From 4fe7ed9a752897738aaa0f90e36296c6a7300e08 Mon Sep 17 00:00:00 2001 From: Yaming Pei Date: Thu, 20 Feb 2025 18:02:04 +0800 Subject: [PATCH 01/34] enh: add csv-related parameters --- tools/taos-tools/inc/bench.h | 6 +++- tools/taos-tools/src/benchJsonOpt.c | 44 ++++++++++++++++++----------- 2 files changed, 33 insertions(+), 17 deletions(-) diff --git a/tools/taos-tools/inc/bench.h b/tools/taos-tools/inc/bench.h index d47bafbaf0..968f91d493 100644 --- a/tools/taos-tools/inc/bench.h +++ b/tools/taos-tools/inc/bench.h @@ -780,7 +780,11 @@ typedef struct SArguments_S { bool mistMode; bool escape_character; bool pre_load_tb_meta; - char csvPath[MAX_FILE_NAME_LEN]; + + char* csv_output_dir; + char* csv_file_prefix; + char* csv_ts_format; + char* csv_ts_interval; bool bind_vgroup; } SArguments; diff --git a/tools/taos-tools/src/benchJsonOpt.c b/tools/taos-tools/src/benchJsonOpt.c index 3e41908668..d77306682e 100644 --- a/tools/taos-tools/src/benchJsonOpt.c +++ b/tools/taos-tools/src/benchJsonOpt.c @@ -1586,25 +1586,37 @@ static int getMetaFromCommonJsonFile(tools_cJSON *json) { } } - g_arguments->csvPath[0] = 0; - tools_cJSON *csv = tools_cJSON_GetObjectItem(json, "csvPath"); - if (csv && (csv->type == tools_cJSON_String) - && (csv->valuestring != NULL)) { - tstrncpy(g_arguments->csvPath, csv->valuestring, MAX_FILE_NAME_LEN); + // csv output dir + tools_cJSON* csv_od = tools_cJSON_GetObjectItem(json, "csv_output_dir"); + if (csv_od && csv_od->type == tools_cJSON_String && csv_od->valuestring != NULL) { + g_arguments->csv_output_dir = csv_od->valuestring; + } else { + g_arguments->csv_output_dir = "./output/"; + } + (void)mkdir(g_arguments->csv_output_dir, 0775); + + // csv file prefix + tools_cJSON* csv_fp = tools_cJSON_GetObjectItem(json, "csv_file_prefix"); + if (csv_fp && csv_fp->type == tools_cJSON_String && csv_fp->valuestring != NULL) { + g_arguments->csv_file_prefix = csv_fp->valuestring; + } else { + g_arguments->csv_file_prefix = "data"; } - size_t len = strlen(g_arguments->csvPath); - - if(len == 0) { - // set default with current path - strcpy(g_arguments->csvPath, "./output/"); - mkdir(g_arguments->csvPath, 0775); + // csv timestamp format + tools_cJSON* csv_tf = tools_cJSON_GetObjectItem(json, "csv_ts_format"); + if (csv_tf && csv_tf->type == tools_cJSON_String && csv_tf->valuestring != NULL) { + g_arguments->csv_ts_format = csv_tf->valuestring; } else { - // append end - if (g_arguments->csvPath[len-1] != '/' ) { - strcat(g_arguments->csvPath, "/"); - } - mkdir(g_arguments->csvPath, 0775); + g_arguments->csv_ts_format = "YYYYMMDDHHmmSS"; + } + + // csv timestamp format + tools_cJSON* csv_ti = tools_cJSON_GetObjectItem(json, "csv_ts_interval"); + if (csv_ti && csv_ti->type == tools_cJSON_String && csv_ti->valuestring != NULL) { + g_arguments->csv_ts_interval = csv_ti->valuestring; + } else { + g_arguments->csv_ts_interval = "1d"; } code = 0; From b93428432c56043c7321089cce11ddd597ebffb0 Mon Sep 17 00:00:00 2001 From: Yaming Pei Date: Fri, 21 Feb 2025 17:04:41 +0800 Subject: [PATCH 02/34] enh: csv-related parameters validity check --- tools/taos-tools/inc/bench.h | 4 +- tools/taos-tools/src/benchCsv.c | 264 ++++++++++++++++++++-------- tools/taos-tools/src/benchJsonOpt.c | 12 +- tools/taos-tools/src/benchMain.c | 2 +- 4 files changed, 205 insertions(+), 77 deletions(-) diff --git a/tools/taos-tools/inc/bench.h b/tools/taos-tools/inc/bench.h index 968f91d493..caabd39d3b 100644 --- a/tools/taos-tools/inc/bench.h +++ b/tools/taos-tools/inc/bench.h @@ -781,10 +781,12 @@ typedef struct SArguments_S { bool escape_character; bool pre_load_tb_meta; - char* csv_output_dir; + char* csv_output_path; + char csv_output_path_buf[MAX_PATH_LEN]; char* csv_file_prefix; char* csv_ts_format; char* csv_ts_interval; + long csv_ts_intv_secs; bool bind_vgroup; } SArguments; diff --git a/tools/taos-tools/src/benchCsv.c b/tools/taos-tools/src/benchCsv.c index 8186438643..6c08f1281b 100644 --- a/tools/taos-tools/src/benchCsv.c +++ b/tools/taos-tools/src/benchCsv.c @@ -10,6 +10,11 @@ * FITNESS FOR A PARTICULAR PURPOSE. */ +#include +#include +#include +#include + #include #include "benchLog.h" #include @@ -22,73 +27,6 @@ #define SHOW_CNT 100000 -static void *csvWriteThread(void *param) { - // write thread - for (int i = 0; i < g_arguments->databases->size; i++) { - // database - SDataBase * db = benchArrayGet(g_arguments->databases, i); - for (int j=0; j < db->superTbls->size; j++) { - // stb - SSuperTable* stb = benchArrayGet(db->superTbls, j); - // gen csv - int ret = genWithSTable(db, stb, g_arguments->csvPath); - if(ret != 0) { - errorPrint("failed generate to csv. db=%s stb=%s error code=%d \n", db->dbName, stb->stbName, ret); - return NULL; - } - } - } - return NULL; -} - -int csvTestProcess() { - pthread_t handle; - int ret = pthread_create(&handle, NULL, csvWriteThread, NULL); - if (ret != 0) { - errorPrint("pthread_create failed. error code =%d \n", ret); - return -1; - } - - infoPrint("start output to csv %s ...\n", g_arguments->csvPath); - int64_t start = toolsGetTimestampMs(); - pthread_join(handle, NULL); - int64_t delay = toolsGetTimestampMs() - start; - infoPrint("output to csv %s finished. delay:%"PRId64"s \n", g_arguments->csvPath, delay/1000); - - return 0; -} - -int genWithSTable(SDataBase* db, SSuperTable* stb, char* outDir) { - // filename - int ret = 0; - char outFile[MAX_FILE_NAME_LEN] = {0}; - obtainCsvFile(outFile, db, stb, outDir); - FILE * fs = fopen(outFile, "w"); - if(fs == NULL) { - errorPrint("failed create csv file. file=%s, last errno=%d strerror=%s \n", outFile, errno, strerror(errno)); - return -1; - } - - int rowLen = TSDB_TABLE_NAME_LEN + stb->lenOfTags + stb->lenOfCols + stb->tags->size + stb->cols->size; - int bufLen = rowLen * g_arguments->reqPerReq; - char* buf = benchCalloc(1, bufLen, true); - - infoPrint("start write csv file: %s \n", outFile); - - if (stb->interlaceRows > 0) { - // interlace mode - ret = interlaceWriteCsv(db, stb, fs, buf, bufLen, rowLen * 2); - } else { - // batch mode - ret = batchWriteCsv(db, stb, fs, buf, bufLen, rowLen * 2); - } - - tmfree(buf); - fclose(fs); - - succPrint("end write csv file: %s \n", outFile); - return ret; -} void obtainCsvFile(char * outFile, SDataBase* db, SSuperTable* stb, char* outDir) { @@ -125,7 +63,7 @@ int batchWriteCsv(SDataBase* db, SSuperTable* stb, FILE* fs, char* buf, int bufL for(int64_t j = 0; j < stb->insertRows; j++) { genColumnData(colData, stb, ts, db->precision, &ck); // combine - pos += sprintf(buf + pos, "%s,%s\n", tagData, colData); + pos += sprintf(buf + pos, "%s,%s.\n", tagData, colData); if (bufLen - pos < minRemain) { // submit ret = writeCsvFile(fs, buf, pos); @@ -197,7 +135,7 @@ int interlaceWriteCsv(SDataBase* db, SSuperTable* stb, FILE* fs, char* buf, int for (int64_t j = 0; j < needInserts; j++) { genColumnData(colData, stb, ts, db->precision, &ck); // combine tags,cols - pos += sprintf(buf + pos, "%s,%s\n", tagDatas[i], colData); + pos += sprintf(buf + pos, "%s,%s.\n", tagDatas[i], colData); if (bufLen - pos < minRemain) { // submit ret = writeCsvFile(fs, buf, pos); @@ -300,3 +238,191 @@ int32_t genRowByField(char* buf, BArray* fields, int16_t fieldCnt, char* binanry return pos1; } + + +int genWithSTable(SDataBase* db, SSuperTable* stb) { + + + + + int ret = 0; + char outFile[MAX_FILE_NAME_LEN] = {0}; + obtainCsvFile(outFile, db, stb, outDir); + FILE * fs = fopen(outFile, "w"); + if(fs == NULL) { + errorPrint("failed create csv file. file=%s, last errno=%d strerror=%s \n", outFile, errno, strerror(errno)); + return -1; + } + + int rowLen = TSDB_TABLE_NAME_LEN + stb->lenOfTags + stb->lenOfCols + stb->tags->size + stb->cols->size; + int bufLen = rowLen * g_arguments->reqPerReq; + char* buf = benchCalloc(1, bufLen, true); + + infoPrint("start write csv file: %s \n", outFile); + + if (stb->interlaceRows > 0) { + // interlace mode + ret = interlaceWriteCsv(db, stb, fs, buf, bufLen, rowLen * 2); + } else { + // batch mode + ret = batchWriteCsv(db, stb, fs, buf, bufLen, rowLen * 2); + } + + tmfree(buf); + fclose(fs); + + succPrint("end write csv file: %s \n", outFile); + return ret; +} + + +static int is_valid_csv_ts_format(const char* csv_ts_format) { + if (!csv_ts_format) return 0; + + struct tm test_tm = { + .tm_year = 70, + .tm_mon = 0, + .tm_mday = 1, + .tm_hour = 0, + .tm_min = 0, + .tm_sec = 0, + .tm_isdst = -1 + }; + mktime(&test_tm); + + char buffer[1024]; + size_t len = strftime(buffer, sizeof(buffer), csv_ts_format, &test_tm); + if (len == 0) { + return -1; + } + + const char* invalid_chars = "/\\:*?\"<>|"; + if (strpbrk(buffer, invalid_chars) != NULL) { + return -1; + } + + return 0; +} + + +static long validate_csv_ts_interval(const char* csv_ts_interval) { + if (!csv_ts_interval || *csv_ts_interval == '\0') return -1; + + char* endptr; + errno = 0; + const long num = strtol(csv_ts_interval, &endptr, 10); + + if (errno == ERANGE || + endptr == csv_ts_interval || + num <= 0) { + return -1; + } + + if (*endptr == '\0' || + *(endptr + 1) != '\0') { + return -1; + } + + switch (tolower(*endptr)) { + case 's': return num; + case 'm': return num * 60; + case 'h': return num * 60 * 60; + case 'd': return num * 60 * 60 * 24; + default : return -1; + } +} + + +static int csvParseParameter() { + // csv_output_path + { + size_t len = strlen(g_arguments->csv_output_path); + if (len == 0) { + errorPrint("Failed to generate CSV, the specified output path is empty. Please provide a valid path. database: %s, super table: %s.\n", + db->dbName, stb->stbName); + return -1; + } + if (g_arguments->csv_output_path[len - 1] != '/') { + int n = snprintf(g_arguments->csv_output_path_buf, sizeof(g_arguments->csv_output_path_buf), "%s/", g_arguments->csv_output_path); + if (n < 0 || n >= sizeof(g_arguments->csv_output_path_buf)) { + errorPrint("Failed to generate CSV, path buffer overflow risk when appending '/'. path: %s, database: %s, super table: %s.\n", + g_arguments->csv_output_path, db->dbName, stb->stbName); + return -1; + } + g_arguments->csv_output_path = g_arguments->csv_output_path_buf; + } + } + + // csv_ts_format + { + if (g_arguments->csv_ts_format) { + if (is_valid_csv_ts_format(g_arguments->csv_ts_format) != 0) { + errorPrint("Failed to generate CSV, the parameter `csv_ts_format` is invalid. csv_ts_format: %s, database: %s, super table: %s.\n", + g_arguments->csv_ts_format, db->dbName, stb->stbName); + return -1; + } + } + } + + // csv_ts_interval + { + long csv_ts_intv_secs = validate_csv_ts_interval(g_arguments->csv_ts_interval); + if (csv_ts_intv_secs <= 0) { + errorPrint("Failed to generate CSV, the parameter `csv_ts_interval` is invalid. csv_ts_interval: %s, database: %s, super table: %s.\n", + g_arguments->csv_ts_interval, db->dbName, stb->stbName); + return -1; + } + g_arguments->csv_ts_intv_secs = csv_ts_intv_secs; + } + + return 0; +} + + +static void csvWriteThread() { + for (size_t i = 0; i < g_arguments->databases->size; ++i) { + // database + SDataBase* db = benchArrayGet(g_arguments->databases, i); + if (database->superTbls) { + for (size_t j = 0; j < db->superTbls->size; ++j) { + // stb + SSuperTable* stb = benchArrayGet(db->superTbls, j); + if (stb->insertRows == 0) { + continue; + } + + // gen csv + int ret = genWithSTable(db, stb); + if(ret != 0) { + errorPrint("Failed to generate CSV files. database: %s, super table: %s, error code: %d.\n", + db->dbName, stb->stbName, ret); + return; + } + } + } + } + + return; +} + + + +int csvTestProcess() { + // parse parameter + if (csvParseParameter() != 0) { + errorPrint("Failed to generate CSV files. database: %s, super table: %s, error code: %d.\n", + db->dbName, stb->stbName, ret); + return -1; + } + + + + + infoPrint("Starting to output data to CSV files in directory: %s ...\n", g_arguments->csv_output_path); + int64_t start = toolsGetTimestampMs(); + csvWriteThread(); + int64_t delay = toolsGetTimestampMs() - start; + infoPrint("Data export to CSV files in directory: %s has been completed. Time elapsed: %.3f seconds\n", + g_arguments->csv_output_path, delay / 1000.0); + return 0; +} diff --git a/tools/taos-tools/src/benchJsonOpt.c b/tools/taos-tools/src/benchJsonOpt.c index d77306682e..a2bf4f07d8 100644 --- a/tools/taos-tools/src/benchJsonOpt.c +++ b/tools/taos-tools/src/benchJsonOpt.c @@ -1587,13 +1587,13 @@ static int getMetaFromCommonJsonFile(tools_cJSON *json) { } // csv output dir - tools_cJSON* csv_od = tools_cJSON_GetObjectItem(json, "csv_output_dir"); - if (csv_od && csv_od->type == tools_cJSON_String && csv_od->valuestring != NULL) { - g_arguments->csv_output_dir = csv_od->valuestring; + tools_cJSON* csv_op = tools_cJSON_GetObjectItem(json, "csv_output_path"); + if (csv_op && csv_op->type == tools_cJSON_String && csv_op->valuestring != NULL) { + g_arguments->csv_output_path = csv_op->valuestring; } else { - g_arguments->csv_output_dir = "./output/"; + g_arguments->csv_output_path = "./output/"; } - (void)mkdir(g_arguments->csv_output_dir, 0775); + (void)mkdir(g_arguments->csv_output_path, 0775); // csv file prefix tools_cJSON* csv_fp = tools_cJSON_GetObjectItem(json, "csv_file_prefix"); @@ -1608,7 +1608,7 @@ static int getMetaFromCommonJsonFile(tools_cJSON *json) { if (csv_tf && csv_tf->type == tools_cJSON_String && csv_tf->valuestring != NULL) { g_arguments->csv_ts_format = csv_tf->valuestring; } else { - g_arguments->csv_ts_format = "YYYYMMDDHHmmSS"; + g_arguments->csv_ts_format = NULL; } // csv timestamp format diff --git a/tools/taos-tools/src/benchMain.c b/tools/taos-tools/src/benchMain.c index 86ad795d05..e82da29468 100644 --- a/tools/taos-tools/src/benchMain.c +++ b/tools/taos-tools/src/benchMain.c @@ -153,7 +153,7 @@ int main(int argc, char* argv[]) { } } else if (g_arguments->test_mode == CSVFILE_TEST) { if (csvTestProcess()) { - errorPrint("%s", "query test process failed\n"); + errorPrint("%s", "generate csv process failed\n"); ret = -1; } } else if (g_arguments->test_mode == QUERY_TEST) { From a1b7986cbdb3aba332097f12548d2a1adcd4b84a Mon Sep 17 00:00:00 2001 From: Yaming Pei Date: Fri, 21 Feb 2025 17:08:33 +0800 Subject: [PATCH 03/34] enh: csv-related parameters code indent adjustment --- tools/taos-tools/src/benchCsv.c | 52 +++++++++++++++------------------ 1 file changed, 23 insertions(+), 29 deletions(-) diff --git a/tools/taos-tools/src/benchCsv.c b/tools/taos-tools/src/benchCsv.c index 6c08f1281b..6f88d2864d 100644 --- a/tools/taos-tools/src/benchCsv.c +++ b/tools/taos-tools/src/benchCsv.c @@ -335,45 +335,39 @@ static long validate_csv_ts_interval(const char* csv_ts_interval) { static int csvParseParameter() { // csv_output_path - { - size_t len = strlen(g_arguments->csv_output_path); - if (len == 0) { - errorPrint("Failed to generate CSV, the specified output path is empty. Please provide a valid path. database: %s, super table: %s.\n", - db->dbName, stb->stbName); + size_t len = strlen(g_arguments->csv_output_path); + if (len == 0) { + errorPrint("Failed to generate CSV, the specified output path is empty. Please provide a valid path. database: %s, super table: %s.\n", + db->dbName, stb->stbName); + return -1; + } + if (g_arguments->csv_output_path[len - 1] != '/') { + int n = snprintf(g_arguments->csv_output_path_buf, sizeof(g_arguments->csv_output_path_buf), "%s/", g_arguments->csv_output_path); + if (n < 0 || n >= sizeof(g_arguments->csv_output_path_buf)) { + errorPrint("Failed to generate CSV, path buffer overflow risk when appending '/'. path: %s, database: %s, super table: %s.\n", + g_arguments->csv_output_path, db->dbName, stb->stbName); return -1; } - if (g_arguments->csv_output_path[len - 1] != '/') { - int n = snprintf(g_arguments->csv_output_path_buf, sizeof(g_arguments->csv_output_path_buf), "%s/", g_arguments->csv_output_path); - if (n < 0 || n >= sizeof(g_arguments->csv_output_path_buf)) { - errorPrint("Failed to generate CSV, path buffer overflow risk when appending '/'. path: %s, database: %s, super table: %s.\n", - g_arguments->csv_output_path, db->dbName, stb->stbName); - return -1; - } - g_arguments->csv_output_path = g_arguments->csv_output_path_buf; - } + g_arguments->csv_output_path = g_arguments->csv_output_path_buf; } // csv_ts_format - { - if (g_arguments->csv_ts_format) { - if (is_valid_csv_ts_format(g_arguments->csv_ts_format) != 0) { - errorPrint("Failed to generate CSV, the parameter `csv_ts_format` is invalid. csv_ts_format: %s, database: %s, super table: %s.\n", - g_arguments->csv_ts_format, db->dbName, stb->stbName); - return -1; - } + if (g_arguments->csv_ts_format) { + if (is_valid_csv_ts_format(g_arguments->csv_ts_format) != 0) { + errorPrint("Failed to generate CSV, the parameter `csv_ts_format` is invalid. csv_ts_format: %s, database: %s, super table: %s.\n", + g_arguments->csv_ts_format, db->dbName, stb->stbName); + return -1; } } // csv_ts_interval - { - long csv_ts_intv_secs = validate_csv_ts_interval(g_arguments->csv_ts_interval); - if (csv_ts_intv_secs <= 0) { - errorPrint("Failed to generate CSV, the parameter `csv_ts_interval` is invalid. csv_ts_interval: %s, database: %s, super table: %s.\n", - g_arguments->csv_ts_interval, db->dbName, stb->stbName); - return -1; - } - g_arguments->csv_ts_intv_secs = csv_ts_intv_secs; + long csv_ts_intv_secs = validate_csv_ts_interval(g_arguments->csv_ts_interval); + if (csv_ts_intv_secs <= 0) { + errorPrint("Failed to generate CSV, the parameter `csv_ts_interval` is invalid. csv_ts_interval: %s, database: %s, super table: %s.\n", + g_arguments->csv_ts_interval, db->dbName, stb->stbName); + return -1; } + g_arguments->csv_ts_intv_secs = csv_ts_intv_secs; return 0; } From 5cb31be1e68fde702d1665b2ff52337af3ec2962 Mon Sep 17 00:00:00 2001 From: Yaming Pei Date: Mon, 24 Feb 2025 16:17:20 +0800 Subject: [PATCH 04/34] enh: add csv writing meta --- tools/taos-tools/inc/bench.h | 4 +- tools/taos-tools/inc/benchCsv.h | 33 +- tools/taos-tools/src/benchCsv.c | 506 +++++++++++++++++++++------- tools/taos-tools/src/benchJsonOpt.c | 12 +- 4 files changed, 407 insertions(+), 148 deletions(-) diff --git a/tools/taos-tools/inc/bench.h b/tools/taos-tools/inc/bench.h index caabd39d3b..e8c94016f8 100644 --- a/tools/taos-tools/inc/bench.h +++ b/tools/taos-tools/inc/bench.h @@ -781,8 +781,8 @@ typedef struct SArguments_S { bool escape_character; bool pre_load_tb_meta; - char* csv_output_path; - char csv_output_path_buf[MAX_PATH_LEN]; + char* output_path; + char output_path_buf[MAX_PATH_LEN]; char* csv_file_prefix; char* csv_ts_format; char* csv_ts_interval; diff --git a/tools/taos-tools/inc/benchCsv.h b/tools/taos-tools/inc/benchCsv.h index 25d0c55eba..a65d5d1c9c 100644 --- a/tools/taos-tools/inc/benchCsv.h +++ b/tools/taos-tools/inc/benchCsv.h @@ -18,19 +18,26 @@ #include + +typedef enum { + CSV_NAMING_SINGLE, + CSV_NAMING_TIME_SLICE, + CSV_NAMING_THREAD, + CSV_NAMING_THREAD_TIME_SLICE +} CsvNamingType; + +typedef struct { + CsvNamingType naming_type; + time_t start_secs; + time_t end_secs; + time_t end_ts; + size_t thread_id; + size_t total_threads; + char thread_formatter[TINY_BUFF_LEN]; +} CsvWriteMeta; + + + int csvTestProcess(); -int genWithSTable(SDataBase* db, SSuperTable* stb, char* outDir); - -char * genTagData(char* buf, SSuperTable* stb, int64_t i, int64_t *k); - -char * genColumnData(char* colData, SSuperTable* stb, int64_t ts, int32_t precision, int64_t *k); - -int32_t genRowByField(char* buf, BArray* fields, int16_t fieldCnt, char* binanryPrefix, char* ncharPrefix, int64_t *k); - -void obtainCsvFile(char * outFile, SDataBase* db, SSuperTable* stb, char* outDir); - -int interlaceWriteCsv(SDataBase* db, SSuperTable* stb, FILE* fs, char* buf, int bufLen, int minRemain); -int batchWriteCsv(SDataBase* db, SSuperTable* stb, FILE* fs, char* buf, int bufLen, int minRemain); - #endif // INC_BENCHCSV_H_ diff --git a/tools/taos-tools/src/benchCsv.c b/tools/taos-tools/src/benchCsv.c index 6f88d2864d..c7d455c66a 100644 --- a/tools/taos-tools/src/benchCsv.c +++ b/tools/taos-tools/src/benchCsv.c @@ -15,11 +15,10 @@ #include #include -#include #include "benchLog.h" -#include -#include - +#include "benchData.h" +#include "benchDataMix.h" +#include "benchCsv.h" // // main etry @@ -29,9 +28,7 @@ -void obtainCsvFile(char * outFile, SDataBase* db, SSuperTable* stb, char* outDir) { - sprintf(outFile, "%s%s-%s.csv", outDir, db->dbName, stb->stbName); -} + int32_t writeCsvFile(FILE* f, char * buf, int32_t len) { size_t size = fwrite(buf, 1, len, f); @@ -42,29 +39,36 @@ int32_t writeCsvFile(FILE* f, char * buf, int32_t len) { return 0; } -int batchWriteCsv(SDataBase* db, SSuperTable* stb, FILE* fs, char* buf, int bufLen, int minRemain) { +int batchWriteCsv(SDataBase* db, SSuperTable* stb, FILE* fs, char* buf, int rows_buf_len, int minRemain) { int ret = 0; int pos = 0; int64_t tk = 0; int64_t show = 0; - int tagDataLen = stb->lenOfTags + stb->tags->size + 256; - char * tagData = (char *) benchCalloc(1, tagDataLen, true); - int colDataLen = stb->lenOfCols + stb->cols->size + 256; - char * colData = (char *) benchCalloc(1, colDataLen, true); + + uint32_t tags_length = accumulateRowLen(stbInfo->tags, stbInfo->iface); + uint32_t cols_length = accumulateRowLen(stbInfo->cols, stbInfo->iface); + + size_t tags_csv_length = tags_length + stb->tags->size; + size_t cols_csv_length = cols_length + stb->cols->size; + char* tags_csv_buf = (char*)benchCalloc(1, tags_csv_length, true); + char* cols_csv_buf = (char*)benchCalloc(1, cols_csv_length, true); // gen child name - for (int64_t i = 0; i < stb->childTblCount; i++) { + for (int64_t i = 0; i < stb->childTblCount; ++i) { int64_t ts = stb->startTimestamp; int64_t ck = 0; + + // child table + // tags - genTagData(tagData, stb, i, &tk); + csvGenRowTagData(tags_csv_buf, stb, i, &tk); // insert child column data for(int64_t j = 0; j < stb->insertRows; j++) { - genColumnData(colData, stb, ts, db->precision, &ck); + genColumnData(cols_csv_buf, stb, ts, db->precision, &ck); // combine - pos += sprintf(buf + pos, "%s,%s.\n", tagData, colData); - if (bufLen - pos < minRemain) { + pos += sprintf(buf + pos, "%s,%s.\n", tags_csv_buf, cols_csv_buf); + if (rows_buf_len - pos < minRemain) { // submit ret = writeCsvFile(fs, buf, pos); if (ret != 0) { @@ -99,48 +103,277 @@ int batchWriteCsv(SDataBase* db, SSuperTable* stb, FILE* fs, char* buf, int bufL END: // free - tmfree(tagData); - tmfree(colData); + tmfree(tags_csv_buf); + tmfree(cols_csv_buf); return ret; } -int interlaceWriteCsv(SDataBase* db, SSuperTable* stb, FILE* fs, char* buf, int bufLen, int minRemain) { + +static time_t csvGetStartSeconds(SDataBase* db, SSuperTable* stb) { + time_t start_seconds = 0; + + if (db->precision == TSDB_TIME_PRECISION_MICRO) { + start_seconds = stb->startTimestamp / 1000000L; + } else if (db->precision == TSDB_TIME_PRECISION_NANO) { + start_seconds = stb->startTimestamp / 1000000000L; + } else { + start_seconds = stb->startTimestamp / 1000L; + } + return start_seconds; +} + + +void csvConvertTime2String(time_t time_value, char* time_buf, size_t buf_size) { + struct tm tm_result; + char *old_locale = setlocale(LC_TIME, "C"); +#ifdef _WIN32 + gmtime_s(&tm_result, &time_value); +#else + gmtime_r(&time_value, &tm_result); +#endif + strftime(time_buf, buf_size, g_arguments->csv_ts_format, &tm_result); + if (old_locale) { + (LC_TIME, old_locale); + } +} + + +static CsvNamingType csvGetFileNamingType(SSuperTable* stb) { + if (stb->interlaceRows > 0) { + if (g_arguments->csv_ts_format) { + return CSV_NAMING_TIME_SLICE; + } else { + return CSV_NAMING_SINGLE; + } + } else { + if (g_arguments->csv_ts_format) { + return CSV_NAMING_THREAD_TIME_SLICE; + } else { + return CSV_NAMING_THREAD; + } + } +} + + +static void csvGenEndTimestamp(CsvWriteMeta* meta, SDataBase* db) { + time_t end_ts = 0; + + if (db->precision == TSDB_TIME_PRECISION_MICRO) { + end_ts = meta->end_secs * 1000000L; + } else if (db->precision == TSDB_TIME_PRECISION_NANO) { + end_ts = meta->end_secs * 1000000000L; + } else { + end_ts = meta->end_secs * 1000L; + } + meta->end_ts = end_ts; + return; +} + + +static void csvGenThreadFormatter(CsvWriteMeta* meta) { + int digits = 0; + if (meta->total_threads == 0) { + digits = 1; + } else { + for (int n = meta->total_threads; n > 0; n /= 10) { + digits++; + } + } + + if (digits <= 1) { + (void)sprintf(meta->thread_formatter, "%%d"); + } else { + (void)snprintf(meta->thread_formatter, sizeof(meta->thread_formatter), "%%0%dd", digits); + } +} + + +static CsvWriteMeta csvInitFileNamingMeta(SDataBase* db, SSuperTable* stb) { + CsvWriteMeta meta = { + .naming_type = CSV_NAMING_SINGLE, + .start_secs = 0, + .end_secs = 0, + .thread_id = 0, + .total_threads = 1, + .thread_formatter = {} + }; + + meta.naming_type = csvGetFileNamingType(stb); + + switch (meta.naming_type) { + case CSV_NAMING_SINGLE: { + break; + } + case CSV_NAMING_TIME_SLICE: { + meta.start_secs = csvGetStartSeconds(db, stb); + meta.end_secs = meta.start_secs + g_arguments->csv_ts_intv_secs; + csvGenEndTimestamp(&meta, db); + break; + } + case CSV_NAMING_THREAD: { + meta.thread_id = 1; + meta.total_threads = g_arguments->nthreads; + csvGenThreadFormatter(&meta); + break; + } + case CSV_NAMING_THREAD_TIME_SLICE: { + meta.thread_id = 1; + meta.total_threads = g_arguments->nthreads; + csvGenThreadFormatter(&meta); + meta.start_secs = csvGetStartSeconds(db, stb); + meta.end_secs = meta.start_secs + g_arguments->csv_ts_intv_secs; + csvGenEndTimestamp(&meta, db); + break; + } + default: { + meta.naming_type = CSV_NAMING_SINGLE; + break; + } + } + + return meta; +} + + +int csvGetFileFullname(CsvWriteMeta* meta, char* fullname, size_t size) { + char thread_buf[SMALL_BUFF_LEN]; + char start_time_buf[MIDDLE_BUFF_LEN]; + char end_time_buf[MIDDLE_BUFF_LEN]; + int ret = -1; + const char* base_path = g_arguments->output_path; + const char* file_prefix = g_arguments->csv_file_prefix; + + switch (meta->naming_type) { + case CSV_NAMING_SINGLE: { + ret = snprintf(fullname, size, "%s%s.csv", base_path, file_prefix); + break; + } + case CSV_NAMING_TIME_SLICE: { + csvConvertTime2String(meta->start_secs, start_time_buf, sizeof(start_time_buf)); + csvConvertTime2String(meta->end_secs, end_time_buf, sizeof(end_time_buf)); + ret = snprintf(fullname, size, "%s%s_%s_%s.csv", base_path, file_prefix, start_time_buf, end_time_buf); + break; + } + case CSV_NAMING_THREAD: { + (void)snprintf(thread_buf, sizeof(thread_buf), meta->thread_formatter, meta->thread_id); + ret = snprintf(fullname, size, "%s%s_%s.csv", base_path, file_prefix, thread_buf); + break; + } + case CSV_NAMING_THREAD_TIME_SLICE: { + (void)snprintf(thread_buf, sizeof(thread_buf), meta->thread_formatter, meta->thread_id); + csvConvertTime2String(meta->start_secs, start_time_buf, sizeof(start_time_buf)); + csvConvertTime2String(meta->end_secs, end_time_buf, sizeof(end_time_buf)); + ret = snprintf(fullname, size, "%s%s_%s_%s_%s.csv", base_path, file_prefix, thread_buf, start_time_buf, end_time_buf); + break; + } + default: { + ret = -1; + break; + } + } + + return (ret > 0 && (size_t)ret < size) ? 0 : -1; +} + + +uint32_t csvCalcInterlaceRows(CsvWriteMeta* meta, SSuperTable* stb, int64_t ts) { + uint32_t need_rows = 0; + + + switch (meta->naming_type) { + case CSV_NAMING_SINGLE: { + need_rows = stb->interlaceRows; + break; + } + case CSV_NAMING_TIME_SLICE: { + (meta->end_ts - ts) / stb->timestamp_step + need_rows = stb->interlaceRows; + + break; + } + case CSV_NAMING_THREAD: { + (void)snprintf(thread_buf, sizeof(thread_buf), meta->thread_formatter, meta->thread_id); + ret = snprintf(fullname, size, "%s%s_%s.csv", base_path, file_prefix, thread_buf); + break; + } + case CSV_NAMING_THREAD_TIME_SLICE: { + (void)snprintf(thread_buf, sizeof(thread_buf), meta->thread_formatter, meta->thread_id); + csvConvertTime2String(meta->start_secs, start_time_buf, sizeof(start_time_buf)); + csvConvertTime2String(meta->end_secs, end_time_buf, sizeof(end_time_buf)); + ret = snprintf(fullname, size, "%s%s_%s_%s_%s.csv", base_path, file_prefix, thread_buf, start_time_buf, end_time_buf); + break; + } + default: { + ret = -1; + break; + } + } +} + + + + +static int interlaceWriteCsv(SDataBase* db, SSuperTable* stb, FILE* fp, char* rows_buf, int rows_buf_len) { + char fullname[MAX_PATH_LEN] = {}; + CsvWriteMeta meta = csvInitFileNamingMeta(); + + int ret = csvGetFileFullname(&meta, fullname, sizeof(fullname)); + if (ret < 0) { + errorPrint("Failed to generate csv filename. database: %s, super table: %s, naming type: %d.\n", + db->dbName, stb->stbName, meta.naming_type); + return -1; + } + int ret = 0; int pos = 0; int64_t n = 0; // already inserted rows for one child table int64_t tk = 0; int64_t show = 0; + int64_t ts = 0; + int64_t last_ts = stb->startTimestamp; + + // init buffer + char** tags_buf_bucket = (char **)benchCalloc(stb->childTblCount, sizeof(char *), true); + int cols_buf_length = stb->lenOfCols + stb->cols->size; + char* cols_buf = (char *)benchCalloc(1, cols_buf_length, true); + + for (int64_t i = 0; i < stb->childTblCount; ++i) { + int tags_buf_length = TSDB_TABLE_NAME_LEN + stb->lenOfTags + stb->tags->size; + tags_buf_bucket[i] = benchCalloc(1, tags_buf_length, true); + if (!tags_buf_bucket[i]) { + ret = -1; + goto end; + } + + ret = csvGenRowTagData(tags_buf_bucket[i], tags_buf_length, stb, i, &tk); + if (!ret) { + goto end; + } + } - char **tagDatas = (char **)benchCalloc(stb->childTblCount, sizeof(char *), true); - int colDataLen = stb->lenOfCols + stb->cols->size + 256; - char * colData = (char *) benchCalloc(1, colDataLen, true); - int64_t last_ts = stb->startTimestamp; - while (n < stb->insertRows ) { - for (int64_t i = 0; i < stb->childTblCount; i++) { - // start one table - int64_t ts = last_ts; + for (int64_t i = 0; i < stb->childTblCount; ++i) { + ts = last_ts; int64_t ck = 0; - // tags - if (tagDatas[i] == NULL) { - tagDatas[i] = genTagData(NULL, stb, i, &tk); - } + // calc need insert rows + uint32_t need_rows = csvCalcInterlaceRows(&meta, stb, ts) + int64_t needInserts = stb->interlaceRows; if(needInserts > stb->insertRows - n) { needInserts = stb->insertRows - n; - } + } for (int64_t j = 0; j < needInserts; j++) { - genColumnData(colData, stb, ts, db->precision, &ck); + genColumnData(cols_buf, stb, ts, db->precision, &ck); // combine tags,cols - pos += sprintf(buf + pos, "%s,%s.\n", tagDatas[i], colData); - if (bufLen - pos < minRemain) { + pos += sprintf(buf + pos, "%s,%s\n", tags_buf_bucket[i], cols_buf); + if (rows_buf_len - pos < minRemain) { // submit - ret = writeCsvFile(fs, buf, pos); + ret = writeCsvFile(fp, buf, pos); if (ret != 0) { - goto END; + goto end; } pos = 0; } @@ -152,7 +385,7 @@ int interlaceWriteCsv(SDataBase* db, SSuperTable* stb, FILE* fs, char* buf, int if(g_arguments->terminate) { infoPrint("%s", "You are cancel, exiting ... \n"); ret = -1; - goto END; + goto end; } // print show @@ -170,113 +403,131 @@ int interlaceWriteCsv(SDataBase* db, SSuperTable* stb, FILE* fs, char* buf, int } if (pos > 0) { - ret = writeCsvFile(fs, buf, pos); + ret = writeCsvFile(fp, buf, pos); pos = 0; } -END: +end: // free for (int64_t m = 0 ; m < stb->childTblCount; m ++) { - tmfree(tagDatas[m]); + tmfree(tags_buf_bucket[m]); } - tmfree(tagDatas); - tmfree(colData); + tmfree(tags_buf_bucket); + tmfree(cols_buf); return ret; } + // gen tag data -char * genTagData(char* buf, SSuperTable* stb, int64_t i, int64_t *k) { - // malloc - char* tagData; - if (buf == NULL) { - int tagDataLen = TSDB_TABLE_NAME_LEN + stb->lenOfTags + stb->tags->size + 32; - tagData = benchCalloc(1, tagDataLen, true); - } else { - tagData = buf; - } - - int pos = 0; +int csvGenRowTagData(char* buf, size_t size, SSuperTable* stb, int64_t index, int64_t* k) { // tbname - pos += sprintf(tagData, "\'%s%"PRId64"\'", stb->childTblPrefix, i); + int pos = snprintf(buf, size, "\'%s%"PRId64"\'", stb->childTblPrefix, index); // tags - pos += genRowByField(tagData + pos, stb->tags, stb->tags->size, stb->binaryPrefex, stb->ncharPrefex, k); + pos += csvGenRowFields(buf + pos, stb->tags, stb->tags->size, stb->binaryPrefex, stb->ncharPrefex, k); - return tagData; + return (pos > 0 && (size_t)pos < size) ? 0 : -1; } // gen column data -char * genColumnData(char* colData, SSuperTable* stb, int64_t ts, int32_t precision, int64_t *k) { +char * genColumnData(char* cols_csv_buf, SSuperTable* stb, int64_t ts, int32_t precision, int64_t *k) { char szTime[128] = {0}; toolsFormatTimestamp(szTime, ts, precision); - int pos = sprintf(colData, "\'%s\'", szTime); + int pos = sprintf(cols_csv_buf, "\'%s\'", szTime); // columns - genRowByField(colData + pos, stb->cols, stb->cols->size, stb->binaryPrefex, stb->ncharPrefex, k); - return colData; + csvGenRowFields(cols_csv_buf + pos, stb->cols, stb->cols->size, stb->binaryPrefex, stb->ncharPrefex, k); + return cols_csv_buf; } -int32_t genRowByField(char* buf, BArray* fields, int16_t fieldCnt, char* binanryPrefix, char* ncharPrefix, int64_t *k) { +int32_t csvGenRowFields(char* buf, BArray* fields, int16_t field_count, char* binanry_prefix, char* nchar_prefix, int64_t* k) { + int32_t pos = 0; - // other cols data - int32_t pos1 = 0; - for(uint16_t i = 0; i < fieldCnt; i++) { - Field* fd = benchArrayGet(fields, i); - char* prefix = ""; - if(fd->type == TSDB_DATA_TYPE_BINARY || fd->type == TSDB_DATA_TYPE_VARBINARY) { - if(binanryPrefix) { - prefix = binanryPrefix; - } - } else if(fd->type == TSDB_DATA_TYPE_NCHAR) { - if(ncharPrefix) { - prefix = ncharPrefix; - } + for (uint16_t i = 0; i < field_count; ++i) { + Field* field = benchArrayGet(fields, i); + char* prefix = ""; + if(field->type == TSDB_DATA_TYPE_BINARY || field->type == TSDB_DATA_TYPE_VARBINARY) { + if (binanry_prefix) { + prefix = binanry_prefix; + } + } else if(field->type == TSDB_DATA_TYPE_NCHAR) { + if (nchar_prefix) { + prefix = nchar_prefix; + } + } + pos += dataGenByField(field, buf, pos, prefix, k, ""); } - pos1 += dataGenByField(fd, buf, pos1, prefix, k, ""); - } - - return pos1; + return pos; } -int genWithSTable(SDataBase* db, SSuperTable* stb) { - +int csvGenStbInterlace(SDataBase* db, SSuperTable* stb) { int ret = 0; char outFile[MAX_FILE_NAME_LEN] = {0}; obtainCsvFile(outFile, db, stb, outDir); - FILE * fs = fopen(outFile, "w"); - if(fs == NULL) { + FILE* fp = fopen(outFile, "w"); + if(fp == NULL) { errorPrint("failed create csv file. file=%s, last errno=%d strerror=%s \n", outFile, errno, strerror(errno)); return -1; } - int rowLen = TSDB_TABLE_NAME_LEN + stb->lenOfTags + stb->lenOfCols + stb->tags->size + stb->cols->size; - int bufLen = rowLen * g_arguments->reqPerReq; - char* buf = benchCalloc(1, bufLen, true); + int row_buf_len = TSDB_TABLE_NAME_LEN + stb->lenOfTags + stb->lenOfCols + stb->tags->size + stb->cols->size; + int rows_buf_len = row_buf_len * g_arguments->interlaceRows; + char* rows_buf = benchCalloc(1, rows_buf_len, true); infoPrint("start write csv file: %s \n", outFile); - if (stb->interlaceRows > 0) { - // interlace mode - ret = interlaceWriteCsv(db, stb, fs, buf, bufLen, rowLen * 2); - } else { - // batch mode - ret = batchWriteCsv(db, stb, fs, buf, bufLen, rowLen * 2); - } + // interlace mode + ret = interlaceWriteCsv(db, stb, fp, rows_buf, rows_buf_len); - tmfree(buf); - fclose(fs); + + tmfree(rows_buf); + fclose(fp); succPrint("end write csv file: %s \n", outFile); + + + // wait threads + for (int i = 0; i < threadCnt; i++) { + infoPrint("pthread_join %d ...\n", i); + pthread_join(pids[i], NULL); + } + + return ret; } -static int is_valid_csv_ts_format(const char* csv_ts_format) { +void csvGenPrepare(SDataBase* db, SSuperTable* stb) { + stbInfo->lenOfTags = accumulateRowLen(stbInfo->tags, stbInfo->iface); + stbInfo->lenOfCols = accumulateRowLen(stbInfo->cols, stbInfo->iface); + return; +} + + +int csvGenStb(SDataBase* db, SSuperTable* stb) { + // prepare + csvGenPrepare(db, stb); + + + int ret = 0; + if (stb->interlaceRows > 0) { + // interlace mode + ret = csvGenStbInterlace(db, stb); + } else { + // batch mode + ret = csvGenStbBatch(db, stb); + } + + return ret; +} + + +static int csvValidateParamTsFormat(const char* csv_ts_format) { if (!csv_ts_format) return 0; struct tm test_tm = { @@ -296,7 +547,11 @@ static int is_valid_csv_ts_format(const char* csv_ts_format) { return -1; } +#ifdef _WIN32 const char* invalid_chars = "/\\:*?\"<>|"; +#else + const char* invalid_chars = "/\\?\"<>|"; +#endif if (strpbrk(buffer, invalid_chars) != NULL) { return -1; } @@ -305,7 +560,7 @@ static int is_valid_csv_ts_format(const char* csv_ts_format) { } -static long validate_csv_ts_interval(const char* csv_ts_interval) { +static long csvValidateParamTsInterval(const char* csv_ts_interval) { if (!csv_ts_interval || *csv_ts_interval == '\0') return -1; char* endptr; @@ -335,35 +590,35 @@ static long validate_csv_ts_interval(const char* csv_ts_interval) { static int csvParseParameter() { // csv_output_path - size_t len = strlen(g_arguments->csv_output_path); + size_t len = strlen(g_arguments->output_path); if (len == 0) { - errorPrint("Failed to generate CSV, the specified output path is empty. Please provide a valid path. database: %s, super table: %s.\n", + errorPrint("Failed to generate CSV files, the specified output path is empty. Please provide a valid path. database: %s, super table: %s.\n", db->dbName, stb->stbName); return -1; } - if (g_arguments->csv_output_path[len - 1] != '/') { - int n = snprintf(g_arguments->csv_output_path_buf, sizeof(g_arguments->csv_output_path_buf), "%s/", g_arguments->csv_output_path); - if (n < 0 || n >= sizeof(g_arguments->csv_output_path_buf)) { - errorPrint("Failed to generate CSV, path buffer overflow risk when appending '/'. path: %s, database: %s, super table: %s.\n", - g_arguments->csv_output_path, db->dbName, stb->stbName); + if (g_arguments->output_path[len - 1] != '/') { + int n = snprintf(g_arguments->output_path_buf, sizeof(g_arguments->output_path_buf), "%s/", g_arguments->output_path); + if (n < 0 || n >= sizeof(g_arguments->output_path_buf)) { + errorPrint("Failed to generate CSV files, path buffer overflow risk when appending '/'. path: %s, database: %s, super table: %s.\n", + g_arguments->csv_output_path, db->dbName, stb->stbName); return -1; } - g_arguments->csv_output_path = g_arguments->csv_output_path_buf; + g_arguments->output_path = g_arguments->output_path_buf; } // csv_ts_format if (g_arguments->csv_ts_format) { - if (is_valid_csv_ts_format(g_arguments->csv_ts_format) != 0) { - errorPrint("Failed to generate CSV, the parameter `csv_ts_format` is invalid. csv_ts_format: %s, database: %s, super table: %s.\n", + if (csvValidateParamTsFormat(g_arguments->csv_ts_format) != 0) { + errorPrint("Failed to generate CSV files, the parameter `csv_ts_format` is invalid. csv_ts_format: %s, database: %s, super table: %s.\n", g_arguments->csv_ts_format, db->dbName, stb->stbName); return -1; } } // csv_ts_interval - long csv_ts_intv_secs = validate_csv_ts_interval(g_arguments->csv_ts_interval); + long csv_ts_intv_secs = csvValidateParamTsInterval(g_arguments->csv_ts_interval); if (csv_ts_intv_secs <= 0) { - errorPrint("Failed to generate CSV, the parameter `csv_ts_interval` is invalid. csv_ts_interval: %s, database: %s, super table: %s.\n", + errorPrint("Failed to generate CSV files, the parameter `csv_ts_interval` is invalid. csv_ts_interval: %s, database: %s, super table: %s.\n", g_arguments->csv_ts_interval, db->dbName, stb->stbName); return -1; } @@ -373,12 +628,12 @@ static int csvParseParameter() { } -static void csvWriteThread() { - for (size_t i = 0; i < g_arguments->databases->size; ++i) { +static int csvWriteThread() { + for (size_t i = 0; i < g_arguments->databases->size && !g_arguments->terminate; ++i) { // database SDataBase* db = benchArrayGet(g_arguments->databases, i); if (database->superTbls) { - for (size_t j = 0; j < db->superTbls->size; ++j) { + for (size_t j = 0; j < db->superTbls->size && !g_arguments->terminate; ++j) { // stb SSuperTable* stb = benchArrayGet(db->superTbls, j); if (stb->insertRows == 0) { @@ -386,37 +641,34 @@ static void csvWriteThread() { } // gen csv - int ret = genWithSTable(db, stb); + int ret = csvGenStb(db, stb); if(ret != 0) { errorPrint("Failed to generate CSV files. database: %s, super table: %s, error code: %d.\n", db->dbName, stb->stbName, ret); - return; + return -1; } } } } - return; + return 0; } - int csvTestProcess() { - // parse parameter + // parsing parameters if (csvParseParameter() != 0) { - errorPrint("Failed to generate CSV files. database: %s, super table: %s, error code: %d.\n", - db->dbName, stb->stbName, ret); return -1; } - - - - infoPrint("Starting to output data to CSV files in directory: %s ...\n", g_arguments->csv_output_path); + infoPrint("Starting to output data to CSV files in directory: %s ...\n", g_arguments->output_path); int64_t start = toolsGetTimestampMs(); - csvWriteThread(); + int ret = csvWriteThread(); + if (ret != 0) { + return -1; + } int64_t delay = toolsGetTimestampMs() - start; - infoPrint("Data export to CSV files in directory: %s has been completed. Time elapsed: %.3f seconds\n", - g_arguments->csv_output_path, delay / 1000.0); + infoPrint("Generating CSV files in directory: %s has been completed. Time elapsed: %.3f seconds\n", + g_arguments->output_path, delay / 1000.0); return 0; } diff --git a/tools/taos-tools/src/benchJsonOpt.c b/tools/taos-tools/src/benchJsonOpt.c index a2bf4f07d8..a88526c278 100644 --- a/tools/taos-tools/src/benchJsonOpt.c +++ b/tools/taos-tools/src/benchJsonOpt.c @@ -1586,14 +1586,14 @@ static int getMetaFromCommonJsonFile(tools_cJSON *json) { } } - // csv output dir - tools_cJSON* csv_op = tools_cJSON_GetObjectItem(json, "csv_output_path"); - if (csv_op && csv_op->type == tools_cJSON_String && csv_op->valuestring != NULL) { - g_arguments->csv_output_path = csv_op->valuestring; + // output dir + tools_cJSON* opp = tools_cJSON_GetObjectItem(json, "output_path"); + if (opp && opp->type == tools_cJSON_String && opp->valuestring != NULL) { + g_arguments->output_path = opp->valuestring; } else { - g_arguments->csv_output_path = "./output/"; + g_arguments->output_path = "./output/"; } - (void)mkdir(g_arguments->csv_output_path, 0775); + (void)mkdir(g_arguments->output_path, 0775); // csv file prefix tools_cJSON* csv_fp = tools_cJSON_GetObjectItem(json, "csv_file_prefix"); From 80ecd4feb42fe475ef08616fffb66a1a30bcd370 Mon Sep 17 00:00:00 2001 From: Yaming Pei Date: Thu, 27 Feb 2025 14:00:00 +0800 Subject: [PATCH 05/34] enh: framework that supports concurrent writing to csv --- tools/taos-tools/inc/bench.h | 5 + tools/taos-tools/inc/benchCsv.h | 43 +- tools/taos-tools/src/benchCsv.c | 761 ++++++++++++++++------------ tools/taos-tools/src/benchJsonOpt.c | 17 + 4 files changed, 491 insertions(+), 335 deletions(-) diff --git a/tools/taos-tools/inc/bench.h b/tools/taos-tools/inc/bench.h index e8c94016f8..4dd19d83b9 100644 --- a/tools/taos-tools/inc/bench.h +++ b/tools/taos-tools/inc/bench.h @@ -20,6 +20,9 @@ #define CURL_STATICLIB #define ALLOW_FORBID_FUNC +#define MAX(a, b) ((a) > (b) ? (a) : (b)) +#define MIN(a, b) ((a) < (b) ? (a) : (b)) + #ifdef LINUX #ifndef _ALPINE @@ -787,6 +790,8 @@ typedef struct SArguments_S { char* csv_ts_format; char* csv_ts_interval; long csv_ts_intv_secs; + bool csv_output_header; + bool csv_tbname_alias; bool bind_vgroup; } SArguments; diff --git a/tools/taos-tools/inc/benchCsv.h b/tools/taos-tools/inc/benchCsv.h index a65d5d1c9c..19331b8976 100644 --- a/tools/taos-tools/inc/benchCsv.h +++ b/tools/taos-tools/inc/benchCsv.h @@ -20,22 +20,49 @@ typedef enum { - CSV_NAMING_SINGLE, - CSV_NAMING_TIME_SLICE, - CSV_NAMING_THREAD, - CSV_NAMING_THREAD_TIME_SLICE + CSV_NAMING_I_SINGLE, + CSV_NAMING_I_TIME_SLICE, + CSV_NAMING_B_THREAD, + CSV_NAMING_B_THREAD_TIME_SLICE } CsvNamingType; +typedef struct { + char* buf; + int buf_size; + int length; +} CsvRowFieldsBuf; + typedef struct { CsvNamingType naming_type; - time_t start_secs; - time_t end_secs; - time_t end_ts; - size_t thread_id; size_t total_threads; char thread_formatter[TINY_BUFF_LEN]; + SDataBase* db; + SSuperTable* stb; + int64_t start_ts; + int64_t end_ts; + int64_t ts_step; + int64_t interlace_step; } CsvWriteMeta; +typedef struct { + uint64_t ctb_start_idx; + uint64_t ctb_end_idx; + uint64_t ctb_count; + time_t start_secs; + time_t end_secs; + size_t thread_id; + bool output_header; + CsvRowFieldsBuf* tags_buf_bucket; + CsvRowFieldsBuf* cols_buf; +} CsvThreadMeta; + +typedef struct { + CsvWriteMeta* write_meta; + CsvThreadMeta thread_meta; +} CsvThreadArgs; + + + int csvTestProcess(); diff --git a/tools/taos-tools/src/benchCsv.c b/tools/taos-tools/src/benchCsv.c index c7d455c66a..cec38628ad 100644 --- a/tools/taos-tools/src/benchCsv.c +++ b/tools/taos-tools/src/benchCsv.c @@ -25,105 +25,26 @@ // #define SHOW_CNT 100000 +#define GEN_ROW_FIELDS_TAG 0 +#define GEN_ROW_FIELDS_COL 1 - - -int32_t writeCsvFile(FILE* f, char * buf, int32_t len) { - size_t size = fwrite(buf, 1, len, f); - if(size != len) { - errorPrint("failed to write csv file. expect write length:%d real write length:%d \n", len, (int32_t)size); - return -1; - } - return 0; -} - -int batchWriteCsv(SDataBase* db, SSuperTable* stb, FILE* fs, char* buf, int rows_buf_len, int minRemain) { - int ret = 0; - int pos = 0; - int64_t tk = 0; - int64_t show = 0; - - - uint32_t tags_length = accumulateRowLen(stbInfo->tags, stbInfo->iface); - uint32_t cols_length = accumulateRowLen(stbInfo->cols, stbInfo->iface); - - size_t tags_csv_length = tags_length + stb->tags->size; - size_t cols_csv_length = cols_length + stb->cols->size; - char* tags_csv_buf = (char*)benchCalloc(1, tags_csv_length, true); - char* cols_csv_buf = (char*)benchCalloc(1, cols_csv_length, true); - - // gen child name - for (int64_t i = 0; i < stb->childTblCount; ++i) { - int64_t ts = stb->startTimestamp; - int64_t ck = 0; - - // child table - - // tags - csvGenRowTagData(tags_csv_buf, stb, i, &tk); - // insert child column data - for(int64_t j = 0; j < stb->insertRows; j++) { - genColumnData(cols_csv_buf, stb, ts, db->precision, &ck); - // combine - pos += sprintf(buf + pos, "%s,%s.\n", tags_csv_buf, cols_csv_buf); - if (rows_buf_len - pos < minRemain) { - // submit - ret = writeCsvFile(fs, buf, pos); - if (ret != 0) { - goto END; - } - - pos = 0; - } - - // ts move next - ts += stb->timestamp_step; - - // check cancel - if(g_arguments->terminate) { - infoPrint("%s", "You are cancel, exiting ...\n"); - ret = -1; - goto END; - } - - // print show - if (++show % SHOW_CNT == 0) { - infoPrint("batch write child table cnt = %"PRId64 " all rows = %" PRId64 "\n", i+1, show); - } - - } - } - - if (pos > 0) { - ret = writeCsvFile(fs, buf, pos); - pos = 0; - } - -END: - // free - tmfree(tags_csv_buf); - tmfree(cols_csv_buf); - return ret; -} - - -static time_t csvGetStartSeconds(SDataBase* db, SSuperTable* stb) { +static time_t csvGetStartSeconds(int precision, int64_t start_ts) { time_t start_seconds = 0; - if (db->precision == TSDB_TIME_PRECISION_MICRO) { - start_seconds = stb->startTimestamp / 1000000L; - } else if (db->precision == TSDB_TIME_PRECISION_NANO) { - start_seconds = stb->startTimestamp / 1000000000L; + if (precision == TSDB_TIME_PRECISION_MICRO) { + start_seconds = start_ts / 1000000L; + } else if (precision == TSDB_TIME_PRECISION_NANO) { + start_seconds = start_ts / 1000000000L; } else { - start_seconds = stb->startTimestamp / 1000L; + start_seconds = start_ts / 1000L; } return start_seconds; } -void csvConvertTime2String(time_t time_value, char* time_buf, size_t buf_size) { +static void csvConvertTime2String(time_t time_value, char* time_buf, size_t buf_size) { struct tm tm_result; char *old_locale = setlocale(LC_TIME, "C"); #ifdef _WIN32 @@ -133,45 +54,73 @@ void csvConvertTime2String(time_t time_value, char* time_buf, size_t buf_size) { #endif strftime(time_buf, buf_size, g_arguments->csv_ts_format, &tm_result); if (old_locale) { - (LC_TIME, old_locale); + setlocale(LC_TIME, old_locale); } + return; } static CsvNamingType csvGetFileNamingType(SSuperTable* stb) { if (stb->interlaceRows > 0) { if (g_arguments->csv_ts_format) { - return CSV_NAMING_TIME_SLICE; + return CSV_NAMING_I_TIME_SLICE; } else { - return CSV_NAMING_SINGLE; + return CSV_NAMING_I_SINGLE; } } else { if (g_arguments->csv_ts_format) { - return CSV_NAMING_THREAD_TIME_SLICE; + return CSV_NAMING_B_THREAD_TIME_SLICE; } else { - return CSV_NAMING_THREAD; + return CSV_NAMING_B_THREAD; } } } -static void csvGenEndTimestamp(CsvWriteMeta* meta, SDataBase* db) { - time_t end_ts = 0; +static void csvCalcTimestampStep(CsvWriteMeta* meta) { + time_t ts_step = 0; - if (db->precision == TSDB_TIME_PRECISION_MICRO) { - end_ts = meta->end_secs * 1000000L; + if (meta->db->precision == TSDB_TIME_PRECISION_MICRO) { + ts_step = g_arguments->csv_ts_intv_secs * 1000000L; } else if (db->precision == TSDB_TIME_PRECISION_NANO) { - end_ts = meta->end_secs * 1000000000L; + ts_step = g_arguments->csv_ts_intv_secs * 1000000000L; } else { - end_ts = meta->end_secs * 1000L; + ts_step = g_arguments->csv_ts_intv_secs * 1000L; } - meta->end_ts = end_ts; + meta->ts_step = ts_step; + return; +} + + +static void csvCalcCtbRange(CsvThreadMeta* meta, size_t total_threads, int64_t ctb_offset, int64_t ctb_count) { + uint64_t ctb_start_idx = 0; + uint64_t ctb_end_idx = 0; + size_t tid_idx = meta->thread_id - 1; + size_t base = ctb_count / total_threads; + size_t remainder = ctb_count % total_threads; + + if (tid_idx < remainder) { + ctb_start_idx = ctb_offset + tid_idx * (base + 1); + ctb_end_idx = ctb_start_idx + (base + 1); + } else { + ctb_start_idx = ctb_offset + remainder * (base + 1) + (tid_idx - remainder) * base; + ctb_end_idx = ctb_start_idx + base; + } + + if (ctb_end_idx > ctb_offset + ctb_count) { + ctb_end_idx = ctb_offset + ctb_count; + } + + meta->ctb_start_idx = ctb_start_idx; + meta->ctb_end_idx = ctb_end_idx; + meta->ctb_count = ctb_count; return; } static void csvGenThreadFormatter(CsvWriteMeta* meta) { int digits = 0; + if (meta->total_threads == 0) { digits = 1; } else { @@ -181,52 +130,50 @@ static void csvGenThreadFormatter(CsvWriteMeta* meta) { } if (digits <= 1) { - (void)sprintf(meta->thread_formatter, "%%d"); + (void)snprintf(meta->thread_formatter, sizeof(meta->thread_formatter), "%%d"); } else { (void)snprintf(meta->thread_formatter, sizeof(meta->thread_formatter), "%%0%dd", digits); } + return; } -static CsvWriteMeta csvInitFileNamingMeta(SDataBase* db, SSuperTable* stb) { +static CsvWriteMeta csvInitWriteMeta(SDataBase* db, SSuperTable* stb) { CsvWriteMeta meta = { - .naming_type = CSV_NAMING_SINGLE, - .start_secs = 0, - .end_secs = 0, - .thread_id = 0, + .naming_type = CSV_NAMING_I_SINGLE, .total_threads = 1, - .thread_formatter = {} + .thread_formatter = {}, + .db = db, + .stb = stb, + .start_ts = stb->startTimestamp, + .end_ts = stb->startTimestamp + stb->timestamp_step * stb->insertRows, + .ts_step = stb->timestamp_step * stb->insertRows, + .interlace_step = stb->timestamp_step * stb->interlaceRows }; meta.naming_type = csvGetFileNamingType(stb); switch (meta.naming_type) { - case CSV_NAMING_SINGLE: { + case CSV_NAMING_I_SINGLE: { break; } - case CSV_NAMING_TIME_SLICE: { - meta.start_secs = csvGetStartSeconds(db, stb); - meta.end_secs = meta.start_secs + g_arguments->csv_ts_intv_secs; - csvGenEndTimestamp(&meta, db); + case CSV_NAMING_I_TIME_SLICE: { + csvCalcTimestampStep(&meta); break; } - case CSV_NAMING_THREAD: { - meta.thread_id = 1; + case CSV_NAMING_B_THREAD: { meta.total_threads = g_arguments->nthreads; csvGenThreadFormatter(&meta); break; } - case CSV_NAMING_THREAD_TIME_SLICE: { - meta.thread_id = 1; + case CSV_NAMING_B_THREAD_TIME_SLICE: { meta.total_threads = g_arguments->nthreads; csvGenThreadFormatter(&meta); - meta.start_secs = csvGetStartSeconds(db, stb); - meta.end_secs = meta.start_secs + g_arguments->csv_ts_intv_secs; - csvGenEndTimestamp(&meta, db); + csvCalcTimestampStep(&meta); break; } default: { - meta.naming_type = CSV_NAMING_SINGLE; + meta.naming_type = CSV_NAMING_I_SINGLE; break; } } @@ -235,7 +182,67 @@ static CsvWriteMeta csvInitFileNamingMeta(SDataBase* db, SSuperTable* stb) { } -int csvGetFileFullname(CsvWriteMeta* meta, char* fullname, size_t size) { +static CsvThreadMeta csvInitThreadMeta(CsvWriteMeta* write_meta, uint32_t thread_id) { + SDataBase* db = write_meta->db; + SSuperTable* stb = write_meta->stb; + CsvThreadMeta meta = { + .ctb_start_idx = 0, + .ctb_end_idx = 0, + .ctb_count = 0, + .start_secs = 0, + .end_secs = 0, + .thread_id = thread_id, + .tags_buf_bucket = NULL, + .cols_buf = NULL + }; + + csvCalcCtbRange(&meta, write_meta->total_threads, stb->childTblFrom, stb->childTblCount); + + switch (write_meta->naming_type) { + case CSV_NAMING_I_SINGLE: + case CSV_NAMING_B_THREAD: { + break; + } + case CSV_NAMING_I_TIME_SLICE: + case CSV_NAMING_B_THREAD_TIME_SLICE: { + meta.start_secs = csvGetStartSeconds(db->precision, stb->startTimestamp); + meta.end_secs = meta.start_secs + g_arguments->csv_ts_intv_secs; + break; + } + default: { + meta.naming_type = CSV_NAMING_I_SINGLE; + break; + } + } + + return meta; +} + + +static void csvUpdateSliceRange(CsvWriteMeta* write_meta, CsvThreadMeta* thread_meta, int64_t last_end_ts) { + SDataBase* db = write_meta->db; + + switch (write_meta->naming_type) { + case CSV_NAMING_I_SINGLE: + case CSV_NAMING_B_THREAD: { + break; + } + case CSV_NAMING_I_TIME_SLICE: + case CSV_NAMING_B_THREAD_TIME_SLICE: { + thread_meta->start_secs = csvGetStartSeconds(db->precision, last_end_ts); + thread_meta->end_secs = thread_meta.start_secs + g_arguments->csv_ts_intv_secs; + break; + } + default: { + break; + } + } + + return; +} + + +static int csvGetFileFullname(CsvWriteMeta* write_meta, CsvThreadMeta* thread_meta, char* fullname, size_t size) { char thread_buf[SMALL_BUFF_LEN]; char start_time_buf[MIDDLE_BUFF_LEN]; char end_time_buf[MIDDLE_BUFF_LEN]; @@ -244,22 +251,22 @@ int csvGetFileFullname(CsvWriteMeta* meta, char* fullname, size_t size) { const char* file_prefix = g_arguments->csv_file_prefix; switch (meta->naming_type) { - case CSV_NAMING_SINGLE: { + case CSV_NAMING_I_SINGLE: { ret = snprintf(fullname, size, "%s%s.csv", base_path, file_prefix); break; } - case CSV_NAMING_TIME_SLICE: { + case CSV_NAMING_I_TIME_SLICE: { csvConvertTime2String(meta->start_secs, start_time_buf, sizeof(start_time_buf)); csvConvertTime2String(meta->end_secs, end_time_buf, sizeof(end_time_buf)); ret = snprintf(fullname, size, "%s%s_%s_%s.csv", base_path, file_prefix, start_time_buf, end_time_buf); break; } - case CSV_NAMING_THREAD: { + case CSV_NAMING_B_THREAD: { (void)snprintf(thread_buf, sizeof(thread_buf), meta->thread_formatter, meta->thread_id); ret = snprintf(fullname, size, "%s%s_%s.csv", base_path, file_prefix, thread_buf); break; } - case CSV_NAMING_THREAD_TIME_SLICE: { + case CSV_NAMING_B_THREAD_TIME_SLICE: { (void)snprintf(thread_buf, sizeof(thread_buf), meta->thread_formatter, meta->thread_id); csvConvertTime2String(meta->start_secs, start_time_buf, sizeof(start_time_buf)); csvConvertTime2String(meta->end_secs, end_time_buf, sizeof(end_time_buf)); @@ -276,184 +283,55 @@ int csvGetFileFullname(CsvWriteMeta* meta, char* fullname, size_t size) { } -uint32_t csvCalcInterlaceRows(CsvWriteMeta* meta, SSuperTable* stb, int64_t ts) { - uint32_t need_rows = 0; +static int64_t csvCalcSliceBatchTimestamp(CsvWriteMeta* write_meta, int64_t slice_cur_ts, int64_t slice_end_ts) { + int64_t slice_batch_ts = 0; - - switch (meta->naming_type) { - case CSV_NAMING_SINGLE: { - need_rows = stb->interlaceRows; + switch (write_meta->naming_type) { + case CSV_NAMING_I_SINGLE: + case CSV_NAMING_I_TIME_SLICE: { + slice_batch_ts = MIN(slice_cur_ts + write_meta->interlace_step, slice_end_ts); break; } - case CSV_NAMING_TIME_SLICE: { - (meta->end_ts - ts) / stb->timestamp_step - need_rows = stb->interlaceRows; - - break; - } - case CSV_NAMING_THREAD: { - (void)snprintf(thread_buf, sizeof(thread_buf), meta->thread_formatter, meta->thread_id); - ret = snprintf(fullname, size, "%s%s_%s.csv", base_path, file_prefix, thread_buf); - break; - } - case CSV_NAMING_THREAD_TIME_SLICE: { - (void)snprintf(thread_buf, sizeof(thread_buf), meta->thread_formatter, meta->thread_id); - csvConvertTime2String(meta->start_secs, start_time_buf, sizeof(start_time_buf)); - csvConvertTime2String(meta->end_secs, end_time_buf, sizeof(end_time_buf)); - ret = snprintf(fullname, size, "%s%s_%s_%s_%s.csv", base_path, file_prefix, thread_buf, start_time_buf, end_time_buf); + case CSV_NAMING_B_THREAD: + case CSV_NAMING_B_THREAD_TIME_SLICE: { + slice_batch_ts = slice_end_ts; break; } default: { - ret = -1; break; } } + + return slice_batch_ts; } +static int csvGenRowFields(char* buf, int size, SSuperTable* stb, int fields_cate, int64_t* k) { + int pos = 0; + BArray* fields = NULL; + int16_t field_count = 0; + char* binanry_prefix = stb->binaryPrefex ? stb->binaryPrefex : ""; + char* nchar_prefix = stb->ncharPrefex ? stb->ncharPrefex : ""; - -static int interlaceWriteCsv(SDataBase* db, SSuperTable* stb, FILE* fp, char* rows_buf, int rows_buf_len) { - char fullname[MAX_PATH_LEN] = {}; - CsvWriteMeta meta = csvInitFileNamingMeta(); - - int ret = csvGetFileFullname(&meta, fullname, sizeof(fullname)); - if (ret < 0) { - errorPrint("Failed to generate csv filename. database: %s, super table: %s, naming type: %d.\n", - db->dbName, stb->stbName, meta.naming_type); + if (!buf || !stb || !k || size <= 0) { return -1; } - int ret = 0; - int pos = 0; - int64_t n = 0; // already inserted rows for one child table - int64_t tk = 0; - int64_t show = 0; - int64_t ts = 0; - int64_t last_ts = stb->startTimestamp; - - // init buffer - char** tags_buf_bucket = (char **)benchCalloc(stb->childTblCount, sizeof(char *), true); - int cols_buf_length = stb->lenOfCols + stb->cols->size; - char* cols_buf = (char *)benchCalloc(1, cols_buf_length, true); - - for (int64_t i = 0; i < stb->childTblCount; ++i) { - int tags_buf_length = TSDB_TABLE_NAME_LEN + stb->lenOfTags + stb->tags->size; - tags_buf_bucket[i] = benchCalloc(1, tags_buf_length, true); - if (!tags_buf_bucket[i]) { - ret = -1; - goto end; - } - - ret = csvGenRowTagData(tags_buf_bucket[i], tags_buf_length, stb, i, &tk); - if (!ret) { - goto end; - } + if (fields_cate == GEN_ROW_FIELDS_TAG) { + fields = stb->tags; + field_count = stb->tags->size; + } else { + fields = stb->cols; + field_count = stb->cols->size; } - while (n < stb->insertRows ) { - for (int64_t i = 0; i < stb->childTblCount; ++i) { - ts = last_ts; - int64_t ck = 0; - - - // calc need insert rows - uint32_t need_rows = csvCalcInterlaceRows(&meta, stb, ts) - - int64_t needInserts = stb->interlaceRows; - if(needInserts > stb->insertRows - n) { - needInserts = stb->insertRows - n; - } - - for (int64_t j = 0; j < needInserts; j++) { - genColumnData(cols_buf, stb, ts, db->precision, &ck); - // combine tags,cols - pos += sprintf(buf + pos, "%s,%s\n", tags_buf_bucket[i], cols_buf); - if (rows_buf_len - pos < minRemain) { - // submit - ret = writeCsvFile(fp, buf, pos); - if (ret != 0) { - goto end; - } - pos = 0; - } - - // ts move next - ts += stb->timestamp_step; - - // check cancel - if(g_arguments->terminate) { - infoPrint("%s", "You are cancel, exiting ... \n"); - ret = -1; - goto end; - } - - // print show - if (++show % SHOW_CNT == 0) { - infoPrint("interlace write child table index = %"PRId64 " all rows = %"PRId64 "\n", i+1, show); - } - } - - // if last child table - if (i + 1 == stb->childTblCount ) { - n += needInserts; - last_ts = ts; - } - } - } - - if (pos > 0) { - ret = writeCsvFile(fp, buf, pos); - pos = 0; - } - -end: - // free - for (int64_t m = 0 ; m < stb->childTblCount; m ++) { - tmfree(tags_buf_bucket[m]); - } - tmfree(tags_buf_bucket); - tmfree(cols_buf); - return ret; -} - - -// gen tag data -int csvGenRowTagData(char* buf, size_t size, SSuperTable* stb, int64_t index, int64_t* k) { - // tbname - int pos = snprintf(buf, size, "\'%s%"PRId64"\'", stb->childTblPrefix, index); - // tags - pos += csvGenRowFields(buf + pos, stb->tags, stb->tags->size, stb->binaryPrefex, stb->ncharPrefex, k); - - return (pos > 0 && (size_t)pos < size) ? 0 : -1; -} - -// gen column data -char * genColumnData(char* cols_csv_buf, SSuperTable* stb, int64_t ts, int32_t precision, int64_t *k) { - char szTime[128] = {0}; - toolsFormatTimestamp(szTime, ts, precision); - int pos = sprintf(cols_csv_buf, "\'%s\'", szTime); - - // columns - csvGenRowFields(cols_csv_buf + pos, stb->cols, stb->cols->size, stb->binaryPrefex, stb->ncharPrefex, k); - return cols_csv_buf; -} - - -int32_t csvGenRowFields(char* buf, BArray* fields, int16_t field_count, char* binanry_prefix, char* nchar_prefix, int64_t* k) { - int32_t pos = 0; - for (uint16_t i = 0; i < field_count; ++i) { Field* field = benchArrayGet(fields, i); char* prefix = ""; if(field->type == TSDB_DATA_TYPE_BINARY || field->type == TSDB_DATA_TYPE_VARBINARY) { - if (binanry_prefix) { - prefix = binanry_prefix; - } + prefix = binanry_prefix; } else if(field->type == TSDB_DATA_TYPE_NCHAR) { - if (nchar_prefix) { - prefix = nchar_prefix; - } + prefix = nchar_prefix; } pos += dataGenByField(field, buf, pos, prefix, k, ""); } @@ -462,68 +340,297 @@ int32_t csvGenRowFields(char* buf, BArray* fields, int16_t field_count, char* bi } - -int csvGenStbInterlace(SDataBase* db, SSuperTable* stb) { - - - int ret = 0; - char outFile[MAX_FILE_NAME_LEN] = {0}; - obtainCsvFile(outFile, db, stb, outDir); - FILE* fp = fopen(outFile, "w"); - if(fp == NULL) { - errorPrint("failed create csv file. file=%s, last errno=%d strerror=%s \n", outFile, errno, strerror(errno)); +static int csvGenRowTagData(char* buf, int size, SSuperTable* stb, int64_t index, int64_t* k) { + if (!buf || !stb || !k || size <= 0) { return -1; } - int row_buf_len = TSDB_TABLE_NAME_LEN + stb->lenOfTags + stb->lenOfCols + stb->tags->size + stb->cols->size; - int rows_buf_len = row_buf_len * g_arguments->interlaceRows; - char* rows_buf = benchCalloc(1, rows_buf_len, true); + // tbname + int pos = snprintf(buf, size, "\'%s%"PRId64"\'", stb->childTblPrefix, index); - infoPrint("start write csv file: %s \n", outFile); + // tags + pos += csvGenRowFields(buf + pos, size - pos, stb, GEN_ROW_FIELDS_TAG, k); - // interlace mode - ret = interlaceWriteCsv(db, stb, fp, rows_buf, rows_buf_len); - - - tmfree(rows_buf); - fclose(fp); - - succPrint("end write csv file: %s \n", outFile); - - - // wait threads - for (int i = 0; i < threadCnt; i++) { - infoPrint("pthread_join %d ...\n", i); - pthread_join(pids[i], NULL); - } - - - return ret; + return (pos > 0 && pos < size) ? pos : -1; } -void csvGenPrepare(SDataBase* db, SSuperTable* stb) { - stbInfo->lenOfTags = accumulateRowLen(stbInfo->tags, stbInfo->iface); - stbInfo->lenOfCols = accumulateRowLen(stbInfo->cols, stbInfo->iface); +static int csvGenRowColData(char* buf, int size, SSuperTable* stb, int64_t ts, int32_t precision, int64_t *k) { + char ts_fmt[128] = {0}; + toolsFormatTimestamp(ts_fmt, ts, precision); + int pos = snprintf(buf, size, "\'%s\'", ts_fmt); + + // columns + pos += csvGenRowFields(buf + pos, size - pos, stb, GEN_ROW_FIELDS_COL, k); + return (pos > 0 && pos < size) ? pos : -1; +} + + +static CsvRowFieldsBuf* csvGenCtbTagData(CsvWriteMeta* write_meta, CsvThreadMeta* thread_meta) { + SSuperTable* stb = write_meta->stb; + int ret = 0; + int64_t tk = 0; + + if (!write_meta || !thread_meta) { + return NULL; + } + + CsvRowFieldsBuf* tags_buf_bucket = (CsvRowFieldsBuf*)benchCalloc(thread_meta->ctb_count, sizeof(CsvRowFieldsBuf), true); + if (!tags_buf_bucket) { + return NULL; + } + + char* tags_buf = NULL; + int tags_buf_size = TSDB_TABLE_NAME_LEN + stb->lenOfTags + stb->tags->size; + for (uint64_t i = 0; i < thread_meta->ctb_count; ++i) { + tags_buf = benchCalloc(1, tags_buf_size, true); + if (!tags_buf) { + goto error; + } + + tags_buf_bucket[i].buf = tags_buf; + tags_buf_bucket[i].buf_size = tags_buf_size; + + ret = csvGenRowTagData(tags_buf, tags_buf_size, stb, thread_meta->ctb_start_idx + i, &tk); + if (ret <= 0) { + goto error; + } + + tags_buf_bucket[i].length = ret; + } + + return tags_buf_bucket; + +error: + csvFreeCtbTagData(thread_meta, tags_buf_bucket); + return NULL; +} + + +static void csvFreeCtbTagData(CsvThreadMeta* thread_meta, CsvRowFieldsBuf* tags_buf_bucket) { + if (!thread_meta || !tags_buf_bucket) { + return; + } + + for (uint64_t i = 0 ; i < thread_meta->ctb_count; ++i) { + char* tags_buf = tags_buf_bucket[i].buf; + if (tags_buf) { + tmfree(tags_buf_bucket); + } else { + break; + } + } + tmfree(tags_buf_bucket); return; } -int csvGenStb(SDataBase* db, SSuperTable* stb) { +static int csvWriteFile(FILE* fp, uint64_t ctb_idx, int64_t cur_ts, int64_t* ck, CsvWriteMeta* write_meta, CsvThreadMeta* thread_meta) { + SDataBase* db = write_meta->db; + SSuperTable* stb = write_meta->stb; + CsvRowFieldsBuf* tags_buf_bucket = thread_meta->tags_buf_bucket; + CsvRowFieldsBuf* tags_buf = &tags_buf_bucket[ctb_idx]; + CsvRowFieldsBuf* cols_buf = thread_meta->cols_buf; + int ret = 0; + + + ret = csvGenRowColData(cols_buf->buf, cols_buf->buf_size, stb, cur_ts, db->precision, ck); + if (ret <= 0) { + errorPrint("Failed to generate csv column data. database: %s, super table: %s, naming type: %d, thread index: %d, ctb index: %" PRIu64 ".\n", + db->dbName, stb->stbName, write_meta.naming_type, thread_meta->thread_id, ctb_idx); + return -1; + } + + cols_buf->length = ret; + + + // write header + if (thread_meta->output_header) { + // TODO + + thread_meta->output_header = false; + } + + + // write columns + size_t written = fwrite(cols_buf->buf, 1, cols_buf->length, fp); + if (written != cols_buf->length) { + errorPrint("Failed to write csv column data, expected written %d but got %zu. database: %s, super table: %s, naming type: %d, thread index: %d, ctb index: %" PRIu64 ".\n", + cols_buf->length, written, db->dbName, stb->stbName, write_meta.naming_type, thread_meta->thread_id, ctb_idx); + return -1; + } + + + // write tags + size_t written = fwrite(tags_buf->buf, 1, tags_buf->length, fp); + if (written != tags_buf->length) { + errorPrint("Failed to write csv tag data, expected written %d but got %zu. database: %s, super table: %s, naming type: %d, thread index: %d, ctb index: %" PRIu64 ".\n", + tags_buf->length, written, db->dbName, stb->stbName, write_meta.naming_type, thread_meta->thread_id, ctb_idx); + return -1; + } + + return 0; +} + + +static void* csvGenStbThread(void* arg) { + CsvThreadArgs* thread_arg = (CsvThreadArgs*)arg; + CsvWriteMeta* write_meta = thread_arg->write_meta; + CsvThreadMeta* thread_meta = &thread_arg->thread_meta; + SDataBase* db = write_meta->db; + SSuperTable* stb = write_meta->stb; + + int64_t cur_ts = 0; + int64_t slice_cur_ts = 0; + int64_t slice_end_ts = 0; + int64_t slice_batch_ts = 0; + int64_t slice_ctb_cur_ts = 0; + int64_t ck = 0; + uint64_t ctb_idx = 0; + int ret = 0; + FILE* fp = NULL; + char fullname[MAX_PATH_LEN] = {}; + + + // tags buffer + CsvRowFieldsBuf* tags_buf_bucket = csvGenCtbTagData(write_meta, thread_meta); + if (!tags_buf_bucket) { + errorPrint("Failed to generate csv tag data. database: %s, super table: %s, naming type: %d, thread index: %d.\n", + db->dbName, stb->stbName, write_meta.naming_type, thread_meta->thread_id); + return NULL; + } + + // column buffer + int buf_size = stb->lenOfCols + stb->cols->size; + char* buf = (char*)benchCalloc(1, buf_size, true); + if (!buf) { + errorPrint("Failed to malloc csv column buffer. database: %s, super table: %s, naming type: %d, thread index: %d.\n", + db->dbName, stb->stbName, write_meta.naming_type, thread_meta->thread_id); + goto end; + } + + CsvRowFieldsBuf cols_buf = { + .buf = buf, + .buf_size = buf_size, + .length = 0 + }; + + thread_meta->tags_buf_bucket = tags_buf_bucket; + thread_meta->cols_buf = &cols_buf; + + + for (cur_ts = write_meta->start_ts; cur_ts < write_meta->end_ts; cur_ts += write_meta->ts_step) { + // get filename + fullname[MAX_PATH_LEN] = {}; + ret = csvGetFileFullname(write_meta, thread_meta, fullname, sizeof(fullname)); + if (ret < 0) { + errorPrint("Failed to generate csv filename. database: %s, super table: %s, naming type: %d, thread index: %d.\n", + db->dbName, stb->stbName, write_meta.naming_type, thread_meta->thread_id); + goto end; + } + + // create fd + fp = fopen(fullname, "w"); + if (fp == NULL) { + errorPrint("Failed to create csv file. thread index: %d, file: %s, errno: %d, strerror: %s.\n", + thread_meta->thread_id, fullname, errno, strerror(errno)); + goto end; + } + + + thread_meta->output_header = g_arguments->csv_output_header; + slice_cur_ts = cur_ts; + slice_end_ts = MIN(cur_ts + write_meta->ts_step, write_meta->end_ts); + + // write data + while (slice_cur_ts < slice_end_ts) { + slice_batch_ts = csvCalcSliceBatchTimestamp(write_meta, slice_cur_ts, slice_end_ts); + + for (ctb_idx = 0; ctb_idx < thread_meta->ctb_count; ++ctb_idx) { + for (slice_ctb_cur_ts = slice_cur_ts; slice_ctb_cur_ts < slice_batch_ts; slice_ctb_cur_ts += write_meta->stb->timestamp_step) { + ret = csvWriteFile(fp, ctb_idx, slice_ctb_cur_ts, &ck, write_meta, thread_meta); + if (!ret) { + errorPrint("Failed to write csv file. thread index: %d, file: %s, errno: %d, strerror: %s.\n", + thread_meta->thread_id, fullname, errno, strerror(errno)); + fclose(fp); + goto end; + } + + ck += 1; + } + } + + slice_cur_ts = slice_batch_ts; + } + + fclose(fp); + csvUpdateSliceRange(write_meta, thread_meta, last_end_ts); + } + +end: + csvFreeCtbTagData(tags_buf_bucket); + tmfree(cols_buf); + return NULL; +} + + +static int csvGenStbProcess(SDataBase* db, SSuperTable* stb) { + int ret = 0; + CsvWriteMeta write_meta = csvInitWriteMeta(db, stb); + + pthread_t* pids = benchCalloc(write_meta.total_threads, sizeof(pthread_t), false); + if (!pids) { + ret = -1; + goto end; + } + CsvThreadArgs* args = benchCalloc(write_meta.total_threads, sizeof(CsvThreadArgs), false); + if (!args) { + ret = -1; + goto end; + } + + for (uint32_t i = 0; (i < write_meta.total_threads && !g_arguments->terminate); ++i) { + CsvThreadArgs* arg = &args[i]; + arg->write_meta = &write_meta; + arg->thread_meta = csvInitThreadMeta(&write_meta, i + 1); + + ret = pthread_create(&pids[i], NULL, csvGenStbThread, arg); + if (!ret) { + perror("Failed to create thread"); + goto end; + } + } + + // wait threads + for (uint32_t i = 0; i < write_meta.total_threads; ++i) { + infoPrint("pthread_join %d ...\n", i); + pthread_join(pids[i], NULL); + } + +end: + tmfree(pids); + tmfree(args); + return ret; +} + + +static void csvGenPrepare(SDataBase* db, SSuperTable* stb) { + stb->lenOfTags = accumulateRowLen(stb->tags, stb->iface); + stb->lenOfCols = accumulateRowLen(stb->cols, stb->iface); + + if (stb->childTblTo) { + stb->childTblCount = stb->childTblTo - stb->childTblFrom; + } + + return; +} + + +static int csvGenStb(SDataBase* db, SSuperTable* stb) { // prepare csvGenPrepare(db, stb); - - int ret = 0; - if (stb->interlaceRows > 0) { - // interlace mode - ret = csvGenStbInterlace(db, stb); - } else { - // batch mode - ret = csvGenStbBatch(db, stb); - } - - return ret; + return csvGenStbProcess(db, stb); } diff --git a/tools/taos-tools/src/benchJsonOpt.c b/tools/taos-tools/src/benchJsonOpt.c index a88526c278..4cf690204e 100644 --- a/tools/taos-tools/src/benchJsonOpt.c +++ b/tools/taos-tools/src/benchJsonOpt.c @@ -1619,6 +1619,23 @@ static int getMetaFromCommonJsonFile(tools_cJSON *json) { g_arguments->csv_ts_interval = "1d"; } + // csv output header + g_arguments->csv_output_header = false; + tools_cJSON* oph = tools_cJSON_GetObjectItem(json, "csv_output_header"); + if (oph && oph->type == tools_cJSON_String && oph->valuestring != NULL) { + if (0 == strcasecmp(oph->valuestring, "yes")) { + g_arguments->csv_output_header = true; + } + } + + // csv tbname alias + tools_cJSON* tba = tools_cJSON_GetObjectItem(json, "csv_tbname_alias"); + if (tba && tba->type == tools_cJSON_String && tba->valuestring != NULL) { + g_arguments->csv_tbname_alias = tba->valuestring; + } else { + g_arguments->csv_tbname_alias = "device_id"; + } + code = 0; return code; } From 3627a54c133b4ec9191c365ae51baebb69f2d586 Mon Sep 17 00:00:00 2001 From: Yaming Pei Date: Thu, 27 Feb 2025 14:17:23 +0800 Subject: [PATCH 06/34] enh: extract bufer size of all child tables --- tools/taos-tools/inc/benchCsv.h | 12 +++++++++--- tools/taos-tools/src/benchCsv.c | 20 ++++++++++---------- 2 files changed, 19 insertions(+), 13 deletions(-) diff --git a/tools/taos-tools/inc/benchCsv.h b/tools/taos-tools/inc/benchCsv.h index 19331b8976..717dfd8a71 100644 --- a/tools/taos-tools/inc/benchCsv.h +++ b/tools/taos-tools/inc/benchCsv.h @@ -26,11 +26,16 @@ typedef enum { CSV_NAMING_B_THREAD_TIME_SLICE } CsvNamingType; +typedef struct { + char* buf; + int length; +} CsvRowTagsBuf; + typedef struct { char* buf; int buf_size; int length; -} CsvRowFieldsBuf; +} CsvRowColsBuf; typedef struct { CsvNamingType naming_type; @@ -52,8 +57,9 @@ typedef struct { time_t end_secs; size_t thread_id; bool output_header; - CsvRowFieldsBuf* tags_buf_bucket; - CsvRowFieldsBuf* cols_buf; + int tags_buf_size; + CsvRowTagsBuf* tags_buf_bucket; + CsvRowColsBuf* cols_buf; } CsvThreadMeta; typedef struct { diff --git a/tools/taos-tools/src/benchCsv.c b/tools/taos-tools/src/benchCsv.c index cec38628ad..95c94dc57d 100644 --- a/tools/taos-tools/src/benchCsv.c +++ b/tools/taos-tools/src/benchCsv.c @@ -366,7 +366,7 @@ static int csvGenRowColData(char* buf, int size, SSuperTable* stb, int64_t ts, i } -static CsvRowFieldsBuf* csvGenCtbTagData(CsvWriteMeta* write_meta, CsvThreadMeta* thread_meta) { +static CsvRowTagsBuf* csvGenCtbTagData(CsvWriteMeta* write_meta, CsvThreadMeta* thread_meta) { SSuperTable* stb = write_meta->stb; int ret = 0; int64_t tk = 0; @@ -375,7 +375,7 @@ static CsvRowFieldsBuf* csvGenCtbTagData(CsvWriteMeta* write_meta, CsvThreadMeta return NULL; } - CsvRowFieldsBuf* tags_buf_bucket = (CsvRowFieldsBuf*)benchCalloc(thread_meta->ctb_count, sizeof(CsvRowFieldsBuf), true); + CsvRowTagsBuf* tags_buf_bucket = (CsvRowTagsBuf*)benchCalloc(thread_meta->ctb_count, sizeof(CsvRowTagsBuf), true); if (!tags_buf_bucket) { return NULL; } @@ -388,8 +388,8 @@ static CsvRowFieldsBuf* csvGenCtbTagData(CsvWriteMeta* write_meta, CsvThreadMeta goto error; } - tags_buf_bucket[i].buf = tags_buf; - tags_buf_bucket[i].buf_size = tags_buf_size; + tags_buf_bucket[i].buf = tags_buf; + write_meta->tags_buf_size = tags_buf_size; ret = csvGenRowTagData(tags_buf, tags_buf_size, stb, thread_meta->ctb_start_idx + i, &tk); if (ret <= 0) { @@ -407,7 +407,7 @@ error: } -static void csvFreeCtbTagData(CsvThreadMeta* thread_meta, CsvRowFieldsBuf* tags_buf_bucket) { +static void csvFreeCtbTagData(CsvThreadMeta* thread_meta, CsvRowTagsBuf* tags_buf_bucket) { if (!thread_meta || !tags_buf_bucket) { return; } @@ -428,9 +428,9 @@ static void csvFreeCtbTagData(CsvThreadMeta* thread_meta, CsvRowFieldsBuf* tags_ static int csvWriteFile(FILE* fp, uint64_t ctb_idx, int64_t cur_ts, int64_t* ck, CsvWriteMeta* write_meta, CsvThreadMeta* thread_meta) { SDataBase* db = write_meta->db; SSuperTable* stb = write_meta->stb; - CsvRowFieldsBuf* tags_buf_bucket = thread_meta->tags_buf_bucket; - CsvRowFieldsBuf* tags_buf = &tags_buf_bucket[ctb_idx]; - CsvRowFieldsBuf* cols_buf = thread_meta->cols_buf; + CsvRowTagsBuf* tags_buf_bucket = thread_meta->tags_buf_bucket; + CsvRowColsBuf* tags_buf = &tags_buf_bucket[ctb_idx]; + CsvRowColsBuf* cols_buf = thread_meta->cols_buf; int ret = 0; @@ -493,7 +493,7 @@ static void* csvGenStbThread(void* arg) { // tags buffer - CsvRowFieldsBuf* tags_buf_bucket = csvGenCtbTagData(write_meta, thread_meta); + CsvRowTagsBuf* tags_buf_bucket = csvGenCtbTagData(write_meta, thread_meta); if (!tags_buf_bucket) { errorPrint("Failed to generate csv tag data. database: %s, super table: %s, naming type: %d, thread index: %d.\n", db->dbName, stb->stbName, write_meta.naming_type, thread_meta->thread_id); @@ -509,7 +509,7 @@ static void* csvGenStbThread(void* arg) { goto end; } - CsvRowFieldsBuf cols_buf = { + CsvRowColsBuf cols_buf = { .buf = buf, .buf_size = buf_size, .length = 0 From 5ce4bd2465fd2e58f1d2b0da91c4cb85221aa0a6 Mon Sep 17 00:00:00 2001 From: Yaming Pei Date: Thu, 27 Feb 2025 15:13:58 +0800 Subject: [PATCH 07/34] refactor: csv init write & thread meta --- tools/taos-tools/src/benchCsv.c | 81 +++++++++++++++++---------------- 1 file changed, 42 insertions(+), 39 deletions(-) diff --git a/tools/taos-tools/src/benchCsv.c b/tools/taos-tools/src/benchCsv.c index 95c94dc57d..673e223959 100644 --- a/tools/taos-tools/src/benchCsv.c +++ b/tools/taos-tools/src/benchCsv.c @@ -138,38 +138,34 @@ static void csvGenThreadFormatter(CsvWriteMeta* meta) { } -static CsvWriteMeta csvInitWriteMeta(SDataBase* db, SSuperTable* stb) { - CsvWriteMeta meta = { - .naming_type = CSV_NAMING_I_SINGLE, - .total_threads = 1, - .thread_formatter = {}, - .db = db, - .stb = stb, - .start_ts = stb->startTimestamp, - .end_ts = stb->startTimestamp + stb->timestamp_step * stb->insertRows, - .ts_step = stb->timestamp_step * stb->insertRows, - .interlace_step = stb->timestamp_step * stb->interlaceRows - }; +static void csvInitWriteMeta(SDataBase* db, SSuperTable* stb, CsvWriteMeta* write_meta) { + write_meta->naming_type = csvGetFileNamingType(stb); + write_meta->total_threads = 1; + write_meta->db = db; + write_meta->stb = stb; + write_meta->start_ts = stb->startTimestamp; + write_meta->end_ts = stb->startTimestamp + stb->timestamp_step * stb->insertRows; + write_meta->ts_step = stb->timestamp_step * stb->insertRows; + write_meta->interlace_step = stb->timestamp_step * stb->interlaceRows; - meta.naming_type = csvGetFileNamingType(stb); switch (meta.naming_type) { case CSV_NAMING_I_SINGLE: { break; } case CSV_NAMING_I_TIME_SLICE: { - csvCalcTimestampStep(&meta); + csvCalcTimestampStep(write_meta); break; } case CSV_NAMING_B_THREAD: { meta.total_threads = g_arguments->nthreads; - csvGenThreadFormatter(&meta); + csvGenThreadFormatter(write_meta); break; } case CSV_NAMING_B_THREAD_TIME_SLICE: { meta.total_threads = g_arguments->nthreads; - csvGenThreadFormatter(&meta); - csvCalcTimestampStep(&meta); + csvGenThreadFormatter(write_meta); + csvCalcTimestampStep(write_meta); break; } default: { @@ -178,25 +174,26 @@ static CsvWriteMeta csvInitWriteMeta(SDataBase* db, SSuperTable* stb) { } } - return meta; + return; } -static CsvThreadMeta csvInitThreadMeta(CsvWriteMeta* write_meta, uint32_t thread_id) { +static void csvInitThreadMeta(CsvWriteMeta* write_meta, uint32_t thread_id, CsvThreadMeta* thread_meta) { SDataBase* db = write_meta->db; SSuperTable* stb = write_meta->stb; - CsvThreadMeta meta = { - .ctb_start_idx = 0, - .ctb_end_idx = 0, - .ctb_count = 0, - .start_secs = 0, - .end_secs = 0, - .thread_id = thread_id, - .tags_buf_bucket = NULL, - .cols_buf = NULL - }; - csvCalcCtbRange(&meta, write_meta->total_threads, stb->childTblFrom, stb->childTblCount); + thread_meta->ctb_start_idx = 0; + thread_meta->ctb_end_idx = 0; + thread_meta->ctb_count = 0; + thread_meta->start_secs = 0; + thread_meta->end_secs = 0; + thread_meta->thread_id = thread_id; + thread_meta->output_header = false; + thread_meta->tags_buf_size = 0; + thread_meta->tags_buf_bucket = NULL; + thread_meta->cols_buf = NULL; + + csvCalcCtbRange(write_meta, write_meta->total_threads, stb->childTblFrom, stb->childTblCount); switch (write_meta->naming_type) { case CSV_NAMING_I_SINGLE: @@ -205,17 +202,17 @@ static CsvThreadMeta csvInitThreadMeta(CsvWriteMeta* write_meta, uint32_t thread } case CSV_NAMING_I_TIME_SLICE: case CSV_NAMING_B_THREAD_TIME_SLICE: { - meta.start_secs = csvGetStartSeconds(db->precision, stb->startTimestamp); - meta.end_secs = meta.start_secs + g_arguments->csv_ts_intv_secs; + thread_meta->start_secs = csvGetStartSeconds(db->precision, stb->startTimestamp); + thread_meta->end_secs = thread_meta->start_secs + g_arguments->csv_ts_intv_secs; break; } default: { - meta.naming_type = CSV_NAMING_I_SINGLE; + thread_meta->naming_type = CSV_NAMING_I_SINGLE; break; } } - return meta; + return; } @@ -576,23 +573,28 @@ end: static int csvGenStbProcess(SDataBase* db, SSuperTable* stb) { int ret = 0; - CsvWriteMeta write_meta = csvInitWriteMeta(db, stb); pthread_t* pids = benchCalloc(write_meta.total_threads, sizeof(pthread_t), false); if (!pids) { ret = -1; goto end; } + CsvWriteMeta* write_meta = benchCalloc(1, sizeof(CsvWriteMeta), false); + if (!args) { + ret = -1; + goto end; + } CsvThreadArgs* args = benchCalloc(write_meta.total_threads, sizeof(CsvThreadArgs), false); if (!args) { ret = -1; goto end; } - for (uint32_t i = 0; (i < write_meta.total_threads && !g_arguments->terminate); ++i) { + csvInitWriteMeta(db, stb, write_meta); + for (uint32_t i = 0; (i < write_meta->total_threads && !g_arguments->terminate); ++i) { CsvThreadArgs* arg = &args[i]; - arg->write_meta = &write_meta; - arg->thread_meta = csvInitThreadMeta(&write_meta, i + 1); + arg->write_meta = write_meta; + csvInitThreadMeta(write_meta, i + 1, &arg->thread_meta); ret = pthread_create(&pids[i], NULL, csvGenStbThread, arg); if (!ret) { @@ -602,13 +604,14 @@ static int csvGenStbProcess(SDataBase* db, SSuperTable* stb) { } // wait threads - for (uint32_t i = 0; i < write_meta.total_threads; ++i) { + for (uint32_t i = 0; i < write_meta->total_threads; ++i) { infoPrint("pthread_join %d ...\n", i); pthread_join(pids[i], NULL); } end: tmfree(pids); + tmfree(write_meta); tmfree(args); return ret; } From 8203389adab54cb76fb9c4e180645a431000964e Mon Sep 17 00:00:00 2001 From: Yaming Pei Date: Thu, 27 Feb 2025 16:18:12 +0800 Subject: [PATCH 08/34] feat: csv supports optional table header --- tools/taos-tools/inc/benchCsv.h | 2 + tools/taos-tools/src/benchCsv.c | 83 +++++++++++++++++++++++++-------- 2 files changed, 66 insertions(+), 19 deletions(-) diff --git a/tools/taos-tools/inc/benchCsv.h b/tools/taos-tools/inc/benchCsv.h index 717dfd8a71..c522a12c50 100644 --- a/tools/taos-tools/inc/benchCsv.h +++ b/tools/taos-tools/inc/benchCsv.h @@ -41,6 +41,8 @@ typedef struct { CsvNamingType naming_type; size_t total_threads; char thread_formatter[TINY_BUFF_LEN]; + char csv_header[LARGE_BUFF_LEN]; + int csv_header_length; SDataBase* db; SSuperTable* stb; int64_t start_ts; diff --git a/tools/taos-tools/src/benchCsv.c b/tools/taos-tools/src/benchCsv.c index 673e223959..f1aacaec66 100644 --- a/tools/taos-tools/src/benchCsv.c +++ b/tools/taos-tools/src/benchCsv.c @@ -138,9 +138,44 @@ static void csvGenThreadFormatter(CsvWriteMeta* meta) { } -static void csvInitWriteMeta(SDataBase* db, SSuperTable* stb, CsvWriteMeta* write_meta) { +static int csvGenCsvHeader(CsvWriteMeta* write_meta) { + SDataBase* db = write_meta->db; + SSuperTable* stb = write_meta->stb; + char* buf = write_meta->csv_header; + int pos = 0; + int size = sizeof(write_meta->csv_header); + + if (!g_arguments->csv_output_header) { + return 0; + } + + // ts + pos += snprintf(buf + pos, size - pos, "ts"); + + // columns + for (size_t i = 0; i < stb->cols->size; ++i) { + Field* col = benchArrayGet(stb->cols, i); + pos += snprintf(buf + pos, size - pos, ",%s", col->name); + } + + // tbname + pos += snprintf(buf + pos, size - pos, ",%s", g_arguments->csv_tbname_alias); + + // tags + for (size_t i = 0; i < stb->tags->size; ++i) { + Field* tag = benchArrayGet(stb->tags, i); + pos += snprintf(buf + pos, size - pos, ",%s", tag->name); + } + + write_meta->csv_header_length = (pos > 0 && pos < size) ? pos : 0; + return (pos > 0 && pos < size) ? 0 : -1; +} + + +static int csvInitWriteMeta(SDataBase* db, SSuperTable* stb, CsvWriteMeta* write_meta) { write_meta->naming_type = csvGetFileNamingType(stb); write_meta->total_threads = 1; + write_meta->csv_header_length = 0; write_meta->db = db; write_meta->stb = stb; write_meta->start_ts = stb->startTimestamp; @@ -148,6 +183,12 @@ static void csvInitWriteMeta(SDataBase* db, SSuperTable* stb, CsvWriteMeta* writ write_meta->ts_step = stb->timestamp_step * stb->insertRows; write_meta->interlace_step = stb->timestamp_step * stb->interlaceRows; + int ret = csvGenCsvHeader(write_meta); + if (ret < 0) { + errorPrint("Failed to generate csv header data. database: %s, super table: %s, naming type: %d.\n", + db->dbName, stb->stbName, write_meta->naming_type); + return -1; + } switch (meta.naming_type) { case CSV_NAMING_I_SINGLE: { @@ -174,7 +215,7 @@ static void csvInitWriteMeta(SDataBase* db, SSuperTable* stb, CsvWriteMeta* writ } } - return; + return 0; } @@ -428,7 +469,8 @@ static int csvWriteFile(FILE* fp, uint64_t ctb_idx, int64_t cur_ts, int64_t* ck, CsvRowTagsBuf* tags_buf_bucket = thread_meta->tags_buf_bucket; CsvRowColsBuf* tags_buf = &tags_buf_bucket[ctb_idx]; CsvRowColsBuf* cols_buf = thread_meta->cols_buf; - int ret = 0; + int ret = 0; + size_t written = 0; ret = csvGenRowColData(cols_buf->buf, cols_buf->buf_size, stb, cur_ts, db->precision, ck); @@ -440,26 +482,22 @@ static int csvWriteFile(FILE* fp, uint64_t ctb_idx, int64_t cur_ts, int64_t* ck, cols_buf->length = ret; - // write header if (thread_meta->output_header) { - // TODO - + written = fwrite(write_meta->csv_header, 1, write_meta->csv_header_length, fp); thread_meta->output_header = false; } - // write columns - size_t written = fwrite(cols_buf->buf, 1, cols_buf->length, fp); + written = fwrite(cols_buf->buf, 1, cols_buf->length, fp); if (written != cols_buf->length) { errorPrint("Failed to write csv column data, expected written %d but got %zu. database: %s, super table: %s, naming type: %d, thread index: %d, ctb index: %" PRIu64 ".\n", cols_buf->length, written, db->dbName, stb->stbName, write_meta.naming_type, thread_meta->thread_id, ctb_idx); return -1; } - // write tags - size_t written = fwrite(tags_buf->buf, 1, tags_buf->length, fp); + written = fwrite(tags_buf->buf, 1, tags_buf->length, fp); if (written != tags_buf->length) { errorPrint("Failed to write csv tag data, expected written %d but got %zu. database: %s, super table: %s, naming type: %d, thread index: %d, ctb index: %" PRIu64 ".\n", tags_buf->length, written, db->dbName, stb->stbName, write_meta.naming_type, thread_meta->thread_id, ctb_idx); @@ -574,23 +612,30 @@ end: static int csvGenStbProcess(SDataBase* db, SSuperTable* stb) { int ret = 0; - pthread_t* pids = benchCalloc(write_meta.total_threads, sizeof(pthread_t), false); - if (!pids) { - ret = -1; - goto end; - } CsvWriteMeta* write_meta = benchCalloc(1, sizeof(CsvWriteMeta), false); - if (!args) { + if (!write_meta) { ret = -1; goto end; } - CsvThreadArgs* args = benchCalloc(write_meta.total_threads, sizeof(CsvThreadArgs), false); + + ret = csvInitWriteMeta(db, stb, write_meta); + if (ret < 0) { + ret = -1; + goto end; + } + + CsvThreadArgs* args = benchCalloc(write_meta->total_threads, sizeof(CsvThreadArgs), false); if (!args) { ret = -1; goto end; } - csvInitWriteMeta(db, stb, write_meta); + pthread_t* pids = benchCalloc(write_meta.total_threads, sizeof(pthread_t), false); + if (!pids) { + ret = -1; + goto end; + } + for (uint32_t i = 0; (i < write_meta->total_threads && !g_arguments->terminate); ++i) { CsvThreadArgs* arg = &args[i]; arg->write_meta = write_meta; @@ -611,8 +656,8 @@ static int csvGenStbProcess(SDataBase* db, SSuperTable* stb) { end: tmfree(pids); - tmfree(write_meta); tmfree(args); + tmfree(write_meta); return ret; } From 32b575f73a31cbff9f2cd530ef97541cc2b302aa Mon Sep 17 00:00:00 2001 From: Yaming Pei Date: Fri, 28 Feb 2025 15:26:20 +0800 Subject: [PATCH 09/34] feat: csv supports gzip --- tools/taos-tools/inc/bench.h | 3 +- tools/taos-tools/inc/benchCsv.h | 26 ++++- tools/taos-tools/src/benchCsv.c | 143 ++++++++++++++++++++++++---- tools/taos-tools/src/benchJsonOpt.c | 17 ++++ 4 files changed, 166 insertions(+), 23 deletions(-) diff --git a/tools/taos-tools/inc/bench.h b/tools/taos-tools/inc/bench.h index 4dd19d83b9..4f41abb903 100644 --- a/tools/taos-tools/inc/bench.h +++ b/tools/taos-tools/inc/bench.h @@ -783,6 +783,7 @@ typedef struct SArguments_S { bool mistMode; bool escape_character; bool pre_load_tb_meta; + bool bind_vgroup; char* output_path; char output_path_buf[MAX_PATH_LEN]; @@ -792,8 +793,8 @@ typedef struct SArguments_S { long csv_ts_intv_secs; bool csv_output_header; bool csv_tbname_alias; + CsvCompressionLevel csv_compress_level; - bool bind_vgroup; } SArguments; typedef struct SBenchConn { diff --git a/tools/taos-tools/inc/benchCsv.h b/tools/taos-tools/inc/benchCsv.h index c522a12c50..2db2ec324e 100644 --- a/tools/taos-tools/inc/benchCsv.h +++ b/tools/taos-tools/inc/benchCsv.h @@ -16,7 +16,8 @@ #ifndef INC_BENCHCSV_H_ #define INC_BENCHCSV_H_ -#include +#include +#include "bench.h" typedef enum { @@ -26,6 +27,29 @@ typedef enum { CSV_NAMING_B_THREAD_TIME_SLICE } CsvNamingType; +typedef enum { + CSV_COMPRESS_NONE = 0, + CSV_COMPRESS_FAST = 1, + CSV_COMPRESS_BALANCE = 6, + CSV_COMPRESS_BEST = 9 +} CsvCompressionLevel; + +typedef enum { + CSV_ERR_OK = 0, + CSV_ERR_OPEN_FAILED, + CSV_ERR_WRITE_FAILED +} CsvIoError; + +typedef struct { + const char* filename; + CsvCompressionLevel compress_level; + CsvIoError result; + union { + gzFile gf; + FILE* fp; + } handle; +} CsvFileHandle; + typedef struct { char* buf; int length; diff --git a/tools/taos-tools/src/benchCsv.c b/tools/taos-tools/src/benchCsv.c index f1aacaec66..97a1a74c0f 100644 --- a/tools/taos-tools/src/benchCsv.c +++ b/tools/taos-tools/src/benchCsv.c @@ -280,6 +280,15 @@ static void csvUpdateSliceRange(CsvWriteMeta* write_meta, CsvThreadMeta* thread_ } +static const char* csvGetGzipFilePrefix() { + if (g_arguments->csv_compress_level == CSV_COMPRESS_NONE) { + return ""; + } else { + return ".gz" + } +} + + static int csvGetFileFullname(CsvWriteMeta* write_meta, CsvThreadMeta* thread_meta, char* fullname, size_t size) { char thread_buf[SMALL_BUFF_LEN]; char start_time_buf[MIDDLE_BUFF_LEN]; @@ -287,28 +296,29 @@ static int csvGetFileFullname(CsvWriteMeta* write_meta, CsvThreadMeta* thread_me int ret = -1; const char* base_path = g_arguments->output_path; const char* file_prefix = g_arguments->csv_file_prefix; + const char* gzip_suffix = csvGetGzipFilePrefix(); switch (meta->naming_type) { case CSV_NAMING_I_SINGLE: { - ret = snprintf(fullname, size, "%s%s.csv", base_path, file_prefix); + ret = snprintf(fullname, size, "%s%s.csv%s", base_path, file_prefix, g_arguments->csv_compress_level, gzip_suffix); break; } case CSV_NAMING_I_TIME_SLICE: { csvConvertTime2String(meta->start_secs, start_time_buf, sizeof(start_time_buf)); csvConvertTime2String(meta->end_secs, end_time_buf, sizeof(end_time_buf)); - ret = snprintf(fullname, size, "%s%s_%s_%s.csv", base_path, file_prefix, start_time_buf, end_time_buf); + ret = snprintf(fullname, size, "%s%s_%s_%s.csv%s", base_path, file_prefix, start_time_buf, end_time_buf, gzip_suffix); break; } case CSV_NAMING_B_THREAD: { (void)snprintf(thread_buf, sizeof(thread_buf), meta->thread_formatter, meta->thread_id); - ret = snprintf(fullname, size, "%s%s_%s.csv", base_path, file_prefix, thread_buf); + ret = snprintf(fullname, size, "%s%s_%s.csv%s", base_path, file_prefix, thread_buf, gzip_suffix); break; } case CSV_NAMING_B_THREAD_TIME_SLICE: { (void)snprintf(thread_buf, sizeof(thread_buf), meta->thread_formatter, meta->thread_id); csvConvertTime2String(meta->start_secs, start_time_buf, sizeof(start_time_buf)); csvConvertTime2String(meta->end_secs, end_time_buf, sizeof(end_time_buf)); - ret = snprintf(fullname, size, "%s%s_%s_%s_%s.csv", base_path, file_prefix, thread_buf, start_time_buf, end_time_buf); + ret = snprintf(fullname, size, "%s%s_%s_%s_%s.csv%s", base_path, file_prefix, thread_buf, start_time_buf, end_time_buf, gzip_suffix); break; } default: { @@ -463,14 +473,91 @@ static void csvFreeCtbTagData(CsvThreadMeta* thread_meta, CsvRowTagsBuf* tags_bu } -static int csvWriteFile(FILE* fp, uint64_t ctb_idx, int64_t cur_ts, int64_t* ck, CsvWriteMeta* write_meta, CsvThreadMeta* thread_meta) { +static CsvFileHandle* csvOpen(const char* filename, CsvCompressionLevel compress_level) { + CsvFileHandle* fhdl = NULL; + bool failed = false; + + fhdl = (CsvFileHandle*)benchCalloc(1, sizeof(CsvFileHandle), false); + if (!fhdl) { + errorPrint("Failed to malloc csv file handle. filename: %s, compress level: %d.\n", + filename, compress_level); + return NULL; + } + + if (compress_level == CSV_COMPRESS_NONE) { + fhdl.handle.fp = fopen(filename, "w"); + failed = (!fhdl.handle.fp); + } else { + char mode[TINY_BUFF_LEN]; + (void)snprintf(mode, sizeof(mode), "wb%d", compress_level); + fhdl.handle.gf = gzopen(filename, mode); + failed = (!fhdl.handle.gf); + } + + if (failed) { + tmfree(fhdl); + errorPrint("Failed to open csv file handle. filename: %s, compress level: %d.\n", + filename, compress_level); + return NULL; + } else { + fhdl->filename = filename; + fhdl->compress_level = compress_level; + fhdl->result = CSV_ERR_OK; + return fhdl; + } +} + + +static CsvIoError csvWrite(CsvFileHandle* fhdl, const char* buf, size_t size) { + if (fhdl->compress_level == CSV_COMPRESS_NONE) { + size_t ret = fwrite(buf, 1, size, fhdl->handle.fp); + if (ret != size) { + errorPrint("Failed to write csv file: %s. expected written %zu but %zu.\n", + fhdl->filename, size, ret); + if (ferror(fhdl->handle.fp)) { + perror("error"); + } + fhdl->result = CSV_ERR_WRITE_FAILED; + return CSV_ERR_WRITE_FAILED; + } + } else { + unsigned int ret = gzwrite(fhdl->handle.gf, buf, size); + if (ret != size) { + errorPrint("Failed to write csv file: %s. expected written %zu but %zu.\n", + fhdl->filename, size, ret); + int errnum; + const char* errmsg = gzerror(fhdl->handle.gf, &errnum); + errorPrint("gzwrite error: %s\n", errmsg); + fhdl->result = CSV_ERR_WRITE_FAILED; + return CSV_ERR_WRITE_FAILED; + } + } + return CSV_ERR_OK; +} + + +static void csvClose(CsvFileHandle* fhdl) { + if (fhdl->compress_level == CSV_COMPRESS_NONE) { + if (fhdl->handle.fp) { + fclose(fhdl->handle.fp); + fhdl->handle.fp = NULL; + } + } else { + if (fhdl->handle.gf) { + gzclose(fhdl->handle.gf); + fhdl->handle.gf = NULL; + } + } +} + + +static int csvWriteFile(CsvFileHandle* fhdl, uint64_t ctb_idx, int64_t cur_ts, int64_t* ck, CsvWriteMeta* write_meta, CsvThreadMeta* thread_meta) { SDataBase* db = write_meta->db; SSuperTable* stb = write_meta->stb; CsvRowTagsBuf* tags_buf_bucket = thread_meta->tags_buf_bucket; CsvRowColsBuf* tags_buf = &tags_buf_bucket[ctb_idx]; CsvRowColsBuf* cols_buf = thread_meta->cols_buf; - int ret = 0; - size_t written = 0; + int ret = 0; ret = csvGenRowColData(cols_buf->buf, cols_buf->buf_size, stb, cur_ts, db->precision, ck); @@ -484,23 +571,37 @@ static int csvWriteFile(FILE* fp, uint64_t ctb_idx, int64_t cur_ts, int64_t* ck, // write header if (thread_meta->output_header) { - written = fwrite(write_meta->csv_header, 1, write_meta->csv_header_length, fp); + ret = csvWrite(fhdl, write_meta->csv_header, write_meta->csv_header_length); + if (ret != CSV_ERR_OK) { + errorPrint("Failed to write csv header data. database: %s, super table: %s, naming type: %d, thread index: %d, ctb index: %" PRIu64 ".\n", + db->dbName, stb->stbName, write_meta->naming_type, thread_meta->thread_id, ctb_idx); + return -1; + } + thread_meta->output_header = false; } // write columns - written = fwrite(cols_buf->buf, 1, cols_buf->length, fp); - if (written != cols_buf->length) { + ret = csvWrite(fhdl, cols_buf->buf, cols_buf->length); + if (ret != CSV_ERR_OK) { errorPrint("Failed to write csv column data, expected written %d but got %zu. database: %s, super table: %s, naming type: %d, thread index: %d, ctb index: %" PRIu64 ".\n", - cols_buf->length, written, db->dbName, stb->stbName, write_meta.naming_type, thread_meta->thread_id, ctb_idx); + db->dbName, stb->stbName, write_meta->naming_type, thread_meta->thread_id, ctb_idx); return -1; } // write tags - written = fwrite(tags_buf->buf, 1, tags_buf->length, fp); - if (written != tags_buf->length) { - errorPrint("Failed to write csv tag data, expected written %d but got %zu. database: %s, super table: %s, naming type: %d, thread index: %d, ctb index: %" PRIu64 ".\n", - tags_buf->length, written, db->dbName, stb->stbName, write_meta.naming_type, thread_meta->thread_id, ctb_idx); + ret = csvWrite(fhdl, tags_buf->buf, tags_buf->length); + if (ret != CSV_ERR_OK) { + errorPrint("Failed to write csv tag data. database: %s, super table: %s, naming type: %d, thread index: %d, ctb index: %" PRIu64 ".\n", + db->dbName, stb->stbName, write_meta->naming_type, thread_meta->thread_id, ctb_idx); + return -1; + } + + // write line break + ret = csvWrite(fhdl, "\n", 1); + if (ret != CSV_ERR_OK) { + errorPrint("Failed to write csv line break data. database: %s, super table: %s, naming type: %d, thread index: %d, ctb index: %" PRIu64 ".\n", + db->dbName, stb->stbName, write_meta->naming_type, thread_meta->thread_id, ctb_idx); return -1; } @@ -523,7 +624,7 @@ static void* csvGenStbThread(void* arg) { int64_t ck = 0; uint64_t ctb_idx = 0; int ret = 0; - FILE* fp = NULL; + CsvFileHandle* fhdl = NULL; char fullname[MAX_PATH_LEN] = {}; @@ -565,8 +666,8 @@ static void* csvGenStbThread(void* arg) { } // create fd - fp = fopen(fullname, "w"); - if (fp == NULL) { + fhdl = csvOpen(fullname, g_arguments->csv_compress_level); + if (fhdl == NULL) { errorPrint("Failed to create csv file. thread index: %d, file: %s, errno: %d, strerror: %s.\n", thread_meta->thread_id, fullname, errno, strerror(errno)); goto end; @@ -583,11 +684,11 @@ static void* csvGenStbThread(void* arg) { for (ctb_idx = 0; ctb_idx < thread_meta->ctb_count; ++ctb_idx) { for (slice_ctb_cur_ts = slice_cur_ts; slice_ctb_cur_ts < slice_batch_ts; slice_ctb_cur_ts += write_meta->stb->timestamp_step) { - ret = csvWriteFile(fp, ctb_idx, slice_ctb_cur_ts, &ck, write_meta, thread_meta); + ret = csvWriteFile(fhdl, ctb_idx, slice_ctb_cur_ts, &ck, write_meta, thread_meta); if (!ret) { errorPrint("Failed to write csv file. thread index: %d, file: %s, errno: %d, strerror: %s.\n", thread_meta->thread_id, fullname, errno, strerror(errno)); - fclose(fp); + csvClose(fhdl); goto end; } @@ -598,7 +699,7 @@ static void* csvGenStbThread(void* arg) { slice_cur_ts = slice_batch_ts; } - fclose(fp); + csvClose(fhdl); csvUpdateSliceRange(write_meta, thread_meta, last_end_ts); } diff --git a/tools/taos-tools/src/benchJsonOpt.c b/tools/taos-tools/src/benchJsonOpt.c index 4cf690204e..26c6200157 100644 --- a/tools/taos-tools/src/benchJsonOpt.c +++ b/tools/taos-tools/src/benchJsonOpt.c @@ -14,6 +14,7 @@ #include #include #include "benchLog.h" +#include "benchCsv.h" extern char g_configDir[MAX_PATH_LEN]; @@ -1636,6 +1637,22 @@ static int getMetaFromCommonJsonFile(tools_cJSON *json) { g_arguments->csv_tbname_alias = "device_id"; } + // csv compression level + tools_cJSON* cl = tools_cJSON_GetObjectItem(json, "csv_compress_level"); + if (cl && cl->type == tools_cJSON_String && cl->valuestring != NULL) { + if (0 == strcasecmp(cl->valuestring, "none")) { + g_arguments->csv_compress_level = CSV_COMPRESS_NONE; + } else if (0 == strcasecmp(cl->valuestring, "fast")) { + g_arguments->csv_compress_level = CSV_COMPRESS_FAST; + } else if (0 == strcasecmp(cl->valuestring, "balance")) { + g_arguments->csv_compress_level = CSV_COMPRESS_BALANCE; + } else if (0 == strcasecmp(cl->valuestring, "best")) { + g_arguments->csv_compress_level = CSV_COMPRESS_BEST; + } + } else { + g_arguments->csv_compress_level = CSV_COMPRESS_NONE; + } + code = 0; return code; } From 13d845935e9eb3168094632434815a839004906b Mon Sep 17 00:00:00 2001 From: Yaming Pei Date: Fri, 28 Feb 2025 16:01:28 +0800 Subject: [PATCH 10/34] feat: abort exit message when writing to csv --- tools/taos-tools/src/benchCsv.c | 13 ++++++++++++- 1 file changed, 12 insertions(+), 1 deletion(-) diff --git a/tools/taos-tools/src/benchCsv.c b/tools/taos-tools/src/benchCsv.c index 97a1a74c0f..e7db3481b1 100644 --- a/tools/taos-tools/src/benchCsv.c +++ b/tools/taos-tools/src/benchCsv.c @@ -693,6 +693,11 @@ static void* csvGenStbThread(void* arg) { } ck += 1; + + if (!g_arguments->terminate) { + csvClose(fhdl); + goto end; + } } } @@ -705,13 +710,14 @@ static void* csvGenStbThread(void* arg) { end: csvFreeCtbTagData(tags_buf_bucket); - tmfree(cols_buf); + tmfree(buf); return NULL; } static int csvGenStbProcess(SDataBase* db, SSuperTable* stb) { int ret = 0; + bool prompt = true; CsvWriteMeta* write_meta = benchCalloc(1, sizeof(CsvWriteMeta), false); if (!write_meta) { @@ -751,6 +757,11 @@ static int csvGenStbProcess(SDataBase* db, SSuperTable* stb) { // wait threads for (uint32_t i = 0; i < write_meta->total_threads; ++i) { + if (g_arguments->terminate && prompt) { + infoPrint("Operation cancelled by user, exiting gracefully...\n"); + prompt = false; + } + infoPrint("pthread_join %d ...\n", i); pthread_join(pids[i], NULL); } From 695e92110568fad2381d613a1f9469c1b0300333 Mon Sep 17 00:00:00 2001 From: Yaming Pei Date: Fri, 28 Feb 2025 17:39:11 +0800 Subject: [PATCH 11/34] feat: csv writing statitics --- tools/taos-tools/inc/benchCsv.h | 1 + tools/taos-tools/src/benchCsv.c | 78 +++++++++++++++++++++++++++------ 2 files changed, 65 insertions(+), 14 deletions(-) diff --git a/tools/taos-tools/inc/benchCsv.h b/tools/taos-tools/inc/benchCsv.h index 2db2ec324e..11666a0b45 100644 --- a/tools/taos-tools/inc/benchCsv.h +++ b/tools/taos-tools/inc/benchCsv.h @@ -79,6 +79,7 @@ typedef struct { uint64_t ctb_start_idx; uint64_t ctb_end_idx; uint64_t ctb_count; + uint64_t total_rows; time_t start_secs; time_t end_secs; size_t thread_id; diff --git a/tools/taos-tools/src/benchCsv.c b/tools/taos-tools/src/benchCsv.c index e7db3481b1..92641f8ea8 100644 --- a/tools/taos-tools/src/benchCsv.c +++ b/tools/taos-tools/src/benchCsv.c @@ -622,11 +622,19 @@ static void* csvGenStbThread(void* arg) { int64_t slice_batch_ts = 0; int64_t slice_ctb_cur_ts = 0; int64_t ck = 0; - uint64_t ctb_idx = 0; + uint64_t ctb_idx = 0; int ret = 0; CsvFileHandle* fhdl = NULL; char fullname[MAX_PATH_LEN] = {}; + uint64_t total_rows = 0; + uint64_t pre_total_rows = 0; + uint64_t file_rows = 0; + int64_t start_print_ts = 0; + int64_t pre_print_ts = 0; + int64_t cur_print_ts = 0; + int64_t print_ts_elapse = 0; + // tags buffer CsvRowTagsBuf* tags_buf_bucket = csvGenCtbTagData(write_meta, thread_meta); @@ -653,7 +661,7 @@ static void* csvGenStbThread(void* arg) { thread_meta->tags_buf_bucket = tags_buf_bucket; thread_meta->cols_buf = &cols_buf; - + start_print_ts = toolsGetTimestampMs(); for (cur_ts = write_meta->start_ts; cur_ts < write_meta->end_ts; cur_ts += write_meta->ts_step) { // get filename @@ -677,6 +685,9 @@ static void* csvGenStbThread(void* arg) { thread_meta->output_header = g_arguments->csv_output_header; slice_cur_ts = cur_ts; slice_end_ts = MIN(cur_ts + write_meta->ts_step, write_meta->end_ts); + file_rows = 0; + + infoPrint("thread[%d] begin to write csv file: %s.\n", thread_meta->thread_id, fullname); // write data while (slice_cur_ts < slice_end_ts) { @@ -692,7 +703,20 @@ static void* csvGenStbThread(void* arg) { goto end; } - ck += 1; + ck += 1; + total_rows += 1; + file_rows += 1; + + cur_print_ts = toolsGetTimestampMs(); + print_ts_elapse = cur_print_ts - pre_print_ts; + if (print_ts_elapse > 30000) { + infoPrint("thread[%d] has currently inserted rows: %" PRIu64 ", period insert rate: %.2f rows/s.\n", + thread_meta->thread_id, total_rows, (total_rows - pre_total_rows) * 1000.0 / print_ts_elapse); + + pre_print_ts = cur_print_ts; + pre_total_rows = total_rows; + } + if (!g_arguments->terminate) { csvClose(fhdl); @@ -708,7 +732,14 @@ static void* csvGenStbThread(void* arg) { csvUpdateSliceRange(write_meta, thread_meta, last_end_ts); } + cur_print_ts = toolsGetTimestampMs(); + print_ts_elapse = cur_print_ts - start_print_ts; + + succPrint("thread [%d] has completed inserting rows: %" PRIu64 ", insert rate %.2f rows/s.\n", + thread_meta->thread_id, total_rows, total_rows * 1000.0 / print_ts_elapse); + end: + thread_meta->total_rows = total_rows; csvFreeCtbTagData(tags_buf_bucket); tmfree(buf); return NULL; @@ -716,8 +747,12 @@ end: static int csvGenStbProcess(SDataBase* db, SSuperTable* stb) { - int ret = 0; - bool prompt = true; + int ret = 0; + bool prompt = true; + uint64_t total_rows = 0; + int64_t start_ts = 0; + int64_t ts_elapse = 0; + CsvWriteMeta* write_meta = benchCalloc(1, sizeof(CsvWriteMeta), false); if (!write_meta) { @@ -743,6 +778,7 @@ static int csvGenStbProcess(SDataBase* db, SSuperTable* stb) { goto end; } + start_ts = toolsGetTimestampMs(); for (uint32_t i = 0; (i < write_meta->total_threads && !g_arguments->terminate); ++i) { CsvThreadArgs* arg = &args[i]; arg->write_meta = write_meta; @@ -766,6 +802,20 @@ static int csvGenStbProcess(SDataBase* db, SSuperTable* stb) { pthread_join(pids[i], NULL); } + // statistics + total_rows = 0; + for (uint32_t i = 0; i < write_meta->total_threads; ++i) { + CsvThreadArgs* arg = &args[i]; + CsvThreadMeta* thread_meta = &arg->thread_meta; + total_rows += thread_meta->total_rows; + } + + ts_elapse = toolsGetTimestampMs() - start_ts; + if (ts_elapse > 0) { + succPrint("Spent %.6f seconds to insert rows: %" PRIu64 " with %d thread(s) into %s, at a rate of %.2f rows/s.\n", + ts_elapse / 1000.0, total_rows, write_meta->total_threads, g_arguments->output_path, total_rows * 1000.0 / ts_elapse); + } + end: tmfree(pids); tmfree(args); @@ -859,14 +909,14 @@ static int csvParseParameter() { // csv_output_path size_t len = strlen(g_arguments->output_path); if (len == 0) { - errorPrint("Failed to generate CSV files, the specified output path is empty. Please provide a valid path. database: %s, super table: %s.\n", + errorPrint("Failed to generate csv files, the specified output path is empty. Please provide a valid path. database: %s, super table: %s.\n", db->dbName, stb->stbName); return -1; } if (g_arguments->output_path[len - 1] != '/') { int n = snprintf(g_arguments->output_path_buf, sizeof(g_arguments->output_path_buf), "%s/", g_arguments->output_path); if (n < 0 || n >= sizeof(g_arguments->output_path_buf)) { - errorPrint("Failed to generate CSV files, path buffer overflow risk when appending '/'. path: %s, database: %s, super table: %s.\n", + errorPrint("Failed to generate csv files, path buffer overflow risk when appending '/'. path: %s, database: %s, super table: %s.\n", g_arguments->csv_output_path, db->dbName, stb->stbName); return -1; } @@ -876,7 +926,7 @@ static int csvParseParameter() { // csv_ts_format if (g_arguments->csv_ts_format) { if (csvValidateParamTsFormat(g_arguments->csv_ts_format) != 0) { - errorPrint("Failed to generate CSV files, the parameter `csv_ts_format` is invalid. csv_ts_format: %s, database: %s, super table: %s.\n", + errorPrint("Failed to generate csv files, the parameter `csv_ts_format` is invalid. csv_ts_format: %s, database: %s, super table: %s.\n", g_arguments->csv_ts_format, db->dbName, stb->stbName); return -1; } @@ -885,7 +935,7 @@ static int csvParseParameter() { // csv_ts_interval long csv_ts_intv_secs = csvValidateParamTsInterval(g_arguments->csv_ts_interval); if (csv_ts_intv_secs <= 0) { - errorPrint("Failed to generate CSV files, the parameter `csv_ts_interval` is invalid. csv_ts_interval: %s, database: %s, super table: %s.\n", + errorPrint("Failed to generate csv files, the parameter `csv_ts_interval` is invalid. csv_ts_interval: %s, database: %s, super table: %s.\n", g_arguments->csv_ts_interval, db->dbName, stb->stbName); return -1; } @@ -910,7 +960,7 @@ static int csvWriteThread() { // gen csv int ret = csvGenStb(db, stb); if(ret != 0) { - errorPrint("Failed to generate CSV files. database: %s, super table: %s, error code: %d.\n", + errorPrint("Failed to generate csv files. database: %s, super table: %s, error code: %d.\n", db->dbName, stb->stbName, ret); return -1; } @@ -928,14 +978,14 @@ int csvTestProcess() { return -1; } - infoPrint("Starting to output data to CSV files in directory: %s ...\n", g_arguments->output_path); + infoPrint("Starting to output data to csv files in directory: %s ...\n", g_arguments->output_path); int64_t start = toolsGetTimestampMs(); int ret = csvWriteThread(); if (ret != 0) { return -1; } - int64_t delay = toolsGetTimestampMs() - start; - infoPrint("Generating CSV files in directory: %s has been completed. Time elapsed: %.3f seconds\n", - g_arguments->output_path, delay / 1000.0); + int64_t elapse = toolsGetTimestampMs() - start; + infoPrint("Generating csv files in directory: %s has been completed. Time elapsed: %.3f seconds\n", + g_arguments->output_path, elapse / 1000.0); return 0; } From 4d7c1a40671111a02903917bcc3b0c3099f0c1e4 Mon Sep 17 00:00:00 2001 From: Yaming Pei Date: Mon, 3 Mar 2025 09:06:08 +0800 Subject: [PATCH 12/34] feat: csv supports exporting create sql stmt --- tools/taos-tools/inc/benchCsv.h | 3 - tools/taos-tools/src/benchCsv.c | 288 ++++++++++++++++++++++++++++++++ 2 files changed, 288 insertions(+), 3 deletions(-) diff --git a/tools/taos-tools/inc/benchCsv.h b/tools/taos-tools/inc/benchCsv.h index 11666a0b45..62e0dea7d7 100644 --- a/tools/taos-tools/inc/benchCsv.h +++ b/tools/taos-tools/inc/benchCsv.h @@ -95,9 +95,6 @@ typedef struct { } CsvThreadArgs; - - - int csvTestProcess(); #endif // INC_BENCHCSV_H_ diff --git a/tools/taos-tools/src/benchCsv.c b/tools/taos-tools/src/benchCsv.c index 92641f8ea8..246ff79287 100644 --- a/tools/taos-tools/src/benchCsv.c +++ b/tools/taos-tools/src/benchCsv.c @@ -167,11 +167,296 @@ static int csvGenCsvHeader(CsvWriteMeta* write_meta) { pos += snprintf(buf + pos, size - pos, ",%s", tag->name); } + // line break + pos += snprintf(buf + pos, size - pos, "\n"); + write_meta->csv_header_length = (pos > 0 && pos < size) ? pos : 0; return (pos > 0 && pos < size) ? 0 : -1; } +int csvGenCreateDbSql(SDataBase* db, char* buf, int size) { + int pos = 0; + + pos += snprintf(buf + pos, size - pos, "CREATE DATABASE IF NOT EXISTS "); + if (pos <= 0 || pos >= size) return -1; + + pos += snprintf(buf + pos, size - pos, g_arguments->escape_character ? "`%s`" : "%s", db->dbName); + if (pos <= 0 || pos >= size) return -1; + + if (-1 != g_arguments->inputted_vgroups) { + pos += snprintf(buf + pos, size - pos, " VGROUPS %d", g_arguments->inputted_vgroups); + if (pos <= 0 || pos >= size) return -1; + } + + if (db->cfgs) { + for (size i = 0; i < db->cfgs->size; ++i) { + SDbCfg* cfg = benchArrayGet(db->cfgs, i); + if (cfg->valuestring) { + pos += snprintf(buf + pos, size - pos, " %s %s", cfg->name, cfg->valuestring); + } else { + pos += snprintf(buf + pos, size - pos, " %s %d", cfg->name, cfg->valueint); + } + if (pos <= 0 || pos >= size) return -1; + } + } + + switch (db->precision) { + case TSDB_TIME_PRECISION_MILLI: + pos += snprintf(buf + pos, size - pos, " PRECISION 'ms';\n"); + break; + case TSDB_TIME_PRECISION_MICRO: + pos += snprintf(buf + pos, size - pos, " PRECISION 'us';\n"); + break; + case TSDB_TIME_PRECISION_NANO: + pos += snprintf(buf + pos, size - pos, " PRECISION 'ns';\n"); + break; + } + + return (pos > 0 && pos < size) ? pos : -1; +} + + +static int csvExportCreateDbSql(CsvWriteMeta* write_meta, FILE* fp) { + char buf[LARGE_BUFF_LEN] = {}; + int ret = 0; + int length = 0; + + length = csvGenCreateDbSql(write_meta->db, buf, sizeof(buf)); + if (length < 0) { + errorPrint("Failed to generate create db sql, maybe buffer[%d] not enough.\n", sizeof(buf)); + return -1; + } + + ret = fwrite(buf, 1, length, fp); + if (ret != length) { + errorPrint("Failed to write create db sql: %s. expected written %d but %d.\n", + buf, length, ret); + if (ferror(fp)) { + perror("error"); + } + return -1; + } + + return 0; +} + + +int csvGenCreateStbSql(SDataBase* db, SSuperTable* stb, char* buf, int size) { + int pos = 0; + + pos += snprintf(buf + pos, size - pos, "CREATE TABLE IF NOT EXISTS "); + if (pos <= 0 || pos >= size) return -1; + + pos += snprintf(buf + pos, size - pos, g_arguments->escape_character ? "`%s`.`%s`" : "%s.%s", db->dbName, stb->stbName); + if (pos <= 0 || pos >= size) return -1; + + pos += snprintf(buf + pos, size - pos, " (ts TIMESTAMP"); + if (pos <= 0 || pos >= size) return -1; + + + // columns + for (sizt_t i = 0; i < stb->cols->size; ++i) { + Field* col = benchArrayGet(stb->cols, i); + + if (col->type == TSDB_DATA_TYPE_BINARY + || col->type == TSDB_DATA_TYPE_NCHAR + || col->type == TSDB_DATA_TYPE_VARBINARY + || col->type == TSDB_DATA_TYPE_GEOMETRY) { + + if (col->type == TSDB_DATA_TYPE_GEOMETRY && col->length < 21) { + errorPrint("%s() LN%d, geometry filed len must be greater than 21 on %d\n", __func__, __LINE__, i); + return -1; + } + + pos += snprintf(buf + pos, size - pos, ",%s %s(%d)", col->name, convertDatatypeToString(col->type), col->length); + } else { + pos += snprintf(buf + pos, size - pos, ",%s %s", col->name, convertDatatypeToString(col->type)); + } + if (pos <= 0 || pos >= size) return -1; + + // primary key + if (stb->primary_key && i == 0) { + pos += snprintf(buf + pos, size - pos, " %s", PRIMARY_KEY); + if (pos <= 0 || pos >= size) return -1; + } + + // compress key + if (strlen(col->encode) > 0) { + pos += snprintf(buf + pos, size - pos, " encode '%s'", col->encode); + if (pos <= 0 || pos >= size) return -1; + } + if (strlen(col->compress) > 0) { + pos += snprintf(buf + pos, size - pos, " compress '%s'", col->compress); + if (pos <= 0 || pos >= size) return -1; + } + if (strlen(col->level) > 0) { + pos += snprintf(buf + pos, size - pos, " level '%s'", col->level); + if (pos <= 0 || pos >= size) return -1; + } + } + + pos += snprintf(buf + pos, size - pos, ") TAGS ("); + if (pos <= 0 || pos >= size) return -1; + + + // tags + for (sizt_t i = 0; i < stb->tags->size; ++i) { + Field* tag = benchArrayGet(stb->tags, i); + + if (i > 0) { + pos += snprintf(buf + pos, size - pos, ","); + if (pos <= 0 || pos >= size) return -1; + } + + if (tag->type == TSDB_DATA_TYPE_BINARY + || tag->type == TSDB_DATA_TYPE_NCHAR + || tag->type == TSDB_DATA_TYPE_VARBINARY + || tag->type == TSDB_DATA_TYPE_GEOMETRY) { + + if (tag->type == TSDB_DATA_TYPE_GEOMETRY && tag->length < 21) { + errorPrint("%s() LN%d, geometry filed len must be greater than 21 on %d\n", __func__, __LINE__, i); + return -1; + } + + pos += snprintf(buf + pos, size - pos, "%s %s(%d)", tag->name, convertDatatypeToString(tag->type), tag->length); + + } else { + pos += snprintf(buf + pos, size - pos, "%s %s", tag->name, convertDatatypeToString(tag->type)); + } + if (pos <= 0 || pos >= size) return -1; + } + + pos += snprintf(buf + pos, size - pos, ")"); + if (pos <= 0 || pos >= size) return -1; + + + // comment + if (stb->comment != NULL) { + pos += snprintf(buf + pos, size - pos," COMMENT '%s'", stb->comment); + if (pos <= 0 || pos >= size) return -1; + } + + // delay + if (stb->delay >= 0) { + pos += snprintf(buf + pos, size - pos, " DELAY %d", stb->delay); + if (pos <= 0 || pos >= size) return -1; + } + + // file factor + if (stb->file_factor >= 0) { + pos += snprintf(buf + pos, size - pos, " FILE_FACTOR %f", stb->file_factor / 100.0); + if (pos <= 0 || pos >= size) return -1; + } + + // rollup + if (stb->rollup != NULL) { + pos += snprintf(buf + pos, size - pos, " ROLLUP(%s)", stb->rollup); + if (pos <= 0 || pos >= size) return -1; + } + + // max delay + if (stb->max_delay != NULL) { + pos += snprintf(buf + pos, size - pos, " MAX_DELAY %s", stb->max_delay); + if (pos <= 0 || pos >= size) return -1; + } + + // watermark + if (stb->watermark != NULL) { + pos += snprintf(buf + pos, size - pos, " WATERMARK %s", stb->watermark); + if (pos <= 0 || pos >= size) return -1; + } + + bool first_sma = true; + for (size_t i = 0; i < stb->cols->size; ++i) { + Field* col = benchArrayGet(stb->cols, i); + if (col->sma) { + if (first_sma) { + pos += snprintf(buf + pos, size - pos, " SMA(%s", col->name); + first_sma = false; + } else { + pos += snprintf(buf + pos, size - pos, ",%s", col->name); + } + if (pos <= 0 || pos >= size) return -1; + } + } + if (!first_sma) { + pos += snprintf(buf + pos, size - pos, ")"); + if (pos <= 0 || pos >= size) return -1; + } + + infoPrint("create stable: <%s>\n", buf); + return (pos > 0 && pos < size) ? pos : -1; +} + + +static int csvExportCreateStbSql(CsvWriteMeta* write_meta, FILE* fp) { + char buf[4096] = {}; + int ret = 0; + int length = 0; + + length = csvGenCreateStbSql(write_meta->db, write_meta->stb, buf, sizeof(buf)); + if (length < 0) { + errorPrint("Failed to generate create stb sql, maybe buffer[%d] not enough.\n", sizeof(buf)); + return -1; + } + + ret = fwrite(buf, 1, length, fp); + if (ret != length) { + errorPrint("Failed to write create stb sql: %s. expected written %d but %d.\n", + buf, length, ret); + if (ferror(fp)) { + perror("error"); + } + return -1; + } + + return 0; +} + + +static int csvExportCreateSql(CsvWriteMeta* write_meta) { + char fullname[MAX_PATH_LEN] = {}; + char buf[LARGE_BUFF_LEN] = {}; + int ret = 0; + int length = 0; + FILE* fp = NULL; + + + length = snprintf(fullname, sizeof(fullname), "%s%s.txt", g_arguments->output_path, "create_stmt"); + if (length <= 0 || length >= sizeof(fullname)) { + return -1; + } + + FILE* fp = fopen(fullname, "w"); + if (!fp) { + return -1; + } + + + // export db + ret = csvExportCreateDbSql(write_meta, fp); + if (ret < 0) { + goto end; + } + + // export stb + ret = csvExportCreateStbSql(write_meta, fp); + if (ret < 0) { + goto end; + } + + succPrint("Export create sql to file: %s successfully..\n", fullname); + +end: + if (fp) { + fclose(fp); + } + + return ret; +} + + static int csvInitWriteMeta(SDataBase* db, SSuperTable* stb, CsvWriteMeta* write_meta) { write_meta->naming_type = csvGetFileNamingType(stb); write_meta->total_threads = 1; @@ -816,6 +1101,9 @@ static int csvGenStbProcess(SDataBase* db, SSuperTable* stb) { ts_elapse / 1000.0, total_rows, write_meta->total_threads, g_arguments->output_path, total_rows * 1000.0 / ts_elapse); } + // export create db/stb sql + ret = csvExportCreateSql(write_meta); + end: tmfree(pids); tmfree(args); From 8ba478cad0ffd60897601c78ca8c245c1b93a833 Mon Sep 17 00:00:00 2001 From: Yaming Pei Date: Mon, 3 Mar 2025 14:34:15 +0800 Subject: [PATCH 13/34] fix: resolve csv compilation errors --- tools/taos-tools/inc/bench.h | 11 +- tools/taos-tools/inc/benchCsv.h | 9 +- tools/taos-tools/inc/benchLog.h | 32 ++-- tools/taos-tools/src/benchCsv.c | 222 ++++++++++++++-------------- tools/taos-tools/src/benchJsonOpt.c | 1 - 5 files changed, 139 insertions(+), 136 deletions(-) diff --git a/tools/taos-tools/inc/bench.h b/tools/taos-tools/inc/bench.h index ac187d2575..30973170a3 100644 --- a/tools/taos-tools/inc/bench.h +++ b/tools/taos-tools/inc/bench.h @@ -719,6 +719,14 @@ typedef struct STmqMetaInfo_S { uint16_t iface; } STmqMetaInfo; + +typedef enum { + CSV_COMPRESS_NONE = 0, + CSV_COMPRESS_FAST = 1, + CSV_COMPRESS_BALANCE = 6, + CSV_COMPRESS_BEST = 9 +} CsvCompressionLevel; + typedef struct SArguments_S { uint8_t taosc_version; char * metaFile; @@ -786,9 +794,10 @@ typedef struct SArguments_S { char* csv_file_prefix; char* csv_ts_format; char* csv_ts_interval; + char* csv_tbname_alias; long csv_ts_intv_secs; bool csv_output_header; - bool csv_tbname_alias; + CsvCompressionLevel csv_compress_level; } SArguments; diff --git a/tools/taos-tools/inc/benchCsv.h b/tools/taos-tools/inc/benchCsv.h index 62e0dea7d7..f9f87aa341 100644 --- a/tools/taos-tools/inc/benchCsv.h +++ b/tools/taos-tools/inc/benchCsv.h @@ -27,13 +27,6 @@ typedef enum { CSV_NAMING_B_THREAD_TIME_SLICE } CsvNamingType; -typedef enum { - CSV_COMPRESS_NONE = 0, - CSV_COMPRESS_FAST = 1, - CSV_COMPRESS_BALANCE = 6, - CSV_COMPRESS_BEST = 9 -} CsvCompressionLevel; - typedef enum { CSV_ERR_OK = 0, CSV_ERR_OPEN_FAILED, @@ -85,7 +78,7 @@ typedef struct { size_t thread_id; bool output_header; int tags_buf_size; - CsvRowTagsBuf* tags_buf_bucket; + CsvRowTagsBuf* tags_buf_array; CsvRowColsBuf* cols_buf; } CsvThreadMeta; diff --git a/tools/taos-tools/inc/benchLog.h b/tools/taos-tools/inc/benchLog.h index 426112bcd8..961a037e3c 100644 --- a/tools/taos-tools/inc/benchLog.h +++ b/tools/taos-tools/inc/benchLog.h @@ -16,6 +16,8 @@ #ifndef INC_BENCHLOG_H_ #define INC_BENCHLOG_H_ +#include + // // suport thread safe log module // @@ -53,7 +55,7 @@ void exitLog(); (int32_t)timeSecs.tv_usec); \ fprintf(stdout, "DEBG: "); \ fprintf(stdout, "%s(%d) ", __FILE__, __LINE__); \ - fprintf(stdout, "" fmt, __VA_ARGS__); \ + fprintf(stdout, "" fmt, ##__VA_ARGS__); \ unlockLog(LOG_STDOUT); \ } \ } while (0) @@ -74,7 +76,7 @@ void exitLog(); (int32_t)timeSecs.tv_usec); \ fprintf(stdout, "DEBG: "); \ fprintf(stdout, "%s(%d) ", __FILE__, __LINE__); \ - fprintf(stdout, "" fmt, __VA_ARGS__); \ + fprintf(stdout, "" fmt, ##__VA_ARGS__); \ unlockLog(LOG_STDOUT); \ } \ } while (0) @@ -94,7 +96,7 @@ void exitLog(); do { \ if (g_arguments->debug_print) { \ lockLog(LOG_STDOUT); \ - fprintf(stdout, "" fmt, __VA_ARGS__); \ + fprintf(stdout, "" fmt, ##__VA_ARGS__); \ unlockLog(LOG_STDOUT); \ } \ } while (0) @@ -102,14 +104,14 @@ void exitLog(); #define infoPrintNoTimestamp(fmt, ...) \ do { \ lockLog(LOG_STDOUT); \ - fprintf(stdout, "" fmt, __VA_ARGS__); \ + fprintf(stdout, "" fmt, ##__VA_ARGS__); \ unlockLog(LOG_STDOUT); \ } while (0) #define infoPrintNoTimestampToFile(fmt, ...) \ do { \ lockLog(LOG_RESULT); \ - fprintf(g_arguments->fpOfInsertResult, "" fmt, __VA_ARGS__); \ + fprintf(g_arguments->fpOfInsertResult, "" fmt, ##__VA_ARGS__); \ unlockLog(LOG_RESULT); \ } while (0) @@ -126,7 +128,7 @@ void exitLog(); ptm->tm_mon + 1, \ ptm->tm_mday, ptm->tm_hour, ptm->tm_min, ptm->tm_sec, \ (int32_t)timeSecs.tv_usec); \ - fprintf(stdout, "INFO: " fmt, __VA_ARGS__); \ + fprintf(stdout, "INFO: " fmt, ##__VA_ARGS__); \ unlockLog(LOG_STDOUT); \ } while (0) @@ -142,7 +144,7 @@ void exitLog(); fprintf(g_arguments->fpOfInsertResult,"[%02d/%02d %02d:%02d:%02d.%06d] ", ptm->tm_mon + 1, \ ptm->tm_mday, ptm->tm_hour, ptm->tm_min, ptm->tm_sec, \ (int32_t)timeSecs.tv_usec); \ - fprintf(g_arguments->fpOfInsertResult, "INFO: " fmt, __VA_ARGS__);\ + fprintf(g_arguments->fpOfInsertResult, "INFO: " fmt, ##__VA_ARGS__);\ unlockLog(LOG_RESULT); \ } while (0) @@ -160,7 +162,7 @@ void exitLog(); ptm->tm_mon + 1, \ ptm->tm_mday, ptm->tm_hour, ptm->tm_min, ptm->tm_sec, \ (int32_t)timeSecs.tv_usec); \ - fprintf(stderr, "PERF: " fmt, __VA_ARGS__); \ + fprintf(stderr, "PERF: " fmt, ##__VA_ARGS__); \ unlockLog(LOG_STDERR); \ if (g_arguments->fpOfInsertResult && !g_arguments->terminate) { \ lockLog(LOG_RESULT); \ @@ -172,7 +174,7 @@ void exitLog(); (int32_t)timeSecs.tv_usec); \ fprintf(g_arguments->fpOfInsertResult, "PERF: "); \ fprintf(g_arguments->fpOfInsertResult, \ - "" fmt, __VA_ARGS__); \ + "" fmt, ##__VA_ARGS__); \ unlockLog(LOG_RESULT); \ } \ } \ @@ -196,7 +198,7 @@ void exitLog(); if (g_arguments->debug_print) { \ fprintf(stderr, "%s(%d) ", __FILE__, __LINE__); \ } \ - fprintf(stderr, "" fmt, __VA_ARGS__); \ + fprintf(stderr, "" fmt, ##__VA_ARGS__); \ fprintf(stderr, "\033[0m"); \ unlockLog(LOG_STDERR); \ if (g_arguments->fpOfInsertResult && !g_arguments->terminate) { \ @@ -206,7 +208,7 @@ void exitLog(); ptm->tm_mday, ptm->tm_hour, ptm->tm_min, ptm->tm_sec, \ (int32_t)timeSecs.tv_usec); \ fprintf(g_arguments->fpOfInsertResult, "ERROR: "); \ - fprintf(g_arguments->fpOfInsertResult, "" fmt, __VA_ARGS__); \ + fprintf(g_arguments->fpOfInsertResult, "" fmt, ##__VA_ARGS__); \ unlockLog(LOG_RESULT); \ } \ } while (0) @@ -229,7 +231,7 @@ void exitLog(); if (g_arguments->debug_print) { \ fprintf(stderr, "%s(%d) ", __FILE__, __LINE__); \ } \ - fprintf(stderr, "" fmt, __VA_ARGS__); \ + fprintf(stderr, "" fmt, ##__VA_ARGS__); \ fprintf(stderr, "\033[0m"); \ unlockLog(LOG_STDERR); \ if (g_arguments->fpOfInsertResult && !g_arguments->terminate) { \ @@ -239,7 +241,7 @@ void exitLog(); ptm->tm_mday, ptm->tm_hour, ptm->tm_min, ptm->tm_sec, \ (int32_t)timeSecs.tv_usec); \ fprintf(g_arguments->fpOfInsertResult, "WARN: "); \ - fprintf(g_arguments->fpOfInsertResult, "" fmt, __VA_ARGS__); \ + fprintf(g_arguments->fpOfInsertResult, "" fmt, ##__VA_ARGS__); \ unlockLog(LOG_RESULT); \ } \ } while (0) @@ -262,7 +264,7 @@ void exitLog(); if (g_arguments->debug_print) { \ fprintf(stderr, "%s(%d) ", __FILE__, __LINE__); \ } \ - fprintf(stderr, "" fmt, __VA_ARGS__); \ + fprintf(stderr, "" fmt, ##__VA_ARGS__); \ fprintf(stderr, "\033[0m"); \ unlockLog(LOG_STDERR); \ if (g_arguments->fpOfInsertResult && !g_arguments->terminate) { \ @@ -272,7 +274,7 @@ void exitLog(); ptm->tm_mday, ptm->tm_hour, ptm->tm_min, ptm->tm_sec, \ (int32_t)timeSecs.tv_usec); \ fprintf(g_arguments->fpOfInsertResult, "SUCC: "); \ - fprintf(g_arguments->fpOfInsertResult, "" fmt, __VA_ARGS__); \ + fprintf(g_arguments->fpOfInsertResult, "" fmt, ##__VA_ARGS__); \ unlockLog(LOG_RESULT); \ } \ } while (0) diff --git a/tools/taos-tools/src/benchCsv.c b/tools/taos-tools/src/benchCsv.c index 246ff79287..c491e94606 100644 --- a/tools/taos-tools/src/benchCsv.c +++ b/tools/taos-tools/src/benchCsv.c @@ -14,6 +14,7 @@ #include #include #include +#include #include "benchLog.h" #include "benchData.h" @@ -77,25 +78,25 @@ static CsvNamingType csvGetFileNamingType(SSuperTable* stb) { } -static void csvCalcTimestampStep(CsvWriteMeta* meta) { +static void csvCalcTimestampStep(CsvWriteMeta* write_meta) { time_t ts_step = 0; - if (meta->db->precision == TSDB_TIME_PRECISION_MICRO) { + if (write_meta->db->precision == TSDB_TIME_PRECISION_MICRO) { ts_step = g_arguments->csv_ts_intv_secs * 1000000L; - } else if (db->precision == TSDB_TIME_PRECISION_NANO) { + } else if (write_meta->db->precision == TSDB_TIME_PRECISION_NANO) { ts_step = g_arguments->csv_ts_intv_secs * 1000000000L; } else { ts_step = g_arguments->csv_ts_intv_secs * 1000L; } - meta->ts_step = ts_step; + write_meta->ts_step = ts_step; return; } -static void csvCalcCtbRange(CsvThreadMeta* meta, size_t total_threads, int64_t ctb_offset, int64_t ctb_count) { +static void csvCalcCtbRange(CsvThreadMeta* thread_meta, size_t total_threads, int64_t ctb_offset, int64_t ctb_count) { uint64_t ctb_start_idx = 0; uint64_t ctb_end_idx = 0; - size_t tid_idx = meta->thread_id - 1; + size_t tid_idx = thread_meta->thread_id - 1; size_t base = ctb_count / total_threads; size_t remainder = ctb_count % total_threads; @@ -111,35 +112,34 @@ static void csvCalcCtbRange(CsvThreadMeta* meta, size_t total_threads, int64_t c ctb_end_idx = ctb_offset + ctb_count; } - meta->ctb_start_idx = ctb_start_idx; - meta->ctb_end_idx = ctb_end_idx; - meta->ctb_count = ctb_count; + thread_meta->ctb_start_idx = ctb_start_idx; + thread_meta->ctb_end_idx = ctb_end_idx; + thread_meta->ctb_count = ctb_count; return; } -static void csvGenThreadFormatter(CsvWriteMeta* meta) { +static void csvGenThreadFormatter(CsvWriteMeta* write_meta) { int digits = 0; - if (meta->total_threads == 0) { + if (write_meta->total_threads == 0) { digits = 1; } else { - for (int n = meta->total_threads; n > 0; n /= 10) { + for (int n = write_meta->total_threads; n > 0; n /= 10) { digits++; } } if (digits <= 1) { - (void)snprintf(meta->thread_formatter, sizeof(meta->thread_formatter), "%%d"); + (void)snprintf(write_meta->thread_formatter, sizeof(write_meta->thread_formatter), "%%d"); } else { - (void)snprintf(meta->thread_formatter, sizeof(meta->thread_formatter), "%%0%dd", digits); + (void)snprintf(write_meta->thread_formatter, sizeof(write_meta->thread_formatter), "%%0%dd", digits); } return; } static int csvGenCsvHeader(CsvWriteMeta* write_meta) { - SDataBase* db = write_meta->db; SSuperTable* stb = write_meta->stb; char* buf = write_meta->csv_header; int pos = 0; @@ -190,7 +190,7 @@ int csvGenCreateDbSql(SDataBase* db, char* buf, int size) { } if (db->cfgs) { - for (size i = 0; i < db->cfgs->size; ++i) { + for (size_t i = 0; i < db->cfgs->size; ++i) { SDbCfg* cfg = benchArrayGet(db->cfgs, i); if (cfg->valuestring) { pos += snprintf(buf + pos, size - pos, " %s %s", cfg->name, cfg->valuestring); @@ -224,7 +224,7 @@ static int csvExportCreateDbSql(CsvWriteMeta* write_meta, FILE* fp) { length = csvGenCreateDbSql(write_meta->db, buf, sizeof(buf)); if (length < 0) { - errorPrint("Failed to generate create db sql, maybe buffer[%d] not enough.\n", sizeof(buf)); + errorPrint("Failed to generate create db sql, maybe buffer[%zu] not enough.\n", sizeof(buf)); return -1; } @@ -256,7 +256,7 @@ int csvGenCreateStbSql(SDataBase* db, SSuperTable* stb, char* buf, int size) { // columns - for (sizt_t i = 0; i < stb->cols->size; ++i) { + for (size_t i = 0; i < stb->cols->size; ++i) { Field* col = benchArrayGet(stb->cols, i); if (col->type == TSDB_DATA_TYPE_BINARY @@ -265,7 +265,7 @@ int csvGenCreateStbSql(SDataBase* db, SSuperTable* stb, char* buf, int size) { || col->type == TSDB_DATA_TYPE_GEOMETRY) { if (col->type == TSDB_DATA_TYPE_GEOMETRY && col->length < 21) { - errorPrint("%s() LN%d, geometry filed len must be greater than 21 on %d\n", __func__, __LINE__, i); + errorPrint("%s() LN%d, geometry filed len must be greater than 21 on %zu.\n", __func__, __LINE__, i); return -1; } @@ -301,7 +301,7 @@ int csvGenCreateStbSql(SDataBase* db, SSuperTable* stb, char* buf, int size) { // tags - for (sizt_t i = 0; i < stb->tags->size; ++i) { + for (size_t i = 0; i < stb->tags->size; ++i) { Field* tag = benchArrayGet(stb->tags, i); if (i > 0) { @@ -315,7 +315,7 @@ int csvGenCreateStbSql(SDataBase* db, SSuperTable* stb, char* buf, int size) { || tag->type == TSDB_DATA_TYPE_GEOMETRY) { if (tag->type == TSDB_DATA_TYPE_GEOMETRY && tag->length < 21) { - errorPrint("%s() LN%d, geometry filed len must be greater than 21 on %d\n", __func__, __LINE__, i); + errorPrint("%s() LN%d, geometry filed len must be greater than 21 on %zu.\n", __func__, __LINE__, i); return -1; } @@ -397,7 +397,7 @@ static int csvExportCreateStbSql(CsvWriteMeta* write_meta, FILE* fp) { length = csvGenCreateStbSql(write_meta->db, write_meta->stb, buf, sizeof(buf)); if (length < 0) { - errorPrint("Failed to generate create stb sql, maybe buffer[%d] not enough.\n", sizeof(buf)); + errorPrint("Failed to generate create stb sql, maybe buffer[%zu] not enough.\n", sizeof(buf)); return -1; } @@ -417,7 +417,6 @@ static int csvExportCreateStbSql(CsvWriteMeta* write_meta, FILE* fp) { static int csvExportCreateSql(CsvWriteMeta* write_meta) { char fullname[MAX_PATH_LEN] = {}; - char buf[LARGE_BUFF_LEN] = {}; int ret = 0; int length = 0; FILE* fp = NULL; @@ -428,7 +427,7 @@ static int csvExportCreateSql(CsvWriteMeta* write_meta) { return -1; } - FILE* fp = fopen(fullname, "w"); + fp = fopen(fullname, "w"); if (!fp) { return -1; } @@ -475,7 +474,7 @@ static int csvInitWriteMeta(SDataBase* db, SSuperTable* stb, CsvWriteMeta* write return -1; } - switch (meta.naming_type) { + switch (write_meta->naming_type) { case CSV_NAMING_I_SINGLE: { break; } @@ -484,18 +483,18 @@ static int csvInitWriteMeta(SDataBase* db, SSuperTable* stb, CsvWriteMeta* write break; } case CSV_NAMING_B_THREAD: { - meta.total_threads = g_arguments->nthreads; + write_meta->total_threads = g_arguments->nthreads; csvGenThreadFormatter(write_meta); break; } case CSV_NAMING_B_THREAD_TIME_SLICE: { - meta.total_threads = g_arguments->nthreads; + write_meta->total_threads = g_arguments->nthreads; csvGenThreadFormatter(write_meta); csvCalcTimestampStep(write_meta); break; } default: { - meta.naming_type = CSV_NAMING_I_SINGLE; + write_meta->naming_type = CSV_NAMING_I_SINGLE; break; } } @@ -516,10 +515,10 @@ static void csvInitThreadMeta(CsvWriteMeta* write_meta, uint32_t thread_id, CsvT thread_meta->thread_id = thread_id; thread_meta->output_header = false; thread_meta->tags_buf_size = 0; - thread_meta->tags_buf_bucket = NULL; + thread_meta->tags_buf_array = NULL; thread_meta->cols_buf = NULL; - csvCalcCtbRange(write_meta, write_meta->total_threads, stb->childTblFrom, stb->childTblCount); + csvCalcCtbRange(thread_meta, write_meta->total_threads, stb->childTblFrom, stb->childTblCount); switch (write_meta->naming_type) { case CSV_NAMING_I_SINGLE: @@ -533,7 +532,6 @@ static void csvInitThreadMeta(CsvWriteMeta* write_meta, uint32_t thread_id, CsvT break; } default: { - thread_meta->naming_type = CSV_NAMING_I_SINGLE; break; } } @@ -553,7 +551,7 @@ static void csvUpdateSliceRange(CsvWriteMeta* write_meta, CsvThreadMeta* thread_ case CSV_NAMING_I_TIME_SLICE: case CSV_NAMING_B_THREAD_TIME_SLICE: { thread_meta->start_secs = csvGetStartSeconds(db->precision, last_end_ts); - thread_meta->end_secs = thread_meta.start_secs + g_arguments->csv_ts_intv_secs; + thread_meta->end_secs = thread_meta->start_secs + g_arguments->csv_ts_intv_secs; break; } default: { @@ -569,7 +567,7 @@ static const char* csvGetGzipFilePrefix() { if (g_arguments->csv_compress_level == CSV_COMPRESS_NONE) { return ""; } else { - return ".gz" + return ".gz"; } } @@ -583,26 +581,26 @@ static int csvGetFileFullname(CsvWriteMeta* write_meta, CsvThreadMeta* thread_me const char* file_prefix = g_arguments->csv_file_prefix; const char* gzip_suffix = csvGetGzipFilePrefix(); - switch (meta->naming_type) { + switch (write_meta->naming_type) { case CSV_NAMING_I_SINGLE: { - ret = snprintf(fullname, size, "%s%s.csv%s", base_path, file_prefix, g_arguments->csv_compress_level, gzip_suffix); + ret = snprintf(fullname, size, "%s%s.csv%s", base_path, file_prefix, gzip_suffix); break; } case CSV_NAMING_I_TIME_SLICE: { - csvConvertTime2String(meta->start_secs, start_time_buf, sizeof(start_time_buf)); - csvConvertTime2String(meta->end_secs, end_time_buf, sizeof(end_time_buf)); + csvConvertTime2String(thread_meta->start_secs, start_time_buf, sizeof(start_time_buf)); + csvConvertTime2String(thread_meta->end_secs, end_time_buf, sizeof(end_time_buf)); ret = snprintf(fullname, size, "%s%s_%s_%s.csv%s", base_path, file_prefix, start_time_buf, end_time_buf, gzip_suffix); break; } case CSV_NAMING_B_THREAD: { - (void)snprintf(thread_buf, sizeof(thread_buf), meta->thread_formatter, meta->thread_id); + (void)snprintf(thread_buf, sizeof(thread_buf), write_meta->thread_formatter, thread_meta->thread_id); ret = snprintf(fullname, size, "%s%s_%s.csv%s", base_path, file_prefix, thread_buf, gzip_suffix); break; } case CSV_NAMING_B_THREAD_TIME_SLICE: { - (void)snprintf(thread_buf, sizeof(thread_buf), meta->thread_formatter, meta->thread_id); - csvConvertTime2String(meta->start_secs, start_time_buf, sizeof(start_time_buf)); - csvConvertTime2String(meta->end_secs, end_time_buf, sizeof(end_time_buf)); + (void)snprintf(thread_buf, sizeof(thread_buf), write_meta->thread_formatter, thread_meta->thread_id); + csvConvertTime2String(thread_meta->start_secs, start_time_buf, sizeof(start_time_buf)); + csvConvertTime2String(thread_meta->end_secs, end_time_buf, sizeof(end_time_buf)); ret = snprintf(fullname, size, "%s%s_%s_%s_%s.csv%s", base_path, file_prefix, thread_buf, start_time_buf, end_time_buf, gzip_suffix); break; } @@ -699,6 +697,24 @@ static int csvGenRowColData(char* buf, int size, SSuperTable* stb, int64_t ts, i } +static void csvFreeCtbTagData(CsvThreadMeta* thread_meta, CsvRowTagsBuf* tags_buf_array) { + if (!thread_meta || !tags_buf_array) { + return; + } + + for (uint64_t i = 0 ; i < thread_meta->ctb_count; ++i) { + char* tags_buf = tags_buf_array[i].buf; + if (tags_buf) { + tmfree(tags_buf_array); + } else { + break; + } + } + tmfree(tags_buf_array); + return; +} + + static CsvRowTagsBuf* csvGenCtbTagData(CsvWriteMeta* write_meta, CsvThreadMeta* thread_meta) { SSuperTable* stb = write_meta->stb; int ret = 0; @@ -708,8 +724,8 @@ static CsvRowTagsBuf* csvGenCtbTagData(CsvWriteMeta* write_meta, CsvThreadMeta* return NULL; } - CsvRowTagsBuf* tags_buf_bucket = (CsvRowTagsBuf*)benchCalloc(thread_meta->ctb_count, sizeof(CsvRowTagsBuf), true); - if (!tags_buf_bucket) { + CsvRowTagsBuf* tags_buf_array = (CsvRowTagsBuf*)benchCalloc(thread_meta->ctb_count, sizeof(CsvRowTagsBuf), true); + if (!tags_buf_array) { return NULL; } @@ -721,43 +737,25 @@ static CsvRowTagsBuf* csvGenCtbTagData(CsvWriteMeta* write_meta, CsvThreadMeta* goto error; } - tags_buf_bucket[i].buf = tags_buf; - write_meta->tags_buf_size = tags_buf_size; + tags_buf_array[i].buf = tags_buf; + thread_meta->tags_buf_size = tags_buf_size; ret = csvGenRowTagData(tags_buf, tags_buf_size, stb, thread_meta->ctb_start_idx + i, &tk); if (ret <= 0) { goto error; } - tags_buf_bucket[i].length = ret; + tags_buf_array[i].length = ret; } - return tags_buf_bucket; + return tags_buf_array; error: - csvFreeCtbTagData(thread_meta, tags_buf_bucket); + csvFreeCtbTagData(thread_meta, tags_buf_array); return NULL; } -static void csvFreeCtbTagData(CsvThreadMeta* thread_meta, CsvRowTagsBuf* tags_buf_bucket) { - if (!thread_meta || !tags_buf_bucket) { - return; - } - - for (uint64_t i = 0 ; i < thread_meta->ctb_count; ++i) { - char* tags_buf = tags_buf_bucket[i].buf; - if (tags_buf) { - tmfree(tags_buf_bucket); - } else { - break; - } - } - tmfree(tags_buf_bucket); - return; -} - - static CsvFileHandle* csvOpen(const char* filename, CsvCompressionLevel compress_level) { CsvFileHandle* fhdl = NULL; bool failed = false; @@ -770,13 +768,13 @@ static CsvFileHandle* csvOpen(const char* filename, CsvCompressionLevel compress } if (compress_level == CSV_COMPRESS_NONE) { - fhdl.handle.fp = fopen(filename, "w"); - failed = (!fhdl.handle.fp); + fhdl->handle.fp = fopen(filename, "w"); + failed = (!fhdl->handle.fp); } else { char mode[TINY_BUFF_LEN]; (void)snprintf(mode, sizeof(mode), "wb%d", compress_level); - fhdl.handle.gf = gzopen(filename, mode); - failed = (!fhdl.handle.gf); + fhdl->handle.gf = gzopen(filename, mode); + failed = (!fhdl->handle.gf); } if (failed) { @@ -806,9 +804,9 @@ static CsvIoError csvWrite(CsvFileHandle* fhdl, const char* buf, size_t size) { return CSV_ERR_WRITE_FAILED; } } else { - unsigned int ret = gzwrite(fhdl->handle.gf, buf, size); + int ret = gzwrite(fhdl->handle.gf, buf, size); if (ret != size) { - errorPrint("Failed to write csv file: %s. expected written %zu but %zu.\n", + errorPrint("Failed to write csv file: %s. expected written %zu but %d.\n", fhdl->filename, size, ret); int errnum; const char* errmsg = gzerror(fhdl->handle.gf, &errnum); @@ -839,16 +837,16 @@ static void csvClose(CsvFileHandle* fhdl) { static int csvWriteFile(CsvFileHandle* fhdl, uint64_t ctb_idx, int64_t cur_ts, int64_t* ck, CsvWriteMeta* write_meta, CsvThreadMeta* thread_meta) { SDataBase* db = write_meta->db; SSuperTable* stb = write_meta->stb; - CsvRowTagsBuf* tags_buf_bucket = thread_meta->tags_buf_bucket; - CsvRowColsBuf* tags_buf = &tags_buf_bucket[ctb_idx]; + CsvRowTagsBuf* tags_buf_array = thread_meta->tags_buf_array; + CsvRowTagsBuf* tags_buf = &tags_buf_array[ctb_idx]; CsvRowColsBuf* cols_buf = thread_meta->cols_buf; int ret = 0; ret = csvGenRowColData(cols_buf->buf, cols_buf->buf_size, stb, cur_ts, db->precision, ck); if (ret <= 0) { - errorPrint("Failed to generate csv column data. database: %s, super table: %s, naming type: %d, thread index: %d, ctb index: %" PRIu64 ".\n", - db->dbName, stb->stbName, write_meta.naming_type, thread_meta->thread_id, ctb_idx); + errorPrint("Failed to generate csv column data. database: %s, super table: %s, naming type: %d, thread index: %zu, ctb index: %" PRIu64 ".\n", + db->dbName, stb->stbName, write_meta->naming_type, thread_meta->thread_id, ctb_idx); return -1; } @@ -858,7 +856,7 @@ static int csvWriteFile(CsvFileHandle* fhdl, uint64_t ctb_idx, int64_t cur_ts, i if (thread_meta->output_header) { ret = csvWrite(fhdl, write_meta->csv_header, write_meta->csv_header_length); if (ret != CSV_ERR_OK) { - errorPrint("Failed to write csv header data. database: %s, super table: %s, naming type: %d, thread index: %d, ctb index: %" PRIu64 ".\n", + errorPrint("Failed to write csv header data. database: %s, super table: %s, naming type: %d, thread index: %zu, ctb index: %" PRIu64 ".\n", db->dbName, stb->stbName, write_meta->naming_type, thread_meta->thread_id, ctb_idx); return -1; } @@ -869,7 +867,7 @@ static int csvWriteFile(CsvFileHandle* fhdl, uint64_t ctb_idx, int64_t cur_ts, i // write columns ret = csvWrite(fhdl, cols_buf->buf, cols_buf->length); if (ret != CSV_ERR_OK) { - errorPrint("Failed to write csv column data, expected written %d but got %zu. database: %s, super table: %s, naming type: %d, thread index: %d, ctb index: %" PRIu64 ".\n", + errorPrint("Failed to write csv column data. database: %s, super table: %s, naming type: %d, thread index: %zu, ctb index: %" PRIu64 ".\n", db->dbName, stb->stbName, write_meta->naming_type, thread_meta->thread_id, ctb_idx); return -1; } @@ -877,7 +875,7 @@ static int csvWriteFile(CsvFileHandle* fhdl, uint64_t ctb_idx, int64_t cur_ts, i // write tags ret = csvWrite(fhdl, tags_buf->buf, tags_buf->length); if (ret != CSV_ERR_OK) { - errorPrint("Failed to write csv tag data. database: %s, super table: %s, naming type: %d, thread index: %d, ctb index: %" PRIu64 ".\n", + errorPrint("Failed to write csv tag data. database: %s, super table: %s, naming type: %d, thread index: %zu, ctb index: %" PRIu64 ".\n", db->dbName, stb->stbName, write_meta->naming_type, thread_meta->thread_id, ctb_idx); return -1; } @@ -885,7 +883,7 @@ static int csvWriteFile(CsvFileHandle* fhdl, uint64_t ctb_idx, int64_t cur_ts, i // write line break ret = csvWrite(fhdl, "\n", 1); if (ret != CSV_ERR_OK) { - errorPrint("Failed to write csv line break data. database: %s, super table: %s, naming type: %d, thread index: %d, ctb index: %" PRIu64 ".\n", + errorPrint("Failed to write csv line break data. database: %s, super table: %s, naming type: %d, thread index: %zu, ctb index: %" PRIu64 ".\n", db->dbName, stb->stbName, write_meta->naming_type, thread_meta->thread_id, ctb_idx); return -1; } @@ -922,10 +920,10 @@ static void* csvGenStbThread(void* arg) { // tags buffer - CsvRowTagsBuf* tags_buf_bucket = csvGenCtbTagData(write_meta, thread_meta); - if (!tags_buf_bucket) { - errorPrint("Failed to generate csv tag data. database: %s, super table: %s, naming type: %d, thread index: %d.\n", - db->dbName, stb->stbName, write_meta.naming_type, thread_meta->thread_id); + CsvRowTagsBuf* tags_buf_array = csvGenCtbTagData(write_meta, thread_meta); + if (!tags_buf_array) { + errorPrint("Failed to generate csv tag data. database: %s, super table: %s, naming type: %d, thread index: %zu.\n", + db->dbName, stb->stbName, write_meta->naming_type, thread_meta->thread_id); return NULL; } @@ -933,8 +931,8 @@ static void* csvGenStbThread(void* arg) { int buf_size = stb->lenOfCols + stb->cols->size; char* buf = (char*)benchCalloc(1, buf_size, true); if (!buf) { - errorPrint("Failed to malloc csv column buffer. database: %s, super table: %s, naming type: %d, thread index: %d.\n", - db->dbName, stb->stbName, write_meta.naming_type, thread_meta->thread_id); + errorPrint("Failed to malloc csv column buffer. database: %s, super table: %s, naming type: %d, thread index: %zu.\n", + db->dbName, stb->stbName, write_meta->naming_type, thread_meta->thread_id); goto end; } @@ -944,24 +942,23 @@ static void* csvGenStbThread(void* arg) { .length = 0 }; - thread_meta->tags_buf_bucket = tags_buf_bucket; + thread_meta->tags_buf_array = tags_buf_array; thread_meta->cols_buf = &cols_buf; start_print_ts = toolsGetTimestampMs(); for (cur_ts = write_meta->start_ts; cur_ts < write_meta->end_ts; cur_ts += write_meta->ts_step) { // get filename - fullname[MAX_PATH_LEN] = {}; ret = csvGetFileFullname(write_meta, thread_meta, fullname, sizeof(fullname)); if (ret < 0) { - errorPrint("Failed to generate csv filename. database: %s, super table: %s, naming type: %d, thread index: %d.\n", - db->dbName, stb->stbName, write_meta.naming_type, thread_meta->thread_id); + errorPrint("Failed to generate csv filename. database: %s, super table: %s, naming type: %d, thread index: %zu.\n", + db->dbName, stb->stbName, write_meta->naming_type, thread_meta->thread_id); goto end; } // create fd fhdl = csvOpen(fullname, g_arguments->csv_compress_level); if (fhdl == NULL) { - errorPrint("Failed to create csv file. thread index: %d, file: %s, errno: %d, strerror: %s.\n", + errorPrint("Failed to create csv file. thread index: %zu, file: %s, errno: %d, strerror: %s.\n", thread_meta->thread_id, fullname, errno, strerror(errno)); goto end; } @@ -972,7 +969,7 @@ static void* csvGenStbThread(void* arg) { slice_end_ts = MIN(cur_ts + write_meta->ts_step, write_meta->end_ts); file_rows = 0; - infoPrint("thread[%d] begin to write csv file: %s.\n", thread_meta->thread_id, fullname); + infoPrint("thread[%zu] begin to write csv file: %s.\n", thread_meta->thread_id, fullname); // write data while (slice_cur_ts < slice_end_ts) { @@ -982,7 +979,7 @@ static void* csvGenStbThread(void* arg) { for (slice_ctb_cur_ts = slice_cur_ts; slice_ctb_cur_ts < slice_batch_ts; slice_ctb_cur_ts += write_meta->stb->timestamp_step) { ret = csvWriteFile(fhdl, ctb_idx, slice_ctb_cur_ts, &ck, write_meta, thread_meta); if (!ret) { - errorPrint("Failed to write csv file. thread index: %d, file: %s, errno: %d, strerror: %s.\n", + errorPrint("Failed to write csv file. thread index: %zu, file: %s, errno: %d, strerror: %s.\n", thread_meta->thread_id, fullname, errno, strerror(errno)); csvClose(fhdl); goto end; @@ -995,7 +992,7 @@ static void* csvGenStbThread(void* arg) { cur_print_ts = toolsGetTimestampMs(); print_ts_elapse = cur_print_ts - pre_print_ts; if (print_ts_elapse > 30000) { - infoPrint("thread[%d] has currently inserted rows: %" PRIu64 ", period insert rate: %.2f rows/s.\n", + infoPrint("thread[%zu] has currently inserted rows: %" PRIu64 ", period insert rate: %.2f rows/s.\n", thread_meta->thread_id, total_rows, (total_rows - pre_total_rows) * 1000.0 / print_ts_elapse); pre_print_ts = cur_print_ts; @@ -1014,18 +1011,18 @@ static void* csvGenStbThread(void* arg) { } csvClose(fhdl); - csvUpdateSliceRange(write_meta, thread_meta, last_end_ts); + csvUpdateSliceRange(write_meta, thread_meta, slice_end_ts); } cur_print_ts = toolsGetTimestampMs(); print_ts_elapse = cur_print_ts - start_print_ts; - succPrint("thread [%d] has completed inserting rows: %" PRIu64 ", insert rate %.2f rows/s.\n", + succPrint("thread [%zu] has completed inserting rows: %" PRIu64 ", insert rate %.2f rows/s.\n", thread_meta->thread_id, total_rows, total_rows * 1000.0 / print_ts_elapse); end: thread_meta->total_rows = total_rows; - csvFreeCtbTagData(tags_buf_bucket); + csvFreeCtbTagData(thread_meta, tags_buf_array); tmfree(buf); return NULL; } @@ -1038,8 +1035,12 @@ static int csvGenStbProcess(SDataBase* db, SSuperTable* stb) { int64_t start_ts = 0; int64_t ts_elapse = 0; + CsvWriteMeta* write_meta = NULL; + CsvThreadArgs* args = NULL; + pthread_t* pids = NULL; - CsvWriteMeta* write_meta = benchCalloc(1, sizeof(CsvWriteMeta), false); + + write_meta = benchCalloc(1, sizeof(CsvWriteMeta), false); if (!write_meta) { ret = -1; goto end; @@ -1051,13 +1052,13 @@ static int csvGenStbProcess(SDataBase* db, SSuperTable* stb) { goto end; } - CsvThreadArgs* args = benchCalloc(write_meta->total_threads, sizeof(CsvThreadArgs), false); + args = benchCalloc(write_meta->total_threads, sizeof(CsvThreadArgs), false); if (!args) { ret = -1; goto end; } - pthread_t* pids = benchCalloc(write_meta.total_threads, sizeof(pthread_t), false); + pids = benchCalloc(write_meta->total_threads, sizeof(pthread_t), false); if (!pids) { ret = -1; goto end; @@ -1083,7 +1084,7 @@ static int csvGenStbProcess(SDataBase* db, SSuperTable* stb) { prompt = false; } - infoPrint("pthread_join %d ...\n", i); + infoPrint("pthread_join %u ...\n", i); pthread_join(pids[i], NULL); } @@ -1097,7 +1098,7 @@ static int csvGenStbProcess(SDataBase* db, SSuperTable* stb) { ts_elapse = toolsGetTimestampMs() - start_ts; if (ts_elapse > 0) { - succPrint("Spent %.6f seconds to insert rows: %" PRIu64 " with %d thread(s) into %s, at a rate of %.2f rows/s.\n", + succPrint("Spent %.6f seconds to insert rows: %" PRIu64 " with %zu thread(s) into %s, at a rate of %.2f rows/s.\n", ts_elapse / 1000.0, total_rows, write_meta->total_threads, g_arguments->output_path, total_rows * 1000.0 / ts_elapse); } @@ -1197,15 +1198,14 @@ static int csvParseParameter() { // csv_output_path size_t len = strlen(g_arguments->output_path); if (len == 0) { - errorPrint("Failed to generate csv files, the specified output path is empty. Please provide a valid path. database: %s, super table: %s.\n", - db->dbName, stb->stbName); + errorPrint("Failed to generate csv files, the specified output path is empty. Please provide a valid path.\n"); return -1; } if (g_arguments->output_path[len - 1] != '/') { int n = snprintf(g_arguments->output_path_buf, sizeof(g_arguments->output_path_buf), "%s/", g_arguments->output_path); if (n < 0 || n >= sizeof(g_arguments->output_path_buf)) { - errorPrint("Failed to generate csv files, path buffer overflow risk when appending '/'. path: %s, database: %s, super table: %s.\n", - g_arguments->csv_output_path, db->dbName, stb->stbName); + errorPrint("Failed to generate csv files, path buffer overflow risk when appending '/'. path: %s.\n", + g_arguments->output_path); return -1; } g_arguments->output_path = g_arguments->output_path_buf; @@ -1214,8 +1214,8 @@ static int csvParseParameter() { // csv_ts_format if (g_arguments->csv_ts_format) { if (csvValidateParamTsFormat(g_arguments->csv_ts_format) != 0) { - errorPrint("Failed to generate csv files, the parameter `csv_ts_format` is invalid. csv_ts_format: %s, database: %s, super table: %s.\n", - g_arguments->csv_ts_format, db->dbName, stb->stbName); + errorPrint("Failed to generate csv files, the parameter `csv_ts_format` is invalid. csv_ts_format: %s.\n", + g_arguments->csv_ts_format); return -1; } } @@ -1223,8 +1223,8 @@ static int csvParseParameter() { // csv_ts_interval long csv_ts_intv_secs = csvValidateParamTsInterval(g_arguments->csv_ts_interval); if (csv_ts_intv_secs <= 0) { - errorPrint("Failed to generate csv files, the parameter `csv_ts_interval` is invalid. csv_ts_interval: %s, database: %s, super table: %s.\n", - g_arguments->csv_ts_interval, db->dbName, stb->stbName); + errorPrint("Failed to generate csv files, the parameter `csv_ts_interval` is invalid. csv_ts_interval: %s.\n", + g_arguments->csv_ts_interval); return -1; } g_arguments->csv_ts_intv_secs = csv_ts_intv_secs; @@ -1237,7 +1237,7 @@ static int csvWriteThread() { for (size_t i = 0; i < g_arguments->databases->size && !g_arguments->terminate; ++i) { // database SDataBase* db = benchArrayGet(g_arguments->databases, i); - if (database->superTbls) { + if (db->superTbls) { for (size_t j = 0; j < db->superTbls->size && !g_arguments->terminate; ++j) { // stb SSuperTable* stb = benchArrayGet(db->superTbls, j); diff --git a/tools/taos-tools/src/benchJsonOpt.c b/tools/taos-tools/src/benchJsonOpt.c index 21393b8d29..967b465dff 100644 --- a/tools/taos-tools/src/benchJsonOpt.c +++ b/tools/taos-tools/src/benchJsonOpt.c @@ -14,7 +14,6 @@ #include #include #include "benchLog.h" -#include "benchCsv.h" extern char g_configDir[MAX_PATH_LEN]; From 2815227d2518ef2ff14e4cc0893a3b660f349e32 Mon Sep 17 00:00:00 2001 From: Yaming Pei Date: Mon, 3 Mar 2025 15:35:45 +0800 Subject: [PATCH 14/34] fix: thread concurrency <= count of a subtable --- tools/taos-tools/src/benchCsv.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tools/taos-tools/src/benchCsv.c b/tools/taos-tools/src/benchCsv.c index c491e94606..850d3b9a98 100644 --- a/tools/taos-tools/src/benchCsv.c +++ b/tools/taos-tools/src/benchCsv.c @@ -483,12 +483,12 @@ static int csvInitWriteMeta(SDataBase* db, SSuperTable* stb, CsvWriteMeta* write break; } case CSV_NAMING_B_THREAD: { - write_meta->total_threads = g_arguments->nthreads; + write_meta->total_threads = MIN(g_arguments->nthreads, stb->childTblCount); csvGenThreadFormatter(write_meta); break; } case CSV_NAMING_B_THREAD_TIME_SLICE: { - write_meta->total_threads = g_arguments->nthreads; + write_meta->total_threads = MIN(g_arguments->nthreads, stb->childTblCount); csvGenThreadFormatter(write_meta); csvCalcTimestampStep(write_meta); break; From 20f6c7dd53eb391225a87cccaf1a68e7ec7826dd Mon Sep 17 00:00:00 2001 From: Yaming Pei Date: Mon, 3 Mar 2025 15:37:08 +0800 Subject: [PATCH 15/34] feat: csv output header supports true/false options --- tools/taos-tools/src/benchJsonOpt.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/tools/taos-tools/src/benchJsonOpt.c b/tools/taos-tools/src/benchJsonOpt.c index 967b465dff..9bc8527130 100644 --- a/tools/taos-tools/src/benchJsonOpt.c +++ b/tools/taos-tools/src/benchJsonOpt.c @@ -1620,11 +1620,13 @@ static int getMetaFromCommonJsonFile(tools_cJSON *json) { } // csv output header - g_arguments->csv_output_header = false; + g_arguments->csv_output_header = true; tools_cJSON* oph = tools_cJSON_GetObjectItem(json, "csv_output_header"); if (oph && oph->type == tools_cJSON_String && oph->valuestring != NULL) { - if (0 == strcasecmp(oph->valuestring, "yes")) { + if (0 == strcasecmp(oph->valuestring, "yes") || 0 == strcasecmp(oph->valuestring, "true")) { g_arguments->csv_output_header = true; + } else if (0 == strcasecmp(oph->valuestring, "no") || 0 == strcasecmp(oph->valuestring, "false")) { + g_arguments->csv_output_header = false; } } From 6bc6223db31fd332d8c2b950ed39bf487c274584 Mon Sep 17 00:00:00 2001 From: Yaming Pei Date: Mon, 3 Mar 2025 16:47:07 +0800 Subject: [PATCH 16/34] fix: fix memory release bug & return value error --- tools/taos-tools/src/benchCsv.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/tools/taos-tools/src/benchCsv.c b/tools/taos-tools/src/benchCsv.c index 850d3b9a98..36f92c99e8 100644 --- a/tools/taos-tools/src/benchCsv.c +++ b/tools/taos-tools/src/benchCsv.c @@ -705,7 +705,7 @@ static void csvFreeCtbTagData(CsvThreadMeta* thread_meta, CsvRowTagsBuf* tags_bu for (uint64_t i = 0 ; i < thread_meta->ctb_count; ++i) { char* tags_buf = tags_buf_array[i].buf; if (tags_buf) { - tmfree(tags_buf_array); + tmfree(tags_buf); } else { break; } @@ -738,7 +738,6 @@ static CsvRowTagsBuf* csvGenCtbTagData(CsvWriteMeta* write_meta, CsvThreadMeta* } tags_buf_array[i].buf = tags_buf; - thread_meta->tags_buf_size = tags_buf_size; ret = csvGenRowTagData(tags_buf, tags_buf_size, stb, thread_meta->ctb_start_idx + i, &tk); if (ret <= 0) { @@ -747,6 +746,7 @@ static CsvRowTagsBuf* csvGenCtbTagData(CsvWriteMeta* write_meta, CsvThreadMeta* tags_buf_array[i].length = ret; } + thread_meta->tags_buf_size = tags_buf_size; return tags_buf_array; @@ -978,7 +978,7 @@ static void* csvGenStbThread(void* arg) { for (ctb_idx = 0; ctb_idx < thread_meta->ctb_count; ++ctb_idx) { for (slice_ctb_cur_ts = slice_cur_ts; slice_ctb_cur_ts < slice_batch_ts; slice_ctb_cur_ts += write_meta->stb->timestamp_step) { ret = csvWriteFile(fhdl, ctb_idx, slice_ctb_cur_ts, &ck, write_meta, thread_meta); - if (!ret) { + if (ret) { errorPrint("Failed to write csv file. thread index: %zu, file: %s, errno: %d, strerror: %s.\n", thread_meta->thread_id, fullname, errno, strerror(errno)); csvClose(fhdl); @@ -1071,7 +1071,7 @@ static int csvGenStbProcess(SDataBase* db, SSuperTable* stb) { csvInitThreadMeta(write_meta, i + 1, &arg->thread_meta); ret = pthread_create(&pids[i], NULL, csvGenStbThread, arg); - if (!ret) { + if (ret) { perror("Failed to create thread"); goto end; } From 7eef6659ace4907b84757140488a51e20f78dc55 Mon Sep 17 00:00:00 2001 From: Yaming Pei Date: Mon, 3 Mar 2025 17:28:26 +0800 Subject: [PATCH 17/34] fix: fix bug that counted the number of child tables handled by threads incorrectly --- tools/taos-tools/src/benchCsv.c | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) diff --git a/tools/taos-tools/src/benchCsv.c b/tools/taos-tools/src/benchCsv.c index 36f92c99e8..61334c418d 100644 --- a/tools/taos-tools/src/benchCsv.c +++ b/tools/taos-tools/src/benchCsv.c @@ -114,7 +114,7 @@ static void csvCalcCtbRange(CsvThreadMeta* thread_meta, size_t total_threads, in thread_meta->ctb_start_idx = ctb_start_idx; thread_meta->ctb_end_idx = ctb_end_idx; - thread_meta->ctb_count = ctb_count; + thread_meta->ctb_count = ctb_end_idx - ctb_start_idx; return; } @@ -385,7 +385,10 @@ int csvGenCreateStbSql(SDataBase* db, SSuperTable* stb, char* buf, int size) { if (pos <= 0 || pos >= size) return -1; } - infoPrint("create stable: <%s>\n", buf); + pos += snprintf(buf + pos, size - pos, "\n"); + if (pos <= 0 || pos >= size) return -1; + + // infoPrint("create stable: <%s>.\n", buf); return (pos > 0 && pos < size) ? pos : -1; } @@ -677,7 +680,7 @@ static int csvGenRowTagData(char* buf, int size, SSuperTable* stb, int64_t index } // tbname - int pos = snprintf(buf, size, "\'%s%"PRId64"\'", stb->childTblPrefix, index); + int pos = snprintf(buf, size, ",'%s%"PRId64"'", stb->childTblPrefix, index); // tags pos += csvGenRowFields(buf + pos, size - pos, stb, GEN_ROW_FIELDS_TAG, k); @@ -968,6 +971,7 @@ static void* csvGenStbThread(void* arg) { slice_cur_ts = cur_ts; slice_end_ts = MIN(cur_ts + write_meta->ts_step, write_meta->end_ts); file_rows = 0; + pre_print_ts = toolsGetTimestampMs(); infoPrint("thread[%zu] begin to write csv file: %s.\n", thread_meta->thread_id, fullname); @@ -1000,7 +1004,7 @@ static void* csvGenStbThread(void* arg) { } - if (!g_arguments->terminate) { + if (g_arguments->terminate) { csvClose(fhdl); goto end; } From cd958c9f17ce240221ea3ff56b5466705d67d189 Mon Sep 17 00:00:00 2001 From: Yaming Pei Date: Mon, 3 Mar 2025 17:59:58 +0800 Subject: [PATCH 18/34] feat: print csv export mode --- tools/taos-tools/inc/benchCsv.h | 3 ++- tools/taos-tools/src/benchCsv.c | 6 ++++++ 2 files changed, 8 insertions(+), 1 deletion(-) diff --git a/tools/taos-tools/inc/benchCsv.h b/tools/taos-tools/inc/benchCsv.h index f9f87aa341..e80f73bcda 100644 --- a/tools/taos-tools/inc/benchCsv.h +++ b/tools/taos-tools/inc/benchCsv.h @@ -57,7 +57,8 @@ typedef struct { typedef struct { CsvNamingType naming_type; size_t total_threads; - char thread_formatter[TINY_BUFF_LEN]; + char mode[MIDDLE_BUFF_LEN]; + char thread_formatter[SMALL_BUFF_LEN]; char csv_header[LARGE_BUFF_LEN]; int csv_header_length; SDataBase* db; diff --git a/tools/taos-tools/src/benchCsv.c b/tools/taos-tools/src/benchCsv.c index 61334c418d..32a48e70d0 100644 --- a/tools/taos-tools/src/benchCsv.c +++ b/tools/taos-tools/src/benchCsv.c @@ -479,18 +479,22 @@ static int csvInitWriteMeta(SDataBase* db, SSuperTable* stb, CsvWriteMeta* write switch (write_meta->naming_type) { case CSV_NAMING_I_SINGLE: { + (void)snprintf(write_meta->mode, sizeof(write_meta->mode), "interlace|no-time-slice"); break; } case CSV_NAMING_I_TIME_SLICE: { + (void)snprintf(write_meta->mode, sizeof(write_meta->mode), "interlace|time-slice"); csvCalcTimestampStep(write_meta); break; } case CSV_NAMING_B_THREAD: { + (void)snprintf(write_meta->mode, sizeof(write_meta->mode), "batch|no-time-slice"); write_meta->total_threads = MIN(g_arguments->nthreads, stb->childTblCount); csvGenThreadFormatter(write_meta); break; } case CSV_NAMING_B_THREAD_TIME_SLICE: { + (void)snprintf(write_meta->mode, sizeof(write_meta->mode), "batch|time-slice"); write_meta->total_threads = MIN(g_arguments->nthreads, stb->childTblCount); csvGenThreadFormatter(write_meta); csvCalcTimestampStep(write_meta); @@ -1056,6 +1060,8 @@ static int csvGenStbProcess(SDataBase* db, SSuperTable* stb) { goto end; } + infoPrint("export csv mode: %s.\n", write_meta->mode); + args = benchCalloc(write_meta->total_threads, sizeof(CsvThreadArgs), false); if (!args) { ret = -1; From 56ac7c9ef4281b3e0374a71ac04f3f0c544b85a2 Mon Sep 17 00:00:00 2001 From: Yaming Pei Date: Mon, 3 Mar 2025 20:07:09 +0800 Subject: [PATCH 19/34] feat: add the number of threads in mode --- tools/taos-tools/src/benchCsv.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/tools/taos-tools/src/benchCsv.c b/tools/taos-tools/src/benchCsv.c index 32a48e70d0..cf8527d375 100644 --- a/tools/taos-tools/src/benchCsv.c +++ b/tools/taos-tools/src/benchCsv.c @@ -385,7 +385,7 @@ int csvGenCreateStbSql(SDataBase* db, SSuperTable* stb, char* buf, int size) { if (pos <= 0 || pos >= size) return -1; } - pos += snprintf(buf + pos, size - pos, "\n"); + pos += snprintf(buf + pos, size - pos, ";\n"); if (pos <= 0 || pos >= size) return -1; // infoPrint("create stable: <%s>.\n", buf); @@ -488,14 +488,14 @@ static int csvInitWriteMeta(SDataBase* db, SSuperTable* stb, CsvWriteMeta* write break; } case CSV_NAMING_B_THREAD: { - (void)snprintf(write_meta->mode, sizeof(write_meta->mode), "batch|no-time-slice"); write_meta->total_threads = MIN(g_arguments->nthreads, stb->childTblCount); + (void)snprintf(write_meta->mode, sizeof(write_meta->mode), "batch[%zu]|no-time-slice", write_meta->total_threads); csvGenThreadFormatter(write_meta); break; } case CSV_NAMING_B_THREAD_TIME_SLICE: { - (void)snprintf(write_meta->mode, sizeof(write_meta->mode), "batch|time-slice"); write_meta->total_threads = MIN(g_arguments->nthreads, stb->childTblCount); + (void)snprintf(write_meta->mode, sizeof(write_meta->mode), "batch[%zu]|time-slice", write_meta->total_threads); csvGenThreadFormatter(write_meta); csvCalcTimestampStep(write_meta); break; From 0d630ecdf9a9265b9e29b400719fc804d26abb40 Mon Sep 17 00:00:00 2001 From: Yaming Pei Date: Tue, 4 Mar 2025 14:19:57 +0800 Subject: [PATCH 20/34] fix: fix csv file handle leaks --- tools/taos-tools/src/benchCsv.c | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/tools/taos-tools/src/benchCsv.c b/tools/taos-tools/src/benchCsv.c index cf8527d375..848cd9a6ef 100644 --- a/tools/taos-tools/src/benchCsv.c +++ b/tools/taos-tools/src/benchCsv.c @@ -448,7 +448,7 @@ static int csvExportCreateSql(CsvWriteMeta* write_meta) { goto end; } - succPrint("Export create sql to file: %s successfully..\n", fullname); + succPrint("Export create sql to file: %s successfully.\n", fullname); end: if (fp) { @@ -827,6 +827,10 @@ static CsvIoError csvWrite(CsvFileHandle* fhdl, const char* buf, size_t size) { static void csvClose(CsvFileHandle* fhdl) { + if (!fhdl) { + return; + } + if (fhdl->compress_level == CSV_COMPRESS_NONE) { if (fhdl->handle.fp) { fclose(fhdl->handle.fp); @@ -838,6 +842,7 @@ static void csvClose(CsvFileHandle* fhdl) { fhdl->handle.gf = NULL; } } + tmfree(fhdl); } From ecff4313186ecef32bc9a1e4404427025a6025e6 Mon Sep 17 00:00:00 2001 From: Yaming Pei Date: Tue, 4 Mar 2025 16:10:49 +0800 Subject: [PATCH 21/34] build: add dependency library z --- tools/taos-tools/src/CMakeLists.txt | 2 ++ 1 file changed, 2 insertions(+) diff --git a/tools/taos-tools/src/CMakeLists.txt b/tools/taos-tools/src/CMakeLists.txt index 1f0899db5c..4fa7fb0dc4 100644 --- a/tools/taos-tools/src/CMakeLists.txt +++ b/tools/taos-tools/src/CMakeLists.txt @@ -363,3 +363,5 @@ ELSE () TARGET_LINK_LIBRARIES(taosBenchmark taos msvcregex pthread toolscJson ${WEBSOCKET_LINK_FLAGS}) ENDIF () + +target_link_libraries(taosBenchmark PRIVATE z) From 0c55e8a0ad630c12ca3e6a4e373227e5a67b7d7a Mon Sep 17 00:00:00 2001 From: Yaming Pei Date: Tue, 4 Mar 2025 16:21:55 +0800 Subject: [PATCH 22/34] build: delete key PRIVATE in target_link_libraries --- tools/taos-tools/src/CMakeLists.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/taos-tools/src/CMakeLists.txt b/tools/taos-tools/src/CMakeLists.txt index 4fa7fb0dc4..5bc2703165 100644 --- a/tools/taos-tools/src/CMakeLists.txt +++ b/tools/taos-tools/src/CMakeLists.txt @@ -364,4 +364,4 @@ ELSE () TARGET_LINK_LIBRARIES(taosBenchmark taos msvcregex pthread toolscJson ${WEBSOCKET_LINK_FLAGS}) ENDIF () -target_link_libraries(taosBenchmark PRIVATE z) +target_link_libraries(taosBenchmark z) From 47ded1b71de8334f0eae6f5211acb24fa50447fe Mon Sep 17 00:00:00 2001 From: Yaming Pei Date: Wed, 5 Mar 2025 10:03:46 +0800 Subject: [PATCH 23/34] feat: Supports csv parameters at the super table level --- tools/taos-tools/inc/bench.h | 32 ++-- tools/taos-tools/src/benchCsv.c | 265 +++++++++++++++------------- tools/taos-tools/src/benchJsonOpt.c | 118 ++++++------- 3 files changed, 214 insertions(+), 201 deletions(-) diff --git a/tools/taos-tools/inc/bench.h b/tools/taos-tools/inc/bench.h index 30973170a3..c413d953b7 100644 --- a/tools/taos-tools/inc/bench.h +++ b/tools/taos-tools/inc/bench.h @@ -479,6 +479,13 @@ typedef struct SChildTable_S { int32_t pkCnt; } SChildTable; +typedef enum { + CSV_COMPRESS_NONE = 0, + CSV_COMPRESS_FAST = 1, + CSV_COMPRESS_BALANCE = 6, + CSV_COMPRESS_BEST = 9 +} CsvCompressionLevel; + #define PRIMARY_KEY "PRIMARY KEY" typedef struct SSuperTable_S { char *stbName; @@ -581,6 +588,15 @@ typedef struct SSuperTable_S { // execute sqls after create super table char **sqls; + + char* csv_file_prefix; + char* csv_ts_format; + char* csv_ts_interval; + char* csv_tbname_alias; + long csv_ts_intv_secs; + bool csv_output_header; + CsvCompressionLevel csv_compress_level; + } SSuperTable; typedef struct SDbCfg_S { @@ -719,14 +735,6 @@ typedef struct STmqMetaInfo_S { uint16_t iface; } STmqMetaInfo; - -typedef enum { - CSV_COMPRESS_NONE = 0, - CSV_COMPRESS_FAST = 1, - CSV_COMPRESS_BALANCE = 6, - CSV_COMPRESS_BEST = 9 -} CsvCompressionLevel; - typedef struct SArguments_S { uint8_t taosc_version; char * metaFile; @@ -791,14 +799,6 @@ typedef struct SArguments_S { char* output_path; char output_path_buf[MAX_PATH_LEN]; - char* csv_file_prefix; - char* csv_ts_format; - char* csv_ts_interval; - char* csv_tbname_alias; - long csv_ts_intv_secs; - bool csv_output_header; - - CsvCompressionLevel csv_compress_level; } SArguments; diff --git a/tools/taos-tools/src/benchCsv.c b/tools/taos-tools/src/benchCsv.c index 848cd9a6ef..dd6ce3360a 100644 --- a/tools/taos-tools/src/benchCsv.c +++ b/tools/taos-tools/src/benchCsv.c @@ -31,6 +31,111 @@ +static int csvValidateParamTsFormat(const char* csv_ts_format) { + if (!csv_ts_format) return 0; + + struct tm test_tm = { + .tm_year = 70, + .tm_mon = 0, + .tm_mday = 1, + .tm_hour = 0, + .tm_min = 0, + .tm_sec = 0, + .tm_isdst = -1 + }; + mktime(&test_tm); + + char buffer[1024]; + size_t len = strftime(buffer, sizeof(buffer), csv_ts_format, &test_tm); + if (len == 0) { + return -1; + } + +#ifdef _WIN32 + const char* invalid_chars = "/\\:*?\"<>|"; +#else + const char* invalid_chars = "/\\?\"<>|"; +#endif + if (strpbrk(buffer, invalid_chars) != NULL) { + return -1; + } + + return 0; +} + + +static long csvValidateParamTsInterval(const char* csv_ts_interval) { + if (!csv_ts_interval || *csv_ts_interval == '\0') return -1; + + char* endptr; + errno = 0; + const long num = strtol(csv_ts_interval, &endptr, 10); + + if (errno == ERANGE || + endptr == csv_ts_interval || + num <= 0) { + return -1; + } + + if (*endptr == '\0' || + *(endptr + 1) != '\0') { + return -1; + } + + switch (tolower(*endptr)) { + case 's': return num; + case 'm': return num * 60; + case 'h': return num * 60 * 60; + case 'd': return num * 60 * 60 * 24; + default : return -1; + } +} + + +static int csvParseParameter() { + // csv_output_path + size_t len = strlen(g_arguments->output_path); + if (len == 0) { + errorPrint("Failed to generate csv files, the specified output path is empty. Please provide a valid path.\n"); + return -1; + } + if (g_arguments->output_path[len - 1] != '/') { + int n = snprintf(g_arguments->output_path_buf, sizeof(g_arguments->output_path_buf), "%s/", g_arguments->output_path); + if (n < 0 || n >= sizeof(g_arguments->output_path_buf)) { + errorPrint("Failed to generate csv files, path buffer overflow risk when appending '/'. path: %s.\n", + g_arguments->output_path); + return -1; + } + g_arguments->output_path = g_arguments->output_path_buf; + } + + return 0; +} + + +static int csvParseStbParameter(SSuperTable* stb) { + // csv_ts_format + if (stb->csv_ts_format) { + if (csvValidateParamTsFormat(stb->csv_ts_format) != 0) { + errorPrint("Failed to generate csv files, the parameter `csv_ts_format` is invalid. csv_ts_format: %s.\n", + stb->csv_ts_format); + return -1; + } + } + + // csv_ts_interval + long csv_ts_intv_secs = csvValidateParamTsInterval(stb->csv_ts_interval); + if (csv_ts_intv_secs <= 0) { + errorPrint("Failed to generate csv files, the parameter `csv_ts_interval` is invalid. csv_ts_interval: %s.\n", + stb->csv_ts_interval); + return -1; + } + stb->csv_ts_intv_secs = csv_ts_intv_secs; + + return 0; +} + + static time_t csvGetStartSeconds(int precision, int64_t start_ts) { time_t start_seconds = 0; @@ -45,7 +150,7 @@ static time_t csvGetStartSeconds(int precision, int64_t start_ts) { } -static void csvConvertTime2String(time_t time_value, char* time_buf, size_t buf_size) { +static void csvConvertTime2String(time_t time_value, char* ts_format, char* time_buf, size_t buf_size) { struct tm tm_result; char *old_locale = setlocale(LC_TIME, "C"); #ifdef _WIN32 @@ -53,7 +158,7 @@ static void csvConvertTime2String(time_t time_value, char* time_buf, size_t buf_ #else gmtime_r(&time_value, &tm_result); #endif - strftime(time_buf, buf_size, g_arguments->csv_ts_format, &tm_result); + strftime(time_buf, buf_size, ts_format, &tm_result); if (old_locale) { setlocale(LC_TIME, old_locale); } @@ -63,13 +168,13 @@ static void csvConvertTime2String(time_t time_value, char* time_buf, size_t buf_ static CsvNamingType csvGetFileNamingType(SSuperTable* stb) { if (stb->interlaceRows > 0) { - if (g_arguments->csv_ts_format) { + if (stb->csv_ts_format) { return CSV_NAMING_I_TIME_SLICE; } else { return CSV_NAMING_I_SINGLE; } } else { - if (g_arguments->csv_ts_format) { + if (stb->csv_ts_format) { return CSV_NAMING_B_THREAD_TIME_SLICE; } else { return CSV_NAMING_B_THREAD; @@ -82,11 +187,11 @@ static void csvCalcTimestampStep(CsvWriteMeta* write_meta) { time_t ts_step = 0; if (write_meta->db->precision == TSDB_TIME_PRECISION_MICRO) { - ts_step = g_arguments->csv_ts_intv_secs * 1000000L; + ts_step = write_meta->stb->csv_ts_intv_secs * 1000000L; } else if (write_meta->db->precision == TSDB_TIME_PRECISION_NANO) { - ts_step = g_arguments->csv_ts_intv_secs * 1000000000L; + ts_step = write_meta->stb->csv_ts_intv_secs * 1000000000L; } else { - ts_step = g_arguments->csv_ts_intv_secs * 1000L; + ts_step = write_meta->stb->csv_ts_intv_secs * 1000L; } write_meta->ts_step = ts_step; return; @@ -145,7 +250,7 @@ static int csvGenCsvHeader(CsvWriteMeta* write_meta) { int pos = 0; int size = sizeof(write_meta->csv_header); - if (!g_arguments->csv_output_header) { + if (!write_meta->stb->csv_output_header) { return 0; } @@ -159,7 +264,7 @@ static int csvGenCsvHeader(CsvWriteMeta* write_meta) { } // tbname - pos += snprintf(buf + pos, size - pos, ",%s", g_arguments->csv_tbname_alias); + pos += snprintf(buf + pos, size - pos, ",%s", write_meta->stb->csv_tbname_alias); // tags for (size_t i = 0; i < stb->tags->size; ++i) { @@ -479,23 +584,23 @@ static int csvInitWriteMeta(SDataBase* db, SSuperTable* stb, CsvWriteMeta* write switch (write_meta->naming_type) { case CSV_NAMING_I_SINGLE: { - (void)snprintf(write_meta->mode, sizeof(write_meta->mode), "interlace|no-time-slice"); + (void)snprintf(write_meta->mode, sizeof(write_meta->mode), "interlace::normal"); break; } case CSV_NAMING_I_TIME_SLICE: { - (void)snprintf(write_meta->mode, sizeof(write_meta->mode), "interlace|time-slice"); + (void)snprintf(write_meta->mode, sizeof(write_meta->mode), "interlace::time-slice"); csvCalcTimestampStep(write_meta); break; } case CSV_NAMING_B_THREAD: { write_meta->total_threads = MIN(g_arguments->nthreads, stb->childTblCount); - (void)snprintf(write_meta->mode, sizeof(write_meta->mode), "batch[%zu]|no-time-slice", write_meta->total_threads); + (void)snprintf(write_meta->mode, sizeof(write_meta->mode), "batch[%zu]::normal", write_meta->total_threads); csvGenThreadFormatter(write_meta); break; } case CSV_NAMING_B_THREAD_TIME_SLICE: { write_meta->total_threads = MIN(g_arguments->nthreads, stb->childTblCount); - (void)snprintf(write_meta->mode, sizeof(write_meta->mode), "batch[%zu]|time-slice", write_meta->total_threads); + (void)snprintf(write_meta->mode, sizeof(write_meta->mode), "batch[%zu]::time-slice", write_meta->total_threads); csvGenThreadFormatter(write_meta); csvCalcTimestampStep(write_meta); break; @@ -535,7 +640,7 @@ static void csvInitThreadMeta(CsvWriteMeta* write_meta, uint32_t thread_id, CsvT case CSV_NAMING_I_TIME_SLICE: case CSV_NAMING_B_THREAD_TIME_SLICE: { thread_meta->start_secs = csvGetStartSeconds(db->precision, stb->startTimestamp); - thread_meta->end_secs = thread_meta->start_secs + g_arguments->csv_ts_intv_secs; + thread_meta->end_secs = thread_meta->start_secs + write_meta->stb->csv_ts_intv_secs; break; } default: { @@ -558,7 +663,7 @@ static void csvUpdateSliceRange(CsvWriteMeta* write_meta, CsvThreadMeta* thread_ case CSV_NAMING_I_TIME_SLICE: case CSV_NAMING_B_THREAD_TIME_SLICE: { thread_meta->start_secs = csvGetStartSeconds(db->precision, last_end_ts); - thread_meta->end_secs = thread_meta->start_secs + g_arguments->csv_ts_intv_secs; + thread_meta->end_secs = thread_meta->start_secs + write_meta->stb->csv_ts_intv_secs; break; } default: { @@ -570,8 +675,8 @@ static void csvUpdateSliceRange(CsvWriteMeta* write_meta, CsvThreadMeta* thread_ } -static const char* csvGetGzipFilePrefix() { - if (g_arguments->csv_compress_level == CSV_COMPRESS_NONE) { +static const char* csvGetGzipFilePrefix(CsvCompressionLevel csv_compress_level) { + if (csv_compress_level == CSV_COMPRESS_NONE) { return ""; } else { return ".gz"; @@ -585,8 +690,8 @@ static int csvGetFileFullname(CsvWriteMeta* write_meta, CsvThreadMeta* thread_me char end_time_buf[MIDDLE_BUFF_LEN]; int ret = -1; const char* base_path = g_arguments->output_path; - const char* file_prefix = g_arguments->csv_file_prefix; - const char* gzip_suffix = csvGetGzipFilePrefix(); + const char* file_prefix = write_meta->stb->csv_file_prefix; + const char* gzip_suffix = csvGetGzipFilePrefix(write_meta->stb->csv_compress_level); switch (write_meta->naming_type) { case CSV_NAMING_I_SINGLE: { @@ -594,8 +699,8 @@ static int csvGetFileFullname(CsvWriteMeta* write_meta, CsvThreadMeta* thread_me break; } case CSV_NAMING_I_TIME_SLICE: { - csvConvertTime2String(thread_meta->start_secs, start_time_buf, sizeof(start_time_buf)); - csvConvertTime2String(thread_meta->end_secs, end_time_buf, sizeof(end_time_buf)); + csvConvertTime2String(thread_meta->start_secs, write_meta->stb->csv_ts_format, start_time_buf, sizeof(start_time_buf)); + csvConvertTime2String(thread_meta->end_secs, write_meta->stb->csv_ts_format, end_time_buf, sizeof(end_time_buf)); ret = snprintf(fullname, size, "%s%s_%s_%s.csv%s", base_path, file_prefix, start_time_buf, end_time_buf, gzip_suffix); break; } @@ -606,8 +711,8 @@ static int csvGetFileFullname(CsvWriteMeta* write_meta, CsvThreadMeta* thread_me } case CSV_NAMING_B_THREAD_TIME_SLICE: { (void)snprintf(thread_buf, sizeof(thread_buf), write_meta->thread_formatter, thread_meta->thread_id); - csvConvertTime2String(thread_meta->start_secs, start_time_buf, sizeof(start_time_buf)); - csvConvertTime2String(thread_meta->end_secs, end_time_buf, sizeof(end_time_buf)); + csvConvertTime2String(thread_meta->start_secs, write_meta->stb->csv_ts_format, start_time_buf, sizeof(start_time_buf)); + csvConvertTime2String(thread_meta->end_secs, write_meta->stb->csv_ts_format, end_time_buf, sizeof(end_time_buf)); ret = snprintf(fullname, size, "%s%s_%s_%s_%s.csv%s", base_path, file_prefix, thread_buf, start_time_buf, end_time_buf, gzip_suffix); break; } @@ -968,7 +1073,7 @@ static void* csvGenStbThread(void* arg) { } // create fd - fhdl = csvOpen(fullname, g_arguments->csv_compress_level); + fhdl = csvOpen(fullname, stb->csv_compress_level); if (fhdl == NULL) { errorPrint("Failed to create csv file. thread index: %zu, file: %s, errno: %d, strerror: %s.\n", thread_meta->thread_id, fullname, errno, strerror(errno)); @@ -976,7 +1081,7 @@ static void* csvGenStbThread(void* arg) { } - thread_meta->output_header = g_arguments->csv_output_header; + thread_meta->output_header = stb->csv_output_header; slice_cur_ts = cur_ts; slice_end_ts = MIN(cur_ts + write_meta->ts_step, write_meta->end_ts); file_rows = 0; @@ -1148,106 +1253,6 @@ static int csvGenStb(SDataBase* db, SSuperTable* stb) { } -static int csvValidateParamTsFormat(const char* csv_ts_format) { - if (!csv_ts_format) return 0; - - struct tm test_tm = { - .tm_year = 70, - .tm_mon = 0, - .tm_mday = 1, - .tm_hour = 0, - .tm_min = 0, - .tm_sec = 0, - .tm_isdst = -1 - }; - mktime(&test_tm); - - char buffer[1024]; - size_t len = strftime(buffer, sizeof(buffer), csv_ts_format, &test_tm); - if (len == 0) { - return -1; - } - -#ifdef _WIN32 - const char* invalid_chars = "/\\:*?\"<>|"; -#else - const char* invalid_chars = "/\\?\"<>|"; -#endif - if (strpbrk(buffer, invalid_chars) != NULL) { - return -1; - } - - return 0; -} - - -static long csvValidateParamTsInterval(const char* csv_ts_interval) { - if (!csv_ts_interval || *csv_ts_interval == '\0') return -1; - - char* endptr; - errno = 0; - const long num = strtol(csv_ts_interval, &endptr, 10); - - if (errno == ERANGE || - endptr == csv_ts_interval || - num <= 0) { - return -1; - } - - if (*endptr == '\0' || - *(endptr + 1) != '\0') { - return -1; - } - - switch (tolower(*endptr)) { - case 's': return num; - case 'm': return num * 60; - case 'h': return num * 60 * 60; - case 'd': return num * 60 * 60 * 24; - default : return -1; - } -} - - -static int csvParseParameter() { - // csv_output_path - size_t len = strlen(g_arguments->output_path); - if (len == 0) { - errorPrint("Failed to generate csv files, the specified output path is empty. Please provide a valid path.\n"); - return -1; - } - if (g_arguments->output_path[len - 1] != '/') { - int n = snprintf(g_arguments->output_path_buf, sizeof(g_arguments->output_path_buf), "%s/", g_arguments->output_path); - if (n < 0 || n >= sizeof(g_arguments->output_path_buf)) { - errorPrint("Failed to generate csv files, path buffer overflow risk when appending '/'. path: %s.\n", - g_arguments->output_path); - return -1; - } - g_arguments->output_path = g_arguments->output_path_buf; - } - - // csv_ts_format - if (g_arguments->csv_ts_format) { - if (csvValidateParamTsFormat(g_arguments->csv_ts_format) != 0) { - errorPrint("Failed to generate csv files, the parameter `csv_ts_format` is invalid. csv_ts_format: %s.\n", - g_arguments->csv_ts_format); - return -1; - } - } - - // csv_ts_interval - long csv_ts_intv_secs = csvValidateParamTsInterval(g_arguments->csv_ts_interval); - if (csv_ts_intv_secs <= 0) { - errorPrint("Failed to generate csv files, the parameter `csv_ts_interval` is invalid. csv_ts_interval: %s.\n", - g_arguments->csv_ts_interval); - return -1; - } - g_arguments->csv_ts_intv_secs = csv_ts_intv_secs; - - return 0; -} - - static int csvWriteThread() { for (size_t i = 0; i < g_arguments->databases->size && !g_arguments->terminate; ++i) { // database @@ -1260,8 +1265,16 @@ static int csvWriteThread() { continue; } + // parsing parameters + int ret = csvParseStbParameter(stb); + if (ret != 0) { + errorPrint("Failed to parse csv parameter. database: %s, super table: %s, error code: %d.\n", + db->dbName, stb->stbName, ret); + return -1; + } + // gen csv - int ret = csvGenStb(db, stb); + ret = csvGenStb(db, stb); if(ret != 0) { errorPrint("Failed to generate csv files. database: %s, super table: %s, error code: %d.\n", db->dbName, stb->stbName, ret); diff --git a/tools/taos-tools/src/benchJsonOpt.c b/tools/taos-tools/src/benchJsonOpt.c index 9bc8527130..83edc5c6ef 100644 --- a/tools/taos-tools/src/benchJsonOpt.c +++ b/tools/taos-tools/src/benchJsonOpt.c @@ -1405,6 +1405,65 @@ static int getStableInfo(tools_cJSON *dbinfos, int index) { } } } + + // csv file prefix + tools_cJSON* csv_fp = tools_cJSON_GetObjectItem(stbInfo, "csv_file_prefix"); + if (csv_fp && csv_fp->type == tools_cJSON_String && csv_fp->valuestring != NULL) { + superTable->csv_file_prefix = csv_fp->valuestring; + } else { + superTable->csv_file_prefix = "data"; + } + + // csv timestamp format + tools_cJSON* csv_tf = tools_cJSON_GetObjectItem(stbInfo, "csv_ts_format"); + if (csv_tf && csv_tf->type == tools_cJSON_String && csv_tf->valuestring != NULL) { + superTable->csv_ts_format = csv_tf->valuestring; + } else { + superTable->csv_ts_format = NULL; + } + + // csv timestamp format + tools_cJSON* csv_ti = tools_cJSON_GetObjectItem(stbInfo, "csv_ts_interval"); + if (csv_ti && csv_ti->type == tools_cJSON_String && csv_ti->valuestring != NULL) { + superTable->csv_ts_interval = csv_ti->valuestring; + } else { + superTable->csv_ts_interval = "1d"; + } + + // csv output header + superTable->csv_output_header = true; + tools_cJSON* oph = tools_cJSON_GetObjectItem(stbInfo, "csv_output_header"); + if (oph && oph->type == tools_cJSON_String && oph->valuestring != NULL) { + if (0 == strcasecmp(oph->valuestring, "yes") || 0 == strcasecmp(oph->valuestring, "true")) { + superTable->csv_output_header = true; + } else if (0 == strcasecmp(oph->valuestring, "no") || 0 == strcasecmp(oph->valuestring, "false")) { + superTable->csv_output_header = false; + } + } + + // csv tbname alias + tools_cJSON* tba = tools_cJSON_GetObjectItem(stbInfo, "csv_tbname_alias"); + if (tba && tba->type == tools_cJSON_String && tba->valuestring != NULL) { + superTable->csv_tbname_alias = tba->valuestring; + } else { + superTable->csv_tbname_alias = "device_id"; + } + + // csv compression level + tools_cJSON* cl = tools_cJSON_GetObjectItem(stbInfo, "csv_compress_level"); + if (cl && cl->type == tools_cJSON_String && cl->valuestring != NULL) { + if (0 == strcasecmp(cl->valuestring, "none")) { + superTable->csv_compress_level = CSV_COMPRESS_NONE; + } else if (0 == strcasecmp(cl->valuestring, "fast")) { + superTable->csv_compress_level = CSV_COMPRESS_FAST; + } else if (0 == strcasecmp(cl->valuestring, "balance")) { + superTable->csv_compress_level = CSV_COMPRESS_BALANCE; + } else if (0 == strcasecmp(cl->valuestring, "best")) { + superTable->csv_compress_level = CSV_COMPRESS_BEST; + } + } else { + superTable->csv_compress_level = CSV_COMPRESS_NONE; + } } return 0; } @@ -1595,65 +1654,6 @@ static int getMetaFromCommonJsonFile(tools_cJSON *json) { } (void)mkdir(g_arguments->output_path, 0775); - // csv file prefix - tools_cJSON* csv_fp = tools_cJSON_GetObjectItem(json, "csv_file_prefix"); - if (csv_fp && csv_fp->type == tools_cJSON_String && csv_fp->valuestring != NULL) { - g_arguments->csv_file_prefix = csv_fp->valuestring; - } else { - g_arguments->csv_file_prefix = "data"; - } - - // csv timestamp format - tools_cJSON* csv_tf = tools_cJSON_GetObjectItem(json, "csv_ts_format"); - if (csv_tf && csv_tf->type == tools_cJSON_String && csv_tf->valuestring != NULL) { - g_arguments->csv_ts_format = csv_tf->valuestring; - } else { - g_arguments->csv_ts_format = NULL; - } - - // csv timestamp format - tools_cJSON* csv_ti = tools_cJSON_GetObjectItem(json, "csv_ts_interval"); - if (csv_ti && csv_ti->type == tools_cJSON_String && csv_ti->valuestring != NULL) { - g_arguments->csv_ts_interval = csv_ti->valuestring; - } else { - g_arguments->csv_ts_interval = "1d"; - } - - // csv output header - g_arguments->csv_output_header = true; - tools_cJSON* oph = tools_cJSON_GetObjectItem(json, "csv_output_header"); - if (oph && oph->type == tools_cJSON_String && oph->valuestring != NULL) { - if (0 == strcasecmp(oph->valuestring, "yes") || 0 == strcasecmp(oph->valuestring, "true")) { - g_arguments->csv_output_header = true; - } else if (0 == strcasecmp(oph->valuestring, "no") || 0 == strcasecmp(oph->valuestring, "false")) { - g_arguments->csv_output_header = false; - } - } - - // csv tbname alias - tools_cJSON* tba = tools_cJSON_GetObjectItem(json, "csv_tbname_alias"); - if (tba && tba->type == tools_cJSON_String && tba->valuestring != NULL) { - g_arguments->csv_tbname_alias = tba->valuestring; - } else { - g_arguments->csv_tbname_alias = "device_id"; - } - - // csv compression level - tools_cJSON* cl = tools_cJSON_GetObjectItem(json, "csv_compress_level"); - if (cl && cl->type == tools_cJSON_String && cl->valuestring != NULL) { - if (0 == strcasecmp(cl->valuestring, "none")) { - g_arguments->csv_compress_level = CSV_COMPRESS_NONE; - } else if (0 == strcasecmp(cl->valuestring, "fast")) { - g_arguments->csv_compress_level = CSV_COMPRESS_FAST; - } else if (0 == strcasecmp(cl->valuestring, "balance")) { - g_arguments->csv_compress_level = CSV_COMPRESS_BALANCE; - } else if (0 == strcasecmp(cl->valuestring, "best")) { - g_arguments->csv_compress_level = CSV_COMPRESS_BEST; - } - } else { - g_arguments->csv_compress_level = CSV_COMPRESS_NONE; - } - code = 0; return code; } From 1b2afe31edeb61051e7a58f510c29feb5d9a3e82 Mon Sep 17 00:00:00 2001 From: Yaming Pei Date: Wed, 5 Mar 2025 18:15:02 +0800 Subject: [PATCH 24/34] fix: fix bug in time slice window calculation --- tools/taos-tools/inc/benchCsv.h | 2 + tools/taos-tools/src/benchCsv.c | 89 ++++++++++++++++++++++++++------- 2 files changed, 74 insertions(+), 17 deletions(-) diff --git a/tools/taos-tools/inc/benchCsv.h b/tools/taos-tools/inc/benchCsv.h index e80f73bcda..624bcadedc 100644 --- a/tools/taos-tools/inc/benchCsv.h +++ b/tools/taos-tools/inc/benchCsv.h @@ -76,6 +76,8 @@ typedef struct { uint64_t total_rows; time_t start_secs; time_t end_secs; + int64_t start_ts; + int64_t end_ts; size_t thread_id; bool output_header; int tags_buf_size; diff --git a/tools/taos-tools/src/benchCsv.c b/tools/taos-tools/src/benchCsv.c index dd6ce3360a..b498214468 100644 --- a/tools/taos-tools/src/benchCsv.c +++ b/tools/taos-tools/src/benchCsv.c @@ -136,7 +136,43 @@ static int csvParseStbParameter(SSuperTable* stb) { } -static time_t csvGetStartSeconds(int precision, int64_t start_ts) { +static time_t csvAlignTimestamp(time_t seconds, const char* ts_format) { + struct tm aligned_tm; +#ifdef _WIN32 + localtime_s(&aligned_tm, &seconds); +#else + localtime_r(&seconds, &aligned_tm); +#endif + + int has_Y = 0, has_m = 0, has_d = 0, has_H = 0, has_M = 0, has_S = 0; + const char* p = ts_format; + while (*p) { + if (*p == '%') { + p++; + switch (*p) { + case 'Y': has_Y = 1; break; + case 'm': has_m = 1; break; + case 'd': has_d = 1; break; + case 'H': has_H = 1; break; + case 'M': has_M = 1; break; + case 'S': has_S = 1; break; + } + } + p++; + } + + if (!has_S) aligned_tm.tm_sec = 0; + if (!has_M) aligned_tm.tm_min = 0; + if (!has_H) aligned_tm.tm_hour = 0; + if (!has_d) aligned_tm.tm_mday = 1; + if (!has_m) aligned_tm.tm_mon = 0; + if (!has_Y) aligned_tm.tm_year = 0; + + return mktime(&aligned_tm); +} + + +static time_t csvGetStartSeconds(int precision, int64_t start_ts, const char* csv_ts_format) { time_t start_seconds = 0; if (precision == TSDB_TIME_PRECISION_MICRO) { @@ -146,17 +182,17 @@ static time_t csvGetStartSeconds(int precision, int64_t start_ts) { } else { start_seconds = start_ts / 1000L; } - return start_seconds; + return csvAlignTimestamp(start_seconds, csv_ts_format); } static void csvConvertTime2String(time_t time_value, char* ts_format, char* time_buf, size_t buf_size) { struct tm tm_result; - char *old_locale = setlocale(LC_TIME, "C"); + char* old_locale = setlocale(LC_TIME, "C"); #ifdef _WIN32 - gmtime_s(&tm_result, &time_value); + localtime_s(&tm_result, &time_value); #else - gmtime_r(&time_value, &tm_result); + localtime_r(&time_value, &tm_result); #endif strftime(time_buf, buf_size, ts_format, &tm_result); if (old_locale) { @@ -183,17 +219,29 @@ static CsvNamingType csvGetFileNamingType(SSuperTable* stb) { } -static void csvCalcTimestampStep(CsvWriteMeta* write_meta) { - time_t ts_step = 0; +static time_t csvCalcTimestampFromSeconds(int precision, time_t secs) { + time_t ts = 0; - if (write_meta->db->precision == TSDB_TIME_PRECISION_MICRO) { - ts_step = write_meta->stb->csv_ts_intv_secs * 1000000L; - } else if (write_meta->db->precision == TSDB_TIME_PRECISION_NANO) { - ts_step = write_meta->stb->csv_ts_intv_secs * 1000000000L; + if (precision == TSDB_TIME_PRECISION_MICRO) { + ts = secs * 1000000L; + } else if (precision == TSDB_TIME_PRECISION_NANO) { + ts = secs * 1000000000L; } else { - ts_step = write_meta->stb->csv_ts_intv_secs * 1000L; + ts = secs * 1000L; } - write_meta->ts_step = ts_step; + return ts; +} + + +static void csvCalcTimestampStep(CsvWriteMeta* write_meta) { + write_meta->ts_step = csvCalcTimestampFromSeconds(write_meta->db->precision, write_meta->stb->csv_ts_intv_secs); + return; +} + + +static void csvCalcSliceTimestamp(CsvWriteMeta* write_meta, CsvThreadMeta* thread_meta) { + thread_meta->start_ts = csvCalcTimestampFromSeconds(write_meta->db->precision, thread_meta->start_secs); + thread_meta->end_ts = csvCalcTimestampFromSeconds(write_meta->db->precision, thread_meta->end_secs); return; } @@ -624,6 +672,8 @@ static void csvInitThreadMeta(CsvWriteMeta* write_meta, uint32_t thread_id, CsvT thread_meta->ctb_count = 0; thread_meta->start_secs = 0; thread_meta->end_secs = 0; + thread_meta->start_ts = write_meta->start_ts; + thread_meta->end_ts = write_meta->end_ts; thread_meta->thread_id = thread_id; thread_meta->output_header = false; thread_meta->tags_buf_size = 0; @@ -639,8 +689,9 @@ static void csvInitThreadMeta(CsvWriteMeta* write_meta, uint32_t thread_id, CsvT } case CSV_NAMING_I_TIME_SLICE: case CSV_NAMING_B_THREAD_TIME_SLICE: { - thread_meta->start_secs = csvGetStartSeconds(db->precision, stb->startTimestamp); + thread_meta->start_secs = csvGetStartSeconds(db->precision, stb->startTimestamp, stb->csv_ts_format); thread_meta->end_secs = thread_meta->start_secs + write_meta->stb->csv_ts_intv_secs; + csvCalcSliceTimestamp(write_meta, thread_meta); break; } default: { @@ -654,6 +705,7 @@ static void csvInitThreadMeta(CsvWriteMeta* write_meta, uint32_t thread_id, CsvT static void csvUpdateSliceRange(CsvWriteMeta* write_meta, CsvThreadMeta* thread_meta, int64_t last_end_ts) { SDataBase* db = write_meta->db; + SSuperTable* stb = write_meta->stb; switch (write_meta->naming_type) { case CSV_NAMING_I_SINGLE: @@ -662,8 +714,9 @@ static void csvUpdateSliceRange(CsvWriteMeta* write_meta, CsvThreadMeta* thread_ } case CSV_NAMING_I_TIME_SLICE: case CSV_NAMING_B_THREAD_TIME_SLICE: { - thread_meta->start_secs = csvGetStartSeconds(db->precision, last_end_ts); + thread_meta->start_secs = csvGetStartSeconds(db->precision, last_end_ts, stb->csv_ts_format); thread_meta->end_secs = thread_meta->start_secs + write_meta->stb->csv_ts_intv_secs; + csvCalcSliceTimestamp(write_meta, thread_meta); break; } default: { @@ -1063,7 +1116,8 @@ static void* csvGenStbThread(void* arg) { thread_meta->cols_buf = &cols_buf; start_print_ts = toolsGetTimestampMs(); - for (cur_ts = write_meta->start_ts; cur_ts < write_meta->end_ts; cur_ts += write_meta->ts_step) { + cur_ts = write_meta->start_ts; + while (cur_ts < write_meta->end_ts) { // get filename ret = csvGetFileFullname(write_meta, thread_meta, fullname, sizeof(fullname)); if (ret < 0) { @@ -1083,7 +1137,7 @@ static void* csvGenStbThread(void* arg) { thread_meta->output_header = stb->csv_output_header; slice_cur_ts = cur_ts; - slice_end_ts = MIN(cur_ts + write_meta->ts_step, write_meta->end_ts); + slice_end_ts = MIN(thread_meta->end_ts, write_meta->end_ts); file_rows = 0; pre_print_ts = toolsGetTimestampMs(); @@ -1129,6 +1183,7 @@ static void* csvGenStbThread(void* arg) { } csvClose(fhdl); + cur_ts = thread_meta->end_ts; csvUpdateSliceRange(write_meta, thread_meta, slice_end_ts); } From 59ef7cd3be5e1b07eefa241c2698786f7fd02b54 Mon Sep 17 00:00:00 2001 From: Yaming Pei Date: Thu, 6 Mar 2025 09:01:02 +0800 Subject: [PATCH 25/34] test: add csv interlace case --- tests/army/tools/benchmark/basic/exportCsv.py | 193 ++++++++++++++---- 1 file changed, 153 insertions(+), 40 deletions(-) diff --git a/tests/army/tools/benchmark/basic/exportCsv.py b/tests/army/tools/benchmark/basic/exportCsv.py index b8b3828ea6..fececcf7f2 100644 --- a/tests/army/tools/benchmark/basic/exportCsv.py +++ b/tests/army/tools/benchmark/basic/exportCsv.py @@ -13,6 +13,7 @@ import os import json import csv +import datetime import frame import frame.etool @@ -26,80 +27,192 @@ from frame import * class TDTestCase(TBase): def caseDescription(self): """ - [TD-11510] taosBenchmark test cases - """ - # check correct - def checkCorrect(self, csvFile, allRows, interlaceRows): - # open as csv - count = 0 - batch = 0 - name = "" - with open(csvFile) as file: + [TS-5089] taosBenchmark support exporting csv + """ + + + def clear_directory(self, target_dir: str = 'csv'): + try: + if not os.path.exists(target_dir): + return + for entry in os.listdir(target_dir): + entry_path = os.path.join(target_dir, entry) + if os.path.isfile(entry_path) or os.path.islink(entry_path): + os.unlink(entry_path) + else: + shutil.rmtree(entry_path) + + tdLog.debug("clear succ, dir: %s " % (target_dir)) + except OSError as e: + tdLog.exit("clear fail, dir: %s " % (target_dir)) + + + def convert_timestamp(self, ts, ts_format): + dt_object = datetime.datetime.fromtimestamp(ts / 1000) + formatted_time = dt_object.strftime(ts_format) + return formatted_time + + + def calc_time_slice_partitions(self, total_start_ts, total_end_ts, ts_step, childs, ts_format, ts_interval): + interval_days = int(ts_interval[:-1]) + n_days_millis = interval_days * 24 * 60 * 60 * 1000 + + dt_start = datetime.datetime.fromtimestamp(total_start_ts / 1000.0) + formatted_str = dt_start.strftime(ts_format) + s0_dt = datetime.datetime.strptime(formatted_str, ts_format) + s0 = int(s0_dt.timestamp() * 1000) + + partitions = [] + current_s = s0 + + while current_s <= total_end_ts: + current_end = current_s + n_days_millis + start_actual = max(current_s, total_start_ts) + end_actual = min(current_end, total_end_ts) + + if start_actual >= end_actual: + count = 0 + else: + delta = end_actual - start_actual + delta + delta_start = start_actual - total_start_ts + delta_end = end_actual - total_start_ts + if delta % ts_step: + count = delta // ts_step + 1 + else: + count = delta // ts_step + + count *= childs + + partitions.append({ + "start_ts": current_s, + "end_ts": current_end, + "start_time": self.convert_timestamp(current_s, ts_format), + "end_time": self.convert_timestamp(current_end, ts_format), + "count": count + }) + + current_s += n_days_millis + + # partitions = [p for p in partitions if p['count'] > 0] + return partitions + + + def check_stb_csv_correct(self, csv_file_name, all_rows, interlace_rows): + # open as csv + tbname_idx = 14 + count = 0 + batch = 0 + name = "" + header = True + with open(csv_file_name) as file: rows = csv.reader(file) for row in rows: - # interlaceRows + if header: + header = False + continue + + # interlace_rows if name == "": - name = row[0] + name = row[tbname_idx] batch = 1 else: - if name == row[0]: + if name == row[tbname_idx]: batch += 1 else: # switch to another child table - if batch != interlaceRows: - tdLog.exit(f"interlaceRows invalid. tbName={name} real={batch} expect={interlaceRows} i={count} csvFile={csvFile}") + if batch != interlace_rows: + tdLog.exit(f"interlace_rows invalid. tbName={name} actual={batch} expected={interlace_rows} i={count} csv_file_name={csv_file_name}") batch = 1 - name = row[0] + name = row[tbname_idx] # count ++ count += 1 # batch - if batch != interlaceRows: - tdLog.exit(f"interlaceRows invalid. tbName={name} real={batch} expect={interlaceRows} i={count} csvFile={csvFile}") + if batch != interlace_rows: + tdLog.exit(f"interlace_rows invalid. tbName={name} actual={batch} expected={interlace_rows} i={count} csv_file_name={csv_file_name}") # check all rows - if count != allRows: - tdLog.exit(f"allRows invalid. real={count} expect={allRows} csvFile={csvFile}") + if count != all_rows: + tdLog.exit(f"all_rows invalid. actual={count} expected={all_rows} csv_file_name={csv_file_name}") - tdLog.info(f"Check generate csv file successfully. csvFile={csvFile} count={count} interlaceRows={batch}") + tdLog.info(f"Check generate csv file successfully. csv_file_name={csv_file_name} count={count} interlace_rows={batch}") + + + # check correct + def check_stb_correct(self, data, db, stb): + filepath = data["output_path"] + stbName = stb["name"] + childs = stb["childtable_to"] - stb["childtable_from"] + insert_rows = stb["insert_rows"] + interlace_rows = stb["interlace_rows"] + csv_file_prefix = stb["csv_file_prefix"] + csv_ts_format = stb.get("csv_ts_format", None) + csv_ts_interval = stb.get("csv_ts_interval", None) + + ts_step = stb["timestamp_step"] + total_start_ts = stb["start_timestamp"] + total_end_ts = total_start_ts + ts_step * insert_rows + + + all_rows = childs * insert_rows + if interlace_rows > 0: + # interlace + + if not csv_ts_format: + # normal + csv_file_name = f"{filepath}{csv_file_prefix}.csv" + self.check_stb_csv_correct(csv_file_name, all_rows, interlace_rows) + else: + # time slice + partitions = self.calc_time_slice_partitions(total_start_ts, total_end_ts, ts_step, childs, csv_ts_format, csv_ts_interval) + for part in partitions: + csv_file_name = f"{filepath}{csv_file_prefix}_{part['start_time']}_{part['end_time']}.csv" + self.check_stb_csv_correct(csv_file_name, part['count'], interlace_rows) + else: + # batch + interlace_rows = insert_rows + if not csv_ts_format: + # normal + pass + else: + # time slice + pass + # check result - def checResult(self, jsonFile): + def check_result(self, jsonFile): # csv with open(jsonFile) as file: data = json.load(file) - # read json + # read json database = data["databases"][0] - out = data["csvPath"] - dbName = database["dbinfo"]["name"] stables = database["super_tables"] - for stable in stables: - stbName = stable["name"] - childs = stable["childtable_count"] - insertRows = stable["insert_rows"] - interlaceRows = stable["interlace_rows"] - csvFile = f"{out}{dbName}-{stbName}.csv" - rows = childs * insertRows - if interlaceRows == 0: - interlaceRows = insertRows - # check csv context correct - self.checkCorrect(csvFile, rows, interlaceRows) - def checkExportCsv(self, benchmark, jsonFile, options=""): + for stable in stables: + # check csv context correct + self.check_stb_correct(data, database, stable) + + + def check_export_csv(self, benchmark, jsonFile, options=""): + # clear + self.clear_directory() + # exec cmd = f"{benchmark} {options} -f {jsonFile}" os.system(cmd) # check result - self.checResult(jsonFile) + self.check_result(jsonFile) + def run(self): # path benchmark = etool.benchMarkFile() - # do check - json = "tools/benchmark/basic/json/exportCsv.json" - self.checkExportCsv(benchmark, json) + # do check interlace normal + json = "tools/benchmark/basic/json/csv-interlace-normal.json" + self.check_export_csv(benchmark, json) def stop(self): tdSql.close() From 2c29c1a3fd4aa60389063a04b6d154361e607c43 Mon Sep 17 00:00:00 2001 From: Yaming Pei Date: Thu, 6 Mar 2025 09:55:57 +0800 Subject: [PATCH 26/34] test: add csv exporting batch case --- .../basic/{exportCsv.py => csv-export.py} | 37 +++++++++++++------ tests/parallel_test/cases.task | 3 +- 2 files changed, 27 insertions(+), 13 deletions(-) rename tests/army/tools/benchmark/basic/{exportCsv.py => csv-export.py} (82%) diff --git a/tests/army/tools/benchmark/basic/exportCsv.py b/tests/army/tools/benchmark/basic/csv-export.py similarity index 82% rename from tests/army/tools/benchmark/basic/exportCsv.py rename to tests/army/tools/benchmark/basic/csv-export.py index fececcf7f2..6b71d2c238 100644 --- a/tests/army/tools/benchmark/basic/exportCsv.py +++ b/tests/army/tools/benchmark/basic/csv-export.py @@ -53,7 +53,7 @@ class TDTestCase(TBase): return formatted_time - def calc_time_slice_partitions(self, total_start_ts, total_end_ts, ts_step, childs, ts_format, ts_interval): + def calc_time_slice_partitions(self, total_start_ts, total_end_ts, ts_step, ts_format, ts_interval): interval_days = int(ts_interval[:-1]) n_days_millis = interval_days * 24 * 60 * 60 * 1000 @@ -82,8 +82,6 @@ class TDTestCase(TBase): else: count = delta // ts_step - count *= childs - partitions.append({ "start_ts": current_s, "end_ts": current_end, @@ -142,7 +140,7 @@ class TDTestCase(TBase): def check_stb_correct(self, data, db, stb): filepath = data["output_path"] stbName = stb["name"] - childs = stb["childtable_to"] - stb["childtable_from"] + child_count = stb["childtable_to"] - stb["childtable_from"] insert_rows = stb["insert_rows"] interlace_rows = stb["interlace_rows"] csv_file_prefix = stb["csv_file_prefix"] @@ -154,7 +152,7 @@ class TDTestCase(TBase): total_end_ts = total_start_ts + ts_step * insert_rows - all_rows = childs * insert_rows + all_rows = child_count * insert_rows if interlace_rows > 0: # interlace @@ -164,20 +162,35 @@ class TDTestCase(TBase): self.check_stb_csv_correct(csv_file_name, all_rows, interlace_rows) else: # time slice - partitions = self.calc_time_slice_partitions(total_start_ts, total_end_ts, ts_step, childs, csv_ts_format, csv_ts_interval) + partitions = self.calc_time_slice_partitions(total_start_ts, total_end_ts, ts_step, csv_ts_format, csv_ts_interval) for part in partitions: csv_file_name = f"{filepath}{csv_file_prefix}_{part['start_time']}_{part['end_time']}.csv" - self.check_stb_csv_correct(csv_file_name, part['count'], interlace_rows) + self.check_stb_csv_correct(csv_file_name, part['count'] * child_count, interlace_rows) else: # batch - interlace_rows = insert_rows + thread_count = stb["thread_count"] + interlace_rows = insert_rows if not csv_ts_format: # normal - pass + for i in range(thread_count): + csv_file_name = f"{filepath}{csv_file_prefix}_{i + 1}.csv" + if i < child_count % thread_count: + self.check_stb_csv_correct(csv_file_name, insert_rows * (child_count // thread_count + 1), interlace_rows) + else: + self.check_stb_csv_correct(csv_file_name, insert_rows * (child_count // thread_count), interlace_rows) else: # time slice - pass - + for i in range(thread_count): + partitions = self.calc_time_slice_partitions(total_start_ts, total_end_ts, ts_step, csv_ts_format, csv_ts_interval) + for part in partitions: + csv_file_name = f"{filepath}{csv_file_prefix}_{i + 1}_{part['start_time']}_{part['end_time']}.csv" + if i < child_count % thread_count: + slice_rows = part['count'] * (child_count // thread_count + 1) + else: + slice_rows = part['count'] * (child_count // thread_count) + + self.check_stb_csv_correct(csv_file_name, slice_rows, part['count']) + # check result def check_result(self, jsonFile): @@ -211,7 +224,7 @@ class TDTestCase(TBase): benchmark = etool.benchMarkFile() # do check interlace normal - json = "tools/benchmark/basic/json/csv-interlace-normal.json" + json = "tools/benchmark/basic/json/csv-export.json" self.check_export_csv(benchmark, json) def stop(self): diff --git a/tests/parallel_test/cases.task b/tests/parallel_test/cases.task index 8f986ad445..29d8a1b147 100644 --- a/tests/parallel_test/cases.task +++ b/tests/parallel_test/cases.task @@ -91,7 +91,8 @@ ,,y,army,./pytest.sh python3 ./test.py -f tools/benchmark/basic/default_json.py ,,y,army,./pytest.sh python3 ./test.py -f tools/benchmark/basic/demo.py -,,y,army,./pytest.sh python3 ./test.py -f tools/benchmark/basic/exportCsv.py +,,y,army,./pytest.sh python3 ./test.py -f tools/benchmark/basic/csv-export.py +# ,,y,army,./pytest.sh python3 ./test.py -f tools/benchmark/basic/csv-import.py ,,y,army,./pytest.sh python3 ./test.py -f tools/benchmark/basic/from-to.py ,,y,army,./pytest.sh python3 ./test.py -f tools/benchmark/basic/from-to-continue.py From cc220ca3aee7192125df15f8e0a633a54e5bdcea Mon Sep 17 00:00:00 2001 From: Yaming Pei Date: Thu, 6 Mar 2025 14:10:56 +0800 Subject: [PATCH 27/34] feat: change the output path parameter to output_dir --- tests/army/tools/benchmark/basic/csv-export.py | 6 +++--- tools/taos-tools/src/benchCsv.c | 8 ++++---- tools/taos-tools/src/benchJsonOpt.c | 2 +- 3 files changed, 8 insertions(+), 8 deletions(-) diff --git a/tests/army/tools/benchmark/basic/csv-export.py b/tests/army/tools/benchmark/basic/csv-export.py index 6b71d2c238..702490d6ed 100644 --- a/tests/army/tools/benchmark/basic/csv-export.py +++ b/tests/army/tools/benchmark/basic/csv-export.py @@ -122,7 +122,7 @@ class TDTestCase(TBase): if batch != interlace_rows: tdLog.exit(f"interlace_rows invalid. tbName={name} actual={batch} expected={interlace_rows} i={count} csv_file_name={csv_file_name}") batch = 1 - name = row[tbname_idx] + name = row[tbname_idx] # count ++ count += 1 # batch @@ -136,9 +136,9 @@ class TDTestCase(TBase): tdLog.info(f"Check generate csv file successfully. csv_file_name={csv_file_name} count={count} interlace_rows={batch}") - # check correct + # check correct def check_stb_correct(self, data, db, stb): - filepath = data["output_path"] + filepath = data["output_dir"] stbName = stb["name"] child_count = stb["childtable_to"] - stb["childtable_from"] insert_rows = stb["insert_rows"] diff --git a/tools/taos-tools/src/benchCsv.c b/tools/taos-tools/src/benchCsv.c index b498214468..0bb47b0888 100644 --- a/tools/taos-tools/src/benchCsv.c +++ b/tools/taos-tools/src/benchCsv.c @@ -371,7 +371,7 @@ int csvGenCreateDbSql(SDataBase* db, char* buf, int size) { static int csvExportCreateDbSql(CsvWriteMeta* write_meta, FILE* fp) { - char buf[LARGE_BUFF_LEN] = {}; + char buf[LARGE_BUFF_LEN] = {0}; int ret = 0; int length = 0; @@ -547,7 +547,7 @@ int csvGenCreateStbSql(SDataBase* db, SSuperTable* stb, char* buf, int size) { static int csvExportCreateStbSql(CsvWriteMeta* write_meta, FILE* fp) { - char buf[4096] = {}; + char buf[4096] = {0}; int ret = 0; int length = 0; @@ -572,7 +572,7 @@ static int csvExportCreateStbSql(CsvWriteMeta* write_meta, FILE* fp) { static int csvExportCreateSql(CsvWriteMeta* write_meta) { - char fullname[MAX_PATH_LEN] = {}; + char fullname[MAX_PATH_LEN] = {0}; int ret = 0; int length = 0; FILE* fp = NULL; @@ -1078,7 +1078,7 @@ static void* csvGenStbThread(void* arg) { uint64_t ctb_idx = 0; int ret = 0; CsvFileHandle* fhdl = NULL; - char fullname[MAX_PATH_LEN] = {}; + char fullname[MAX_PATH_LEN] = {0}; uint64_t total_rows = 0; uint64_t pre_total_rows = 0; diff --git a/tools/taos-tools/src/benchJsonOpt.c b/tools/taos-tools/src/benchJsonOpt.c index 83edc5c6ef..49b5a6529d 100644 --- a/tools/taos-tools/src/benchJsonOpt.c +++ b/tools/taos-tools/src/benchJsonOpt.c @@ -1646,7 +1646,7 @@ static int getMetaFromCommonJsonFile(tools_cJSON *json) { } // output dir - tools_cJSON* opp = tools_cJSON_GetObjectItem(json, "output_path"); + tools_cJSON* opp = tools_cJSON_GetObjectItem(json, "output_dir"); if (opp && opp->type == tools_cJSON_String && opp->valuestring != NULL) { g_arguments->output_path = opp->valuestring; } else { From 31376ba71494e4a5bedcf4a7d4a7508db797fa14 Mon Sep 17 00:00:00 2001 From: Yaming Pei Date: Thu, 6 Mar 2025 15:17:43 +0800 Subject: [PATCH 28/34] docs: csv-related parameters descriptions --- .../14-reference/02-tools/10-taosbenchmark.md | 26 +++++++++++++++- .../14-reference/02-tools/10-taosbenchmark.md | 31 +++++++++++++++++-- 2 files changed, 54 insertions(+), 3 deletions(-) diff --git a/docs/en/14-reference/02-tools/10-taosbenchmark.md b/docs/en/14-reference/02-tools/10-taosbenchmark.md index cfc92b4e0b..3c1401de68 100644 --- a/docs/en/14-reference/02-tools/10-taosbenchmark.md +++ b/docs/en/14-reference/02-tools/10-taosbenchmark.md @@ -188,9 +188,12 @@ taosBenchmark -A INT,DOUBLE,NCHAR,BINARY\(16\) The parameters listed in this section apply to all functional modes. -- **filetype**: The function to test, possible values are `insert`, `query`, and `subscribe`. Corresponding to insert, query, and subscribe functions. Only one can be specified in each configuration file. +- **filetype**: The function to test, possible values are `insert`, `query`, `subscribe` and `csvfile`. Corresponding to insert, query, subscribe and generate csv file functions. Only one can be specified in each configuration file. + - **cfgdir**: Directory where the TDengine client configuration file is located, default path is /etc/taos. +- **output_dir**: The directory specified for output files. When the feature category is csvfile, it refers to the directory where the generated csv files will be saved. The default value is ./output/. + - **host**: Specifies the FQDN of the TDengine server to connect to, default value is localhost. - **port**: The port number of the TDengine server to connect to, default value is 6030. @@ -283,6 +286,27 @@ Parameters related to supertable creation are configured in the `super_tables` s - **repeat_ts_max** : Numeric type, when composite primary key is enabled, specifies the maximum number of records with the same timestamp to be generated - **sqls** : Array of strings type, specifies the array of sql to be executed after the supertable is successfully created, the table name specified in sql must be prefixed with the database name, otherwise an unspecified database error will occur +- **csv_file_prefix**: String type, sets the prefix for the names of the generated csv files. Default value is "data". + +- **csv_ts_format**: String type, sets the format of the time string in the names of the generated csv files, following the `strftime` format standard. If not set, files will not be split by time intervals. Supported patterns include: + - %Y: Year as a four-digit number (e.g., 2025) + - %m: Month as a two-digit number (01 to 12) + - %d: Day of the month as a two-digit number (01 to 31) + - %H: Hour in 24-hour format as a two-digit number (00 to 23) + - %M: Minute as a two-digit number (00 to 59) + - %S: Second as a two-digit number (00 to 59) + +- **csv_ts_interval**: String type, sets the time interval for splitting generated csv file names. Supports daily, hourly, minute, and second intervals such as 1d/2h/30m/40s. The default value is "1d". + +- **csv_output_header**: String type, sets whether the generated csv files should contain column header descriptions. The default value is "true". + +- **csv_tbname_alias**: String type, sets the alias for the tbname field in the column header descriptions of csv files. The default value is "device_id". + +- **csv_compress_level**: String type, sets the compression level when generating csv files and automatically compressing them into gzip format. Possible values are: + - none: No compression + - fast: gzip level 1 compression + - balance: gzip level 6 compression + - best: gzip level 9 compression #### Tag and Data Columns diff --git a/docs/zh/14-reference/02-tools/10-taosbenchmark.md b/docs/zh/14-reference/02-tools/10-taosbenchmark.md index 56f9e5b122..9902fa56c9 100644 --- a/docs/zh/14-reference/02-tools/10-taosbenchmark.md +++ b/docs/zh/14-reference/02-tools/10-taosbenchmark.md @@ -93,14 +93,17 @@ taosBenchmark -f 本节所列参数适用于所有功能模式。 -- **filetype**:功能分类,可选值为 `insert`、`query` 和 `subscribe`。分别对应插入、查询和订阅功能。每个配置文件中只能指定其中之一。 +- **filetype**:功能分类,可选值为 `insert`、`query`、`subscribe` 和 `csvfile`。分别对应插入、查询、订阅和生成csv文件功能。每个配置文件中只能指定其中之一。 + - **cfgdir**:TDengine 客户端配置文件所在的目录,默认路径是 /etc/taos 。 +- **output_dir**:指定输出文件的目录,当功能分类是 `csvfile` 时,指生成的 csv 文件的保存目录,默认值为 ./output/ 。 + - **host**:指定要连接的 TDengine 服务端的 FQDN,默认值为 localhost 。 - **port**:要连接的 TDengine 服务器的端口号,默认值为 6030 。 -- **user**:用于连接 TDengine 服务端的用户名,默认为 root 。 +- **user**:用于连接 TDengine 服务端的用户名,默认值为 root 。 - **password**:用于连接 TDengine 服务端的密码,默认值为 taosdata。 @@ -184,10 +187,34 @@ taosBenchmark -f - **tags_file**:仅当 insert_mode 为 taosc,rest 的模式下生效。最终的 tag 的数值与 childtable_count 有关,如果 csv 文件内的 tag 数据行小于给定的子表数量,那么会循环读取 csv 文件数据直到生成 childtable_count 指定的子表数量;否则则只会读取 childtable_count 行 tag 数据。也即最终生成的子表数量为二者取小。 - **primary_key**:指定超级表是否有复合主键,取值 1 和 0,复合主键列只能是超级表的第二列,指定生成复合主键后要确保第二列符合复合主键的数据类型,否则会报错。 + - **repeat_ts_min**:数值类型,复合主键开启情况下指定生成相同时间戳记录的最小个数,生成相同时间戳记录的个数是在范围[repeat_ts_min, repeat_ts_max] 内的随机值,最小值等于最大值时为固定个数。 + - **repeat_ts_max**:数值类型,复合主键开启情况下指定生成相同时间戳记录的最大个数。 + - **sqls**:字符串数组类型,指定超级表创建成功后要执行的 sql 数组,sql 中指定表名前面要带数据库名,否则会报未指定数据库错误。 +- **csv_file_prefix**:字符串类型,设置生成的 csv 文件名称的前缀,默认值为 data 。 + +- **csv_ts_format**:字符串类型,设置生成的 csv 文件名称中时间字符串的格式,格式遵循 `strftime` 格式标准,如果没有设置表示不按照时间段切分文件。支持的模式有: + - %Y: 年份,四位数表示(例如:2025) + - %m: 月份,两位数表示(01到12) + - %d: 一个月中的日子,两位数表示(01到31) + - %H: 小时,24小时制,两位数表示(00到23) + - %M: 分钟,两位数表示(00到59) + - %S: 秒,两位数表示(00到59) + +- **csv_ts_interval**:字符串类型,设置生成的 csv 文件名称中时间段间隔,支持天、小时、分钟、秒级间隔,如 1d/2h/30m/40s,默认值为 1d 。 + +- **csv_output_header**:字符串类型,设置生成的 csv 文件是否包含列头描述,默认值为 true 。 + +- **csv_tbname_alias**:字符串类型,设置 csv 文件列头描述中 tbname 字段的别名,默认值为 device_id 。 + +- **csv_compress_level**:字符串类型,设置生成 csv 并自动压缩成 gzip 格式文件的压缩等级。可选值为: + - none:不压缩 + - fast:gzip 1级压缩 + - balance:gzip 6级压缩 + - best:gzip 9级压缩 #### 标签列与数据列 From 77a4b818fb6a3adf0afacbd0fe9f3e554da45cea Mon Sep 17 00:00:00 2001 From: Yaming Pei Date: Thu, 6 Mar 2025 16:06:11 +0800 Subject: [PATCH 29/34] test: add csv test config file --- .../benchmark/basic/json/csv-export.json | 172 ++++++++++++++++++ .../tools/benchmark/basic/json/exportCsv.json | 78 -------- 2 files changed, 172 insertions(+), 78 deletions(-) create mode 100644 tests/army/tools/benchmark/basic/json/csv-export.json delete mode 100644 tests/army/tools/benchmark/basic/json/exportCsv.json diff --git a/tests/army/tools/benchmark/basic/json/csv-export.json b/tests/army/tools/benchmark/basic/json/csv-export.json new file mode 100644 index 0000000000..2dbe2300a8 --- /dev/null +++ b/tests/army/tools/benchmark/basic/json/csv-export.json @@ -0,0 +1,172 @@ +{ + "filetype": "csvfile", + "output_path": "./csv/", + "databases": [ + { + "dbinfo": { + "name": "csvdb", + "precision": "ms" + }, + "super_tables": [ + { + "name": "interlace-normal", + "childtable_count": 1010, + "insert_rows": 1000, + "interlace_rows": 1, + "childtable_prefix": "d", + "timestamp_step": 1000000, + "start_timestamp":1700000000000, + "childtable_from": 1000, + "childtable_to": 1010, + "csv_file_prefix": "data", + "csv_output_header": "true", + "csv_tbname_alias": "device_id", + "csv_compress_level": "none", + "columns": [ + { "type": "bool", "name": "bc"}, + { "type": "float", "name": "fc", "min": 1}, + { "type": "double", "name": "dc", "min":10, "max":10}, + { "type": "tinyint", "name": "ti"}, + { "type": "smallint", "name": "si"}, + { "type": "int", "name": "ic", "fillNull":"false"}, + { "type": "bigint", "name": "bi"}, + { "type": "utinyint", "name": "uti"}, + { "type": "usmallint", "name": "usi", "min":100, "max":120}, + { "type": "uint", "name": "ui"}, + { "type": "ubigint", "name": "ubi"}, + { "type": "binary", "name": "bin", "len": 16}, + { "type": "nchar", "name": "nch", "len": 16} + ], + "tags": [ + {"type": "tinyint", "name": "groupid","max": 10,"min": 1}, + {"type": "binary", "name": "location", "len": 16, + "values": ["San Francisco", "Los Angles", "San Diego", + "San Jose", "Palo Alto", "Campbell", "Mountain View", + "Sunnyvale", "Santa Clara", "Cupertino"] + } + ] + }, + { + "name": "interlace-timeslice", + "childtable_count": 1010, + "insert_rows": 1000, + "interlace_rows": 1, + "childtable_prefix": "d", + "timestamp_step": 1000000, + "start_timestamp":1700000000000, + "childtable_from": 1000, + "childtable_to": 1010, + "csv_file_prefix": "data", + "csv_ts_format": "%Y%m%d", + "csv_ts_interval": "1d", + "csv_output_header": "true", + "csv_tbname_alias": "device_id", + "csv_compress_level": "none", + "columns": [ + { "type": "bool", "name": "bc"}, + { "type": "float", "name": "fc", "min": 1}, + { "type": "double", "name": "dc", "min":10, "max":10}, + { "type": "tinyint", "name": "ti"}, + { "type": "smallint", "name": "si"}, + { "type": "int", "name": "ic", "fillNull":"false"}, + { "type": "bigint", "name": "bi"}, + { "type": "utinyint", "name": "uti"}, + { "type": "usmallint", "name": "usi", "min":100, "max":120}, + { "type": "uint", "name": "ui"}, + { "type": "ubigint", "name": "ubi"}, + { "type": "binary", "name": "bin", "len": 16}, + { "type": "nchar", "name": "nch", "len": 16} + ], + "tags": [ + {"type": "tinyint", "name": "groupid","max": 10,"min": 1}, + {"type": "binary", "name": "location", "len": 16, + "values": ["San Francisco", "Los Angles", "San Diego", + "San Jose", "Palo Alto", "Campbell", "Mountain View", + "Sunnyvale", "Santa Clara", "Cupertino"] + } + ] + }, + { + "name": "batch-normal", + "childtable_count": 1010, + "insert_rows": 1000, + "interlace_rows": 0, + "thread_count": 8, + "childtable_prefix": "d", + "timestamp_step": 1000000, + "start_timestamp":1700000000000, + "childtable_from": 1000, + "childtable_to": 1010, + "csv_file_prefix": "data", + "csv_output_header": "true", + "csv_tbname_alias": "device_id", + "csv_compress_level": "none", + "columns": [ + { "type": "bool", "name": "bc"}, + { "type": "float", "name": "fc", "min": 1}, + { "type": "double", "name": "dc", "min":10, "max":10}, + { "type": "tinyint", "name": "ti"}, + { "type": "smallint", "name": "si"}, + { "type": "int", "name": "ic", "fillNull":"false"}, + { "type": "bigint", "name": "bi"}, + { "type": "utinyint", "name": "uti"}, + { "type": "usmallint", "name": "usi", "min":100, "max":120}, + { "type": "uint", "name": "ui"}, + { "type": "ubigint", "name": "ubi"}, + { "type": "binary", "name": "bin", "len": 16}, + { "type": "nchar", "name": "nch", "len": 16} + ], + "tags": [ + {"type": "tinyint", "name": "groupid","max": 10,"min": 1}, + {"type": "binary", "name": "location", "len": 16, + "values": ["San Francisco", "Los Angles", "San Diego", + "San Jose", "Palo Alto", "Campbell", "Mountain View", + "Sunnyvale", "Santa Clara", "Cupertino"] + } + ] + }, + { + "name": "batch-timeslice", + "childtable_count": 1010, + "insert_rows": 1000, + "interlace_rows": 0, + "thread_count": 8, + "childtable_prefix": "d", + "timestamp_step": 1000000, + "start_timestamp":1700000000000, + "childtable_from": 1000, + "childtable_to": 1010, + "csv_file_prefix": "data", + "csv_ts_format": "%Y%m%d", + "csv_ts_interval": "1d", + "csv_output_header": "true", + "csv_tbname_alias": "device_id", + "csv_compress_level": "none", + "columns": [ + { "type": "bool", "name": "bc"}, + { "type": "float", "name": "fc", "min": 1}, + { "type": "double", "name": "dc", "min":10, "max":10}, + { "type": "tinyint", "name": "ti"}, + { "type": "smallint", "name": "si"}, + { "type": "int", "name": "ic", "fillNull":"false"}, + { "type": "bigint", "name": "bi"}, + { "type": "utinyint", "name": "uti"}, + { "type": "usmallint", "name": "usi", "min":100, "max":120}, + { "type": "uint", "name": "ui"}, + { "type": "ubigint", "name": "ubi"}, + { "type": "binary", "name": "bin", "len": 16}, + { "type": "nchar", "name": "nch", "len": 16} + ], + "tags": [ + {"type": "tinyint", "name": "groupid","max": 10,"min": 1}, + {"type": "binary", "name": "location", "len": 16, + "values": ["San Francisco", "Los Angles", "San Diego", + "San Jose", "Palo Alto", "Campbell", "Mountain View", + "Sunnyvale", "Santa Clara", "Cupertino"] + } + ] + } + ] + } + ] +} diff --git a/tests/army/tools/benchmark/basic/json/exportCsv.json b/tests/army/tools/benchmark/basic/json/exportCsv.json deleted file mode 100644 index 05a7341eb6..0000000000 --- a/tests/army/tools/benchmark/basic/json/exportCsv.json +++ /dev/null @@ -1,78 +0,0 @@ -{ - "filetype": "csvfile", - "csvPath": "./csv/", - "num_of_records_per_req": 10000, - "databases": [ - { - "dbinfo": { - "name": "csvdb" - }, - "super_tables": [ - { - "name": "batchTable", - "childtable_count": 5, - "insert_rows": 100, - "interlace_rows": 0, - "childtable_prefix": "d", - "timestamp_step": 10, - "start_timestamp":1600000000000, - "columns": [ - { "type": "bool", "name": "bc"}, - { "type": "float", "name": "fc", "min": 1}, - { "type": "double", "name": "dc", "min":10, "max":10}, - { "type": "tinyint", "name": "ti"}, - { "type": "smallint", "name": "si"}, - { "type": "int", "name": "ic", "fillNull":"false"}, - { "type": "bigint", "name": "bi"}, - { "type": "utinyint", "name": "uti"}, - { "type": "usmallint", "name": "usi", "min":100, "max":120}, - { "type": "uint", "name": "ui"}, - { "type": "ubigint", "name": "ubi"}, - { "type": "binary", "name": "bin", "len": 16}, - { "type": "nchar", "name": "nch", "len": 16} - ], - "tags": [ - {"type": "tinyint", "name": "groupid","max": 10,"min": 1}, - {"type": "binary", "name": "location", "len": 16, - "values": ["San Francisco", "Los Angles", "San Diego", - "San Jose", "Palo Alto", "Campbell", "Mountain View", - "Sunnyvale", "Santa Clara", "Cupertino"] - } - ] - }, - { - "name": "interlaceTable", - "childtable_count": 5, - "insert_rows": 100, - "interlace_rows": 10, - "childtable_prefix": "d", - "timestamp_step": 1000, - "start_timestamp":1700000000000, - "columns": [ - { "type": "bool", "name": "bc"}, - { "type": "float", "name": "fc", "min":16}, - { "type": "double", "name": "dc", "min":16}, - { "type": "tinyint", "name": "ti"}, - { "type": "smallint", "name": "si"}, - { "type": "int", "name": "ic", "fillNull":"false"}, - { "type": "bigint", "name": "bi"}, - { "type": "utinyint", "name": "uti"}, - { "type": "usmallint", "name": "usi"}, - { "type": "uint", "name": "ui"}, - { "type": "ubigint", "name": "ubi"}, - { "type": "binary", "name": "bin", "len": 32}, - { "type": "nchar", "name": "nch", "len": 64} - ], - "tags": [ - {"type": "tinyint", "name": "groupid","max": 10,"min": 1}, - {"type": "binary", "name": "location", "len": 16, - "values": ["San Francisco", "Los Angles", "San Diego", - "San Jose", "Palo Alto", "Campbell", "Mountain View", - "Sunnyvale", "Santa Clara", "Cupertino"] - } - ] - } - ] - } - ] -} From beb8d55337c7b39a246d709c2f2caee6b18ac0e9 Mon Sep 17 00:00:00 2001 From: Yaming Pei Date: Thu, 6 Mar 2025 16:29:05 +0800 Subject: [PATCH 30/34] fix: add missing header file --- tools/taos-tools/inc/benchLog.h | 1 + 1 file changed, 1 insertion(+) diff --git a/tools/taos-tools/inc/benchLog.h b/tools/taos-tools/inc/benchLog.h index 961a037e3c..ab74aaff75 100644 --- a/tools/taos-tools/inc/benchLog.h +++ b/tools/taos-tools/inc/benchLog.h @@ -17,6 +17,7 @@ #define INC_BENCHLOG_H_ #include +#include // // suport thread safe log module From 53b9743ac4e87b191755b211cdb398e4382180c4 Mon Sep 17 00:00:00 2001 From: Yaming Pei Date: Thu, 6 Mar 2025 20:03:13 +0800 Subject: [PATCH 31/34] feat: enhanced time format verification --- .../tools/benchmark/basic/json/csv-export.json | 2 +- tools/taos-tools/src/benchCsv.c | 18 ++++++++++++++++++ 2 files changed, 19 insertions(+), 1 deletion(-) diff --git a/tests/army/tools/benchmark/basic/json/csv-export.json b/tests/army/tools/benchmark/basic/json/csv-export.json index 2dbe2300a8..2d6f7b7022 100644 --- a/tests/army/tools/benchmark/basic/json/csv-export.json +++ b/tests/army/tools/benchmark/basic/json/csv-export.json @@ -1,6 +1,6 @@ { "filetype": "csvfile", - "output_path": "./csv/", + "output_dir": "./csv/", "databases": [ { "dbinfo": { diff --git a/tools/taos-tools/src/benchCsv.c b/tools/taos-tools/src/benchCsv.c index 0bb47b0888..39f1a7983f 100644 --- a/tools/taos-tools/src/benchCsv.c +++ b/tools/taos-tools/src/benchCsv.c @@ -60,6 +60,24 @@ static int csvValidateParamTsFormat(const char* csv_ts_format) { return -1; } + int has_Y = 0, has_m = 0, has_d = 0; + const char* p = csv_ts_format; + while (*p) { + if (*p == '%') { + p++; + switch (*p) { + case 'Y': has_Y = 1; break; + case 'm': has_m = 1; break; + case 'd': has_d = 1; break; + } + } + p++; + } + + if (has_Y == 0 || has_m == 0 || has_d == 0) { + return -1; + } + return 0; } From 4e79e8f489890e3bb39dd69a9f0f64b1a163d357 Mon Sep 17 00:00:00 2001 From: Yaming Pei Date: Thu, 6 Mar 2025 20:18:49 +0800 Subject: [PATCH 32/34] feat: zlib compression is supported only on linux and mac platforms --- tools/taos-tools/inc/benchCsv.h | 5 +++++ tools/taos-tools/src/CMakeLists.txt | 4 +++- tools/taos-tools/src/benchCsv.c | 17 ++++++++++++++--- 3 files changed, 22 insertions(+), 4 deletions(-) diff --git a/tools/taos-tools/inc/benchCsv.h b/tools/taos-tools/inc/benchCsv.h index 624bcadedc..f944600ecb 100644 --- a/tools/taos-tools/inc/benchCsv.h +++ b/tools/taos-tools/inc/benchCsv.h @@ -16,7 +16,10 @@ #ifndef INC_BENCHCSV_H_ #define INC_BENCHCSV_H_ +#ifndef _WIN32 #include +#endif + #include "bench.h" @@ -38,7 +41,9 @@ typedef struct { CsvCompressionLevel compress_level; CsvIoError result; union { +#ifndef _WIN32 gzFile gf; +#endif FILE* fp; } handle; } CsvFileHandle; diff --git a/tools/taos-tools/src/CMakeLists.txt b/tools/taos-tools/src/CMakeLists.txt index 5bc2703165..93b1530020 100644 --- a/tools/taos-tools/src/CMakeLists.txt +++ b/tools/taos-tools/src/CMakeLists.txt @@ -316,6 +316,9 @@ IF (${CMAKE_SYSTEM_NAME} MATCHES "Linux" OR ${CMAKE_SYSTEM_NAME} MATCHES "Darwin ENDIF () ENDIF () + + target_link_libraries(taosBenchmark z) + ELSE () ADD_DEFINITIONS(-DWINDOWS) SET(CMAKE_C_STANDARD 11) @@ -364,4 +367,3 @@ ELSE () TARGET_LINK_LIBRARIES(taosBenchmark taos msvcregex pthread toolscJson ${WEBSOCKET_LINK_FLAGS}) ENDIF () -target_link_libraries(taosBenchmark z) diff --git a/tools/taos-tools/src/benchCsv.c b/tools/taos-tools/src/benchCsv.c index 39f1a7983f..d08b9d19b0 100644 --- a/tools/taos-tools/src/benchCsv.c +++ b/tools/taos-tools/src/benchCsv.c @@ -953,12 +953,15 @@ static CsvFileHandle* csvOpen(const char* filename, CsvCompressionLevel compress if (compress_level == CSV_COMPRESS_NONE) { fhdl->handle.fp = fopen(filename, "w"); failed = (!fhdl->handle.fp); - } else { + } +#ifndef _WIN32 + else { char mode[TINY_BUFF_LEN]; (void)snprintf(mode, sizeof(mode), "wb%d", compress_level); fhdl->handle.gf = gzopen(filename, mode); failed = (!fhdl->handle.gf); } +#endif if (failed) { tmfree(fhdl); @@ -986,7 +989,9 @@ static CsvIoError csvWrite(CsvFileHandle* fhdl, const char* buf, size_t size) { fhdl->result = CSV_ERR_WRITE_FAILED; return CSV_ERR_WRITE_FAILED; } - } else { + } +#ifndef _WIN32 + else { int ret = gzwrite(fhdl->handle.gf, buf, size); if (ret != size) { errorPrint("Failed to write csv file: %s. expected written %zu but %d.\n", @@ -998,6 +1003,8 @@ static CsvIoError csvWrite(CsvFileHandle* fhdl, const char* buf, size_t size) { return CSV_ERR_WRITE_FAILED; } } +#endif + return CSV_ERR_OK; } @@ -1012,12 +1019,16 @@ static void csvClose(CsvFileHandle* fhdl) { fclose(fhdl->handle.fp); fhdl->handle.fp = NULL; } - } else { + } +#ifndef _WIN32 + else { if (fhdl->handle.gf) { gzclose(fhdl->handle.gf); fhdl->handle.gf = NULL; } } +#endif + tmfree(fhdl); } From 1c2072eb80307cc4d286d7128256a7d78a620556 Mon Sep 17 00:00:00 2001 From: Yaming Pei Date: Fri, 7 Mar 2025 13:53:05 +0800 Subject: [PATCH 33/34] feat: support zlib compression on windows platform --- tools/taos-tools/README-CN.md | 2 +- tools/taos-tools/inc/benchCsv.h | 4 ---- tools/taos-tools/src/CMakeLists.txt | 6 +++++- tools/taos-tools/src/benchCsv.c | 15 +++------------ 4 files changed, 9 insertions(+), 18 deletions(-) diff --git a/tools/taos-tools/README-CN.md b/tools/taos-tools/README-CN.md index 3def035f68..da14e81cd1 100644 --- a/tools/taos-tools/README-CN.md +++ b/tools/taos-tools/README-CN.md @@ -18,7 +18,7 @@ taosdump 是用于备份 TDengine 数据到本地目录和从本地目录恢复 #### 对于 Ubuntu/Debian 系统 ```shell -sudo apt install libjansson-dev libsnappy-dev liblzma-dev libz-dev zlib1g pkg-config libssl-dev +sudo apt install libjansson-dev libsnappy-dev liblzma-dev libz-dev zlib1g zlib1g-dev pkg-config libssl-dev ``` #### 对于 CentOS 7/RHEL 系统 diff --git a/tools/taos-tools/inc/benchCsv.h b/tools/taos-tools/inc/benchCsv.h index f944600ecb..6bf531cf14 100644 --- a/tools/taos-tools/inc/benchCsv.h +++ b/tools/taos-tools/inc/benchCsv.h @@ -16,9 +16,7 @@ #ifndef INC_BENCHCSV_H_ #define INC_BENCHCSV_H_ -#ifndef _WIN32 #include -#endif #include "bench.h" @@ -41,9 +39,7 @@ typedef struct { CsvCompressionLevel compress_level; CsvIoError result; union { -#ifndef _WIN32 gzFile gf; -#endif FILE* fp; } handle; } CsvFileHandle; diff --git a/tools/taos-tools/src/CMakeLists.txt b/tools/taos-tools/src/CMakeLists.txt index 93b1530020..320fb1f413 100644 --- a/tools/taos-tools/src/CMakeLists.txt +++ b/tools/taos-tools/src/CMakeLists.txt @@ -317,7 +317,7 @@ IF (${CMAKE_SYSTEM_NAME} MATCHES "Linux" OR ${CMAKE_SYSTEM_NAME} MATCHES "Darwin ENDIF () - target_link_libraries(taosBenchmark z) + TARGET_LINK_LIBRARIES(taosBenchmark z) ELSE () ADD_DEFINITIONS(-DWINDOWS) @@ -334,6 +334,7 @@ ELSE () ADD_DEPENDENCIES(taosdump deps-snappy) ADD_DEPENDENCIES(taosdump deps-libargp) ADD_DEPENDENCIES(taosdump apache-avro) + ADD_DEPENDENCIES(taosBenchmark tools-zlib) IF (${WEBSOCKET}) INCLUDE_DIRECTORIES(/usr/local/include/) @@ -365,5 +366,8 @@ ELSE () ENDIF () TARGET_LINK_LIBRARIES(taosBenchmark taos msvcregex pthread toolscJson ${WEBSOCKET_LINK_FLAGS}) + + TARGET_LINK_LIBRARIES(taosBenchmark zlibstatic) + ENDIF () diff --git a/tools/taos-tools/src/benchCsv.c b/tools/taos-tools/src/benchCsv.c index d08b9d19b0..f8c43dbb97 100644 --- a/tools/taos-tools/src/benchCsv.c +++ b/tools/taos-tools/src/benchCsv.c @@ -953,15 +953,12 @@ static CsvFileHandle* csvOpen(const char* filename, CsvCompressionLevel compress if (compress_level == CSV_COMPRESS_NONE) { fhdl->handle.fp = fopen(filename, "w"); failed = (!fhdl->handle.fp); - } -#ifndef _WIN32 - else { + } else { char mode[TINY_BUFF_LEN]; (void)snprintf(mode, sizeof(mode), "wb%d", compress_level); fhdl->handle.gf = gzopen(filename, mode); failed = (!fhdl->handle.gf); } -#endif if (failed) { tmfree(fhdl); @@ -989,9 +986,7 @@ static CsvIoError csvWrite(CsvFileHandle* fhdl, const char* buf, size_t size) { fhdl->result = CSV_ERR_WRITE_FAILED; return CSV_ERR_WRITE_FAILED; } - } -#ifndef _WIN32 - else { + } else { int ret = gzwrite(fhdl->handle.gf, buf, size); if (ret != size) { errorPrint("Failed to write csv file: %s. expected written %zu but %d.\n", @@ -1003,7 +998,6 @@ static CsvIoError csvWrite(CsvFileHandle* fhdl, const char* buf, size_t size) { return CSV_ERR_WRITE_FAILED; } } -#endif return CSV_ERR_OK; } @@ -1019,15 +1013,12 @@ static void csvClose(CsvFileHandle* fhdl) { fclose(fhdl->handle.fp); fhdl->handle.fp = NULL; } - } -#ifndef _WIN32 - else { + } else { if (fhdl->handle.gf) { gzclose(fhdl->handle.gf); fhdl->handle.gf = NULL; } } -#endif tmfree(fhdl); } From 8292b8df252f7bbbd2a9e2b0ad12cd72a24327d1 Mon Sep 17 00:00:00 2001 From: Yaming Pei Date: Fri, 7 Mar 2025 18:19:57 +0800 Subject: [PATCH 34/34] docs: optimize document description --- .../14-reference/02-tools/10-taosbenchmark.md | 15 +++++- .../14-reference/02-tools/10-taosbenchmark.md | 17 ++++-- .../army/tools/benchmark/basic/csv-export.py | 3 +- .../benchmark/basic/json/csv-export.json | 8 +-- tools/taos-tools/example/csv-export.json | 54 +++++++++++++++++++ tools/taos-tools/src/benchJsonOpt.c | 4 +- 6 files changed, 89 insertions(+), 12 deletions(-) create mode 100644 tools/taos-tools/example/csv-export.json diff --git a/docs/en/14-reference/02-tools/10-taosbenchmark.md b/docs/en/14-reference/02-tools/10-taosbenchmark.md index 3c1401de68..19f498eab1 100644 --- a/docs/en/14-reference/02-tools/10-taosbenchmark.md +++ b/docs/en/14-reference/02-tools/10-taosbenchmark.md @@ -298,11 +298,11 @@ Parameters related to supertable creation are configured in the `super_tables` s - **csv_ts_interval**: String type, sets the time interval for splitting generated csv file names. Supports daily, hourly, minute, and second intervals such as 1d/2h/30m/40s. The default value is "1d". -- **csv_output_header**: String type, sets whether the generated csv files should contain column header descriptions. The default value is "true". +- **csv_output_header**: String type, sets whether the generated csv files should contain column header descriptions. The default value is "yes". - **csv_tbname_alias**: String type, sets the alias for the tbname field in the column header descriptions of csv files. The default value is "device_id". -- **csv_compress_level**: String type, sets the compression level when generating csv files and automatically compressing them into gzip format. Possible values are: +- **csv_compress_level**: String type, sets the compression level for generating csv-encoded data and automatically compressing it into gzip file. This process directly encodes and compresses the data, rather than first generating a csv file and then compressing it. Possible values are: - none: No compression - fast: gzip level 1 compression - balance: gzip level 6 compression @@ -502,6 +502,17 @@ Note: Data types in the taosBenchmark configuration file must be in lowercase to +### Export CSV File Example + +
+csv-export.json + +```json +{{#include /TDengine/tools/taos-tools/example/csv-export.json}} +``` + +
+ Other json examples see [here](https://github.com/taosdata/TDengine/tree/main/tools/taos-tools/example) ## Output Performance Indicators diff --git a/docs/zh/14-reference/02-tools/10-taosbenchmark.md b/docs/zh/14-reference/02-tools/10-taosbenchmark.md index 9902fa56c9..1f97b0702a 100644 --- a/docs/zh/14-reference/02-tools/10-taosbenchmark.md +++ b/docs/zh/14-reference/02-tools/10-taosbenchmark.md @@ -93,7 +93,7 @@ taosBenchmark -f 本节所列参数适用于所有功能模式。 -- **filetype**:功能分类,可选值为 `insert`、`query`、`subscribe` 和 `csvfile`。分别对应插入、查询、订阅和生成csv文件功能。每个配置文件中只能指定其中之一。 +- **filetype**:功能分类,可选值为 `insert`、`query`、`subscribe` 和 `csvfile`。分别对应插入、查询、订阅和生成 csv 文件功能。每个配置文件中只能指定其中之一。 - **cfgdir**:TDengine 客户端配置文件所在的目录,默认路径是 /etc/taos 。 @@ -206,11 +206,11 @@ taosBenchmark -f - **csv_ts_interval**:字符串类型,设置生成的 csv 文件名称中时间段间隔,支持天、小时、分钟、秒级间隔,如 1d/2h/30m/40s,默认值为 1d 。 -- **csv_output_header**:字符串类型,设置生成的 csv 文件是否包含列头描述,默认值为 true 。 +- **csv_output_header**:字符串类型,设置生成的 csv 文件是否包含列头描述,默认值为 yes 。 - **csv_tbname_alias**:字符串类型,设置 csv 文件列头描述中 tbname 字段的别名,默认值为 device_id 。 -- **csv_compress_level**:字符串类型,设置生成 csv 并自动压缩成 gzip 格式文件的压缩等级。可选值为: +- **csv_compress_level**:字符串类型,设置生成 csv 编码数据并自动压缩成 gzip 格式文件的压缩等级。此过程直接编码并压缩,而非先生成 csv 文件再压缩。可选值为: - none:不压缩 - fast:gzip 1级压缩 - balance:gzip 6级压缩 @@ -410,6 +410,17 @@ interval 控制休眠时间,避免持续查询慢查询消耗 CPU,单位为 +### 生成 CSV 文件 JSON 示例 + +
+csv-export.json + +```json +{{#include /TDengine/tools/taos-tools/example/csv-export.json}} +``` + +
+ 查看更多 json 配置文件示例可 [点击这里](https://github.com/taosdata/TDengine/tree/main/tools/taos-tools/example) ## 输出性能指标 diff --git a/tests/army/tools/benchmark/basic/csv-export.py b/tests/army/tools/benchmark/basic/csv-export.py index 702490d6ed..65ffb3e541 100644 --- a/tests/army/tools/benchmark/basic/csv-export.py +++ b/tests/army/tools/benchmark/basic/csv-export.py @@ -16,6 +16,7 @@ import csv import datetime import frame +import frame.eos import frame.etool from frame.log import * from frame.cases import * @@ -213,7 +214,7 @@ class TDTestCase(TBase): # exec cmd = f"{benchmark} {options} -f {jsonFile}" - os.system(cmd) + eos.exe(cmd) # check result self.check_result(jsonFile) diff --git a/tests/army/tools/benchmark/basic/json/csv-export.json b/tests/army/tools/benchmark/basic/json/csv-export.json index 2d6f7b7022..88beab0de1 100644 --- a/tests/army/tools/benchmark/basic/json/csv-export.json +++ b/tests/army/tools/benchmark/basic/json/csv-export.json @@ -19,7 +19,7 @@ "childtable_from": 1000, "childtable_to": 1010, "csv_file_prefix": "data", - "csv_output_header": "true", + "csv_output_header": "yes", "csv_tbname_alias": "device_id", "csv_compress_level": "none", "columns": [ @@ -59,7 +59,7 @@ "csv_file_prefix": "data", "csv_ts_format": "%Y%m%d", "csv_ts_interval": "1d", - "csv_output_header": "true", + "csv_output_header": "yes", "csv_tbname_alias": "device_id", "csv_compress_level": "none", "columns": [ @@ -98,7 +98,7 @@ "childtable_from": 1000, "childtable_to": 1010, "csv_file_prefix": "data", - "csv_output_header": "true", + "csv_output_header": "yes", "csv_tbname_alias": "device_id", "csv_compress_level": "none", "columns": [ @@ -139,7 +139,7 @@ "csv_file_prefix": "data", "csv_ts_format": "%Y%m%d", "csv_ts_interval": "1d", - "csv_output_header": "true", + "csv_output_header": "yes", "csv_tbname_alias": "device_id", "csv_compress_level": "none", "columns": [ diff --git a/tools/taos-tools/example/csv-export.json b/tools/taos-tools/example/csv-export.json new file mode 100644 index 0000000000..7fa3e96f2f --- /dev/null +++ b/tools/taos-tools/example/csv-export.json @@ -0,0 +1,54 @@ +{ + "filetype": "csvfile", + "output_path": "./csv/", + "databases": [ + { + "dbinfo": { + "name": "csvdb", + "precision": "ms" + }, + "super_tables": [ + { + "name": "table", + "childtable_count": 1010, + "insert_rows": 1000, + "interlace_rows": 1, + "childtable_prefix": "d", + "timestamp_step": 1000000, + "start_timestamp": "2020-10-01 00:00:00.000", + "childtable_from": 1000, + "childtable_to": 1010, + "csv_file_prefix": "data", + "csv_ts_format": "%Y%m%d", + "csv_ts_interval": "1d", + "csv_output_header": "true", + "csv_tbname_alias": "device_id", + "csv_compress_level": "none", + "columns": [ + { "type": "bool", "name": "bc"}, + { "type": "float", "name": "fc", "min": 1}, + { "type": "double", "name": "dc", "min":10, "max":10}, + { "type": "tinyint", "name": "ti"}, + { "type": "smallint", "name": "si"}, + { "type": "int", "name": "ic", "fillNull":"false"}, + { "type": "bigint", "name": "bi"}, + { "type": "utinyint", "name": "uti"}, + { "type": "usmallint", "name": "usi", "min":100, "max":120}, + { "type": "uint", "name": "ui"}, + { "type": "ubigint", "name": "ubi"}, + { "type": "binary", "name": "bin", "len": 16}, + { "type": "nchar", "name": "nch", "len": 16} + ], + "tags": [ + {"type": "tinyint", "name": "groupid","max": 10,"min": 1}, + {"type": "binary", "name": "location", "len": 16, + "values": ["San Francisco", "Los Angles", "San Diego", + "San Jose", "Palo Alto", "Campbell", "Mountain View", + "Sunnyvale", "Santa Clara", "Cupertino"] + } + ] + } + ] + } + ] +} diff --git a/tools/taos-tools/src/benchJsonOpt.c b/tools/taos-tools/src/benchJsonOpt.c index 49b5a6529d..5b992b388e 100644 --- a/tools/taos-tools/src/benchJsonOpt.c +++ b/tools/taos-tools/src/benchJsonOpt.c @@ -1434,9 +1434,9 @@ static int getStableInfo(tools_cJSON *dbinfos, int index) { superTable->csv_output_header = true; tools_cJSON* oph = tools_cJSON_GetObjectItem(stbInfo, "csv_output_header"); if (oph && oph->type == tools_cJSON_String && oph->valuestring != NULL) { - if (0 == strcasecmp(oph->valuestring, "yes") || 0 == strcasecmp(oph->valuestring, "true")) { + if (0 == strcasecmp(oph->valuestring, "yes")) { superTable->csv_output_header = true; - } else if (0 == strcasecmp(oph->valuestring, "no") || 0 == strcasecmp(oph->valuestring, "false")) { + } else if (0 == strcasecmp(oph->valuestring, "no")) { superTable->csv_output_header = false; } }