diff --git a/docs/en/13-operation/10-monitor.md b/docs/en/13-operation/10-monitor.md index 197dda20ee..c1c6ac3c4c 100644 --- a/docs/en/13-operation/10-monitor.md +++ b/docs/en/13-operation/10-monitor.md @@ -214,19 +214,6 @@ The data of tdinsight dashboard is stored in `log` database (default. You can ch |dnode\_ep|NCHAR|TAG|dnode endpoint| |cluster\_id|NCHAR|TAG|cluster id| -### logs table - -`logs` table contains login information records. - -|field|type|is\_tag|comment| -|:----|:---|:-----|:------| -|ts|TIMESTAMP||timestamp| -|level|VARCHAR||log level| -|content|NCHAR||log content| -|dnode\_id|INT|TAG|dnode id| -|dnode\_ep|NCHAR|TAG|dnode endpoint| -|cluster\_id|NCHAR|TAG|cluster id| - ### log\_summary table `log_summary` table contains log summary information records. diff --git a/docs/en/14-reference/03-connector/07-python.mdx b/docs/en/14-reference/03-connector/07-python.mdx index f0a59842fe..831e79eeb7 100644 --- a/docs/en/14-reference/03-connector/07-python.mdx +++ b/docs/en/14-reference/03-connector/07-python.mdx @@ -1007,13 +1007,12 @@ consumer.close() ### Other sample programs | Example program links | Example program content | -| ------------------------------------------------------------------------------------------------------------- | ------------------- ---- | -| [bind_multi.py](https://github.com/taosdata/taos-connector-python/blob/main/examples/bind-multi.py) | parameter binding, -bind multiple rows at once | -| [bind_row.py](https://github.com/taosdata/taos-connector-python/blob/main/examples/bind-row.py) | bind_row.py +|-----------------------|-------------------------| +| [bind_multi.py](https://github.com/taosdata/taos-connector-python/blob/main/examples/bind-multi.py) | parameter binding, bind multiple rows at once | +| [bind_row.py](https://github.com/taosdata/taos-connector-python/blob/main/examples/bind-row.py) | parameter binding, bind one row at once | | [insert_lines.py](https://github.com/taosdata/taos-connector-python/blob/main/examples/insert-lines.py) | InfluxDB line protocol writing | | [json_tag.py](https://github.com/taosdata/taos-connector-python/blob/main/examples/json-tag.py) | Use JSON type tags | -| [tmq.py](https://github.com/taosdata/taos-connector-python/blob/main/examples/tmq.py) | TMQ subscription | +| [tmq_consumer.py](https://github.com/taosdata/taos-connector-python/blob/main/examples/tmq_consumer.py) | TMQ subscription | ## Other notes diff --git a/docs/zh/17-operation/10-monitor.md b/docs/zh/17-operation/10-monitor.md index 50da505808..4f8dccc78d 100644 --- a/docs/zh/17-operation/10-monitor.md +++ b/docs/zh/17-operation/10-monitor.md @@ -210,19 +210,6 @@ TDinsight dashboard 数据来源于 log 库(存放监控数据的默认db, |dnode\_ep|NCHAR|TAG|dnode endpoint| |cluster\_id|NCHAR|TAG|cluster id| -### logs 表 - -`logs` 表记录登录信息。 - -|field|type|is\_tag|comment| -|:----|:---|:-----|:------| -|ts|TIMESTAMP||timestamp| -|level|VARCHAR||log level| -|content|NCHAR||log content,长度不超过1024字节| -|dnode\_id|INT|TAG|dnode id| -|dnode\_ep|NCHAR|TAG|dnode endpoint| -|cluster\_id|NCHAR|TAG|cluster id| - ### log\_summary 表 `log_summary` 记录日志统计信息。 diff --git a/include/common/tglobal.h b/include/common/tglobal.h index d6c552b3f6..4f2ed2b065 100644 --- a/include/common/tglobal.h +++ b/include/common/tglobal.h @@ -85,8 +85,14 @@ extern int64_t tsVndCommitMaxIntervalMs; extern int64_t tsMndSdbWriteDelta; extern int64_t tsMndLogRetention; extern int8_t tsGrant; +extern int32_t tsMndGrantMode; extern bool tsMndSkipGrant; +// dnode +extern int64_t tsDndStart; +extern int64_t tsDndStartOsUptime; +extern int64_t tsDndUpTime; + // monitor extern bool tsEnableMonitor; extern int32_t tsMonitorInterval; diff --git a/include/os/os.h b/include/os/os.h index 309a977ff6..ac1a750b78 100644 --- a/include/os/os.h +++ b/include/os/os.h @@ -53,6 +53,7 @@ extern "C" { #else #include #include +#include #if defined(_TD_X86_) #include #endif diff --git a/include/os/osSysinfo.h b/include/os/osSysinfo.h index b5309178ae..a6a3655a55 100644 --- a/include/os/osSysinfo.h +++ b/include/os/osSysinfo.h @@ -35,6 +35,7 @@ typedef struct { bool taosCheckSystemIsLittleEnd(); void taosGetSystemInfo(); +int64_t taosGetOsUptime(); int32_t taosGetEmail(char *email, int32_t maxLen); int32_t taosGetOsReleaseName(char *releaseName, char* sName, char* ver, int32_t maxLen); int32_t taosGetCpuInfo(char *cpuModel, int32_t maxLen, float *numOfCores); diff --git a/include/util/tarray.h b/include/util/tarray.h index a93c695370..f56c9e3a17 100644 --- a/include/util/tarray.h +++ b/include/util/tarray.h @@ -22,7 +22,7 @@ extern "C" { #endif -#define TARRAY_MIN_SIZE 8 +#define TARRAY_MIN_SIZE 4 #define TARRAY_GET_ELEM(array, index) ((void*)((char*)((array)->pData) + (index) * (array)->elemSize)) #define TARRAY_ELEM_IDX(array, ele) (POINTER_DISTANCE(ele, (array)->pData) / (array)->elemSize) @@ -138,7 +138,7 @@ size_t taosArrayGetSize(const SArray* pArray); * @param index * @param pData */ -void* taosArrayInsert(SArray* pArray, size_t index, void* pData); +void* taosArrayInsert(SArray* pArray, size_t index, const void* pData); /** * set data in array @@ -204,9 +204,9 @@ void taosArrayClearEx(SArray* pArray, void (*fp)(void*)); void* taosArrayDestroy(SArray* pArray); -void taosArrayDestroyP(SArray* pArray, FDelete fp); +void taosArrayDestroyP(SArray* pArray, FDelete fp); -void taosArrayDestroyEx(SArray* pArray, FDelete fp); +void taosArrayDestroyEx(SArray* pArray, FDelete fp); void taosArraySwap(SArray* a, SArray* b); diff --git a/include/util/tarray2.h b/include/util/tarray2.h new file mode 100644 index 0000000000..cd49e64789 --- /dev/null +++ b/include/util/tarray2.h @@ -0,0 +1,172 @@ +/* + * Copyright (c) 2019 TAOS Data, Inc. + * + * This program is free software: you can use, redistribute, and/or modify + * it under the terms of the GNU Affero General Public License, version 3 + * or later ("AGPL"), as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. + * + * You should have received a copy of the GNU Affero General Public License + * along with this program. If not, see . + */ + +#include "talgo.h" + +#ifndef _TD_UTIL_TARRAY2_H_ +#define _TD_UTIL_TARRAY2_H_ + +#ifdef __cplusplus +extern "C" { +#endif + +// a: a +// e: element +// ep: element pointer +// cmp: compare function +// idx: index +// cb: callback function + +#define TARRAY2(TYPE) \ + struct { \ + int32_t size; \ + int32_t capacity; \ + TYPE *data; \ + } + +typedef void (*TArray2Cb)(void *); + +#define TARRAY2_SIZE(a) ((a)->size) +#define TARRAY2_CAPACITY(a) ((a)->capacity) +#define TARRAY2_DATA(a) ((a)->data) +#define TARRAY2_GET(a, i) ((a)->data[i]) +#define TARRAY2_GET_PTR(a, i) ((a)->data + i) +#define TARRAY2_FIRST(a) ((a)->data[0]) +#define TARRAY2_LAST(a) ((a)->data[(a)->size - 1]) +#define TARRAY2_DATA_LEN(a) ((a)->size * sizeof(((a)->data[0]))) + +static FORCE_INLINE int32_t tarray2_make_room(void *arr, int32_t expSize, int32_t eleSize) { + TARRAY2(void) *a = arr; + + int32_t capacity = (a->capacity > 0) ? (a->capacity << 1) : 32; + while (capacity < expSize) { + capacity <<= 1; + } + void *p = taosMemoryRealloc(a->data, capacity * eleSize); + if (p == NULL) return TSDB_CODE_OUT_OF_MEMORY; + a->capacity = capacity; + a->data = p; + return 0; +} + +static FORCE_INLINE int32_t tarray2InsertBatch(void *arr, int32_t idx, const void *elePtr, int32_t numEle, + int32_t eleSize) { + TARRAY2(uint8_t) *a = arr; + + int32_t ret = 0; + if (a->size + numEle > a->capacity) { + ret = tarray2_make_room(a, a->size + numEle, eleSize); + } + if (ret == 0) { + if (idx < a->size) { + memmove(a->data + (idx + numEle) * eleSize, a->data + idx * eleSize, (a->size - idx) * eleSize); + } + memcpy(a->data + idx * eleSize, elePtr, numEle * eleSize); + a->size += numEle; + } + return ret; +} + +static FORCE_INLINE void *tarray2Search(void *arr, const void *elePtr, int32_t eleSize, __compar_fn_t compar, + int32_t flag) { + TARRAY2(void) *a = arr; + return taosbsearch(elePtr, a->data, a->size, eleSize, compar, flag); +} + +static FORCE_INLINE int32_t tarray2SearchIdx(void *arr, const void *elePtr, int32_t eleSize, __compar_fn_t compar, + int32_t flag) { + TARRAY2(void) *a = arr; + void *p = taosbsearch(elePtr, a->data, a->size, eleSize, compar, flag); + if (p == NULL) { + return -1; + } else { + return (int32_t)(((uint8_t *)p - (uint8_t *)a->data) / eleSize); + } +} + +static FORCE_INLINE int32_t tarray2SortInsert(void *arr, const void *elePtr, int32_t eleSize, __compar_fn_t compar) { + TARRAY2(void) *a = arr; + int32_t idx = tarray2SearchIdx(arr, elePtr, eleSize, compar, TD_GT); + return tarray2InsertBatch(arr, idx < 0 ? a->size : idx, elePtr, 1, eleSize); +} + +#define TARRAY2_INIT_EX(a, size_, capacity_, data_) \ + do { \ + (a)->size = (size_); \ + (a)->capacity = (capacity_); \ + (a)->data = (data_); \ + } while (0) + +#define TARRAY2_INIT(a) TARRAY2_INIT_EX(a, 0, 0, NULL) + +#define TARRAY2_CLEAR(a, cb) \ + do { \ + if ((cb) && (a)->size > 0) { \ + TArray2Cb cb_ = (TArray2Cb)(cb); \ + for (int32_t i = 0; i < (a)->size; ++i) { \ + cb_((a)->data + i); \ + } \ + } \ + (a)->size = 0; \ + } while (0) + +#define TARRAY2_DESTROY(a, cb) \ + do { \ + TARRAY2_CLEAR(a, cb); \ + if ((a)->data) { \ + taosMemoryFree((a)->data); \ + (a)->data = NULL; \ + } \ + (a)->capacity = 0; \ + } while (0) + +#define TARRAY2_INSERT_PTR(a, idx, ep) tarray2InsertBatch(a, idx, ep, 1, sizeof((a)->data[0])) +#define TARRAY2_APPEND_PTR(a, ep) tarray2InsertBatch(a, (a)->size, ep, 1, sizeof((a)->data[0])) +#define TARRAY2_APPEND_BATCH(a, ep, n) tarray2InsertBatch(a, (a)->size, ep, n, sizeof((a)->data[0])) +#define TARRAY2_APPEND(a, e) TARRAY2_APPEND_PTR(a, &(e)) + +// return (TYPE *) +#define TARRAY2_SEARCH(a, ep, cmp, flag) tarray2Search(a, ep, sizeof(((a)->data[0])), (__compar_fn_t)cmp, flag) + +#define TARRAY2_SEARCH_IDX(a, ep, cmp, flag) tarray2SearchIdx(a, ep, sizeof(((a)->data[0])), (__compar_fn_t)cmp, flag) + +#define TARRAY2_SORT_INSERT(a, e, cmp) tarray2SortInsert(a, &(e), sizeof(((a)->data[0])), (__compar_fn_t)cmp) +#define TARRAY2_SORT_INSERT_P(a, ep, cmp) tarray2SortInsert(a, ep, sizeof(((a)->data[0])), (__compar_fn_t)cmp) + +#define TARRAY2_REMOVE(a, idx, cb) \ + do { \ + if ((idx) < (a)->size) { \ + if (cb) { \ + TArray2Cb cb_ = (TArray2Cb)(cb); \ + cb_((a)->data + (idx)); \ + } \ + if ((idx) < (a)->size - 1) { \ + memmove((a)->data + (idx), (a)->data + (idx) + 1, sizeof((*(a)->data)) * ((a)->size - (idx)-1)); \ + } \ + (a)->size--; \ + } \ + } while (0) + +#define TARRAY2_FOREACH(a, e) for (int32_t __i = 0; __i < (a)->size && ((e) = (a)->data[__i], 1); __i++) +#define TARRAY2_FOREACH_REVERSE(a, e) for (int32_t __i = (a)->size - 1; __i >= 0 && ((e) = (a)->data[__i], 1); __i--) +#define TARRAY2_FOREACH_PTR(a, ep) for (int32_t __i = 0; __i < (a)->size && ((ep) = &(a)->data[__i], 1); __i++) +#define TARRAY2_FOREACH_PTR_REVERSE(a, ep) \ + for (int32_t __i = (a)->size - 1; __i >= 0 && ((ep) = &(a)->data[__i], 1); __i--) + +#ifdef __cplusplus +} +#endif + +#endif /*_TD_UTIL_TARRAY2_H_*/ diff --git a/include/util/tdef.h b/include/util/tdef.h index a4fd098e18..8b27e039fc 100644 --- a/include/util/tdef.h +++ b/include/util/tdef.h @@ -191,16 +191,16 @@ typedef enum ELogicConditionType { #define TSDB_MAX_COLUMNS 4096 #define TSDB_MIN_COLUMNS 2 // PRIMARY COLUMN(timestamp) + other columns -#define TSDB_NODE_NAME_LEN 64 -#define TSDB_TABLE_NAME_LEN 193 // it is a null-terminated string -#define TSDB_TOPIC_NAME_LEN 193 // it is a null-terminated string -#define TSDB_CGROUP_LEN 193 // it is a null-terminated string -#define TSDB_OFFSET_LEN 64 // it is a null-terminated string -#define TSDB_USER_CGROUP_LEN (TSDB_USER_LEN + TSDB_CGROUP_LEN) // it is a null-terminated string -#define TSDB_STREAM_NAME_LEN 193 // it is a null-terminated string -#define TSDB_DB_NAME_LEN 65 -#define TSDB_DB_FNAME_LEN (TSDB_ACCT_ID_LEN + TSDB_DB_NAME_LEN + TSDB_NAME_DELIMITER_LEN) -#define TSDB_PRIVILEDGE_CONDITION_LEN 200 +#define TSDB_NODE_NAME_LEN 64 +#define TSDB_TABLE_NAME_LEN 193 // it is a null-terminated string +#define TSDB_TOPIC_NAME_LEN 193 // it is a null-terminated string +#define TSDB_CGROUP_LEN 193 // it is a null-terminated string +#define TSDB_OFFSET_LEN 64 // it is a null-terminated string +#define TSDB_USER_CGROUP_LEN (TSDB_USER_LEN + TSDB_CGROUP_LEN) // it is a null-terminated string +#define TSDB_STREAM_NAME_LEN 193 // it is a null-terminated string +#define TSDB_DB_NAME_LEN 65 +#define TSDB_DB_FNAME_LEN (TSDB_ACCT_ID_LEN + TSDB_DB_NAME_LEN + TSDB_NAME_DELIMITER_LEN) +#define TSDB_PRIVILEDGE_CONDITION_LEN 200 #define TSDB_FUNC_NAME_LEN 65 #define TSDB_FUNC_COMMENT_LEN 1024 * 1024 @@ -249,15 +249,15 @@ typedef enum ELogicConditionType { #define TSDB_LABEL_LEN 8 #define TSDB_JOB_STATUS_LEN 32 -#define TSDB_CLUSTER_ID_LEN 40 -#define TSDB_FQDN_LEN 128 -#define TSDB_EP_LEN (TSDB_FQDN_LEN + 6) -#define TSDB_IPv4ADDR_LEN 16 -#define TSDB_FILENAME_LEN 128 -#define TSDB_SHOW_SQL_LEN 2048 +#define TSDB_CLUSTER_ID_LEN 40 +#define TSDB_FQDN_LEN 128 +#define TSDB_EP_LEN (TSDB_FQDN_LEN + 6) +#define TSDB_IPv4ADDR_LEN 16 +#define TSDB_FILENAME_LEN 128 +#define TSDB_SHOW_SQL_LEN 2048 #define TSDB_SHOW_SCHEMA_JSON_LEN TSDB_MAX_COLUMNS * 256 -#define TSDB_SLOW_QUERY_SQL_LEN 512 -#define TSDB_SHOW_SUBQUERY_LEN 1000 +#define TSDB_SLOW_QUERY_SQL_LEN 512 +#define TSDB_SHOW_SUBQUERY_LEN 1000 #define TSDB_TRANS_STAGE_LEN 12 #define TSDB_TRANS_TYPE_LEN 16 @@ -370,7 +370,7 @@ typedef enum ELogicConditionType { #define TSDB_DEFAULT_DB_SCHEMALESS TSDB_DB_SCHEMALESS_OFF #define TSDB_MIN_STT_TRIGGER 1 #define TSDB_MAX_STT_TRIGGER 16 -#define TSDB_DEFAULT_SST_TRIGGER 1 +#define TSDB_DEFAULT_SST_TRIGGER 2 #define TSDB_MIN_HASH_PREFIX (2 - TSDB_TABLE_NAME_LEN) #define TSDB_MAX_HASH_PREFIX (TSDB_TABLE_NAME_LEN - 2) #define TSDB_DEFAULT_HASH_PREFIX 0 @@ -410,10 +410,10 @@ typedef enum ELogicConditionType { #define TSDB_EXPLAIN_RESULT_ROW_SIZE (16 * 1024) #define TSDB_EXPLAIN_RESULT_COLUMN_NAME "QUERY_PLAN" -#define TSDB_MAX_FIELD_LEN 65519 // 16384:65519 -#define TSDB_MAX_BINARY_LEN TSDB_MAX_FIELD_LEN // 16384-8:65519 -#define TSDB_MAX_NCHAR_LEN TSDB_MAX_FIELD_LEN // 16384-8:65519 -#define TSDB_MAX_GEOMETRY_LEN TSDB_MAX_FIELD_LEN // 16384-8:65519 +#define TSDB_MAX_FIELD_LEN 65519 // 16384:65519 +#define TSDB_MAX_BINARY_LEN TSDB_MAX_FIELD_LEN // 16384-8:65519 +#define TSDB_MAX_NCHAR_LEN TSDB_MAX_FIELD_LEN // 16384-8:65519 +#define TSDB_MAX_GEOMETRY_LEN TSDB_MAX_FIELD_LEN // 16384-8:65519 #define PRIMARYKEY_TIMESTAMP_COL_ID 1 #define COL_REACH_END(colId, maxColId) ((colId) > (maxColId)) diff --git a/include/util/tlist.h b/include/util/tlist.h index c684e90a33..0924c133b9 100644 --- a/include/util/tlist.h +++ b/include/util/tlist.h @@ -241,6 +241,54 @@ void tdListNodeGetData(SList *list, SListNode *node, void *target); void tdListInitIter(SList *list, SListIter *pIter, TD_LIST_DIRECTION_T direction); SListNode *tdListNext(SListIter *pIter); +// macros ==================================================================================== + +// q: for queue +// n: for node +// m: for member + +#define LISTD(TYPE) \ + struct { \ + TYPE *next, *prev; \ + } + +#define LISTD_NEXT(n, m) ((n)->m.next) +#define LISTD_PREV(n, m) ((n)->m.prev) +#define LISTD_INIT(q, m) (LISTD_NEXT(q, m) = LISTD_PREV(q, m) = (q)) +#define LISTD_HEAD(q, m) (LISTD_NEXT(q, m)) +#define LISTD_TAIL(q, m) (LISTD_PREV(q, m)) +#define LISTD_PREV_NEXT(n, m) (LISTD_NEXT(LISTD_PREV(n, m), m)) +#define LISTD_NEXT_PREV(n, m) (LISTD_PREV(LISTD_NEXT(n, m), m)) + +#define LISTD_INSERT_HEAD(q, n, m) \ + do { \ + LISTD_NEXT(n, m) = LISTD_NEXT(q, m); \ + LISTD_PREV(n, m) = (q); \ + LISTD_NEXT_PREV(n, m) = (n); \ + LISTD_NEXT(q, m) = (n); \ + } while (0) + +#define LISTD_INSERT_TAIL(q, n, m) \ + do { \ + LISTD_NEXT(n, m) = (q); \ + LISTD_PREV(n, m) = LISTD_PREV(q, m); \ + LISTD_PREV_NEXT(n, m) = (n); \ + LISTD_PREV(q, m) = (n); \ + } while (0) + +#define LISTD_REMOVE(n, m) \ + do { \ + LISTD_PREV_NEXT(n, m) = LISTD_NEXT(n, m); \ + LISTD_NEXT_PREV(n, m) = LISTD_PREV(n, m); \ + } while (0) + +#define LISTD_FOREACH(q, n, m) for ((n) = LISTD_HEAD(q, m); (n) != (q); (n) = LISTD_NEXT(n, m)) +#define LISTD_FOREACH_REVERSE(q, n, m) for ((n) = LISTD_TAIL(q, m); (n) != (q); (n) = LISTD_PREV(n, m)) +#define LISTD_FOREACH_SAFE(q, n, t, m) \ + for ((n) = LISTD_HEAD(q, m), (t) = LISTD_NEXT(n, m); (n) != (q); (n) = (t), (t) = LISTD_NEXT(n, m)) +#define LISTD_FOREACH_REVERSE_SAFE(q, n, t, m) \ + for ((n) = LISTD_TAIL(q, m), (t) = LISTD_PREV(n, m); (n) != (q); (n) = (t), (t) = LISTD_PREV(n, m)) + #ifdef __cplusplus } #endif diff --git a/include/util/trbtree.h b/include/util/trbtree.h index e226419440..8353a91f0a 100644 --- a/include/util/trbtree.h +++ b/include/util/trbtree.h @@ -39,7 +39,7 @@ void tRBTreeDrop(SRBTree *pTree, SRBTreeNode *z); SRBTreeNode *tRBTreeDropByKey(SRBTree *pTree, void *pKey); SRBTreeNode *tRBTreeDropMin(SRBTree *pTree); SRBTreeNode *tRBTreeDropMax(SRBTree *pTree); -SRBTreeNode *tRBTreeGet(SRBTree *pTree, const SRBTreeNode *pKeyNode); +SRBTreeNode *tRBTreeGet(const SRBTree *pTree, const SRBTreeNode *pKeyNode); // SRBTreeIter ============================================= #define tRBTreeIterCreate(tree, ascend) \ @@ -67,9 +67,9 @@ struct SRBTree { }; struct SRBTreeIter { - int8_t asc; - SRBTree *pTree; - SRBTreeNode *pNode; + int8_t asc; + const SRBTree *pTree; + SRBTreeNode *pNode; }; #ifdef __cplusplus diff --git a/include/util/tutil.h b/include/util/tutil.h index 7a59aa170a..a2cfa4cfe5 100644 --- a/include/util/tutil.h +++ b/include/util/tutil.h @@ -29,7 +29,7 @@ extern "C" { int32_t strdequote(char *src); size_t strtrim(char *src); char *strnchr(const char *haystack, char needle, int32_t len, bool skipquote); -TdUcs4* wcsnchr(const TdUcs4* haystack, TdUcs4 needle, size_t len); +TdUcs4 *wcsnchr(const TdUcs4 *haystack, TdUcs4 needle, size_t len); char **strsplit(char *src, const char *delim, int32_t *num); char *strtolower(char *dst, const char *src); @@ -37,11 +37,11 @@ char *strntolower(char *dst, const char *src, int32_t n); char *strntolower_s(char *dst, const char *src, int32_t n); int64_t strnatoi(char *num, int32_t len); -size_t tstrncspn(const char *str, size_t ssize, const char *reject, size_t rsize); -size_t twcsncspn(const TdUcs4 *wcs, size_t size, const TdUcs4 *reject, size_t rsize); +size_t tstrncspn(const char *str, size_t ssize, const char *reject, size_t rsize); +size_t twcsncspn(const TdUcs4 *wcs, size_t size, const TdUcs4 *reject, size_t rsize); -char *strbetween(char *string, char *begin, char *end); -char *paGetToken(char *src, char **token, int32_t *tokenLen); +char *strbetween(char *string, char *begin, char *end); +char *paGetToken(char *src, char **token, int32_t *tokenLen); int32_t taosByteArrayToHexStr(char bytes[], int32_t len, char hexstr[]); int32_t taosHexStrToByteArray(char hexstr[], char bytes[]); @@ -81,12 +81,13 @@ static FORCE_INLINE void taosEncryptPass_c(uint8_t *inBuf, size_t len, char *tar static FORCE_INLINE int32_t taosGetTbHashVal(const char *tbname, int32_t tblen, int32_t method, int32_t prefix, int32_t suffix) { - if ((prefix == 0 && suffix == 0) || (tblen <= (prefix + suffix)) || (tblen <= -1 * (prefix + suffix)) || prefix * suffix < 0) { + if ((prefix == 0 && suffix == 0) || (tblen <= (prefix + suffix)) || (tblen <= -1 * (prefix + suffix)) || + prefix * suffix < 0) { return MurmurHash3_32(tbname, tblen); } else if (prefix > 0 || suffix > 0) { return MurmurHash3_32(tbname + prefix, tblen - prefix - suffix); } else { - char tbName[TSDB_TABLE_FNAME_LEN]; + char tbName[TSDB_TABLE_FNAME_LEN]; int32_t offset = 0; if (prefix < 0) { offset = -1 * prefix; @@ -94,20 +95,33 @@ static FORCE_INLINE int32_t taosGetTbHashVal(const char *tbname, int32_t tblen, } if (suffix < 0) { strncpy(tbName + offset, tbname + tblen + suffix, -1 * suffix); - offset += -1 *suffix; + offset += -1 * suffix; } return MurmurHash3_32(tbName, offset); } } #define TSDB_CHECK_CODE(CODE, LINO, LABEL) \ - if (CODE) { \ - LINO = __LINE__; \ - goto LABEL; \ + do { \ + if ((CODE)) { \ + LINO = __LINE__; \ + goto LABEL; \ + } \ + } while (0) + +#define TSDB_CHECK_NULL(ptr, CODE, LINO, LABEL, ERRNO) \ + if ((ptr) == NULL) { \ + (CODE) = (ERRNO); \ + (LINO) = __LINE__; \ + goto LABEL; \ } +#define ARRAY_SIZE(a) (sizeof(a) / sizeof(a[0])) + #define VND_CHECK_CODE(CODE, LINO, LABEL) TSDB_CHECK_CODE(CODE, LINO, LABEL) +#define TCONTAINER_OF(ptr, type, member) ((type *)((char *)(ptr)-offsetof(type, member))) + #ifdef __cplusplus } #endif diff --git a/source/common/src/tdataformat.c b/source/common/src/tdataformat.c index 0b12177754..7c6939635a 100644 --- a/source/common/src/tdataformat.c +++ b/source/common/src/tdataformat.c @@ -2245,15 +2245,18 @@ static int32_t tColDataUpdateValue72(SColData *pColData, uint8_t *pData, uint32_ } return 0; } +static FORCE_INLINE int32_t tColDataUpdateNothing(SColData *pColData, uint8_t *pData, uint32_t nData, bool forward) { + return 0; +} static int32_t (*tColDataUpdateValueImpl[8][3])(SColData *pColData, uint8_t *pData, uint32_t nData, bool forward) = { - {NULL, NULL, NULL}, // 0 - {tColDataUpdateValue10, NULL, tColDataUpdateValue12}, // HAS_NONE - {tColDataUpdateValue20, NULL, NULL}, // HAS_NULL - {tColDataUpdateValue30, NULL, tColDataUpdateValue32}, // HAS_NULL|HAS_NONE - {tColDataUpdateValue40, NULL, tColDataUpdateValue42}, // HAS_VALUE - {tColDataUpdateValue50, NULL, tColDataUpdateValue52}, // HAS_VALUE|HAS_NONE - {tColDataUpdateValue60, NULL, tColDataUpdateValue62}, // HAS_VALUE|HAS_NULL - {tColDataUpdateValue70, NULL, tColDataUpdateValue72}, // HAS_VALUE|HAS_NULL|HAS_NONE + {NULL, NULL, NULL}, // 0 + {tColDataUpdateValue10, tColDataUpdateNothing, tColDataUpdateValue12}, // HAS_NONE + {tColDataUpdateValue20, tColDataUpdateNothing, tColDataUpdateNothing}, // HAS_NULL + {tColDataUpdateValue30, tColDataUpdateNothing, tColDataUpdateValue32}, // HAS_NULL|HAS_NONE + {tColDataUpdateValue40, tColDataUpdateNothing, tColDataUpdateValue42}, // HAS_VALUE + {tColDataUpdateValue50, tColDataUpdateNothing, tColDataUpdateValue52}, // HAS_VALUE|HAS_NONE + {tColDataUpdateValue60, tColDataUpdateNothing, tColDataUpdateValue62}, // HAS_VALUE|HAS_NULL + {tColDataUpdateValue70, tColDataUpdateNothing, tColDataUpdateValue72}, // HAS_VALUE|HAS_NULL|HAS_NONE // VALUE NONE NULL }; diff --git a/source/common/src/tglobal.c b/source/common/src/tglobal.c index 04b4d187e9..6ad592751a 100644 --- a/source/common/src/tglobal.c +++ b/source/common/src/tglobal.c @@ -77,8 +77,14 @@ int64_t tsVndCommitMaxIntervalMs = 600 * 1000; int64_t tsMndSdbWriteDelta = 200; int64_t tsMndLogRetention = 2000; int8_t tsGrant = 1; +int32_t tsMndGrantMode = 0; bool tsMndSkipGrant = false; +// dnode +int64_t tsDndStart = 0; +int64_t tsDndStartOsUptime = 0; +int64_t tsDndUpTime = 0; + // monitor bool tsEnableMonitor = true; int32_t tsMonitorInterval = 30; @@ -506,6 +512,7 @@ static int32_t taosAddServerCfg(SConfig *pCfg) { if (cfgAddInt64(pCfg, "mndSdbWriteDelta", tsMndSdbWriteDelta, 20, 10000, CFG_SCOPE_SERVER) != 0) return -1; if (cfgAddInt64(pCfg, "mndLogRetention", tsMndLogRetention, 500, 10000, CFG_SCOPE_SERVER) != 0) return -1; + if (cfgAddInt32(pCfg, "grantMode", tsMndGrantMode, 0, 10000, CFG_SCOPE_SERVER) != 0) return -1; if (cfgAddBool(pCfg, "skipGrant", tsMndSkipGrant, CFG_SCOPE_SERVER) != 0) return -1; if (cfgAddBool(pCfg, "monitor", tsEnableMonitor, CFG_SCOPE_SERVER) != 0) return -1; @@ -915,6 +922,7 @@ static int32_t taosSetServerCfg(SConfig *pCfg) { tsMndSdbWriteDelta = cfgGetItem(pCfg, "mndSdbWriteDelta")->i64; tsMndLogRetention = cfgGetItem(pCfg, "mndLogRetention")->i64; tsMndSkipGrant = cfgGetItem(pCfg, "skipGrant")->bval; + tsMndGrantMode = cfgGetItem(pCfg, "grantMode")->i32; tsStartUdfd = cfgGetItem(pCfg, "udf")->bval; tstrncpy(tsUdfdResFuncs, cfgGetItem(pCfg, "udfdResFuncs")->str, sizeof(tsUdfdResFuncs)); diff --git a/source/dnode/mgmt/exe/dmMain.c b/source/dnode/mgmt/exe/dmMain.c index e1b8a57684..3c08714218 100644 --- a/source/dnode/mgmt/exe/dmMain.c +++ b/source/dnode/mgmt/exe/dmMain.c @@ -373,6 +373,8 @@ int mainWindows(int argc, char **argv) { dInfo("start to init service"); dmSetSignalHandle(); + tsDndStart = taosGetTimestampMs(); + tsDndStartOsUptime = taosGetOsUptime(); int32_t code = dmRun(); dInfo("shutting down the service"); diff --git a/source/dnode/mgmt/mgmt_dnode/src/dmWorker.c b/source/dnode/mgmt/mgmt_dnode/src/dmWorker.c index 89c394fdd0..76cb65b53a 100644 --- a/source/dnode/mgmt/mgmt_dnode/src/dmWorker.c +++ b/source/dnode/mgmt/mgmt_dnode/src/dmWorker.c @@ -24,12 +24,16 @@ static void *dmStatusThreadFp(void *param) { const static int16_t TRIM_FREQ = 30; int32_t trimCount = 0; + int32_t upTimeCount = 0; + int64_t upTime = 0; + while (1) { taosMsleep(200); if (pMgmt->pData->dropped || pMgmt->pData->stopped) break; int64_t curTime = taosGetTimestampMs(); - float interval = (curTime - lastTime) / 1000.0f; + if (curTime < lastTime) lastTime = curTime; + float interval = (curTime - lastTime) / 1000.0f; if (interval >= tsStatusInterval) { dmSendStatusReq(pMgmt); lastTime = curTime; @@ -38,6 +42,11 @@ static void *dmStatusThreadFp(void *param) { if (trimCount == 0) { taosMemoryTrim(0); } + + if ((upTimeCount = ((upTimeCount + 1) & 63)) == 0) { + upTime = taosGetOsUptime() - tsDndStartOsUptime; + tsDndUpTime = TMAX(tsDndUpTime, upTime); + } } } @@ -54,7 +63,8 @@ static void *dmMonitorThreadFp(void *param) { if (pMgmt->pData->dropped || pMgmt->pData->stopped) break; int64_t curTime = taosGetTimestampMs(); - float interval = (curTime - lastTime) / 1000.0f; + if (curTime < lastTime) lastTime = curTime; + float interval = (curTime - lastTime) / 1000.0f; if (interval >= tsMonitorInterval) { (*pMgmt->sendMonitorReportFp)(); lastTime = curTime; diff --git a/source/dnode/mgmt/node_mgmt/src/dmTransport.c b/source/dnode/mgmt/node_mgmt/src/dmTransport.c index 5d6d16ccf8..df54f8abba 100644 --- a/source/dnode/mgmt/node_mgmt/src/dmTransport.c +++ b/source/dnode/mgmt/node_mgmt/src/dmTransport.c @@ -290,6 +290,7 @@ int32_t dmInitClient(SDnode *pDnode) { rpcInit.cfp = (RpcCfp)dmProcessRpcMsg; rpcInit.sessions = 1024; rpcInit.connType = TAOS_CONN_CLIENT; + rpcInit.user = TSDB_DEFAULT_USER; rpcInit.idleTime = tsShellActivityTimer * 1000; rpcInit.parent = pDnode; rpcInit.rfp = rpcRfp; diff --git a/source/dnode/mnode/impl/inc/mndCluster.h b/source/dnode/mnode/impl/inc/mndCluster.h index 2cb41edd7c..e33ffdb372 100644 --- a/source/dnode/mnode/impl/inc/mndCluster.h +++ b/source/dnode/mnode/impl/inc/mndCluster.h @@ -27,7 +27,7 @@ void mndCleanupCluster(SMnode *pMnode); int32_t mndGetClusterName(SMnode *pMnode, char *clusterName, int32_t len); int64_t mndGetClusterId(SMnode *pMnode); int64_t mndGetClusterCreateTime(SMnode *pMnode); -float mndGetClusterUpTime(SMnode *pMnode); +int64_t mndGetClusterUpTime(SMnode *pMnode); #ifdef __cplusplus } diff --git a/source/dnode/mnode/impl/src/mndCluster.c b/source/dnode/mnode/impl/src/mndCluster.c index 8ea98242f9..d20350b094 100644 --- a/source/dnode/mnode/impl/src/mndCluster.c +++ b/source/dnode/mnode/impl/src/mndCluster.c @@ -123,7 +123,7 @@ static int32_t mndGetClusterUpTimeImp(SClusterObj *pCluster) { #endif } -float mndGetClusterUpTime(SMnode *pMnode) { +int64_t mndGetClusterUpTime(SMnode *pMnode) { int64_t upTime = 0; void *pIter = NULL; SClusterObj *pCluster = mndAcquireCluster(pMnode, &pIter); @@ -132,7 +132,7 @@ float mndGetClusterUpTime(SMnode *pMnode) { mndReleaseCluster(pMnode, pCluster, pIter); } - return upTime / 86400.0f; + return upTime; } static SSdbRaw *mndClusterActionEncode(SClusterObj *pCluster) { diff --git a/source/dnode/mnode/impl/src/mndDnode.c b/source/dnode/mnode/impl/src/mndDnode.c index 81b591e8b5..cfd026634c 100644 --- a/source/dnode/mnode/impl/src/mndDnode.c +++ b/source/dnode/mnode/impl/src/mndDnode.c @@ -655,6 +655,7 @@ static int32_t mndConfigDnode(SMnode *pMnode, SRpcMsg *pReq, SMCfgDnodeReq *pCfg STrans *pTrans = NULL; SDnodeObj *pDnode = NULL; bool cfgAll = pCfgReq->dnodeId == -1; + int32_t iter = 0; SSdb *pSdb = pMnode->pSdb; void *pIter = NULL; @@ -662,7 +663,8 @@ static int32_t mndConfigDnode(SMnode *pMnode, SRpcMsg *pReq, SMCfgDnodeReq *pCfg if (cfgAll) { pIter = sdbFetch(pSdb, SDB_DNODE, pIter, (void **)&pDnode); if (pIter == NULL) break; - } else if(!(pDnode = mndAcquireDnode(pMnode, pCfgReq->dnodeId))) { + ++iter; + } else if (!(pDnode = mndAcquireDnode(pMnode, pCfgReq->dnodeId))) { goto _OVER; } @@ -699,7 +701,7 @@ static int32_t mndConfigDnode(SMnode *pMnode, SRpcMsg *pReq, SMCfgDnodeReq *pCfg } if (pTrans && mndTransPrepare(pMnode, pTrans) != 0) goto _OVER; - + tsGrantHBInterval = TMIN(TMAX(5, iter / 2), 30); terrno = 0; _OVER: @@ -863,7 +865,7 @@ static int32_t mndProcessCreateDnodeReq(SRpcMsg *pReq) { code = mndCreateDnode(pMnode, pReq, &createReq); if (code == 0) code = TSDB_CODE_ACTION_IN_PROGRESS; - + tsGrantHBInterval = 5; _OVER: if (code != 0 && code != TSDB_CODE_ACTION_IN_PROGRESS) { mError("dnode:%s:%d, failed to create since %s", createReq.fqdn, createReq.port, terrstr()); diff --git a/source/dnode/mnode/impl/src/mndMain.c b/source/dnode/mnode/impl/src/mndMain.c index 381b1e64ed..79abc57a39 100644 --- a/source/dnode/mnode/impl/src/mndMain.c +++ b/source/dnode/mnode/impl/src/mndMain.c @@ -804,7 +804,7 @@ int32_t mndGetMonitorInfo(SMnode *pMnode, SMonClusterInfo *pClusterInfo, SMonVgr if (pObj->id == pMnode->selfDnodeId) { pClusterInfo->first_ep_dnode_id = pObj->id; tstrncpy(pClusterInfo->first_ep, pObj->pDnode->ep, sizeof(pClusterInfo->first_ep)); - pClusterInfo->master_uptime = mndGetClusterUpTime(pMnode); + pClusterInfo->master_uptime = (float)mndGetClusterUpTime(pMnode) / 86400.0f; // pClusterInfo->master_uptime = (ms - pObj->stateStartTime) / (86400000.0f); tstrncpy(desc.role, syncStr(TAOS_SYNC_STATE_LEADER), sizeof(desc.role)); } else { diff --git a/source/dnode/vnode/CMakeLists.txt b/source/dnode/vnode/CMakeLists.txt index a3ccc720d9..194ffa16f6 100644 --- a/source/dnode/vnode/CMakeLists.txt +++ b/source/dnode/vnode/CMakeLists.txt @@ -1,21 +1,18 @@ # vnode add_library(vnode STATIC "") -target_sources( - vnode - PRIVATE - - # vnode - "src/vnd/vnodeOpen.c" - "src/vnd/vnodeBufPool.c" - "src/vnd/vnodeCfg.c" - "src/vnd/vnodeCommit.c" - "src/vnd/vnodeQuery.c" - "src/vnd/vnodeModule.c" - "src/vnd/vnodeSvr.c" - "src/vnd/vnodeSync.c" - "src/vnd/vnodeSnapshot.c" - "src/vnd/vnodeRetention.c" - "src/vnd/vnodeInitApi.c" +set( + VNODE_SOURCE_FILES + "src/vnd/vnodeOpen.c" + "src/vnd/vnodeBufPool.c" + "src/vnd/vnodeCfg.c" + "src/vnd/vnodeCommit.c" + "src/vnd/vnodeQuery.c" + "src/vnd/vnodeModule.c" + "src/vnd/vnodeSvr.c" + "src/vnd/vnodeSync.c" + "src/vnd/vnodeSnapshot.c" + "src/vnd/vnodeRetention.c" + "src/vnd/vnodeInitApi.c" # meta "src/meta/metaOpen.c" @@ -38,23 +35,23 @@ target_sources( "src/sma/smaSnapshot.c" "src/sma/smaTimeRange.c" - # tsdb - "src/tsdb/tsdbCommit.c" - "src/tsdb/tsdbFile.c" - "src/tsdb/tsdbFS.c" - "src/tsdb/tsdbOpen.c" - "src/tsdb/tsdbMemTable.c" - "src/tsdb/tsdbRead.c" - "src/tsdb/tsdbCache.c" - "src/tsdb/tsdbWrite.c" - "src/tsdb/tsdbReaderWriter.c" - "src/tsdb/tsdbUtil.c" - "src/tsdb/tsdbSnapshot.c" - "src/tsdb/tsdbCacheRead.c" - "src/tsdb/tsdbRetention.c" - "src/tsdb/tsdbDiskData.c" - "src/tsdb/tsdbMergeTree.c" - "src/tsdb/tsdbDataIter.c" + # # tsdb + # "src/tsdb/tsdbCommit.c" + # "src/tsdb/tsdbFile.c" + # "src/tsdb/tsdbFS.c" + # "src/tsdb/tsdbOpen.c" + # "src/tsdb/tsdbMemTable.c" + # "src/tsdb/tsdbRead.c" + # "src/tsdb/tsdbCache.c" + # "src/tsdb/tsdbWrite.c" + # "src/tsdb/tsdbReaderWriter.c" + # "src/tsdb/tsdbUtil.c" + # "src/tsdb/tsdbSnapshot.c" + # "src/tsdb/tsdbCacheRead.c" + # "src/tsdb/tsdbRetention.c" + # "src/tsdb/tsdbDiskData.c" + # "src/tsdb/tsdbMergeTree.c" + # "src/tsdb/tsdbDataIter.c" # tq "src/tq/tq.c" @@ -71,6 +68,19 @@ target_sources( "src/tq/tqOffsetSnapshot.c" ) +aux_source_directory("src/tsdb/" TSDB_SOURCE_FILES) +list( + APPEND + VNODE_SOURCE_FILES + ${TSDB_SOURCE_FILES} +) + +target_sources( + vnode + PRIVATE + ${VNODE_SOURCE_FILES} +) + IF (TD_VNODE_PLUGINS) target_sources( vnode diff --git a/source/dnode/vnode/inc/vnode.h b/source/dnode/vnode/inc/vnode.h index 38216e1414..f60cc2f406 100644 --- a/source/dnode/vnode/inc/vnode.h +++ b/source/dnode/vnode/inc/vnode.h @@ -168,6 +168,27 @@ uint64_t tsdbGetReaderMaxVersion(STsdbReader *pReader); void tsdbReaderSetCloseFlag(STsdbReader *pReader); int64_t tsdbGetLastTimestamp(SVnode *pVnode, void *pTableList, int32_t numOfTables, const char *pIdStr); +//====================================================================================================================== +int32_t tsdbReaderOpen2(void *pVnode, SQueryTableDataCond *pCond, void *pTableList, int32_t numOfTables, + SSDataBlock *pResBlock, void **ppReader, const char *idstr, bool countOnly, + SHashObj **pIgnoreTables); +int32_t tsdbSetTableList2(STsdbReader *pReader, const void *pTableList, int32_t num); +void tsdbReaderSetId2(STsdbReader *pReader, const char *idstr); +void tsdbReaderClose2(STsdbReader *pReader); +int32_t tsdbNextDataBlock2(STsdbReader *pReader, bool *hasNext); +int32_t tsdbRetrieveDatablockSMA2(STsdbReader *pReader, SSDataBlock *pDataBlock, bool *allHave, bool *hasNullSMA); +void tsdbReleaseDataBlock2(STsdbReader *pReader); +SSDataBlock *tsdbRetrieveDataBlock2(STsdbReader *pTsdbReadHandle, SArray *pColumnIdList); +int32_t tsdbReaderReset2(STsdbReader *pReader, SQueryTableDataCond *pCond); +int32_t tsdbGetFileBlocksDistInfo2(STsdbReader *pReader, STableBlockDistInfo *pTableBlockInfo); +int64_t tsdbGetNumOfRowsInMemTable2(STsdbReader *pHandle); +void *tsdbGetIdx2(SMeta *pMeta); +void *tsdbGetIvtIdx2(SMeta *pMeta); +uint64_t tsdbGetReaderMaxVersion2(STsdbReader *pReader); +void tsdbReaderSetCloseFlag2(STsdbReader *pReader); +int64_t tsdbGetLastTimestamp2(SVnode *pVnode, void *pTableList, int32_t numOfTables, const char *pIdStr); +//====================================================================================================================== + int32_t tsdbReuseCacherowsReader(void *pReader, void *pTableIdList, int32_t numOfTables); int32_t tsdbCacherowsReaderOpen(void *pVnode, int32_t type, void *pTableIdList, int32_t numOfTables, int32_t numOfCols, SArray *pCidList, int32_t *pSlotIds, uint64_t suid, void **pReader, const char *idstr); diff --git a/source/dnode/vnode/src/inc/tsdb.h b/source/dnode/vnode/src/inc/tsdb.h index 71af169752..75c8eea83a 100644 --- a/source/dnode/vnode/src/inc/tsdb.h +++ b/source/dnode/vnode/src/inc/tsdb.h @@ -16,6 +16,9 @@ #ifndef _TD_VNODE_TSDB_H_ #define _TD_VNODE_TSDB_H_ +// #include "../tsdb/tsdbFile2.h" +// #include "../tsdb/tsdbMerge.h" +// #include "../tsdb/tsdbSttFileRW.h" #include "tsimplehash.h" #include "vnodeInt.h" @@ -75,9 +78,8 @@ typedef struct STsdbFilterInfo STsdbFilterInfo; #define TSDBROW_ROW_FMT ((int8_t)0x0) #define TSDBROW_COL_FMT ((int8_t)0x1) -#define TSDB_FILE_DLMT ((uint32_t)0xF00AFA0F) -#define TSDB_MAX_SUBBLOCKS 8 -#define TSDB_FHDR_SIZE 512 +#define TSDB_FILE_DLMT ((uint32_t)0xF00AFA0F) +#define TSDB_FHDR_SIZE 512 #define VERSION_MIN 0 #define VERSION_MAX INT64_MAX @@ -165,6 +167,7 @@ void tBlockDataDestroy(SBlockData *pBlockData); int32_t tBlockDataInit(SBlockData *pBlockData, TABLEID *pId, STSchema *pTSchema, int16_t *aCid, int32_t nCid); void tBlockDataReset(SBlockData *pBlockData); int32_t tBlockDataAppendRow(SBlockData *pBlockData, TSDBROW *pRow, STSchema *pTSchema, int64_t uid); +int32_t tBlockDataUpdateRow(SBlockData *pBlockData, TSDBROW *pRow, STSchema *pTSchema); int32_t tBlockDataTryUpsertRow(SBlockData *pBlockData, TSDBROW *pRow, int64_t uid); int32_t tBlockDataUpsertRow(SBlockData *pBlockData, TSDBROW *pRow, STSchema *pTSchema, int64_t uid); void tBlockDataClear(SBlockData *pBlockData); @@ -198,7 +201,7 @@ int32_t tMapDataToArray(SMapData *pMapData, int32_t itemSize, int32_t (*tGetItem // other int32_t tsdbKeyFid(TSKEY key, int32_t minutes, int8_t precision); void tsdbFidKeyRange(int32_t fid, int32_t minutes, int8_t precision, TSKEY *minKey, TSKEY *maxKey); -int32_t tsdbFidLevel(int32_t fid, STsdbKeepCfg *pKeepCfg, int64_t now); +int32_t tsdbFidLevel(int32_t fid, STsdbKeepCfg *pKeepCfg, int64_t nowSec); int32_t tsdbBuildDeleteSkyline(SArray *aDelData, int32_t sidx, int32_t eidx, SArray *aSkyline); int32_t tPutColumnDataAgg(uint8_t *p, SColumnDataAgg *pColAgg); int32_t tGetColumnDataAgg(uint8_t *p, SColumnDataAgg *pColAgg); @@ -302,8 +305,11 @@ int32_t tsdbReadDelIdx(SDelFReader *pReader, SArray *aDelIdx); // tsdbRead.c ============================================================================================== int32_t tsdbTakeReadSnap(STsdbReader *pReader, _query_reseek_func_t reseek, STsdbReadSnap **ppSnap); void tsdbUntakeReadSnap(STsdbReader *pReader, STsdbReadSnap *pSnap, bool proactive); + +int32_t tsdbTakeReadSnap2(STsdbReader *pReader, _query_reseek_func_t reseek, STsdbReadSnap **ppSnap); +void tsdbUntakeReadSnap2(STsdbReader *pReader, STsdbReadSnap *pSnap, bool proactive); // tsdbMerge.c ============================================================================================== -int32_t tsdbMerge(STsdb *pTsdb); +int32_t tsdbMerge(void *arg); // tsdbDiskData ============================================================================================== int32_t tDiskDataBuilderCreate(SDiskDataBuilder **ppBuilder); @@ -362,19 +368,20 @@ typedef struct { } SCacheFlushState; struct STsdb { - char *path; - SVnode *pVnode; - STsdbKeepCfg keepCfg; - TdThreadRwlock rwLock; - SMemTable *mem; - SMemTable *imem; - STsdbFS fs; - SLRUCache *lruCache; - SCacheFlushState flushState; - TdThreadMutex lruMutex; - SLRUCache *biCache; - TdThreadMutex biMutex; - SRocksCache rCache; + char *path; + SVnode *pVnode; + STsdbKeepCfg keepCfg; + TdThreadRwlock rwLock; + SMemTable *mem; + SMemTable *imem; + STsdbFS fs; // old + SLRUCache *lruCache; + SCacheFlushState flushState; + TdThreadMutex lruMutex; + SLRUCache *biCache; + TdThreadMutex biMutex; + struct STFileSystem *pFS; // new + SRocksCache rCache; }; struct TSDBKEY { @@ -410,6 +417,7 @@ struct STbData { SDelData *pTail; SMemSkipList sl; STbData *next; + SRBTreeNode rbtn[1]; }; struct SMemTable { @@ -423,11 +431,10 @@ struct SMemTable { TSKEY maxKey; int64_t nRow; int64_t nDel; - struct { - int32_t nTbData; - int32_t nBucket; - STbData **aBucket; - }; + int32_t nTbData; + int32_t nBucket; + STbData **aBucket; + SRBTree tbDataTree[1]; }; struct TSDBROW { @@ -500,7 +507,7 @@ struct SDataBlk { int32_t nRow; int8_t hasDup; int8_t nSubBlock; - SBlockInfo aSubBlock[TSDB_MAX_SUBBLOCKS]; + SBlockInfo aSubBlock[1]; SSmaInfo smaInfo; }; @@ -652,12 +659,19 @@ struct SDelFWriter { uint8_t *aBuf[1]; }; +#include "tarray2.h" +//#include "tsdbFS2.h" +// struct STFileSet; +typedef struct STFileSet STFileSet; +typedef TARRAY2(STFileSet *) TFileSetArray; + struct STsdbReadSnap { - SMemTable *pMem; - SQueryNode *pNode; - SMemTable *pIMem; - SQueryNode *pINode; - STsdbFS fs; + SMemTable *pMem; + SQueryNode *pNode; + SMemTable *pIMem; + SQueryNode *pINode; + TFileSetArray *pfSetArray; + STsdbFS fs; }; struct SDataFWriter { @@ -696,6 +710,7 @@ typedef struct { typedef struct SSttBlockLoadInfo { SBlockData blockData[2]; + void *pSttStatisBlkArray; SArray *aSttBlk; int32_t blockIndex[2]; // to denote the loaded block in the corresponding position. int32_t currentLoadBlockIndex; @@ -704,10 +719,9 @@ typedef struct SSttBlockLoadInfo { STSchema *pSchema; int16_t *colIds; int32_t numOfCols; - bool checkRemainingRow; + bool checkRemainingRow; // todo: no assign value? bool isLast; bool sttBlockLoaded; - int32_t numOfStt; // keep the last access position, this position may be used to reduce the binary times for // starting last block data for a new table @@ -766,60 +780,107 @@ struct SDiskDataBuilder { }; typedef struct SLDataIter { - SRBTreeNode node; - SSttBlk *pSttBlk; - SDataFReader *pReader; - int32_t iStt; - int8_t backward; - int32_t iSttBlk; - int32_t iRow; - SRowInfo rInfo; - uint64_t uid; - STimeWindow timeWindow; - SVersionRange verRange; - SSttBlockLoadInfo *pBlockLoadInfo; - bool ignoreEarlierTs; + SRBTreeNode node; + SSttBlk *pSttBlk; + int32_t iStt; // for debug purpose + int8_t backward; + int32_t iSttBlk; + int32_t iRow; + SRowInfo rInfo; + uint64_t uid; + STimeWindow timeWindow; + SVersionRange verRange; + SSttBlockLoadInfo *pBlockLoadInfo; + bool ignoreEarlierTs; + struct SSttFileReader *pReader; } SLDataIter; #define tMergeTreeGetRow(_t) (&((_t)->pIter->rInfo.row)) int32_t tMergeTreeOpen(SMergeTree *pMTree, int8_t backward, SDataFReader *pFReader, uint64_t suid, uint64_t uid, STimeWindow *pTimeWindow, SVersionRange *pVerRange, SSttBlockLoadInfo *pBlockLoadInfo, bool destroyLoadInfo, const char *idStr, bool strictTimeRange, SLDataIter *pLDataIter); -void tMergeTreeAddIter(SMergeTree *pMTree, SLDataIter *pIter); -bool tMergeTreeNext(SMergeTree *pMTree); -bool tMergeTreeIgnoreEarlierTs(SMergeTree *pMTree); -void tMergeTreeClose(SMergeTree *pMTree); + +struct SSttFileReader; +typedef int32_t (*_load_tomb_fn)(STsdbReader *pReader, struct SSttFileReader *pSttFileReader, + SSttBlockLoadInfo *pLoadInfo); + +typedef struct { + int8_t backward; + STsdb *pTsdb; + uint64_t suid; + uint64_t uid; + STimeWindow timewindow; + SVersionRange verRange; + bool strictTimeRange; + SArray *pSttFileBlockIterArray; + void *pCurrentFileset; + STSchema *pSchema; + int16_t *pCols; + int32_t numOfCols; + _load_tomb_fn loadTombFn; + void *pReader; + void *idstr; +} SMergeTreeConf; +int32_t tMergeTreeOpen2(SMergeTree *pMTree, SMergeTreeConf *pConf); + +void tMergeTreeAddIter(SMergeTree *pMTree, SLDataIter *pIter); +bool tMergeTreeNext(SMergeTree *pMTree); +bool tMergeTreeIgnoreEarlierTs(SMergeTree *pMTree); +void tMergeTreeClose(SMergeTree *pMTree); SSttBlockLoadInfo *tCreateLastBlockLoadInfo(STSchema *pSchema, int16_t *colList, int32_t numOfCols, int32_t numOfStt); +SSttBlockLoadInfo *tCreateOneLastBlockLoadInfo(STSchema *pSchema, int16_t *colList, int32_t numOfCols); void resetLastBlockLoadInfo(SSttBlockLoadInfo *pLoadInfo); void getLastBlockLoadInfo(SSttBlockLoadInfo *pLoadInfo, int64_t *blocks, double *el); void *destroyLastBlockLoadInfo(SSttBlockLoadInfo *pLoadInfo); +void *destroySttBlockReader(SArray *pLDataIterArray, int64_t *blocks, double *el); // tsdbCache ============================================================================================== +typedef enum { + READ_MODE_COUNT_ONLY = 0x1, + READ_MODE_ALL, +} EReadMode; + +typedef struct STsdbReaderInfo { + uint64_t suid; + STSchema *pSchema; + EReadMode readMode; + uint64_t rowsNum; + STimeWindow window; + SVersionRange verRange; + int16_t order; +} STsdbReaderInfo; + +typedef struct { + SArray *pTombData; +} STableLoadInfo; + +struct SDataFileReader; + typedef struct SCacheRowsReader { - STsdb *pTsdb; - SVersionRange verRange; - TdThreadMutex readerMutex; - SVnode *pVnode; - STSchema *pSchema; - STSchema *pCurrSchema; - uint64_t uid; - uint64_t suid; - char **transferBuf; // todo remove it soon - int32_t numOfCols; - SArray *pCidList; - int32_t *pSlotIds; - int32_t type; - int32_t tableIndex; // currently returned result tables - STableKeyInfo *pTableList; // table id list - int32_t numOfTables; - SSttBlockLoadInfo *pLoadInfo; - SLDataIter *pDataIter; - STsdbReadSnap *pReadSnap; - SDataFReader *pDataFReader; - SDataFReader *pDataFReaderLast; - const char *idstr; - int64_t lastTs; + STsdb *pTsdb; + STsdbReaderInfo info; + TdThreadMutex readerMutex; + SVnode *pVnode; + STSchema *pSchema; + STSchema *pCurrSchema; + uint64_t uid; + char **transferBuf; // todo remove it soon + int32_t numOfCols; + SArray *pCidList; + int32_t *pSlotIds; + int32_t type; + int32_t tableIndex; // currently returned result tables + STableKeyInfo *pTableList; // table id list + int32_t numOfTables; + uint64_t *uidList; + SSHashObj *pTableMap; + SArray *pLDataIterArray; + struct SDataFileReader *pFileReader; + STFileSet *pCurFileSet; + STsdbReadSnap *pReadSnap; + char *idstr; + int64_t lastTs; } SCacheRowsReader; typedef struct { diff --git a/source/dnode/vnode/src/inc/vnd.h b/source/dnode/vnode/src/inc/vnd.h index 5e7d522fb9..85ef384ea9 100644 --- a/source/dnode/vnode/src/inc/vnd.h +++ b/source/dnode/vnode/src/inc/vnd.h @@ -49,7 +49,8 @@ int32_t vnodeEncodeConfig(const void* pObj, SJson* pJson); int32_t vnodeDecodeConfig(const SJson* pJson, void* pObj); // vnodeModule.c -int32_t vnodeScheduleTask(int32_t (*execute)(void*), void* arg); +int vnodeScheduleTask(int (*execute)(void*), void* arg); +int vnodeScheduleTaskEx(int tpid, int (*execute)(void*), void* arg); // vnodeBufPool.c typedef struct SVBufPoolNode SVBufPoolNode; diff --git a/source/dnode/vnode/src/inc/vnodeInt.h b/source/dnode/vnode/src/inc/vnodeInt.h index 77e4a48249..08d8b1d06c 100644 --- a/source/dnode/vnode/src/inc/vnodeInt.h +++ b/source/dnode/vnode/src/inc/vnodeInt.h @@ -180,8 +180,8 @@ SArray* metaGetSmaTbUids(SMeta* pMeta); void* metaGetIdx(SMeta* pMeta); void* metaGetIvtIdx(SMeta* pMeta); -int64_t metaGetTbNum(SMeta *pMeta); -void metaReaderDoInit(SMetaReader *pReader, SMeta *pMeta, int32_t flags); +int64_t metaGetTbNum(SMeta* pMeta); +void metaReaderDoInit(SMetaReader* pReader, SMeta* pMeta, int32_t flags); int32_t metaCreateTSma(SMeta* pMeta, int64_t version, SSmaCfg* pCfg); int32_t metaDropTSma(SMeta* pMeta, int64_t indexUid); @@ -198,12 +198,12 @@ int32_t metaGetInfo(SMeta* pMeta, int64_t uid, SMetaInfo* pInfo, SMetaReader* pR int tsdbOpen(SVnode* pVnode, STsdb** ppTsdb, const char* dir, STsdbKeepCfg* pKeepCfg, int8_t rollback); int tsdbClose(STsdb** pTsdb); int32_t tsdbBegin(STsdb* pTsdb); -int32_t tsdbPrepareCommit(STsdb* pTsdb); -int32_t tsdbCommit(STsdb* pTsdb, SCommitInfo* pInfo); +// int32_t tsdbPrepareCommit(STsdb* pTsdb); +// int32_t tsdbCommit(STsdb* pTsdb, SCommitInfo* pInfo); int32_t tsdbCacheCommit(STsdb* pTsdb); int32_t tsdbCompact(STsdb* pTsdb, SCompactInfo* pInfo); -int32_t tsdbFinishCommit(STsdb* pTsdb); -int32_t tsdbRollbackCommit(STsdb* pTsdb); +// int32_t tsdbFinishCommit(STsdb* pTsdb); +// int32_t tsdbRollbackCommit(STsdb* pTsdb); int tsdbScanAndConvertSubmitMsg(STsdb* pTsdb, SSubmitReq2* pMsg); int tsdbInsertData(STsdb* pTsdb, int64_t version, SSubmitReq2* pMsg, SSubmitRsp2* pRsp); int32_t tsdbInsertTableData(STsdb* pTsdb, int64_t version, SSubmitTbData* pSubmitTbData, int32_t* affectedRows); diff --git a/source/dnode/vnode/src/sma/smaCommit.c b/source/dnode/vnode/src/sma/smaCommit.c index d1c4314091..c26157f4b7 100644 --- a/source/dnode/vnode/src/sma/smaCommit.c +++ b/source/dnode/vnode/src/sma/smaCommit.c @@ -103,15 +103,16 @@ _exit: return code; } -int32_t smaFinishCommit(SSma *pSma) { +extern int32_t tsdbCommitCommit(STsdb *tsdb); +int32_t smaFinishCommit(SSma *pSma) { int32_t code = 0; int32_t lino = 0; SVnode *pVnode = pSma->pVnode; - if (VND_RSMA1(pVnode) && (code = tsdbFinishCommit(VND_RSMA1(pVnode))) < 0) { + if (VND_RSMA1(pVnode) && (code = tsdbCommitCommit(VND_RSMA1(pVnode))) < 0) { TSDB_CHECK_CODE(code, lino, _exit); } - if (VND_RSMA2(pVnode) && (code = tsdbFinishCommit(VND_RSMA2(pVnode))) < 0) { + if (VND_RSMA2(pVnode) && (code = tsdbCommitCommit(VND_RSMA2(pVnode))) < 0) { TSDB_CHECK_CODE(code, lino, _exit); } _exit: @@ -130,6 +131,7 @@ _exit: * @param isCommit * @return int32_t */ +extern int32_t tsdbPreCommit(STsdb *tsdb); static int32_t tdProcessRSmaAsyncPreCommitImpl(SSma *pSma, bool isCommit) { int32_t code = 0; int32_t lino = 0; @@ -186,11 +188,11 @@ static int32_t tdProcessRSmaAsyncPreCommitImpl(SSma *pSma, bool isCommit) { // all rsma results are written completely STsdb *pTsdb = NULL; if ((pTsdb = VND_RSMA1(pSma->pVnode))) { - code = tsdbPrepareCommit(pTsdb); + code = tsdbPreCommit(pTsdb); TSDB_CHECK_CODE(code, lino, _exit); } if ((pTsdb = VND_RSMA2(pSma->pVnode))) { - code = tsdbPrepareCommit(pTsdb); + code = tsdbPreCommit(pTsdb); TSDB_CHECK_CODE(code, lino, _exit); } @@ -207,6 +209,7 @@ _exit: * @param pSma * @return int32_t */ +extern int32_t tsdbCommitBegin(STsdb *tsdb, SCommitInfo *info); static int32_t tdProcessRSmaAsyncCommitImpl(SSma *pSma, SCommitInfo *pInfo) { int32_t code = 0; int32_t lino = 0; @@ -217,10 +220,10 @@ static int32_t tdProcessRSmaAsyncCommitImpl(SSma *pSma, SCommitInfo *pInfo) { goto _exit; } - code = tsdbCommit(VND_RSMA1(pVnode), pInfo); + code = tsdbCommitBegin(VND_RSMA1(pVnode), pInfo); TSDB_CHECK_CODE(code, lino, _exit); - code = tsdbCommit(VND_RSMA2(pVnode), pInfo); + code = tsdbCommitBegin(VND_RSMA2(pVnode), pInfo); TSDB_CHECK_CODE(code, lino, _exit); _exit: diff --git a/source/dnode/vnode/src/sma/smaRollup.c b/source/dnode/vnode/src/sma/smaRollup.c index 490bcd1238..9fd4938448 100644 --- a/source/dnode/vnode/src/sma/smaRollup.c +++ b/source/dnode/vnode/src/sma/smaRollup.c @@ -264,7 +264,7 @@ static int32_t tdSetRSmaInfoItemParams(SSma *pSma, SRSmaParam *param, SRSmaStat return TSDB_CODE_FAILED; } - SReadHandle handle = { .vnode = pVnode, .initTqReader = 1, .pStateBackend = pStreamState }; + SReadHandle handle = {.vnode = pVnode, .initTqReader = 1, .pStateBackend = pStreamState}; initStorageAPI(&handle.api); pRSmaInfo->taskInfo[idx] = qCreateStreamExecTaskInfo(param->qmsg[idx], &handle, TD_VID(pVnode)); @@ -572,8 +572,8 @@ int32_t smaDoRetention(SSma *pSma, int64_t now) { for (int32_t i = 0; i < TSDB_RETENTION_L2; ++i) { if (pSma->pRSmaTsdb[i]) { - code = tsdbDoRetention(pSma->pRSmaTsdb[i], now); - if (code) goto _end; + // code = tsdbDoRetention(pSma->pRSmaTsdb[i], now); + // if (code) goto _end; } } @@ -612,7 +612,6 @@ static int32_t tdRSmaExecAndSubmitResult(SSma *pSma, qTaskInfo_t taskInfo, SRSma blockDebugShowDataBlocks(pResList, flag); #endif for (int32_t i = 0; i < taosArrayGetSize(pResList); ++i) { - output = taosArrayGetP(pResList, i); smaDebug("vgId:%d, result block, uid:%" PRIu64 ", groupid:%" PRIu64 ", rows:%" PRIi64, SMA_VID(pSma), output->info.id.uid, output->info.id.groupId, output->info.rows); @@ -1114,8 +1113,8 @@ static void tdRSmaFetchTrigger(void *param, void *tmrId) { } if (!(pStat = (SRSmaStat *)tdAcquireSmaRef(smaMgmt.rsetId, pRSmaRef->refId))) { - smaWarn("rsma fetch task not start since rsma stat already destroyed, rsetId:%d refId:%" PRIi64 ")", - smaMgmt.rsetId, pRSmaRef->refId); // pRSmaRef freed in taosHashRemove + smaWarn("rsma fetch task not start since rsma stat already destroyed, rsetId:%d refId:%" PRIi64 ")", smaMgmt.rsetId, + pRSmaRef->refId); // pRSmaRef freed in taosHashRemove taosHashRemove(smaMgmt.refHash, ¶m, POINTER_BYTES); return; } diff --git a/source/dnode/vnode/src/tsdb/tsdbCache.c b/source/dnode/vnode/src/tsdb/tsdbCache.c index d0986b25f0..a01c6a8a9e 100644 --- a/source/dnode/vnode/src/tsdb/tsdbCache.c +++ b/source/dnode/vnode/src/tsdb/tsdbCache.c @@ -13,6 +13,8 @@ * along with this program. If not, see . */ #include "tsdb.h" +#include "tsdbDataFileRW.h" +#include "tsdbReadUtil.h" #include "vnd.h" #define ROCKS_BATCH_SIZE (4096) @@ -1689,188 +1691,188 @@ _err: } return code; } -/* -static int32_t getTableDelIdx(SDelFReader *pDelFReader, tb_uid_t suid, tb_uid_t uid, SDelIdx *pDelIdx) { - int32_t code = 0; - SArray *pDelIdxArray = NULL; - // SMapData delIdxMap; - pDelIdxArray = taosArrayInit(32, sizeof(SDelIdx)); - SDelIdx idx = {.suid = suid, .uid = uid}; +static int32_t loadTombFromBlk(const TTombBlkArray *pTombBlkArray, SCacheRowsReader *pReader, void *pFileReader, + bool isFile) { + int32_t code = 0; + uint64_t *uidList = pReader->uidList; + int32_t numOfTables = pReader->numOfTables; + int64_t suid = pReader->info.suid; - // tMapDataReset(&delIdxMap); - code = tsdbReadDelIdx(pDelFReader, pDelIdxArray); - if (code) goto _err; - - // code = tMapDataSearch(&delIdxMap, &idx, tGetDelIdx, tCmprDelIdx, pDelIdx); - SDelIdx *pIdx = taosArraySearch(pDelIdxArray, &idx, tCmprDelIdx, TD_EQ); - - *pDelIdx = *pIdx; - -_err: - if (pDelIdxArray) { - taosArrayDestroy(pDelIdxArray); - } - return code; -} -*/ -typedef enum { - SFSLASTNEXTROW_FS, - SFSLASTNEXTROW_FILESET, - SFSLASTNEXTROW_BLOCKDATA, - SFSLASTNEXTROW_BLOCKROW -} SFSLASTNEXTROWSTATES; - -typedef struct { - SFSLASTNEXTROWSTATES state; // [input] - STsdb *pTsdb; // [input] - STSchema *pTSchema; // [input] - tb_uid_t suid; - tb_uid_t uid; - int32_t nFileSet; - int32_t iFileSet; - SArray *aDFileSet; - SDataFReader **pDataFReader; - TSDBROW row; - - bool checkRemainingRow; - SMergeTree mergeTree; - SMergeTree *pMergeTree; - SSttBlockLoadInfo *pLoadInfo; - SLDataIter *pDataIter; - int64_t lastTs; -} SFSLastNextRowIter; - -static int32_t getNextRowFromFSLast(void *iter, TSDBROW **ppRow, bool *pIgnoreEarlierTs, bool isLast, int16_t *aCols, - int nCols) { - SFSLastNextRowIter *state = (SFSLastNextRowIter *)iter; - int32_t code = 0; - bool checkRemainingRow = true; - - switch (state->state) { - case SFSLASTNEXTROW_FS: - state->nFileSet = taosArrayGetSize(state->aDFileSet); - state->iFileSet = state->nFileSet; - - case SFSLASTNEXTROW_FILESET: { - SDFileSet *pFileSet = NULL; - _next_fileset: - if (state->pMergeTree != NULL) { - tMergeTreeClose(state->pMergeTree); - state->pMergeTree = NULL; - } - - if (--state->iFileSet >= 0) { - pFileSet = (SDFileSet *)taosArrayGet(state->aDFileSet, state->iFileSet); - } else { - *ppRow = NULL; - return code; - } - - if (*state->pDataFReader == NULL || (*state->pDataFReader)->pSet->fid != pFileSet->fid) { - if (*state->pDataFReader != NULL) { - tsdbDataFReaderClose(state->pDataFReader); - - resetLastBlockLoadInfo(state->pLoadInfo); - } - - code = tsdbDataFReaderOpen(state->pDataFReader, state->pTsdb, pFileSet); - if (code) goto _err; - } - - int nTmpCols = nCols; - bool hasTs = false; - if (aCols[0] == PRIMARYKEY_TIMESTAMP_COL_ID) { - --nTmpCols; - hasTs = true; - } - for (int i = 0; i < state->pLoadInfo->numOfStt; ++i) { - state->pLoadInfo[i].colIds = hasTs ? aCols + 1 : aCols; - state->pLoadInfo[i].numOfCols = nTmpCols; - state->pLoadInfo[i].isLast = isLast; - } - tMergeTreeOpen(&state->mergeTree, 1, *state->pDataFReader, state->suid, state->uid, - &(STimeWindow){.skey = state->lastTs, .ekey = TSKEY_MAX}, - &(SVersionRange){.minVer = 0, .maxVer = UINT64_MAX}, state->pLoadInfo, false, NULL, true, - state->pDataIter); - state->pMergeTree = &state->mergeTree; - state->state = SFSLASTNEXTROW_BLOCKROW; + for (int i = 0, j = 0; i < pTombBlkArray->size && j < numOfTables; ++i) { + STombBlk *pTombBlk = &pTombBlkArray->data[i]; + if (pTombBlk->maxTbid.suid < suid || (pTombBlk->maxTbid.suid == suid && pTombBlk->maxTbid.uid < uidList[0])) { + continue; } - case SFSLASTNEXTROW_BLOCKROW: { - if (nCols != state->pLoadInfo->numOfCols) { - for (int i = 0; i < state->pLoadInfo->numOfStt; ++i) { - state->pLoadInfo[i].numOfCols = nCols; - state->pLoadInfo[i].checkRemainingRow = state->checkRemainingRow; - } - } - bool hasVal = tMergeTreeNext(&state->mergeTree); - if (!hasVal) { - if (tMergeTreeIgnoreEarlierTs(&state->mergeTree)) { - *pIgnoreEarlierTs = true; - *ppRow = NULL; - return code; - } - state->state = SFSLASTNEXTROW_FILESET; - goto _next_fileset; - } - state->row = *tMergeTreeGetRow(&state->mergeTree); - *ppRow = &state->row; + if (pTombBlk->minTbid.suid > suid || + (pTombBlk->minTbid.suid == suid && pTombBlk->minTbid.uid > uidList[numOfTables - 1])) { + break; + } - if (TSDBROW_TS(&state->row) <= state->lastTs) { - *pIgnoreEarlierTs = true; - *ppRow = NULL; - return code; - } - - *pIgnoreEarlierTs = false; - /* - if (!hasVal) { - state->state = SFSLASTNEXTROW_FILESET; - } - */ - if (!state->checkRemainingRow) { - state->checkRemainingRow = true; - } + STombBlock block = {0}; + code = isFile ? tsdbDataFileReadTombBlock(pFileReader, &pTombBlkArray->data[i], &block) + : tsdbSttFileReadTombBlock(pFileReader, &pTombBlkArray->data[i], &block); + if (code != TSDB_CODE_SUCCESS) { + return code; + } + + uint64_t uid = uidList[j]; + STableLoadInfo *pInfo = *(STableLoadInfo **)tSimpleHashGet(pReader->pTableMap, &uid, sizeof(uid)); + if (pInfo->pTombData == NULL) { + pInfo->pTombData = taosArrayInit(4, sizeof(SDelData)); + } + + STombRecord record = {0}; + bool finished = false; + for (int32_t k = 0; k < TARRAY2_SIZE(block.suid); ++k) { + code = tTombBlockGet(&block, k, &record); + if (code != TSDB_CODE_SUCCESS) { + finished = true; + break; + } + + if (record.suid < suid) { + continue; + } + if (record.suid > suid) { + finished = true; + break; + } + + bool newTable = false; + if (uid < record.uid) { + while (j < numOfTables && uidList[j] < record.uid) { + ++j; + newTable = true; + } + + if (j >= numOfTables) { + finished = true; + break; + } + + uid = uidList[j]; + } + + if (record.uid < uid) { + continue; + } + + if (newTable) { + pInfo = *(STableLoadInfo **)tSimpleHashGet(pReader->pTableMap, &uid, sizeof(uid)); + if (pInfo->pTombData == NULL) { + pInfo->pTombData = taosArrayInit(4, sizeof(SDelData)); + } + } + + if (record.version <= pReader->info.verRange.maxVer) { + SDelData delData = {.version = record.version, .sKey = record.skey, .eKey = record.ekey}; + taosArrayPush(pInfo->pTombData, &delData); + } + } + + tTombBlockDestroy(&block); + + if (finished) { return code; } - default: - ASSERT(0); - break; } -_err: - /*if (state->pDataFReader) { - tsdbDataFReaderClose(&state->pDataFReader); - state->pDataFReader = NULL; - }*/ - if (state->pMergeTree != NULL) { - tMergeTreeClose(state->pMergeTree); - state->pMergeTree = NULL; + return TSDB_CODE_SUCCESS; +} + +static int32_t loadDataTomb(SCacheRowsReader *pReader, SDataFileReader *pFileReader) { + int32_t code = 0; + + const TTombBlkArray *pBlkArray = NULL; + code = tsdbDataFileReadTombBlk(pFileReader, &pBlkArray); + if (code != TSDB_CODE_SUCCESS) { + return code; } - *ppRow = NULL; + return loadTombFromBlk(pBlkArray, pReader, pFileReader, true); +} + +static int32_t loadSttTomb(STsdbReader *pTsdbReader, SSttFileReader *pSttFileReader, SSttBlockLoadInfo *pLoadInfo) { + int32_t code = 0; + + SCacheRowsReader *pReader = (SCacheRowsReader *)pTsdbReader; + + const TTombBlkArray *pBlkArray = NULL; + code = tsdbSttFileReadTombBlk(pSttFileReader, &pBlkArray); + if (code != TSDB_CODE_SUCCESS) { + return code; + } + + return loadTombFromBlk(pBlkArray, pReader, pSttFileReader, false); +} + +typedef struct { + SMergeTree mergeTree; + SMergeTree *pMergeTree; +} SFSLastIter; + +static int32_t lastIterOpen(SFSLastIter *iter, STFileSet *pFileSet, STsdb *pTsdb, STSchema *pTSchema, tb_uid_t suid, + tb_uid_t uid, SCacheRowsReader *pr, int64_t lastTs, int16_t *aCols, int nCols) { + int32_t code = 0; + + int64_t loadBlocks = 0; + double elapse = 0; + pr->pLDataIterArray = destroySttBlockReader(pr->pLDataIterArray, &loadBlocks, &elapse); + pr->pLDataIterArray = taosArrayInit(4, POINTER_BYTES); + + SMergeTreeConf conf = { + .uid = uid, + .suid = suid, + .pTsdb = pTsdb, + .timewindow = (STimeWindow){.skey = lastTs, .ekey = TSKEY_MAX}, + .verRange = (SVersionRange){.minVer = 0, .maxVer = UINT64_MAX}, + .strictTimeRange = false, + .pSchema = pTSchema, + .pCurrentFileset = pFileSet, + .backward = 1, + .pSttFileBlockIterArray = pr->pLDataIterArray, + .pCols = aCols, + .numOfCols = nCols, + .loadTombFn = loadSttTomb, + .pReader = pr, + .idstr = pr->idstr, + }; + + code = tMergeTreeOpen2(&iter->mergeTree, &conf); + if (code != TSDB_CODE_SUCCESS) { + return -1; + } + + iter->pMergeTree = &iter->mergeTree; return code; } -int32_t clearNextRowFromFSLast(void *iter) { - SFSLastNextRowIter *state = (SFSLastNextRowIter *)iter; - int32_t code = 0; +static int32_t lastIterClose(SFSLastIter **iter) { + int32_t code = 0; - if (!state) { + if ((*iter)->pMergeTree) { + tMergeTreeClose((*iter)->pMergeTree); + (*iter)->pMergeTree = NULL; + } + + *iter = NULL; + + return code; +} + +static int32_t lastIterNext(SFSLastIter *iter, TSDBROW **ppRow) { + int32_t code = 0; + + bool hasVal = tMergeTreeNext(iter->pMergeTree); + if (!hasVal) { + *ppRow = NULL; return code; } - /* - if (state->pDataFReader) { - tsdbDataFReaderClose(&state->pDataFReader); - state->pDataFReader = NULL; - } - */ - if (state->pMergeTree != NULL) { - tMergeTreeClose(state->pMergeTree); - state->pMergeTree = NULL; - } + + *ppRow = tMergeTreeGetRow(iter->pMergeTree); return code; } @@ -1878,347 +1880,348 @@ int32_t clearNextRowFromFSLast(void *iter) { typedef enum SFSNEXTROWSTATES { SFSNEXTROW_FS, SFSNEXTROW_FILESET, + SFSNEXTROW_INDEXLIST, + SFSNEXTROW_BRINBLOCK, + SFSNEXTROW_BRINRECORD, SFSNEXTROW_BLOCKDATA, - SFSNEXTROW_BLOCKROW + SFSNEXTROW_BLOCKROW, + SFSNEXTROW_NEXTSTTROW } SFSNEXTROWSTATES; +struct CacheNextRowIter; + typedef struct SFSNextRowIter { - SFSNEXTROWSTATES state; // [input] - STsdb *pTsdb; // [input] - SBlockIdx *pBlockIdxExp; // [input] - STSchema *pTSchema; // [input] - tb_uid_t suid; - tb_uid_t uid; - int32_t nFileSet; - int32_t iFileSet; - SArray *aDFileSet; - SDataFReader **pDataFReader; - SArray *aBlockIdx; - LRUHandle *aBlockIdxHandle; - SBlockIdx *pBlockIdx; - SMapData blockMap; - int32_t nBlock; - int32_t iBlock; - SDataBlk block; - SBlockData blockData; - SBlockData *pBlockData; - int32_t nRow; - int32_t iRow; - TSDBROW row; - SSttBlockLoadInfo *pLoadInfo; - int64_t lastTs; + SFSNEXTROWSTATES state; // [input] + STsdb *pTsdb; // [input] + SBlockIdx *pBlockIdxExp; // [input] + STSchema *pTSchema; // [input] + tb_uid_t suid; + tb_uid_t uid; + int32_t nFileSet; + int32_t iFileSet; + STFileSet *pFileSet; + TFileSetArray *aDFileSet; + SArray *pIndexList; + int32_t iBrinIndex; + SBrinBlock brinBlock; + int32_t iBrinRecord; + SBrinRecord brinRecord; + SBlockData blockData; + SBlockData *pBlockData; + int32_t nRow; + int32_t iRow; + TSDBROW row; + int64_t lastTs; + SFSLastIter lastIter; + SFSLastIter *pLastIter; + int8_t lastEmpty; + TSDBROW *pLastRow; + SRow *pTSRow; + SRowMerger rowMerger; + SCacheRowsReader *pr; + struct CacheNextRowIter *pRowIter; } SFSNextRowIter; +static void clearLastFileSet(SFSNextRowIter *state); + static int32_t getNextRowFromFS(void *iter, TSDBROW **ppRow, bool *pIgnoreEarlierTs, bool isLast, int16_t *aCols, int nCols) { SFSNextRowIter *state = (SFSNextRowIter *)iter; int32_t code = 0; - bool checkRemainingRow = true; - switch (state->state) { - case SFSNEXTROW_FS: - // state->aDFileSet = state->pTsdb->pFS->cState->aDFileSet; - state->nFileSet = taosArrayGetSize(state->aDFileSet); - state->iFileSet = state->nFileSet; + if (SFSNEXTROW_FS == state->state) { + state->nFileSet = TARRAY2_SIZE(state->aDFileSet); + state->iFileSet = state->nFileSet; - state->pBlockData = NULL; + state->state = SFSNEXTROW_FILESET; + } - case SFSNEXTROW_FILESET: { - SDFileSet *pFileSet = NULL; - _next_fileset: - if (--state->iFileSet >= 0) { - pFileSet = (SDFileSet *)taosArrayGet(state->aDFileSet, state->iFileSet); - } else { - // tBlockDataDestroy(&state->blockData, 1); - if (state->pBlockData) { - tBlockDataDestroy(state->pBlockData); - state->pBlockData = NULL; + if (SFSNEXTROW_FILESET == state->state) { + _next_fileset: + if (--state->iFileSet < 0) { + clearLastFileSet(state); + + *ppRow = NULL; + return code; + } else { + state->pFileSet = TARRAY2_GET(state->aDFileSet, state->iFileSet); + } + + STFileObj **pFileObj = state->pFileSet->farr; + if (pFileObj[0] != NULL || pFileObj[3] != NULL) { + if (state->pFileSet != state->pr->pCurFileSet) { + SDataFileReaderConfig conf = {.tsdb = state->pTsdb, .szPage = state->pTsdb->pVnode->config.tsdbPageSize}; + const char *filesName[4] = {0}; + if (pFileObj[0] != NULL) { + conf.files[0].file = *pFileObj[0]->f; + conf.files[0].exist = true; + filesName[0] = pFileObj[0]->fname; + + conf.files[1].file = *pFileObj[1]->f; + conf.files[1].exist = true; + filesName[1] = pFileObj[1]->fname; + + conf.files[2].file = *pFileObj[2]->f; + conf.files[2].exist = true; + filesName[2] = pFileObj[2]->fname; } - *ppRow = NULL; - return code; - } - - if (*state->pDataFReader == NULL || (*state->pDataFReader)->pSet->fid != pFileSet->fid) { - if (*state->pDataFReader != NULL) { - tsdbDataFReaderClose(state->pDataFReader); - - // resetLastBlockLoadInfo(state->pLoadInfo); + if (pFileObj[3] != NULL) { + conf.files[3].exist = true; + conf.files[3].file = *pFileObj[3]->f; + filesName[3] = pFileObj[3]->fname; } - code = tsdbDataFReaderOpen(state->pDataFReader, state->pTsdb, pFileSet); - if (code) goto _err; + code = tsdbDataFileReaderOpen(filesName, &conf, &state->pr->pFileReader); + if (code != TSDB_CODE_SUCCESS) { + goto _err; + } + + loadDataTomb(state->pr, state->pr->pFileReader); + + state->pr->pCurFileSet = state->pFileSet; } - // tMapDataReset(&state->blockIdxMap); - /* - if (!state->aBlockIdx) { - state->aBlockIdx = taosArrayInit(0, sizeof(SBlockIdx)); + if (!state->pIndexList) { + state->pIndexList = taosArrayInit(1, sizeof(SBrinBlk)); } else { - taosArrayClear(state->aBlockIdx); + taosArrayClear(state->pIndexList); } - code = tsdbReadBlockIdx(*state->pDataFReader, state->aBlockIdx); - if (code) goto _err; - */ - int32_t code = tsdbCacheGetBlockIdx(state->pTsdb->biCache, *state->pDataFReader, &state->aBlockIdxHandle); - if (code != TSDB_CODE_SUCCESS || state->aBlockIdxHandle == NULL) { + const TBrinBlkArray *pBlkArray = NULL; + + int32_t code = tsdbDataFileReadBrinBlk(state->pr->pFileReader, &pBlkArray); + if (code != TSDB_CODE_SUCCESS) { goto _err; } - state->aBlockIdx = (SArray *)taosLRUCacheValue(state->pTsdb->biCache, state->aBlockIdxHandle); - /* if (state->pBlockIdx) { */ - /* } */ - /* code = tMapDataSearch(&state->blockIdxMap, state->pBlockIdxExp, tGetBlockIdx, tCmprBlockIdx, - * &state->blockIdx); - */ - state->pBlockIdx = taosArraySearch(state->aBlockIdx, state->pBlockIdxExp, tCmprBlockIdx, TD_EQ); - if (!state->pBlockIdx) { - tsdbBICacheRelease(state->pTsdb->biCache, state->aBlockIdxHandle); + for (int i = TARRAY2_SIZE(pBlkArray) - 1; i >= 0; --i) { + SBrinBlk *pBrinBlk = &pBlkArray->data[i]; + if (state->suid >= pBrinBlk->minTbid.suid && state->suid <= pBrinBlk->maxTbid.suid) { + if (state->uid >= pBrinBlk->minTbid.uid && state->uid <= pBrinBlk->maxTbid.uid) { + taosArrayPush(state->pIndexList, pBrinBlk); + } + } else if (state->suid > pBrinBlk->maxTbid.suid || + (state->suid == pBrinBlk->maxTbid.suid && state->uid > pBrinBlk->maxTbid.uid)) { + break; + } + } - state->aBlockIdxHandle = NULL; - state->aBlockIdx = NULL; - /* - tsdbDataFReaderClose(state->pDataFReader); - *state->pDataFReader = NULL; - resetLastBlockLoadInfo(state->pLoadInfo);*/ + int indexSize = TARRAY_SIZE(state->pIndexList); + if (indexSize <= 0) { + clearLastFileSet(state); + state->state = SFSNEXTROW_FILESET; goto _next_fileset; } - tMapDataReset(&state->blockMap); - /* - if (state->blockMap.pData != NULL) { - tMapDataClear(&state->blockMap); + state->state = SFSNEXTROW_INDEXLIST; + state->iBrinIndex = indexSize; + } + + code = lastIterOpen(&state->lastIter, state->pFileSet, state->pTsdb, state->pTSchema, state->suid, state->uid, + state->pr, state->lastTs, aCols, nCols); + if (code != TSDB_CODE_SUCCESS) { + goto _err; + } + + code = lastIterNext(&state->lastIter, &state->pLastRow); + if (code != TSDB_CODE_SUCCESS) { + goto _err; + } + + if (!state->pLastRow) { + state->lastEmpty = 1; + + if (SFSNEXTROW_INDEXLIST != state->state) { + clearLastFileSet(state); + goto _next_fileset; } - */ - code = tsdbReadDataBlk(*state->pDataFReader, state->pBlockIdx, &state->blockMap); - if (code) goto _err; + } else { + state->lastEmpty = 0; - state->nBlock = state->blockMap.nItem; - state->iBlock = state->nBlock - 1; + if (SFSNEXTROW_INDEXLIST != state->state) { + state->state = SFSNEXTROW_NEXTSTTROW; - if (!state->pBlockData) { - state->pBlockData = &state->blockData; - - code = tBlockDataCreate(&state->blockData); - if (code) goto _err; + *ppRow = state->pLastRow; + state->pLastRow = NULL; + return code; } } - case SFSNEXTROW_BLOCKDATA: - _next_datablock: - if (state->iBlock >= 0) { - SDataBlk block = {0}; - bool skipBlock = true; - int inputColIndex = 0; - tDataBlkReset(&block); - tBlockDataReset(state->pBlockData); + state->pLastIter = &state->lastIter; + } - tMapDataGetItemByIdx(&state->blockMap, state->iBlock, &block, tGetDataBlk); - if (block.maxKey.ts <= state->lastTs) { - *pIgnoreEarlierTs = true; + if (SFSNEXTROW_NEXTSTTROW == state->state) { + code = lastIterNext(&state->lastIter, &state->pLastRow); + if (code != TSDB_CODE_SUCCESS) { + goto _err; + } - tBlockDataDestroy(state->pBlockData); - state->pBlockData = NULL; - - *ppRow = NULL; - return code; - } - *pIgnoreEarlierTs = false; - tBlockDataReset(state->pBlockData); - TABLEID tid = {.suid = state->suid, .uid = state->uid}; - int nTmpCols = nCols; - bool hasTs = false; - if (aCols[0] == PRIMARYKEY_TIMESTAMP_COL_ID) { - --nTmpCols; - skipBlock = false; - hasTs = true; - } - code = tBlockDataInit(state->pBlockData, &tid, state->pTSchema, hasTs ? aCols + 1 : aCols, nTmpCols); - if (code) goto _err; - - code = tsdbReadDataBlock(*state->pDataFReader, &block, state->pBlockData); - if (code) goto _err; - - for (int colIndex = 0; colIndex < state->pBlockData->nColData; ++colIndex) { - SColData *pColData = &state->pBlockData->aColData[colIndex]; - - if (isLast && (pColData->flag & HAS_VALUE)) { - skipBlock = false; - break; - } /*else if (pColData->flag & (HAS_VALUE | HAS_NULL)) { - skipBlock = false; - break; - }*/ - } - - if (!isLast) { - skipBlock = false; - } - - if (skipBlock) { - if (--state->iBlock < 0) { - tsdbDataFReaderClose(state->pDataFReader); - *state->pDataFReader = NULL; - // resetLastBlockLoadInfo(state->pLoadInfo); - - if (state->aBlockIdx) { - // taosArrayDestroy(state->aBlockIdx); - tsdbBICacheRelease(state->pTsdb->biCache, state->aBlockIdxHandle); - - state->aBlockIdxHandle = NULL; - state->aBlockIdx = NULL; - } - - state->state = SFSNEXTROW_FILESET; - goto _next_fileset; - } else { - goto _next_datablock; - } - } - - state->nRow = state->blockData.nRow; - state->iRow = state->nRow - 1; - - state->state = SFSNEXTROW_BLOCKROW; - checkRemainingRow = false; - } - case SFSNEXTROW_BLOCKROW: { - if (checkRemainingRow) { - bool skipBlock = true; - int inputColIndex = 0; - if (aCols[0] == PRIMARYKEY_TIMESTAMP_COL_ID) { - ++inputColIndex; - } - for (int colIndex = 0; colIndex < state->pBlockData->nColData; ++colIndex) { - SColData *pColData = &state->pBlockData->aColData[colIndex]; - int16_t cid = pColData->cid; - - if (inputColIndex < nCols && cid == aCols[inputColIndex]) { - if (isLast && (pColData->flag & HAS_VALUE)) { - skipBlock = false; - break; - } /*else if (pColData->flag & (HAS_VALUE | HAS_NULL)) { - skipBlock = false; - break; - }*/ - - ++inputColIndex; - } - } - - if (!isLast) { - skipBlock = false; - } - - if (skipBlock) { - if (--state->iBlock < 0) { - tsdbDataFReaderClose(state->pDataFReader); - *state->pDataFReader = NULL; - // resetLastBlockLoadInfo(state->pLoadInfo); - - if (state->aBlockIdx) { - // taosArrayDestroy(state->aBlockIdx); - tsdbBICacheRelease(state->pTsdb->biCache, state->aBlockIdxHandle); - - state->aBlockIdxHandle = NULL; - state->aBlockIdx = NULL; - } - - state->state = SFSNEXTROW_FILESET; - goto _next_fileset; - } else { - goto _next_datablock; - } - } + if (!state->pLastRow) { + if (state->pLastIter) { + lastIterClose(&state->pLastIter); } - if (state->iRow >= 0) { - state->row = tsdbRowFromBlockData(state->pBlockData, state->iRow); - *ppRow = &state->row; + clearLastFileSet(state); + state->state = SFSNEXTROW_FILESET; + goto _next_fileset; + } else { + *ppRow = state->pLastRow; + state->pLastRow = NULL; + return code; + } + } - if (--state->iRow < 0) { - state->state = SFSNEXTROW_BLOCKDATA; - if (--state->iBlock < 0) { - tsdbDataFReaderClose(state->pDataFReader); - *state->pDataFReader = NULL; - // resetLastBlockLoadInfo(state->pLoadInfo); + if (SFSNEXTROW_INDEXLIST == state->state) { + SBrinBlk *pBrinBlk = NULL; + _next_brinindex: + if (--state->iBrinIndex < 0) { // no index left, goto next fileset + clearLastFileSet(state); + goto _next_fileset; + } else { + pBrinBlk = taosArrayGet(state->pIndexList, state->iBrinIndex); + } - if (state->aBlockIdx) { - // taosArrayDestroy(state->aBlockIdx); - tsdbBICacheRelease(state->pTsdb->biCache, state->aBlockIdxHandle); + code = tsdbDataFileReadBrinBlock(state->pr->pFileReader, pBrinBlk, &state->brinBlock); + if (code != TSDB_CODE_SUCCESS) { + goto _err; + } - state->aBlockIdxHandle = NULL; - state->aBlockIdx = NULL; - } + state->iBrinRecord = BRIN_BLOCK_SIZE(&state->brinBlock) - 1; + state->state = SFSNEXTROW_BRINBLOCK; + } - state->state = SFSNEXTROW_FILESET; - } - } + if (SFSNEXTROW_BRINBLOCK == state->state) { + _next_brinrecord: + if (state->iBrinRecord < 0) { // empty brin block, goto _next_brinindex + tBrinBlockClear(&state->brinBlock); + goto _next_brinindex; + } + code = tBrinBlockGet(&state->brinBlock, state->iBrinRecord, &state->brinRecord); + if (code != TSDB_CODE_SUCCESS) { + goto _err; + } + + SBrinRecord *pRecord = &state->brinRecord; + if (pRecord->uid != state->uid) { + // TODO: goto next brin block early + --state->iBrinRecord; + goto _next_brinrecord; + } + + state->state = SFSNEXTROW_BRINRECORD; + } + + if (SFSNEXTROW_BRINRECORD == state->state) { + SBrinRecord *pRecord = &state->brinRecord; + + if (!state->pBlockData) { + state->pBlockData = &state->blockData; + code = tBlockDataCreate(&state->blockData); + if (code) goto _err; + } else { + tBlockDataReset(state->pBlockData); + } + + if (aCols[0] == PRIMARYKEY_TIMESTAMP_COL_ID) { + --nCols; + ++aCols; + } + code = tsdbDataFileReadBlockDataByColumn(state->pr->pFileReader, pRecord, state->pBlockData, state->pTSchema, aCols, + nCols); + if (code != TSDB_CODE_SUCCESS) { + goto _err; + } + + state->nRow = state->blockData.nRow; + state->iRow = state->nRow - 1; + + state->state = SFSNEXTROW_BLOCKROW; + } + + if (SFSNEXTROW_BLOCKROW == state->state) { + if (state->iRow < 0) { + --state->iBrinRecord; + goto _next_brinrecord; + } + + state->row = tsdbRowFromBlockData(state->pBlockData, state->iRow); + if (!state->pLastIter) { + *ppRow = &state->row; + --state->iRow; + return code; + } + + if (!state->pLastRow) { + // get next row from fslast and process with fs row, --state->Row if select fs row + code = lastIterNext(&state->lastIter, &state->pLastRow); + if (code != TSDB_CODE_SUCCESS) { + goto _err; } + } + + if (!state->pLastRow) { + if (state->pLastIter) { + lastIterClose(&state->pLastIter); + } + + *ppRow = &state->row; + --state->iRow; + return code; + } + + // process state->pLastRow & state->row + TSKEY rowTs = TSDBROW_TS(&state->row); + TSKEY lastRowTs = TSDBROW_TS(state->pLastRow); + if (lastRowTs > rowTs) { + *ppRow = state->pLastRow; + state->pLastRow = NULL; + return code; + } else if (lastRowTs < rowTs) { + *ppRow = &state->row; + --state->iRow; + return code; + } else { + // TODO: merge rows and *ppRow = mergedRow + SRowMerger *pMerger = &state->rowMerger; + tsdbRowMergerInit(pMerger, state->pTSchema); + + code = tsdbRowMergerAdd(pMerger, &state->row, state->pTSchema); + if (code != TSDB_CODE_SUCCESS) { + goto _err; + } + code = tsdbRowMergerAdd(pMerger, state->pLastRow, state->pTSchema); + if (code != TSDB_CODE_SUCCESS) { + goto _err; + } + + if (state->pTSRow) { + taosMemoryFree(state->pTSRow); + state->pTSRow = NULL; + } + + code = tsdbRowMergerGetRow(pMerger, &state->pTSRow); + if (code != TSDB_CODE_SUCCESS) { + goto _err; + } + + state->row = tsdbRowFromTSRow(TSDBROW_VERSION(&state->row), state->pTSRow); + *ppRow = &state->row; + --state->iRow; + + tsdbRowMergerClear(pMerger); return code; } - default: - ASSERT(0); - break; } _err: - /* - if (*state->pDataFReader) { - tsdbDataFReaderClose(state->pDataFReader); - *state->pDataFReader = NULL; - resetLastBlockLoadInfo(state->pLoadInfo); - }*/ - if (state->aBlockIdx) { - // taosArrayDestroy(state->aBlockIdx); - tsdbBICacheRelease(state->pTsdb->biCache, state->aBlockIdxHandle); - - state->aBlockIdxHandle = NULL; - state->aBlockIdx = NULL; - } - if (state->pBlockData) { - tBlockDataDestroy(state->pBlockData); - state->pBlockData = NULL; - } + clearLastFileSet(state); *ppRow = NULL; return code; } -int32_t clearNextRowFromFS(void *iter) { - int32_t code = 0; - - SFSNextRowIter *state = (SFSNextRowIter *)iter; - if (!state) { - return code; - } - /* - if (state->pDataFReader) { - tsdbDataFReaderClose(&state->pDataFReader); - state->pDataFReader = NULL; - }*/ - if (state->aBlockIdx) { - // taosArrayDestroy(state->aBlockIdx); - tsdbBICacheRelease(state->pTsdb->biCache, state->aBlockIdxHandle); - - state->aBlockIdxHandle = NULL; - state->aBlockIdx = NULL; - } - if (state->pBlockData) { - // tBlockDataDestroy(&state->blockData, 1); - tBlockDataDestroy(state->pBlockData); - state->pBlockData = NULL; - } - - if (state->blockMap.pData != NULL) { - tMapDataClear(&state->blockMap); - } - - return code; -} - typedef enum SMEMNEXTROWSTATES { SMEMNEXTROW_ENTER, SMEMNEXTROW_NEXT, @@ -2229,8 +2232,6 @@ typedef struct SMemNextRowIter { STbData *pMem; // [input] STbDataIter iter; // mem buffer skip list iterator int64_t lastTs; - // bool iterOpened; - // TSDBROW *curRow; } SMemNextRowIter; static int32_t getNextRowFromMem(void *iter, TSDBROW **ppRow, bool *pIgnoreEarlierTs, bool isLast, int16_t *aCols, @@ -2281,45 +2282,6 @@ _err: return code; } -/* static int32_t tsRowFromTsdbRow(STSchema *pTSchema, TSDBROW *pRow, STSRow **ppRow) { */ -/* int32_t code = 0; */ - -/* SColVal *pColVal = &(SColVal){0}; */ - -/* if (pRow->type == 0) { */ -/* *ppRow = tdRowDup(pRow->pTSRow); */ -/* } else { */ -/* SArray *pArray = taosArrayInit(pTSchema->numOfCols, sizeof(SColVal)); */ -/* if (pArray == NULL) { */ -/* code = TSDB_CODE_OUT_OF_MEMORY; */ -/* goto _exit; */ -/* } */ - -/* TSDBKEY key = TSDBROW_KEY(pRow); */ -/* STColumn *pTColumn = &pTSchema->columns[0]; */ -/* *pColVal = COL_VAL_VALUE(pTColumn->colId, pTColumn->type, (SValue){.ts = key.ts}); */ - -/* if (taosArrayPush(pArray, pColVal) == NULL) { */ -/* code = TSDB_CODE_OUT_OF_MEMORY; */ -/* goto _exit; */ -/* } */ - -/* for (int16_t iCol = 1; iCol < pTSchema->numOfCols; iCol++) { */ -/* tsdbRowGetColVal(pRow, pTSchema, iCol, pColVal); */ -/* if (taosArrayPush(pArray, pColVal) == NULL) { */ -/* code = TSDB_CODE_OUT_OF_MEMORY; */ -/* goto _exit; */ -/* } */ -/* } */ - -/* code = tdSTSRowNew(pArray, pTSchema, ppRow); */ -/* if (code) goto _exit; */ -/* } */ - -/* _exit: */ -/* return code; */ -/* } */ - static bool tsdbKeyDeleted(TSDBKEY *key, SArray *pSkyline, int64_t *iSkyline) { bool deleted = false; while (*iSkyline > 0) { @@ -2365,24 +2327,88 @@ typedef struct { _next_row_clear_fn_t nextRowClearFn; } TsdbNextRowState; -typedef struct { - SArray *pSkyline; - int64_t iSkyline; - - SBlockIdx idx; - SMemNextRowIter memState; - SMemNextRowIter imemState; - SFSLastNextRowIter fsLastState; - SFSNextRowIter fsState; - TSDBROW memRow, imemRow, fsLastRow, fsRow; - - TsdbNextRowState input[4]; - STsdb *pTsdb; +typedef struct CacheNextRowIter { + SArray *pMemDelData; + SArray *pSkyline; + int64_t iSkyline; + SBlockIdx idx; + SMemNextRowIter memState; + SMemNextRowIter imemState; + SFSNextRowIter fsState; + TSDBROW memRow, imemRow, fsLastRow, fsRow; + TsdbNextRowState input[3]; + SCacheRowsReader *pr; + STsdb *pTsdb; } CacheNextRowIter; +int32_t clearNextRowFromFS(void *iter) { + int32_t code = 0; + + SFSNextRowIter *state = (SFSNextRowIter *)iter; + if (!state) { + return code; + } + + if (state->pLastIter) { + lastIterClose(&state->pLastIter); + } + + if (state->pBlockData) { + tBlockDataDestroy(state->pBlockData); + state->pBlockData = NULL; + } + + if (state->pTSRow) { + taosMemoryFree(state->pTSRow); + state->pTSRow = NULL; + } + + if (state->pRowIter->pSkyline) { + taosArrayDestroy(state->pRowIter->pSkyline); + state->pRowIter->pSkyline = NULL; + } + + return code; +} + +static void clearLastFileSet(SFSNextRowIter *state) { + if (state->pLastIter) { + lastIterClose(&state->pLastIter); + } + + if (state->pBlockData) { + tBlockDataDestroy(state->pBlockData); + state->pBlockData = NULL; + } + + if (state->pr->pFileReader) { + tsdbDataFileReaderClose(&state->pr->pFileReader); + state->pr->pFileReader = NULL; + + state->pr->pCurFileSet = NULL; + } + + if (state->pTSRow) { + taosMemoryFree(state->pTSRow); + state->pTSRow = NULL; + } + + if (state->pRowIter->pSkyline) { + taosArrayDestroy(state->pRowIter->pSkyline); + state->pRowIter->pSkyline = NULL; + + void *pe = NULL; + int32_t iter = 0; + while ((pe = tSimpleHashIterate(state->pr->pTableMap, pe, &iter)) != NULL) { + STableLoadInfo *pInfo = *(STableLoadInfo **)pe; + pInfo->pTombData = taosArrayDestroy(pInfo->pTombData); + } + } +} + static int32_t nextRowIterOpen(CacheNextRowIter *pIter, tb_uid_t uid, STsdb *pTsdb, STSchema *pTSchema, tb_uid_t suid, - SSttBlockLoadInfo *pLoadInfo, SLDataIter *pLDataIter, STsdbReadSnap *pReadSnap, - SDataFReader **pDataFReader, SDataFReader **pDataFReaderLast, int64_t lastTs) { + SArray *pLDataIterArray, STsdbReadSnap *pReadSnap, int64_t lastTs, + SCacheRowsReader *pr) { int code = 0; STbData *pMem = NULL; @@ -2397,71 +2423,26 @@ static int32_t nextRowIterOpen(CacheNextRowIter *pIter, tb_uid_t uid, STsdb *pTs pIter->pTsdb = pTsdb; - pIter->pSkyline = taosArrayInit(32, sizeof(TSDBKEY)); + pIter->pMemDelData = NULL; - SDelFile *pDelFile = pReadSnap->fs.pDelFile; - if (pDelFile) { - SDelFReader *pDelFReader; - - code = tsdbDelFReaderOpen(&pDelFReader, pDelFile, pTsdb); - if (code) goto _err; - - SArray *pDelIdxArray = taosArrayInit(32, sizeof(SDelIdx)); - - code = tsdbReadDelIdx(pDelFReader, pDelIdxArray); - if (code) { - taosArrayDestroy(pDelIdxArray); - tsdbDelFReaderClose(&pDelFReader); - goto _err; - } - - SDelIdx *delIdx = taosArraySearch(pDelIdxArray, &(SDelIdx){.suid = suid, .uid = uid}, tCmprDelIdx, TD_EQ); - - code = getTableDelSkyline(pMem, pIMem, pDelFReader, delIdx, pIter->pSkyline); - if (code) { - taosArrayDestroy(pDelIdxArray); - tsdbDelFReaderClose(&pDelFReader); - goto _err; - } - - taosArrayDestroy(pDelIdxArray); - tsdbDelFReaderClose(&pDelFReader); - } else { - code = getTableDelSkyline(pMem, pIMem, NULL, NULL, pIter->pSkyline); - if (code) goto _err; - } - - pIter->iSkyline = taosArrayGetSize(pIter->pSkyline) - 1; + loadMemTombData(&pIter->pMemDelData, pMem, pIMem, pr->info.verRange.maxVer); pIter->idx = (SBlockIdx){.suid = suid, .uid = uid}; - pIter->fsLastState.state = (SFSLASTNEXTROWSTATES)SFSNEXTROW_FS; - pIter->fsLastState.pTsdb = pTsdb; - pIter->fsLastState.aDFileSet = pReadSnap->fs.aDFileSet; - pIter->fsLastState.pTSchema = pTSchema; - pIter->fsLastState.suid = suid; - pIter->fsLastState.uid = uid; - pIter->fsLastState.pLoadInfo = pLoadInfo; - pIter->fsLastState.pDataFReader = pDataFReaderLast; - pIter->fsLastState.lastTs = lastTs; - pIter->fsLastState.pDataIter = pLDataIter; - + pIter->fsState.pRowIter = pIter; pIter->fsState.state = SFSNEXTROW_FS; pIter->fsState.pTsdb = pTsdb; - pIter->fsState.aDFileSet = pReadSnap->fs.aDFileSet; + pIter->fsState.aDFileSet = pReadSnap->pfSetArray; pIter->fsState.pBlockIdxExp = &pIter->idx; pIter->fsState.pTSchema = pTSchema; pIter->fsState.suid = suid; pIter->fsState.uid = uid; - pIter->fsState.pLoadInfo = pLoadInfo; - pIter->fsState.pDataFReader = pDataFReader; pIter->fsState.lastTs = lastTs; + pIter->fsState.pr = pr; pIter->input[0] = (TsdbNextRowState){&pIter->memRow, true, false, false, &pIter->memState, getNextRowFromMem, NULL}; pIter->input[1] = (TsdbNextRowState){&pIter->imemRow, true, false, false, &pIter->imemState, getNextRowFromMem, NULL}; - pIter->input[2] = (TsdbNextRowState){ - &pIter->fsLastRow, false, true, false, &pIter->fsLastState, getNextRowFromFSLast, clearNextRowFromFSLast}; - pIter->input[3] = + pIter->input[2] = (TsdbNextRowState){&pIter->fsRow, false, true, false, &pIter->fsState, getNextRowFromFS, clearNextRowFromFS}; if (pMem) { @@ -2480,7 +2461,7 @@ static int32_t nextRowIterOpen(CacheNextRowIter *pIter, tb_uid_t uid, STsdb *pTs pIter->input[1].next = true; } - return code; + pIter->pr = pr; _err: return code; } @@ -2488,7 +2469,7 @@ _err: static int32_t nextRowIterClose(CacheNextRowIter *pIter) { int code = 0; - for (int i = 0; i < 4; ++i) { + for (int i = 0; i < 3; ++i) { if (pIter->input[i].nextRowClearFn) { pIter->input[i].nextRowClearFn(pIter->input[i].iter); } @@ -2498,6 +2479,10 @@ static int32_t nextRowIterClose(CacheNextRowIter *pIter) { taosArrayDestroy(pIter->pSkyline); } + if (pIter->pMemDelData) { + taosArrayDestroy(pIter->pMemDelData); + } + _err: return code; } @@ -2507,7 +2492,7 @@ static int32_t nextRowIterGet(CacheNextRowIter *pIter, TSDBROW **ppRow, bool *pI int16_t *aCols, int nCols) { int code = 0; for (;;) { - for (int i = 0; i < 4; ++i) { + for (int i = 0; i < 3; ++i) { if (pIter->input[i].next && !pIter->input[i].stop) { code = pIter->input[i].nextRowFn(pIter->input[i].iter, &pIter->input[i].pRow, &pIter->input[i].ignoreEarlierTs, isLast, aCols, nCols); @@ -2520,10 +2505,10 @@ static int32_t nextRowIterGet(CacheNextRowIter *pIter, TSDBROW **ppRow, bool *pI } } - if (pIter->input[0].stop && pIter->input[1].stop && pIter->input[2].stop && pIter->input[3].stop) { + if (pIter->input[0].stop && pIter->input[1].stop && pIter->input[2].stop) { *ppRow = NULL; - *pIgnoreEarlierTs = (pIter->input[0].ignoreEarlierTs || pIter->input[1].ignoreEarlierTs || - pIter->input[2].ignoreEarlierTs || pIter->input[3].ignoreEarlierTs); + *pIgnoreEarlierTs = + (pIter->input[0].ignoreEarlierTs || pIter->input[1].ignoreEarlierTs || pIter->input[2].ignoreEarlierTs); return code; } @@ -2533,7 +2518,7 @@ static int32_t nextRowIterGet(CacheNextRowIter *pIter, TSDBROW **ppRow, bool *pI int nMax = 0; TSKEY maxKey = TSKEY_MIN; - for (int i = 0; i < 4; ++i) { + for (int i = 0; i < 3; ++i) { if (!pIter->input[i].stop && pIter->input[i].pRow != NULL) { TSDBKEY key = TSDBROW_KEY(pIter->input[i].pRow); @@ -2559,6 +2544,21 @@ static int32_t nextRowIterGet(CacheNextRowIter *pIter, TSDBROW **ppRow, bool *pI for (int i = 0; i < nMax; ++i) { TSDBKEY maxKey1 = TSDBROW_KEY(max[i]); + if (!pIter->pSkyline) { + pIter->pSkyline = taosArrayInit(32, sizeof(TSDBKEY)); + + uint64_t uid = pIter->idx.uid; + STableLoadInfo *pInfo = *(STableLoadInfo **)tSimpleHashGet(pIter->pr->pTableMap, &uid, sizeof(uid)); + SArray *pTombData = pInfo->pTombData; + if (pTombData) { + taosArrayAddAll(pTombData, pIter->pMemDelData); + + code = tsdbBuildDeleteSkyline(pTombData, 0, (int32_t)(TARRAY_SIZE(pTombData) - 1), pIter->pSkyline); + } + + pIter->iSkyline = taosArrayGetSize(pIter->pSkyline) - 1; + } + bool deleted = tsdbKeyDeleted(&maxKey1, pIter->pSkyline, &pIter->iSkyline); if (!deleted) { iMerge[nMerge] = iMax[i]; @@ -2629,322 +2629,7 @@ static int32_t updateTSchema(int32_t sversion, SCacheRowsReader *pReader, uint64 } taosMemoryFreeClear(pReader->pCurrSchema); - return metaGetTbTSchemaEx(pReader->pTsdb->pVnode->pMeta, pReader->suid, uid, sversion, &pReader->pCurrSchema); -} - -static int32_t mergeLastRow(tb_uid_t uid, STsdb *pTsdb, bool *dup, SArray **ppColArray, SCacheRowsReader *pr) { - STSchema *pTSchema = pr->pSchema; // metaGetTbTSchema(pTsdb->pVnode->pMeta, uid, -1, 1); - int16_t nLastCol = pTSchema->numOfCols; - int16_t iCol = 0; - int16_t noneCol = 0; - bool setNoneCol = false; - bool hasRow = false; - bool ignoreEarlierTs = false; - SArray *pColArray = NULL; - SColVal *pColVal = &(SColVal){0}; - - int32_t code = initLastColArray(pTSchema, &pColArray); - if (TSDB_CODE_SUCCESS != code) { - return code; - } - - TSKEY lastRowTs = TSKEY_MAX; - - CacheNextRowIter iter = {0}; - nextRowIterOpen(&iter, uid, pTsdb, pTSchema, pr->suid, pr->pLoadInfo, pr->pDataIter, pr->pReadSnap, &pr->pDataFReader, - &pr->pDataFReaderLast, pr->lastTs); - - do { - TSDBROW *pRow = NULL; - nextRowIterGet(&iter, &pRow, &ignoreEarlierTs, false, NULL, 0); - - if (!pRow) { - break; - } - - hasRow = true; - - int32_t sversion = TSDBROW_SVERSION(pRow); - if (sversion != -1) { - code = updateTSchema(sversion, pr, uid); - if (TSDB_CODE_SUCCESS != code) { - goto _err; - } - pTSchema = pr->pCurrSchema; - } - int16_t nCol = pTSchema->numOfCols; - - TSKEY rowTs = TSDBROW_TS(pRow); - - if (lastRowTs == TSKEY_MAX) { - lastRowTs = rowTs; - STColumn *pTColumn = &pTSchema->columns[0]; - - *pColVal = COL_VAL_VALUE(pTColumn->colId, pTColumn->type, (SValue){.val = lastRowTs}); - taosArraySet(pColArray, 0, &(SLastCol){.ts = lastRowTs, .colVal = *pColVal}); - - for (iCol = 1; iCol < nCol; ++iCol) { - if (iCol >= nLastCol) { - break; - } - SLastCol *pCol = taosArrayGet(pColArray, iCol); - if (pCol->colVal.cid != pTSchema->columns[iCol].colId) { - continue; - } - tsdbRowGetColVal(pRow, pTSchema, iCol, pColVal); - - *pCol = (SLastCol){.ts = lastRowTs, .colVal = *pColVal}; - if (IS_VAR_DATA_TYPE(pColVal->type) && pColVal->value.nData > 0) { - pCol->colVal.value.pData = taosMemoryMalloc(pCol->colVal.value.nData); - if (pCol->colVal.value.pData == NULL) { - terrno = TSDB_CODE_OUT_OF_MEMORY; - code = TSDB_CODE_OUT_OF_MEMORY; - goto _err; - } - memcpy(pCol->colVal.value.pData, pColVal->value.pData, pColVal->value.nData); - } - - if (COL_VAL_IS_NONE(pColVal) && !setNoneCol) { - noneCol = iCol; - setNoneCol = true; - } - } - if (!setNoneCol) { - // done, goto return pColArray - break; - } else { - continue; - } - } - - if ((rowTs < lastRowTs)) { - // done, goto return pColArray - break; - } - - // merge into pColArray - setNoneCol = false; - for (iCol = noneCol; iCol < nCol; ++iCol) { - // high version's column value - SColVal *tColVal = (SColVal *)taosArrayGet(pColArray, iCol); - - tsdbRowGetColVal(pRow, pTSchema, iCol, pColVal); - if (COL_VAL_IS_NONE(tColVal) && !COL_VAL_IS_NONE(pColVal)) { - SLastCol lastCol = {.ts = rowTs, .colVal = *pColVal}; - if (IS_VAR_DATA_TYPE(pColVal->type) && pColVal->value.nData > 0) { - SLastCol *pLastCol = (SLastCol *)taosArrayGet(pColArray, iCol); - taosMemoryFree(pLastCol->colVal.value.pData); - - lastCol.colVal.value.pData = taosMemoryMalloc(lastCol.colVal.value.nData); - if (lastCol.colVal.value.pData == NULL) { - terrno = TSDB_CODE_OUT_OF_MEMORY; - code = TSDB_CODE_OUT_OF_MEMORY; - goto _err; - } - memcpy(lastCol.colVal.value.pData, pColVal->value.pData, pColVal->value.nData); - } - - taosArraySet(pColArray, iCol, &lastCol); - } else if (COL_VAL_IS_NONE(tColVal) && COL_VAL_IS_NONE(pColVal) && !setNoneCol) { - noneCol = iCol; - setNoneCol = true; - } - } - } while (setNoneCol); - - // build the result ts row here - *dup = false; - // if (taosArrayGetSize(pColArray) != nCol) { - //*ppColArray = NULL; - // taosArrayDestroy(pColArray); - //} else { - if (!hasRow) { - if (ignoreEarlierTs) { - taosArrayDestroy(pColArray); - pColArray = NULL; - } else { - taosArrayClear(pColArray); - } - } - *ppColArray = pColArray; - //} - - nextRowIterClose(&iter); - // taosMemoryFreeClear(pTSchema); - return code; - -_err: - nextRowIterClose(&iter); - taosArrayDestroy(pColArray); - // taosMemoryFreeClear(pTSchema); - return code; -} - -static int32_t mergeLast(tb_uid_t uid, STsdb *pTsdb, SArray **ppLastArray, SCacheRowsReader *pr) { - STSchema *pTSchema = pr->pSchema; // metaGetTbTSchema(pTsdb->pVnode->pMeta, uid, -1, 1); - int16_t nLastCol = pTSchema->numOfCols; - int16_t noneCol = 0; - bool setNoneCol = false; - bool hasRow = false; - bool ignoreEarlierTs = false; - SArray *pColArray = NULL; - SColVal *pColVal = &(SColVal){0}; - int16_t nCols = nLastCol; - - int32_t code = initLastColArray(pTSchema, &pColArray); - if (TSDB_CODE_SUCCESS != code) { - return code; - } - SArray *aColArray = taosArrayInit(nCols, sizeof(int16_t)); - if (NULL == aColArray) { - taosArrayDestroy(pColArray); - - return TSDB_CODE_OUT_OF_MEMORY; - } - for (int i = 1; i < pTSchema->numOfCols; ++i) { - taosArrayPush(aColArray, &pTSchema->columns[i].colId); - } - - TSKEY lastRowTs = TSKEY_MAX; - - CacheNextRowIter iter = {0}; - nextRowIterOpen(&iter, uid, pTsdb, pTSchema, pr->suid, pr->pLoadInfo, pr->pDataIter, pr->pReadSnap, &pr->pDataFReader, - &pr->pDataFReaderLast, pr->lastTs); - - do { - TSDBROW *pRow = NULL; - nextRowIterGet(&iter, &pRow, &ignoreEarlierTs, true, TARRAY_DATA(aColArray), TARRAY_SIZE(aColArray)); - - if (!pRow) { - break; - } - - hasRow = true; - - int32_t sversion = TSDBROW_SVERSION(pRow); - if (sversion != -1) { - code = updateTSchema(sversion, pr, uid); - if (TSDB_CODE_SUCCESS != code) { - goto _err; - } - pTSchema = pr->pCurrSchema; - } - int16_t nCol = pTSchema->numOfCols; - - TSKEY rowTs = TSDBROW_TS(pRow); - - if (lastRowTs == TSKEY_MAX) { - lastRowTs = rowTs; - STColumn *pTColumn = &pTSchema->columns[0]; - - *pColVal = COL_VAL_VALUE(pTColumn->colId, pTColumn->type, (SValue){.val = lastRowTs}); - taosArraySet(pColArray, 0, &(SLastCol){.ts = lastRowTs, .colVal = *pColVal}); - - for (int16_t iCol = 1; iCol < nCol; ++iCol) { - if (iCol >= nLastCol) { - break; - } - SLastCol *pCol = taosArrayGet(pColArray, iCol); - if (pCol->colVal.cid != pTSchema->columns[iCol].colId) { - continue; - } - tsdbRowGetColVal(pRow, pTSchema, iCol, pColVal); - - *pCol = (SLastCol){.ts = lastRowTs, .colVal = *pColVal}; - if (IS_VAR_DATA_TYPE(pColVal->type) && pColVal->value.nData > 0) { - pCol->colVal.value.pData = taosMemoryMalloc(pCol->colVal.value.nData); - if (pCol->colVal.value.pData == NULL) { - terrno = TSDB_CODE_OUT_OF_MEMORY; - code = TSDB_CODE_OUT_OF_MEMORY; - goto _err; - } - memcpy(pCol->colVal.value.pData, pColVal->value.pData, pColVal->value.nData); - } - - if (!COL_VAL_IS_VALUE(pColVal)) { - if (!setNoneCol) { - noneCol = iCol; - setNoneCol = true; - } - } else { - int32_t aColIndex = taosArraySearchIdx(aColArray, &pColVal->cid, compareInt16Val, TD_EQ); - taosArrayRemove(aColArray, aColIndex); - } - } - if (!setNoneCol) { - // done, goto return pColArray - break; - } else { - continue; - } - } - - // merge into pColArray - setNoneCol = false; - for (int16_t iCol = noneCol; iCol < nCol; ++iCol) { - if (iCol >= nLastCol) { - break; - } - // high version's column value - SLastCol *lastColVal = (SLastCol *)taosArrayGet(pColArray, iCol); - if (lastColVal->colVal.cid != pTSchema->columns[iCol].colId) { - continue; - } - SColVal *tColVal = &lastColVal->colVal; - - tsdbRowGetColVal(pRow, pTSchema, iCol, pColVal); - if (!COL_VAL_IS_VALUE(tColVal) && COL_VAL_IS_VALUE(pColVal)) { - SLastCol lastCol = {.ts = rowTs, .colVal = *pColVal}; - if (IS_VAR_DATA_TYPE(pColVal->type) && pColVal->value.nData > 0) { - SLastCol *pLastCol = (SLastCol *)taosArrayGet(pColArray, iCol); - taosMemoryFree(pLastCol->colVal.value.pData); - - lastCol.colVal.value.pData = taosMemoryMalloc(lastCol.colVal.value.nData); - if (lastCol.colVal.value.pData == NULL) { - terrno = TSDB_CODE_OUT_OF_MEMORY; - code = TSDB_CODE_OUT_OF_MEMORY; - goto _err; - } - memcpy(lastCol.colVal.value.pData, pColVal->value.pData, pColVal->value.nData); - } - - taosArraySet(pColArray, iCol, &lastCol); - int32_t aColIndex = taosArraySearchIdx(aColArray, &lastCol.colVal.cid, compareInt16Val, TD_EQ); - taosArrayRemove(aColArray, aColIndex); - } else if (!COL_VAL_IS_VALUE(tColVal) && !COL_VAL_IS_VALUE(pColVal) && !setNoneCol) { - noneCol = iCol; - setNoneCol = true; - } - } - } while (setNoneCol); - - // if (taosArrayGetSize(pColArray) <= 0) { - //*ppLastArray = NULL; - // taosArrayDestroy(pColArray); - //} else { - if (!hasRow) { - if (ignoreEarlierTs) { - taosArrayDestroy(pColArray); - pColArray = NULL; - } else { - taosArrayClear(pColArray); - } - } - *ppLastArray = pColArray; - //} - - nextRowIterClose(&iter); - taosArrayDestroy(aColArray); - // taosMemoryFreeClear(pTSchema); - return code; - -_err: - nextRowIterClose(&iter); - // taosMemoryFreeClear(pTSchema); - *ppLastArray = NULL; - taosArrayDestroy(pColArray); - taosArrayDestroy(aColArray); - return code; + return metaGetTbTSchemaEx(pReader->pTsdb->pVnode->pMeta, pReader->info.suid, uid, sversion, &pReader->pCurrSchema); } static int32_t mergeLastCid(tb_uid_t uid, STsdb *pTsdb, SArray **ppLastArray, SCacheRowsReader *pr, int16_t *aCols, @@ -2976,8 +2661,7 @@ static int32_t mergeLastCid(tb_uid_t uid, STsdb *pTsdb, SArray **ppLastArray, SC TSKEY lastRowTs = TSKEY_MAX; CacheNextRowIter iter = {0}; - nextRowIterOpen(&iter, uid, pTsdb, pTSchema, pr->suid, pr->pLoadInfo, pr->pDataIter, pr->pReadSnap, &pr->pDataFReader, - &pr->pDataFReaderLast, pr->lastTs); + nextRowIterOpen(&iter, uid, pTsdb, pTSchema, pr->info.suid, pr->pLDataIterArray, pr->pReadSnap, pr->lastTs, pr); do { TSDBROW *pRow = NULL; @@ -3146,8 +2830,7 @@ static int32_t mergeLastRowCid(tb_uid_t uid, STsdb *pTsdb, SArray **ppLastArray, TSKEY lastRowTs = TSKEY_MAX; CacheNextRowIter iter = {0}; - nextRowIterOpen(&iter, uid, pTsdb, pTSchema, pr->suid, pr->pLoadInfo, pr->pDataIter, pr->pReadSnap, &pr->pDataFReader, - &pr->pDataFReaderLast, pr->lastTs); + nextRowIterOpen(&iter, uid, pTsdb, pTSchema, pr->info.suid, pr->pLDataIterArray, pr->pReadSnap, pr->lastTs, pr); do { TSDBROW *pRow = NULL; @@ -3236,92 +2919,6 @@ _err: return code; } -int32_t tsdbCacheGetLastrowH(SLRUCache *pCache, tb_uid_t uid, SCacheRowsReader *pr, LRUHandle **handle) { - int32_t code = 0; - char key[32] = {0}; - int keyLen = 0; - - // getTableCacheKeyS(uid, "lr", key, &keyLen); - getTableCacheKey(uid, 0, key, &keyLen); - LRUHandle *h = taosLRUCacheLookup(pCache, key, keyLen); - if (!h) { - STsdb *pTsdb = pr->pVnode->pTsdb; - taosThreadMutexLock(&pTsdb->lruMutex); - - h = taosLRUCacheLookup(pCache, key, keyLen); - if (!h) { - SArray *pArray = NULL; - bool dup = false; // which is always false for now - code = mergeLastRow(uid, pTsdb, &dup, &pArray, pr); - // if table's empty or error or ignore ignore earlier ts, set handle NULL and return - if (code < 0 || pArray == NULL) { - if (!dup && pArray) { - taosArrayDestroy(pArray); - } - - taosThreadMutexUnlock(&pTsdb->lruMutex); - - *handle = NULL; - - return 0; - } - - size_t charge = pArray->capacity * pArray->elemSize + sizeof(*pArray); - _taos_lru_deleter_t deleter = deleteTableCacheLast; - LRUStatus status = - taosLRUCacheInsert(pCache, key, keyLen, pArray, charge, deleter, &h, TAOS_LRU_PRIORITY_LOW, NULL); - if (status != TAOS_LRU_STATUS_OK) { - code = -1; - } - } - taosThreadMutexUnlock(&pTsdb->lruMutex); - } - - *handle = h; - - return code; -} - -int32_t tsdbCacheGetLastH(SLRUCache *pCache, tb_uid_t uid, SCacheRowsReader *pr, LRUHandle **handle) { - int32_t code = 0; - char key[32] = {0}; - int keyLen = 0; - - // getTableCacheKeyS(uid, "l", key, &keyLen); - getTableCacheKey(uid, 1, key, &keyLen); - LRUHandle *h = taosLRUCacheLookup(pCache, key, keyLen); - if (!h) { - STsdb *pTsdb = pr->pVnode->pTsdb; - taosThreadMutexLock(&pTsdb->lruMutex); - - h = taosLRUCacheLookup(pCache, key, keyLen); - if (!h) { - SArray *pLastArray = NULL; - code = mergeLast(uid, pTsdb, &pLastArray, pr); - // if table's empty or error or ignore ignore earlier ts, set handle NULL and return - if (code < 0 || pLastArray == NULL) { - taosThreadMutexUnlock(&pTsdb->lruMutex); - - *handle = NULL; - return 0; - } - - size_t charge = pLastArray->capacity * pLastArray->elemSize + sizeof(*pLastArray); - _taos_lru_deleter_t deleter = deleteTableCacheLast; - LRUStatus status = - taosLRUCacheInsert(pCache, key, keyLen, pLastArray, charge, deleter, &h, TAOS_LRU_PRIORITY_LOW, NULL); - if (status != TAOS_LRU_STATUS_OK) { - code = -1; - } - } - taosThreadMutexUnlock(&pTsdb->lruMutex); - } - - *handle = h; - - return code; -} - int32_t tsdbCacheRelease(SLRUCache *pCache, LRUHandle *h) { int32_t code = 0; diff --git a/source/dnode/vnode/src/tsdb/tsdbCacheRead.c b/source/dnode/vnode/src/tsdb/tsdbCacheRead.c index 6138b1f7b4..f17041e98b 100644 --- a/source/dnode/vnode/src/tsdb/tsdbCacheRead.c +++ b/source/dnode/vnode/src/tsdb/tsdbCacheRead.c @@ -17,6 +17,7 @@ #include "tarray.h" #include "tcommon.h" #include "tsdb.h" +#include "tsdbDataFileRW.h" #define HASTYPE(_type, _t) (((_type) & (_t)) == (_t)) @@ -124,11 +125,29 @@ int32_t tsdbReuseCacherowsReader(void* reader, void* pTableIdList, int32_t numOf pReader->numOfTables = numOfTables; pReader->lastTs = INT64_MIN; - resetLastBlockLoadInfo(pReader->pLoadInfo); + int64_t blocks; + double elapse; + pReader->pLDataIterArray = destroySttBlockReader(pReader->pLDataIterArray, &blocks, &elapse); + pReader->pLDataIterArray = taosArrayInit(4, POINTER_BYTES); return TSDB_CODE_SUCCESS; } +static int32_t uidComparFunc(const void* p1, const void* p2) { + uint64_t pu1 = *(uint64_t*)p1; + uint64_t pu2 = *(uint64_t*)p2; + if (pu1 == pu2) { + return 0; + } else { + return (pu1 < pu2) ? -1 : 1; + } +} + +static void freeTableInfoFunc(void* param) { + void** p = (void**)param; + taosMemoryFreeClear(*p); +} + int32_t tsdbCacherowsReaderOpen(void* pVnode, int32_t type, void* pTableIdList, int32_t numOfTables, int32_t numOfCols, SArray* pCidList, int32_t* pSlotIds, uint64_t suid, void** pReader, const char* idstr) { *pReader = NULL; @@ -140,11 +159,11 @@ int32_t tsdbCacherowsReaderOpen(void* pVnode, int32_t type, void* pTableIdList, p->type = type; p->pVnode = pVnode; p->pTsdb = p->pVnode->pTsdb; - p->verRange = (SVersionRange){.minVer = 0, .maxVer = UINT64_MAX}; + p->info.verRange = (SVersionRange){.minVer = 0, .maxVer = UINT64_MAX}; + p->info.suid = suid; p->numOfCols = numOfCols; p->pCidList = pCidList; p->pSlotIds = pSlotIds; - p->suid = suid; if (numOfTables == 0) { *pReader = p; @@ -154,6 +173,27 @@ int32_t tsdbCacherowsReaderOpen(void* pVnode, int32_t type, void* pTableIdList, p->pTableList = pTableIdList; p->numOfTables = numOfTables; + p->pTableMap = tSimpleHashInit(numOfTables, taosGetDefaultHashFunction(TSDB_DATA_TYPE_BIGINT)); + if (p->pTableMap == NULL) { + tsdbCacherowsReaderClose(p); + return TSDB_CODE_OUT_OF_MEMORY; + } + p->uidList = taosMemoryMalloc(numOfTables * sizeof(uint64_t)); + if (p->uidList == NULL) { + tsdbCacherowsReaderClose(p); + return TSDB_CODE_OUT_OF_MEMORY; + } + for (int32_t i = 0; i < numOfTables; ++i) { + uint64_t uid = p->pTableList[i].uid; + p->uidList[i] = uid; + STableLoadInfo* pInfo = taosMemoryCalloc(1, sizeof(STableLoadInfo)); + tSimpleHashPut(p->pTableMap, &uid, sizeof(uint64_t), &pInfo, POINTER_BYTES); + } + + tSimpleHashSetFreeFp(p->pTableMap, freeTableInfoFunc); + + taosSort(p->uidList, numOfTables, sizeof(uint64_t), uidComparFunc); + int32_t code = setTableSchema(p, suid, idstr); if (code != TSDB_CODE_SUCCESS) { tsdbCacherowsReaderClose(p); @@ -178,14 +218,8 @@ int32_t tsdbCacherowsReaderOpen(void* pVnode, int32_t type, void* pTableIdList, SVnodeCfg* pCfg = &((SVnode*)pVnode)->config; int32_t numOfStt = pCfg->sttTrigger; - p->pLoadInfo = tCreateLastBlockLoadInfo(p->pSchema, NULL, 0, numOfStt); - if (p->pLoadInfo == NULL) { - tsdbCacherowsReaderClose(p); - return TSDB_CODE_OUT_OF_MEMORY; - } - - p->pDataIter = taosMemoryCalloc(pCfg->sttTrigger, sizeof(SLDataIter)); - if (p->pDataIter == NULL) { + p->pLDataIterArray = taosArrayInit(4, POINTER_BYTES); + if (p->pLDataIterArray == NULL) { tsdbCacherowsReaderClose(p); return TSDB_CODE_OUT_OF_MEMORY; } @@ -214,14 +248,34 @@ void* tsdbCacherowsReaderClose(void* pReader) { taosMemoryFree(p->pSchema); } - taosMemoryFree(p->pDataIter); taosMemoryFree(p->pCurrSchema); - destroyLastBlockLoadInfo(p->pLoadInfo); + int64_t loadBlocks = 0; + double elapse = 0; + destroySttBlockReader(p->pLDataIterArray, &loadBlocks, &elapse); + + if (p->pFileReader) { + tsdbDataFileReaderClose(&p->pFileReader); + p->pFileReader = NULL; + } taosMemoryFree((void*)p->idstr); taosThreadMutexDestroy(&p->readerMutex); + if (p->pTableMap) { + void* pe = NULL; + int32_t iter = 0; + while ((pe = tSimpleHashIterate(p->pTableMap, pe, &iter)) != NULL) { + STableLoadInfo* pInfo = *(STableLoadInfo**)pe; + pInfo->pTombData = taosArrayDestroy(pInfo->pTombData); + } + + tSimpleHashCleanup(p->pTableMap); + } + if (p->uidList) { + taosMemoryFree(p->uidList); + } + taosMemoryFree(pReader); return NULL; } @@ -298,12 +352,10 @@ int32_t tsdbRetrieveCacheRows(void* pReader, SSDataBlock* pResBlock, const int32 } taosThreadMutexLock(&pr->readerMutex); - code = tsdbTakeReadSnap((STsdbReader*)pr, tsdbCacheQueryReseek, &pr->pReadSnap); + code = tsdbTakeReadSnap2((STsdbReader*)pr, tsdbCacheQueryReseek, &pr->pReadSnap); if (code != TSDB_CODE_SUCCESS) { goto _end; } - pr->pDataFReader = NULL; - pr->pDataFReaderLast = NULL; int8_t ltype = (pr->type & CACHESCAN_RETRIEVE_LAST) >> 3; @@ -424,11 +476,13 @@ int32_t tsdbRetrieveCacheRows(void* pReader, SSDataBlock* pResBlock, const int32 } _end: - tsdbDataFReaderClose(&pr->pDataFReaderLast); - tsdbDataFReaderClose(&pr->pDataFReader); + tsdbUntakeReadSnap2((STsdbReader*)pr, pr->pReadSnap, true); + + int64_t loadBlocks = 0; + double elapse = 0; + pr->pLDataIterArray = destroySttBlockReader(pr->pLDataIterArray, &loadBlocks, &elapse); + pr->pLDataIterArray = taosArrayInit(4, POINTER_BYTES); - resetLastBlockLoadInfo(pr->pLoadInfo); - tsdbUntakeReadSnap((STsdbReader*)pr, pr->pReadSnap, true); taosThreadMutexUnlock(&pr->readerMutex); if (pRes != NULL) { diff --git a/source/dnode/vnode/src/tsdb/tsdbCommit2.c b/source/dnode/vnode/src/tsdb/tsdbCommit2.c new file mode 100644 index 0000000000..ed05d7a6ca --- /dev/null +++ b/source/dnode/vnode/src/tsdb/tsdbCommit2.c @@ -0,0 +1,609 @@ +/* + * Copyright (c) 2019 TAOS Data, Inc. + * + * This program is free software: you can use, redistribute, and/or modify + * it under the terms of the GNU Affero General Public License, version 3 + * or later ("AGPL"), as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. + * + * You should have received a copy of the GNU Affero General Public License + * along with this program. If not, see . + */ + +#include "tsdbCommit2.h" + +// extern dependencies +typedef struct { + STsdb *tsdb; + TFileSetArray *fsetArr; + TFileOpArray fopArray[1]; + + // SSkmInfo skmTb[1]; + // SSkmInfo skmRow[1]; + + int32_t minutes; + int8_t precision; + int32_t minRow; + int32_t maxRow; + int8_t cmprAlg; + int32_t sttTrigger; + int32_t szPage; + int64_t compactVersion; + + struct { + int64_t cid; + int64_t now; + TSKEY nextKey; + int32_t fid; + int32_t expLevel; + SDiskID did; + TSKEY minKey; + TSKEY maxKey; + STFileSet *fset; + TABLEID tbid[1]; + bool hasTSData; + } ctx[1]; + + // reader + SSttFileReader *sttReader; + + // iter + TTsdbIterArray dataIterArray[1]; + SIterMerger *dataIterMerger; + TTsdbIterArray tombIterArray[1]; + SIterMerger *tombIterMerger; + + // writer + SFSetWriter *writer; +} SCommitter2; + +static int32_t tsdbCommitOpenWriter(SCommitter2 *committer) { + int32_t code = 0; + int32_t lino = 0; + + SFSetWriterConfig config = { + .tsdb = committer->tsdb, + .toSttOnly = true, + .compactVersion = committer->compactVersion, + .minRow = committer->minRow, + .maxRow = committer->maxRow, + .szPage = committer->szPage, + .cmprAlg = committer->cmprAlg, + .fid = committer->ctx->fid, + .cid = committer->ctx->cid, + .did = committer->ctx->did, + .level = 0, + }; + + if (committer->sttTrigger == 1) { + config.toSttOnly = false; + + if (committer->ctx->fset) { + for (int32_t ftype = TSDB_FTYPE_MIN; ftype < TSDB_FTYPE_MAX; ftype++) { + if (committer->ctx->fset->farr[ftype] != NULL) { + config.files[ftype].exist = true; + config.files[ftype].file = committer->ctx->fset->farr[ftype]->f[0]; + } + } + } + } + + code = tsdbFSetWriterOpen(&config, &committer->writer); + TSDB_CHECK_CODE(code, lino, _exit); + +_exit: + if (code) { + TSDB_ERROR_LOG(TD_VID(committer->tsdb->pVnode), lino, code); + } + return code; +} + +static int32_t tsdbCommitCloseWriter(SCommitter2 *committer) { + return tsdbFSetWriterClose(&committer->writer, 0, committer->fopArray); +} + +static int32_t tsdbCommitTSData(SCommitter2 *committer) { + int32_t code = 0; + int32_t lino = 0; + int64_t numOfRow = 0; + SMetaInfo info; + + committer->ctx->hasTSData = false; + + committer->ctx->tbid->suid = 0; + committer->ctx->tbid->uid = 0; + for (SRowInfo *row; (row = tsdbIterMergerGetData(committer->dataIterMerger)) != NULL;) { + if (row->uid != committer->ctx->tbid->uid) { + committer->ctx->tbid->suid = row->suid; + committer->ctx->tbid->uid = row->uid; + + if (metaGetInfo(committer->tsdb->pVnode->pMeta, row->uid, &info, NULL) != 0) { + code = tsdbIterMergerSkipTableData(committer->dataIterMerger, committer->ctx->tbid); + TSDB_CHECK_CODE(code, lino, _exit); + continue; + } + } + + int64_t ts = TSDBROW_TS(&row->row); + if (ts > committer->ctx->maxKey) { + committer->ctx->nextKey = TMIN(committer->ctx->nextKey, ts); + code = tsdbIterMergerSkipTableData(committer->dataIterMerger, committer->ctx->tbid); + TSDB_CHECK_CODE(code, lino, _exit); + continue; + } + + committer->ctx->hasTSData = true; + numOfRow++; + + code = tsdbFSetWriteRow(committer->writer, row); + TSDB_CHECK_CODE(code, lino, _exit); + + code = tsdbIterMergerNext(committer->dataIterMerger); + TSDB_CHECK_CODE(code, lino, _exit); + } + +_exit: + if (code) { + TSDB_ERROR_LOG(TD_VID(committer->tsdb->pVnode), lino, code); + } else { + tsdbDebug("vgId:%d fid:%d commit %" PRId64 " rows", TD_VID(committer->tsdb->pVnode), committer->ctx->fid, numOfRow); + } + return code; +} + +static int32_t tsdbCommitTombData(SCommitter2 *committer) { + int32_t code = 0; + int32_t lino = 0; + int64_t numRecord = 0; + SMetaInfo info; + + if (committer->ctx->fset == NULL && !committer->ctx->hasTSData) { + return 0; + } + + committer->ctx->tbid->suid = 0; + committer->ctx->tbid->uid = 0; + for (STombRecord *record; (record = tsdbIterMergerGetTombRecord(committer->tombIterMerger));) { + if (record->uid != committer->ctx->tbid->uid) { + committer->ctx->tbid->suid = record->suid; + committer->ctx->tbid->uid = record->uid; + + if (metaGetInfo(committer->tsdb->pVnode->pMeta, record->uid, &info, NULL) != 0) { + code = tsdbIterMergerSkipTableData(committer->dataIterMerger, committer->ctx->tbid); + TSDB_CHECK_CODE(code, lino, _exit); + continue; + } + } + + if (record->ekey < committer->ctx->minKey) { + goto _next; + } else if (record->skey > committer->ctx->maxKey) { + committer->ctx->maxKey = TMIN(record->skey, committer->ctx->maxKey); + goto _next; + } + + if (record->ekey > committer->ctx->maxKey) { + committer->ctx->maxKey = committer->ctx->maxKey + 1; + } + + if (record->ekey > committer->ctx->maxKey) { + committer->ctx->nextKey = record->ekey; + } + + record->skey = TMAX(record->skey, committer->ctx->minKey); + record->ekey = TMIN(record->ekey, committer->ctx->maxKey); + + numRecord++; + code = tsdbFSetWriteTombRecord(committer->writer, record); + TSDB_CHECK_CODE(code, lino, _exit); + + _next: + code = tsdbIterMergerNext(committer->tombIterMerger); + TSDB_CHECK_CODE(code, lino, _exit); + } + +_exit: + if (code) { + TSDB_ERROR_LOG(TD_VID(committer->tsdb->pVnode), lino, code); + } else { + tsdbDebug("vgId:%d fid:%d commit %" PRId64 " tomb records", TD_VID(committer->tsdb->pVnode), committer->ctx->fid, + numRecord); + } + return code; +} + +static int32_t tsdbCommitOpenReader(SCommitter2 *committer) { + int32_t code = 0; + int32_t lino = 0; + + ASSERT(committer->sttReader == NULL); + + if (committer->ctx->fset == NULL // + || committer->sttTrigger > 1 // + || TARRAY2_SIZE(committer->ctx->fset->lvlArr) == 0 // + ) { + return 0; + } + + ASSERT(TARRAY2_SIZE(committer->ctx->fset->lvlArr) == 1); + + SSttLvl *lvl = TARRAY2_FIRST(committer->ctx->fset->lvlArr); + + ASSERT(lvl->level == 0); + + if (TARRAY2_SIZE(lvl->fobjArr) == 0) { + return 0; + } + + ASSERT(TARRAY2_SIZE(lvl->fobjArr) == 1); + + STFileObj *fobj = TARRAY2_FIRST(lvl->fobjArr); + + SSttFileReaderConfig config = { + .tsdb = committer->tsdb, + .szPage = committer->szPage, + .file = fobj->f[0], + }; + code = tsdbSttFileReaderOpen(fobj->fname, &config, &committer->sttReader); + TSDB_CHECK_CODE(code, lino, _exit); + + STFileOp op = { + .optype = TSDB_FOP_REMOVE, + .fid = fobj->f->fid, + .of = fobj->f[0], + }; + + code = TARRAY2_APPEND(committer->fopArray, op); + TSDB_CHECK_CODE(code, lino, _exit); + +_exit: + if (code) { + TSDB_ERROR_LOG(TD_VID(committer->tsdb->pVnode), lino, code); + } + return code; +} + +static int32_t tsdbCommitCloseReader(SCommitter2 *committer) { return tsdbSttFileReaderClose(&committer->sttReader); } + +static int32_t tsdbCommitOpenIter(SCommitter2 *committer) { + int32_t code = 0; + int32_t lino = 0; + + ASSERT(TARRAY2_SIZE(committer->dataIterArray) == 0); + ASSERT(committer->dataIterMerger == NULL); + ASSERT(TARRAY2_SIZE(committer->tombIterArray) == 0); + ASSERT(committer->tombIterMerger == NULL); + + STsdbIter *iter; + STsdbIterConfig config = {0}; + + // mem data iter + config.type = TSDB_ITER_TYPE_MEMT; + config.memt = committer->tsdb->imem; + config.from->ts = committer->ctx->minKey; + config.from->version = VERSION_MIN; + + code = tsdbIterOpen(&config, &iter); + TSDB_CHECK_CODE(code, lino, _exit); + + code = TARRAY2_APPEND(committer->dataIterArray, iter); + TSDB_CHECK_CODE(code, lino, _exit); + + // mem tomb iter + config.type = TSDB_ITER_TYPE_MEMT_TOMB; + config.memt = committer->tsdb->imem; + + code = tsdbIterOpen(&config, &iter); + TSDB_CHECK_CODE(code, lino, _exit); + + code = TARRAY2_APPEND(committer->tombIterArray, iter); + TSDB_CHECK_CODE(code, lino, _exit); + + // STT + if (committer->sttReader) { + // data iter + config.type = TSDB_ITER_TYPE_STT; + config.sttReader = committer->sttReader; + + code = tsdbIterOpen(&config, &iter); + TSDB_CHECK_CODE(code, lino, _exit); + + code = TARRAY2_APPEND(committer->dataIterArray, iter); + TSDB_CHECK_CODE(code, lino, _exit); + + // tomb iter + config.type = TSDB_ITER_TYPE_STT_TOMB; + config.sttReader = committer->sttReader; + + code = tsdbIterOpen(&config, &iter); + TSDB_CHECK_CODE(code, lino, _exit); + + code = TARRAY2_APPEND(committer->tombIterArray, iter); + TSDB_CHECK_CODE(code, lino, _exit); + } + + // open merger + code = tsdbIterMergerOpen(committer->dataIterArray, &committer->dataIterMerger, false); + TSDB_CHECK_CODE(code, lino, _exit); + + code = tsdbIterMergerOpen(committer->tombIterArray, &committer->tombIterMerger, true); + TSDB_CHECK_CODE(code, lino, _exit); + +_exit: + if (code) { + TSDB_ERROR_LOG(TD_VID(committer->tsdb->pVnode), lino, code); + } + return code; +} + +static int32_t tsdbCommitCloseIter(SCommitter2 *committer) { + tsdbIterMergerClose(&committer->tombIterMerger); + tsdbIterMergerClose(&committer->dataIterMerger); + TARRAY2_CLEAR(committer->tombIterArray, tsdbIterClose); + TARRAY2_CLEAR(committer->dataIterArray, tsdbIterClose); + return 0; +} + +static int32_t tsdbCommitFileSetBegin(SCommitter2 *committer) { + int32_t code = 0; + int32_t lino = 0; + STsdb *tsdb = committer->tsdb; + + committer->ctx->fid = tsdbKeyFid(committer->ctx->nextKey, committer->minutes, committer->precision); + committer->ctx->expLevel = tsdbFidLevel(committer->ctx->fid, &tsdb->keepCfg, committer->ctx->now); + tsdbFidKeyRange(committer->ctx->fid, committer->minutes, committer->precision, &committer->ctx->minKey, + &committer->ctx->maxKey); + code = tfsAllocDisk(committer->tsdb->pVnode->pTfs, committer->ctx->expLevel, &committer->ctx->did); + TSDB_CHECK_CODE(code, lino, _exit); + tfsMkdirRecurAt(committer->tsdb->pVnode->pTfs, committer->tsdb->path, committer->ctx->did); + STFileSet fset = {.fid = committer->ctx->fid}; + committer->ctx->fset = &fset; + STFileSet **fsetPtr = TARRAY2_SEARCH(committer->fsetArr, &committer->ctx->fset, tsdbTFileSetCmprFn, TD_EQ); + committer->ctx->fset = (fsetPtr == NULL) ? NULL : *fsetPtr; + committer->ctx->tbid->suid = 0; + committer->ctx->tbid->uid = 0; + + ASSERT(TARRAY2_SIZE(committer->dataIterArray) == 0); + ASSERT(committer->dataIterMerger == NULL); + ASSERT(committer->writer == NULL); + + code = tsdbCommitOpenReader(committer); + TSDB_CHECK_CODE(code, lino, _exit); + + code = tsdbCommitOpenIter(committer); + TSDB_CHECK_CODE(code, lino, _exit); + + code = tsdbCommitOpenWriter(committer); + TSDB_CHECK_CODE(code, lino, _exit); + + // reset nextKey + committer->ctx->nextKey = TSKEY_MAX; + +_exit: + if (code) { + TSDB_ERROR_LOG(TD_VID(tsdb->pVnode), lino, code); + } else { + tsdbDebug("vgId:%d %s done, fid:%d minKey:%" PRId64 " maxKey:%" PRId64 " expLevel:%d", TD_VID(tsdb->pVnode), + __func__, committer->ctx->fid, committer->ctx->minKey, committer->ctx->maxKey, committer->ctx->expLevel); + } + return 0; +} + +static int32_t tsdbCommitFileSetEnd(SCommitter2 *committer) { + int32_t code = 0; + int32_t lino = 0; + + code = tsdbCommitCloseWriter(committer); + TSDB_CHECK_CODE(code, lino, _exit); + + code = tsdbCommitCloseIter(committer); + TSDB_CHECK_CODE(code, lino, _exit); + + code = tsdbCommitCloseReader(committer); + TSDB_CHECK_CODE(code, lino, _exit); + +_exit: + if (code) { + TSDB_ERROR_LOG(TD_VID(committer->tsdb->pVnode), lino, code); + } else { + tsdbDebug("vgId:%d %s done, fid:%d", TD_VID(committer->tsdb->pVnode), __func__, committer->ctx->fid); + } + return code; +} + +static int32_t tsdbCommitFileSet(SCommitter2 *committer) { + int32_t code = 0; + int32_t lino = 0; + + // fset commit start + code = tsdbCommitFileSetBegin(committer); + TSDB_CHECK_CODE(code, lino, _exit); + + // commit fset + code = tsdbCommitTSData(committer); + TSDB_CHECK_CODE(code, lino, _exit); + + code = tsdbCommitTombData(committer); + TSDB_CHECK_CODE(code, lino, _exit); + + // fset commit end + code = tsdbCommitFileSetEnd(committer); + TSDB_CHECK_CODE(code, lino, _exit); + +_exit: + if (code) { + TSDB_ERROR_LOG(TD_VID(committer->tsdb->pVnode), lino, code); + } else { + tsdbDebug("vgId:%d %s done, fid:%d", TD_VID(committer->tsdb->pVnode), __func__, committer->ctx->fid); + } + return code; +} + +static int32_t tsdbOpenCommitter(STsdb *tsdb, SCommitInfo *info, SCommitter2 *committer) { + int32_t code = 0; + int32_t lino = 0; + + memset(committer, 0, sizeof(committer[0])); + + committer->tsdb = tsdb; + code = tsdbFSCreateCopySnapshot(tsdb->pFS, &committer->fsetArr); + TSDB_CHECK_CODE(code, lino, _exit); + committer->minutes = tsdb->keepCfg.days; + committer->precision = tsdb->keepCfg.precision; + committer->minRow = info->info.config.tsdbCfg.minRows; + committer->maxRow = info->info.config.tsdbCfg.maxRows; + committer->cmprAlg = info->info.config.tsdbCfg.compression; + committer->sttTrigger = info->info.config.sttTrigger; + committer->szPage = info->info.config.tsdbPageSize; + committer->compactVersion = INT64_MAX; + committer->ctx->cid = tsdbFSAllocEid(tsdb->pFS); + committer->ctx->now = taosGetTimestampSec(); + + committer->ctx->nextKey = tsdb->imem->minKey; + if (tsdb->imem->nDel > 0) { + SRBTreeIter iter[1] = {tRBTreeIterCreate(tsdb->imem->tbDataTree, 1)}; + + for (SRBTreeNode *node = tRBTreeIterNext(iter); node; node = tRBTreeIterNext(iter)) { + STbData *tbData = TCONTAINER_OF(node, STbData, rbtn); + + for (SDelData *delData = tbData->pHead; delData; delData = delData->pNext) { + if (delData->sKey < committer->ctx->nextKey) { + committer->ctx->nextKey = delData->sKey; + } + } + } + } + +_exit: + if (code) { + TSDB_ERROR_LOG(TD_VID(tsdb->pVnode), lino, code); + } else { + tsdbDebug("vgId:%d %s done", TD_VID(tsdb->pVnode), __func__); + } + return code; +} + +static int32_t tsdbCloseCommitter(SCommitter2 *committer, int32_t eno) { + int32_t code = 0; + int32_t lino = 0; + + if (eno == 0) { + code = tsdbFSEditBegin(committer->tsdb->pFS, committer->fopArray, TSDB_FEDIT_COMMIT); + TSDB_CHECK_CODE(code, lino, _exit); + } else { + // TODO + ASSERT(0); + } + + ASSERT(committer->writer == NULL); + ASSERT(committer->dataIterMerger == NULL); + ASSERT(committer->tombIterMerger == NULL); + TARRAY2_DESTROY(committer->dataIterArray, NULL); + TARRAY2_DESTROY(committer->tombIterArray, NULL); + TARRAY2_DESTROY(committer->fopArray, NULL); + tsdbFSDestroyCopySnapshot(&committer->fsetArr); + +_exit: + if (code) { + tsdbError("vgId:%d %s failed at line %d since %s, eid:%" PRId64, TD_VID(committer->tsdb->pVnode), __func__, lino, + tstrerror(code), committer->ctx->cid); + } else { + tsdbDebug("vgId:%d %s done, eid:%" PRId64, TD_VID(committer->tsdb->pVnode), __func__, committer->ctx->cid); + } + return code; +} + +int32_t tsdbPreCommit(STsdb *tsdb) { + taosThreadRwlockWrlock(&tsdb->rwLock); + ASSERT(tsdb->imem == NULL); + tsdb->imem = tsdb->mem; + tsdb->mem = NULL; + taosThreadRwlockUnlock(&tsdb->rwLock); + return 0; +} + +int32_t tsdbCommitBegin(STsdb *tsdb, SCommitInfo *info) { + if (!tsdb) return 0; + + int32_t code = 0; + int32_t lino = 0; + + SMemTable *imem = tsdb->imem; + int64_t nRow = imem->nRow; + int64_t nDel = imem->nDel; + + if (nRow == 0 && nDel == 0) { + taosThreadRwlockWrlock(&tsdb->rwLock); + tsdb->imem = NULL; + taosThreadRwlockUnlock(&tsdb->rwLock); + tsdbUnrefMemTable(imem, NULL, true); + } else { + SCommitter2 committer[1]; + + code = tsdbOpenCommitter(tsdb, info, committer); + TSDB_CHECK_CODE(code, lino, _exit); + + while (committer->ctx->nextKey != TSKEY_MAX) { + code = tsdbCommitFileSet(committer); + TSDB_CHECK_CODE(code, lino, _exit); + } + + code = tsdbCloseCommitter(committer, code); + TSDB_CHECK_CODE(code, lino, _exit); + } + +_exit: + if (code) { + TSDB_ERROR_LOG(TD_VID(tsdb->pVnode), lino, code); + } else { + tsdbInfo("vgId:%d %s done, nRow:%" PRId64 " nDel:%" PRId64, TD_VID(tsdb->pVnode), __func__, nRow, nDel); + } + return code; +} + +int32_t tsdbCommitCommit(STsdb *tsdb) { + int32_t code = 0; + int32_t lino = 0; + + if (tsdb->imem == NULL) goto _exit; + + SMemTable *pMemTable = tsdb->imem; + taosThreadRwlockWrlock(&tsdb->rwLock); + code = tsdbFSEditCommit(tsdb->pFS); + if (code) { + taosThreadRwlockUnlock(&tsdb->rwLock); + TSDB_CHECK_CODE(code, lino, _exit); + } + tsdb->imem = NULL; + taosThreadRwlockUnlock(&tsdb->rwLock); + tsdbUnrefMemTable(pMemTable, NULL, true); + +_exit: + if (code) { + TSDB_ERROR_LOG(TD_VID(tsdb->pVnode), lino, code); + } else { + tsdbInfo("vgId:%d %s done", TD_VID(tsdb->pVnode), __func__); + } + return code; +} + +int32_t tsdbCommitAbort(STsdb *pTsdb) { + int32_t code = 0; + int32_t lino = 0; + + if (pTsdb->imem == NULL) goto _exit; + + code = tsdbFSEditAbort(pTsdb->pFS); + TSDB_CHECK_CODE(code, lino, _exit); + +_exit: + if (code) { + tsdbError("vgId:%d, %s failed at line %d since %s", TD_VID(pTsdb->pVnode), __func__, lino, tstrerror(code)); + } else { + tsdbInfo("vgId:%d %s done", TD_VID(pTsdb->pVnode), __func__); + } + return code; +} diff --git a/source/dnode/vnode/src/tsdb/tsdbCommit2.h b/source/dnode/vnode/src/tsdb/tsdbCommit2.h new file mode 100644 index 0000000000..41f72f345b --- /dev/null +++ b/source/dnode/vnode/src/tsdb/tsdbCommit2.h @@ -0,0 +1,33 @@ +/* + * Copyright (c) 2019 TAOS Data, Inc. + * + * This program is free software: you can use, redistribute, and/or modify + * it under the terms of the GNU Affero General Public License, version 3 + * or later ("AGPL"), as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. + * + * You should have received a copy of the GNU Affero General Public License + * along with this program. If not, see . + */ + +#include "tsdbDataFileRW.h" +#include "tsdbFS2.h" +#include "tsdbFSetRW.h" +#include "tsdbIter.h" +#include "tsdbSttFileRW.h" + +#ifndef _TSDB_COMMIT_H_ +#define _TSDB_COMMIT_H_ + +#ifdef __cplusplus +extern "C" { +#endif + +#ifdef __cplusplus +} +#endif + +#endif /*_TSDB_COMMIT_H_*/ \ No newline at end of file diff --git a/source/dnode/vnode/src/tsdb/tsdbDataFileRW.c b/source/dnode/vnode/src/tsdb/tsdbDataFileRW.c new file mode 100644 index 0000000000..9d07bcb446 --- /dev/null +++ b/source/dnode/vnode/src/tsdb/tsdbDataFileRW.c @@ -0,0 +1,1686 @@ +/* + * Copyright (c) 2019 TAOS Data, Inc. + * + * This program is free software: you can use, redistribute, and/or modify + * it under the terms of the GNU Affero General Public License, version 3 + * or later ("AGPL"), as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. + * + * You should have received a copy of the GNU Affero General Public License + * along with this program. If not, see . + */ + +#include "tsdbDataFileRW.h" + +extern int32_t tsdbFileWriteTombBlock(STsdbFD *fd, STombBlock *tombBlock, int8_t cmprAlg, int64_t *fileSize, + TTombBlkArray *tombBlkArray, uint8_t **bufArr); +extern int32_t tsdbFileWriteTombBlk(STsdbFD *fd, const TTombBlkArray *tombBlkArray, SFDataPtr *ptr, int64_t *fileSize); + +// SDataFileReader ============================================= +struct SDataFileReader { + SDataFileReaderConfig config[1]; + + uint8_t *bufArr[5]; + + struct { + bool headFooterLoaded; + bool tombFooterLoaded; + bool brinBlkLoaded; + bool tombBlkLoaded; + } ctx[1]; + + STsdbFD *fd[TSDB_FTYPE_MAX]; + + SHeadFooter headFooter[1]; + STombFooter tombFooter[1]; + TBrinBlkArray brinBlkArray[1]; + TTombBlkArray tombBlkArray[1]; +}; + +static int32_t tsdbDataFileReadHeadFooter(SDataFileReader *reader) { + if (reader->ctx->headFooterLoaded) return 0; + + int32_t code = 0; + int32_t lino = 0; + + int32_t ftype = TSDB_FTYPE_HEAD; + if (reader->fd[ftype]) { + code = tsdbReadFile(reader->fd[ftype], reader->config->files[ftype].file.size - sizeof(SHeadFooter), + (uint8_t *)reader->headFooter, sizeof(SHeadFooter)); + TSDB_CHECK_CODE(code, lino, _exit); + } + + reader->ctx->headFooterLoaded = true; + +_exit: + if (code) { + TSDB_ERROR_LOG(TD_VID(reader->config->tsdb->pVnode), lino, code); + } + return code; +} + +static int32_t tsdbDataFileReadTombFooter(SDataFileReader *reader) { + if (reader->ctx->tombFooterLoaded) return 0; + + int32_t code = 0; + int32_t lino = 0; + + int32_t ftype = TSDB_FTYPE_TOMB; + if (reader->fd[ftype]) { + code = tsdbReadFile(reader->fd[ftype], reader->config->files[ftype].file.size - sizeof(STombFooter), + (uint8_t *)reader->tombFooter, sizeof(STombFooter)); + TSDB_CHECK_CODE(code, lino, _exit); + } + reader->ctx->tombFooterLoaded = true; + +_exit: + if (code) { + TSDB_ERROR_LOG(TD_VID(reader->config->tsdb->pVnode), lino, code); + } + return code; +} + +int32_t tsdbDataFileReaderOpen(const char *fname[], const SDataFileReaderConfig *config, SDataFileReader **reader) { + int32_t code = 0; + int32_t lino = 0; + + reader[0] = taosMemoryCalloc(1, sizeof(**reader)); + if (reader[0] == NULL) { + code = TSDB_CODE_OUT_OF_MEMORY; + TSDB_CHECK_CODE(code, lino, _exit); + } + + reader[0]->config[0] = config[0]; + if (reader[0]->config->bufArr == NULL) { + reader[0]->config->bufArr = reader[0]->bufArr; + } + + if (fname) { + for (int32_t i = 0; i < TSDB_FTYPE_MAX; ++i) { + if (fname[i]) { + code = tsdbOpenFile(fname[i], config->szPage, TD_FILE_READ, &reader[0]->fd[i]); + TSDB_CHECK_CODE(code, lino, _exit); + } + } + } else { + for (int32_t i = 0; i < TSDB_FTYPE_MAX; ++i) { + if (config->files[i].exist) { + char fname1[TSDB_FILENAME_LEN]; + tsdbTFileName(config->tsdb, &config->files[i].file, fname1); + code = tsdbOpenFile(fname1, config->szPage, TD_FILE_READ, &reader[0]->fd[i]); + TSDB_CHECK_CODE(code, lino, _exit); + } + } + } + +_exit: + if (code) { + TSDB_ERROR_LOG(TD_VID(config->tsdb->pVnode), lino, code); + } + return code; +} + +int32_t tsdbDataFileReaderClose(SDataFileReader **reader) { + if (reader[0] == NULL) return 0; + + TARRAY2_DESTROY(reader[0]->tombBlkArray, NULL); + TARRAY2_DESTROY(reader[0]->brinBlkArray, NULL); + +#if 0 + TARRAY2_DESTROY(reader[0]->dataBlkArray, NULL); + TARRAY2_DESTROY(reader[0]->blockIdxArray, NULL); +#endif + + for (int32_t i = 0; i < TSDB_FTYPE_MAX; ++i) { + if (reader[0]->fd[i]) { + tsdbCloseFile(&reader[0]->fd[i]); + } + } + + for (int32_t i = 0; i < ARRAY_SIZE(reader[0]->bufArr); ++i) { + tFree(reader[0]->bufArr[i]); + } + + taosMemoryFree(reader[0]); + reader[0] = NULL; + return 0; +} + +int32_t tsdbDataFileReadBrinBlk(SDataFileReader *reader, const TBrinBlkArray **brinBlkArray) { + int32_t code = 0; + int32_t lino = 0; + + if (!reader->ctx->brinBlkLoaded) { + code = tsdbDataFileReadHeadFooter(reader); + TSDB_CHECK_CODE(code, lino, _exit); + + if (reader->headFooter->brinBlkPtr->size > 0) { + void *data = taosMemoryMalloc(reader->headFooter->brinBlkPtr->size); + if (data == NULL) { + code = TSDB_CODE_OUT_OF_MEMORY; + TSDB_CHECK_CODE(code, lino, _exit); + } + + code = tsdbReadFile(reader->fd[TSDB_FTYPE_HEAD], reader->headFooter->brinBlkPtr->offset, data, + reader->headFooter->brinBlkPtr->size); + if (code) { + taosMemoryFree(data); + TSDB_CHECK_CODE(code, lino, _exit); + } + + int32_t size = reader->headFooter->brinBlkPtr->size / sizeof(SBrinBlk); + TARRAY2_INIT_EX(reader->brinBlkArray, size, size, data); + } else { + TARRAY2_INIT(reader->brinBlkArray); + } + + reader->ctx->brinBlkLoaded = true; + } + brinBlkArray[0] = reader->brinBlkArray; + +_exit: + if (code) { + TSDB_ERROR_LOG(TD_VID(reader->config->tsdb->pVnode), lino, code); + } + return code; +} + +int32_t tsdbDataFileReadBrinBlock(SDataFileReader *reader, const SBrinBlk *brinBlk, SBrinBlock *brinBlock) { + int32_t code = 0; + int32_t lino = 0; + + code = tRealloc(&reader->config->bufArr[0], brinBlk->dp->size); + TSDB_CHECK_CODE(code, lino, _exit); + + code = tsdbReadFile(reader->fd[TSDB_FTYPE_HEAD], brinBlk->dp->offset, reader->config->bufArr[0], brinBlk->dp->size); + TSDB_CHECK_CODE(code, lino, _exit); + + int32_t size = 0; + tBrinBlockClear(brinBlock); + for (int32_t i = 0; i < ARRAY_SIZE(brinBlock->dataArr1); i++) { + code = tsdbDecmprData(reader->config->bufArr[0] + size, brinBlk->size[i], TSDB_DATA_TYPE_BIGINT, brinBlk->cmprAlg, + &reader->config->bufArr[1], brinBlk->numRec * sizeof(int64_t), &reader->config->bufArr[2]); + TSDB_CHECK_CODE(code, lino, _exit); + + code = TARRAY2_APPEND_BATCH(&brinBlock->dataArr1[i], reader->config->bufArr[1], brinBlk->numRec); + TSDB_CHECK_CODE(code, lino, _exit); + + size += brinBlk->size[i]; + } + + for (int32_t i = 0, j = ARRAY_SIZE(brinBlock->dataArr1); i < ARRAY_SIZE(brinBlock->dataArr2); i++, j++) { + code = tsdbDecmprData(reader->config->bufArr[0] + size, brinBlk->size[j], TSDB_DATA_TYPE_INT, brinBlk->cmprAlg, + &reader->config->bufArr[1], brinBlk->numRec * sizeof(int32_t), &reader->config->bufArr[2]); + TSDB_CHECK_CODE(code, lino, _exit); + + code = TARRAY2_APPEND_BATCH(&brinBlock->dataArr2[i], reader->config->bufArr[1], brinBlk->numRec); + TSDB_CHECK_CODE(code, lino, _exit); + + size += brinBlk->size[j]; + } + +_exit: + if (code) { + TSDB_ERROR_LOG(TD_VID(reader->config->tsdb->pVnode), lino, code); + } + return code; +} + +int32_t tsdbDataFileReadBlockData(SDataFileReader *reader, const SBrinRecord *record, SBlockData *bData) { + int32_t code = 0; + int32_t lino = 0; + + code = tRealloc(&reader->config->bufArr[0], record->blockSize); + TSDB_CHECK_CODE(code, lino, _exit); + + code = tsdbReadFile(reader->fd[TSDB_FTYPE_DATA], record->blockOffset, reader->config->bufArr[0], record->blockSize); + TSDB_CHECK_CODE(code, lino, _exit); + + code = tDecmprBlockData(reader->config->bufArr[0], record->blockSize, bData, &reader->config->bufArr[1]); + TSDB_CHECK_CODE(code, lino, _exit); + +_exit: + if (code) { + TSDB_ERROR_LOG(TD_VID(reader->config->tsdb->pVnode), lino, code); + } + return code; +} + +int32_t tsdbDataFileReadBlockDataByColumn(SDataFileReader *reader, const SBrinRecord *record, SBlockData *bData, + STSchema *pTSchema, int16_t cids[], int32_t ncid) { + int32_t code = 0; + int32_t lino = 0; + + code = tBlockDataInit(bData, (TABLEID *)record, pTSchema, cids, ncid); + TSDB_CHECK_CODE(code, lino, _exit); + + // uid + version + tskey + code = tRealloc(&reader->config->bufArr[0], record->blockKeySize); + TSDB_CHECK_CODE(code, lino, _exit); + + code = + tsdbReadFile(reader->fd[TSDB_FTYPE_DATA], record->blockOffset, reader->config->bufArr[0], record->blockKeySize); + TSDB_CHECK_CODE(code, lino, _exit); + + // hdr + SDiskDataHdr hdr[1]; + int32_t size = 0; + + size += tGetDiskDataHdr(reader->config->bufArr[0] + size, hdr); + + ASSERT(hdr->delimiter == TSDB_FILE_DLMT); + ASSERT(record->uid == hdr->uid); + + bData->nRow = hdr->nRow; + + // uid + ASSERT(hdr->uid); + + // version + code = tsdbDecmprData(reader->config->bufArr[0] + size, hdr->szVer, TSDB_DATA_TYPE_BIGINT, hdr->cmprAlg, + (uint8_t **)&bData->aVersion, sizeof(int64_t) * hdr->nRow, &reader->config->bufArr[1]); + TSDB_CHECK_CODE(code, lino, _exit); + size += hdr->szVer; + + // ts + code = tsdbDecmprData(reader->config->bufArr[0] + size, hdr->szKey, TSDB_DATA_TYPE_TIMESTAMP, hdr->cmprAlg, + (uint8_t **)&bData->aTSKEY, sizeof(TSKEY) * hdr->nRow, &reader->config->bufArr[1]); + TSDB_CHECK_CODE(code, lino, _exit); + size += hdr->szKey; + + ASSERT(size == record->blockKeySize); + + // other columns + if (bData->nColData > 0) { + if (hdr->szBlkCol > 0) { + code = tRealloc(&reader->config->bufArr[0], hdr->szBlkCol); + TSDB_CHECK_CODE(code, lino, _exit); + + code = tsdbReadFile(reader->fd[TSDB_FTYPE_DATA], record->blockOffset + record->blockKeySize, + reader->config->bufArr[0], hdr->szBlkCol); + TSDB_CHECK_CODE(code, lino, _exit); + } + + SBlockCol bc[1] = {{.cid = 0}}; + SBlockCol *blockCol = bc; + + size = 0; + for (int32_t i = 0; i < bData->nColData; i++) { + SColData *colData = tBlockDataGetColDataByIdx(bData, i); + + while (blockCol && blockCol->cid < colData->cid) { + if (size < hdr->szBlkCol) { + size += tGetBlockCol(reader->config->bufArr[0] + size, blockCol); + } else { + ASSERT(size == hdr->szBlkCol); + blockCol = NULL; + } + } + + if (blockCol == NULL || blockCol->cid > colData->cid) { + for (int32_t iRow = 0; iRow < hdr->nRow; iRow++) { + code = tColDataAppendValue(colData, &COL_VAL_NONE(colData->cid, colData->type)); + TSDB_CHECK_CODE(code, lino, _exit); + } + } else { + ASSERT(blockCol->type == colData->type); + ASSERT(blockCol->flag && blockCol->flag != HAS_NONE); + + if (blockCol->flag == HAS_NULL) { + for (int32_t iRow = 0; iRow < hdr->nRow; iRow++) { + code = tColDataAppendValue(colData, &COL_VAL_NULL(blockCol->cid, blockCol->type)); + TSDB_CHECK_CODE(code, lino, _exit); + } + } else { + int32_t size1 = blockCol->szBitmap + blockCol->szOffset + blockCol->szValue; + + code = tRealloc(&reader->config->bufArr[1], size1); + TSDB_CHECK_CODE(code, lino, _exit); + + code = tsdbReadFile(reader->fd[TSDB_FTYPE_DATA], + record->blockOffset + record->blockKeySize + hdr->szBlkCol + blockCol->offset, + reader->config->bufArr[1], size1); + TSDB_CHECK_CODE(code, lino, _exit); + + code = tsdbDecmprColData(reader->config->bufArr[1], blockCol, hdr->cmprAlg, hdr->nRow, colData, + &reader->config->bufArr[2]); + TSDB_CHECK_CODE(code, lino, _exit); + } + } + } + } + +_exit: + if (code) { + TSDB_ERROR_LOG(TD_VID(reader->config->tsdb->pVnode), lino, code); + } + return code; +} + +int32_t tsdbDataFileReadBlockSma(SDataFileReader *reader, const SBrinRecord *record, + TColumnDataAggArray *columnDataAggArray) { + int32_t code = 0; + int32_t lino = 0; + + TARRAY2_CLEAR(columnDataAggArray, NULL); + if (record->smaSize > 0) { + code = tRealloc(&reader->config->bufArr[0], record->smaSize); + TSDB_CHECK_CODE(code, lino, _exit); + + code = tsdbReadFile(reader->fd[TSDB_FTYPE_SMA], record->smaOffset, reader->config->bufArr[0], record->smaSize); + TSDB_CHECK_CODE(code, lino, _exit); + + // decode sma data + int32_t size = 0; + while (size < record->smaSize) { + SColumnDataAgg sma[1]; + + size += tGetColumnDataAgg(reader->config->bufArr[0] + size, sma); + + code = TARRAY2_APPEND_PTR(columnDataAggArray, sma); + TSDB_CHECK_CODE(code, lino, _exit); + } + ASSERT(size == record->smaSize); + } + +_exit: + if (code) { + TSDB_ERROR_LOG(TD_VID(reader->config->tsdb->pVnode), lino, code); + } + return code; +} + +int32_t tsdbDataFileReadTombBlk(SDataFileReader *reader, const TTombBlkArray **tombBlkArray) { + int32_t code = 0; + int32_t lino = 0; + + if (!reader->ctx->tombBlkLoaded) { + code = tsdbDataFileReadTombFooter(reader); + TSDB_CHECK_CODE(code, lino, _exit); + + if (reader->tombFooter->tombBlkPtr->size > 0) { + void *data = taosMemoryMalloc(reader->tombFooter->tombBlkPtr->size); + if (data == NULL) { + code = TSDB_CODE_OUT_OF_MEMORY; + TSDB_CHECK_CODE(code, lino, _exit); + } + + code = tsdbReadFile(reader->fd[TSDB_FTYPE_TOMB], reader->tombFooter->tombBlkPtr->offset, data, + reader->tombFooter->tombBlkPtr->size); + if (code) { + taosMemoryFree(data); + TSDB_CHECK_CODE(code, lino, _exit); + } + + int32_t size = reader->tombFooter->tombBlkPtr->size / sizeof(STombBlk); + TARRAY2_INIT_EX(reader->tombBlkArray, size, size, data); + } else { + TARRAY2_INIT(reader->tombBlkArray); + } + + reader->ctx->tombBlkLoaded = true; + } + tombBlkArray[0] = reader->tombBlkArray; + +_exit: + if (code) { + TSDB_ERROR_LOG(TD_VID(reader->config->tsdb->pVnode), lino, code); + } + return code; +} + +int32_t tsdbDataFileReadTombBlock(SDataFileReader *reader, const STombBlk *tombBlk, STombBlock *tData) { + int32_t code = 0; + int32_t lino = 0; + + code = tRealloc(&reader->config->bufArr[0], tombBlk->dp->size); + TSDB_CHECK_CODE(code, lino, _exit); + + code = tsdbReadFile(reader->fd[TSDB_FTYPE_TOMB], tombBlk->dp->offset, reader->config->bufArr[0], tombBlk->dp->size); + TSDB_CHECK_CODE(code, lino, _exit); + + int32_t size = 0; + tTombBlockClear(tData); + for (int32_t i = 0; i < ARRAY_SIZE(tData->dataArr); ++i) { + code = tsdbDecmprData(reader->config->bufArr[0] + size, tombBlk->size[i], TSDB_DATA_TYPE_BIGINT, tombBlk->cmprAlg, + &reader->config->bufArr[1], sizeof(int64_t) * tombBlk->numRec, &reader->config->bufArr[2]); + TSDB_CHECK_CODE(code, lino, _exit); + + code = TARRAY2_APPEND_BATCH(&tData->dataArr[i], reader->config->bufArr[1], tombBlk->numRec); + TSDB_CHECK_CODE(code, lino, _exit); + + size += tombBlk->size[i]; + } + ASSERT(size == tombBlk->dp->size); + +_exit: + if (code) { + TSDB_ERROR_LOG(TD_VID(reader->config->tsdb->pVnode), lino, code); + } + return code; +} + +// SDataFileWriter ============================================= +struct SDataFileWriter { + SDataFileWriterConfig config[1]; + + SSkmInfo skmTb[1]; + SSkmInfo skmRow[1]; + uint8_t *bufArr[5]; + + struct { + bool opened; + SDataFileReader *reader; + + // for ts data + TABLEID tbid[1]; + bool tbHasOldData; + + const TBrinBlkArray *brinBlkArray; + int32_t brinBlkArrayIdx; + SBrinBlock brinBlock[1]; + int32_t brinBlockIdx; + SBlockData blockData[1]; + int32_t blockDataIdx; + // for tomb data + bool hasOldTomb; + const TTombBlkArray *tombBlkArray; + int32_t tombBlkArrayIdx; + STombBlock tombBlock[1]; + int32_t tombBlockIdx; + } ctx[1]; + + STFile files[TSDB_FTYPE_MAX]; + STsdbFD *fd[TSDB_FTYPE_MAX]; + + SHeadFooter headFooter[1]; + STombFooter tombFooter[1]; + + TBrinBlkArray brinBlkArray[1]; + SBrinBlock brinBlock[1]; + SBlockData blockData[1]; + + TTombBlkArray tombBlkArray[1]; + STombBlock tombBlock[1]; +}; + +static int32_t tsdbDataFileWriterCloseAbort(SDataFileWriter *writer) { + ASSERT(0); + return 0; +} + +static int32_t tsdbDataFileWriterDoClose(SDataFileWriter *writer) { + if (writer->ctx->reader) { + tsdbDataFileReaderClose(&writer->ctx->reader); + } + + tTombBlockDestroy(writer->tombBlock); + TARRAY2_DESTROY(writer->tombBlkArray, NULL); + tBlockDataDestroy(writer->blockData); + tBrinBlockDestroy(writer->brinBlock); + TARRAY2_DESTROY(writer->brinBlkArray, NULL); + + tTombBlockDestroy(writer->ctx->tombBlock); + tBlockDataDestroy(writer->ctx->blockData); + tBrinBlockDestroy(writer->ctx->brinBlock); + + for (int32_t i = 0; i < ARRAY_SIZE(writer->bufArr); ++i) { + tFree(writer->bufArr[i]); + } + + tDestroyTSchema(writer->skmRow->pTSchema); + tDestroyTSchema(writer->skmTb->pTSchema); + return 0; +} + +static int32_t tsdbDataFileWriterDoOpenReader(SDataFileWriter *writer) { + int32_t code = 0; + int32_t lino = 0; + + for (int32_t i = 0; i < TSDB_FTYPE_MAX; ++i) { + if (writer->config->files[i].exist) { + SDataFileReaderConfig config[1] = {{ + .tsdb = writer->config->tsdb, + .szPage = writer->config->szPage, + .bufArr = writer->config->bufArr, + }}; + + for (int32_t i = 0; i < TSDB_FTYPE_MAX; ++i) { + config->files[i].exist = writer->config->files[i].exist; + if (config->files[i].exist) { + config->files[i].file = writer->config->files[i].file; + } + } + + code = tsdbDataFileReaderOpen(NULL, config, &writer->ctx->reader); + TSDB_CHECK_CODE(code, lino, _exit); + break; + } + } + +_exit: + if (code) { + TSDB_ERROR_LOG(TD_VID(writer->config->tsdb->pVnode), lino, code); + } + return code; +} + +static int32_t tsdbDataFileWriterDoOpen(SDataFileWriter *writer) { + int32_t code = 0; + int32_t lino = 0; + int32_t ftype; + + if (!writer->config->skmTb) writer->config->skmTb = writer->skmTb; + if (!writer->config->skmRow) writer->config->skmRow = writer->skmRow; + if (!writer->config->bufArr) writer->config->bufArr = writer->bufArr; + + // open reader + code = tsdbDataFileWriterDoOpenReader(writer); + TSDB_CHECK_CODE(code, lino, _exit); + + // .head + ftype = TSDB_FTYPE_HEAD; + writer->files[ftype] = (STFile){ + .type = ftype, + .did = writer->config->did, + .fid = writer->config->fid, + .cid = writer->config->cid, + .size = 0, + }; + + // .data + ftype = TSDB_FTYPE_DATA; + if (writer->config->files[ftype].exist) { + writer->files[ftype] = writer->config->files[ftype].file; + } else { + writer->files[ftype] = (STFile){ + .type = ftype, + .did = writer->config->did, + .fid = writer->config->fid, + .cid = writer->config->cid, + .size = 0, + }; + } + + // .sma + ftype = TSDB_FTYPE_SMA; + if (writer->config->files[ftype].exist) { + writer->files[ftype] = writer->config->files[ftype].file; + } else { + writer->files[ftype] = (STFile){ + .type = ftype, + .did = writer->config->did, + .fid = writer->config->fid, + .cid = writer->config->cid, + .size = 0, + }; + } + + // .tomb + ftype = TSDB_FTYPE_TOMB; + writer->files[ftype] = (STFile){ + .type = ftype, + .did = writer->config->did, + .fid = writer->config->fid, + .cid = writer->config->cid, + .size = 0, + }; + + writer->ctx->opened = true; + +_exit: + if (code) { + TSDB_ERROR_LOG(TD_VID(writer->config->tsdb->pVnode), lino, code); + } + return code; +} + +int32_t tsdbFileWriteBrinBlock(STsdbFD *fd, SBrinBlock *brinBlock, int8_t cmprAlg, int64_t *fileSize, + TBrinBlkArray *brinBlkArray, uint8_t **bufArr) { + if (BRIN_BLOCK_SIZE(brinBlock) == 0) return 0; + + int32_t code; + + // get SBrinBlk + SBrinBlk brinBlk[1] = { + { + .dp[0] = + { + .offset = *fileSize, + .size = 0, + }, + .minTbid = + { + .suid = TARRAY2_FIRST(brinBlock->suid), + .uid = TARRAY2_FIRST(brinBlock->uid), + }, + .maxTbid = + { + .suid = TARRAY2_LAST(brinBlock->suid), + .uid = TARRAY2_LAST(brinBlock->uid), + }, + .minVer = TARRAY2_FIRST(brinBlock->minVer), + .maxVer = TARRAY2_FIRST(brinBlock->minVer), + .numRec = BRIN_BLOCK_SIZE(brinBlock), + .cmprAlg = cmprAlg, + }, + }; + + for (int32_t i = 1; i < BRIN_BLOCK_SIZE(brinBlock); i++) { + if (brinBlk->minVer > TARRAY2_GET(brinBlock->minVer, i)) { + brinBlk->minVer = TARRAY2_GET(brinBlock->minVer, i); + } + if (brinBlk->maxVer < TARRAY2_GET(brinBlock->maxVer, i)) { + brinBlk->maxVer = TARRAY2_GET(brinBlock->maxVer, i); + } + } + + // write to file + for (int32_t i = 0; i < ARRAY_SIZE(brinBlock->dataArr1); i++) { + code = tsdbCmprData((uint8_t *)TARRAY2_DATA(brinBlock->dataArr1 + i), TARRAY2_DATA_LEN(brinBlock->dataArr1 + i), + TSDB_DATA_TYPE_BIGINT, brinBlk->cmprAlg, &bufArr[0], 0, &brinBlk->size[i], &bufArr[1]); + if (code) return code; + + code = tsdbWriteFile(fd, *fileSize, bufArr[0], brinBlk->size[i]); + if (code) return code; + + brinBlk->dp->size += brinBlk->size[i]; + *fileSize += brinBlk->size[i]; + } + + for (int32_t i = 0, j = ARRAY_SIZE(brinBlock->dataArr1); i < ARRAY_SIZE(brinBlock->dataArr2); i++, j++) { + code = tsdbCmprData((uint8_t *)TARRAY2_DATA(brinBlock->dataArr2 + i), TARRAY2_DATA_LEN(brinBlock->dataArr2 + i), + TSDB_DATA_TYPE_INT, brinBlk->cmprAlg, &bufArr[0], 0, &brinBlk->size[j], &bufArr[1]); + if (code) return code; + + code = tsdbWriteFile(fd, *fileSize, bufArr[0], brinBlk->size[j]); + if (code) return code; + + brinBlk->dp->size += brinBlk->size[j]; + *fileSize += brinBlk->size[j]; + } + +#if 0 + SBrinRecord record; + for (int32_t i = 0; i < BRIN_BLOCK_SIZE(brinBlock); i++) { + tBrinBlockGet(brinBlock, i, &record); + tsdbInfo("write brin block, block num:%04d, idx:%04d suid:%ld, uid:%ld, offset:%ld, numRow:%d, count:%d", + TARRAY2_SIZE(brinBlkArray), i, record.suid, record.uid, record.blockOffset, record.numRow, record.count); + } +#endif + + // append to brinBlkArray + code = TARRAY2_APPEND_PTR(brinBlkArray, brinBlk); + if (code) return code; + + tBrinBlockClear(brinBlock); + + return 0; +} + +static int32_t tsdbDataFileWriteBrinBlock(SDataFileWriter *writer) { + if (BRIN_BLOCK_SIZE(writer->brinBlock) == 0) return 0; + + int32_t code = 0; + int32_t lino = 0; + + code = tsdbFileWriteBrinBlock(writer->fd[TSDB_FTYPE_HEAD], writer->brinBlock, writer->config->cmprAlg, + &writer->files[TSDB_FTYPE_HEAD].size, writer->brinBlkArray, writer->config->bufArr); + TSDB_CHECK_CODE(code, lino, _exit); + +_exit: + if (code) { + TSDB_ERROR_LOG(TD_VID(writer->config->tsdb->pVnode), lino, code); + } + return code; +} + +static int32_t tsdbDataFileWriteBrinRecord(SDataFileWriter *writer, const SBrinRecord *record) { + int32_t code = 0; + int32_t lino = 0; + + code = tBrinBlockPut(writer->brinBlock, record); + TSDB_CHECK_CODE(code, lino, _exit); + + if (BRIN_BLOCK_SIZE(writer->brinBlock) >= writer->config->maxRow) { + code = tsdbDataFileWriteBrinBlock(writer); + TSDB_CHECK_CODE(code, lino, _exit); + } + +_exit: + if (code) { + TSDB_ERROR_LOG(TD_VID(writer->config->tsdb->pVnode), lino, code); + } + return code; +} + +static int32_t tsdbDataFileDoWriteBlockData(SDataFileWriter *writer, SBlockData *bData) { + if (bData->nRow == 0) return 0; + + ASSERT(bData->uid); + + int32_t code = 0; + int32_t lino = 0; + + SBrinRecord record[1] = {{ + .suid = bData->suid, + .uid = bData->uid, + .firstKey = bData->aTSKEY[0], + .firstKeyVer = bData->aVersion[0], + .lastKey = bData->aTSKEY[bData->nRow - 1], + .lastKeyVer = bData->aVersion[bData->nRow - 1], + .minVer = bData->aVersion[0], + .maxVer = bData->aVersion[0], + .blockOffset = writer->files[TSDB_FTYPE_DATA].size, + .smaOffset = writer->files[TSDB_FTYPE_SMA].size, + .blockSize = 0, + .blockKeySize = 0, + .smaSize = 0, + .numRow = bData->nRow, + .count = 1, + }}; + + for (int32_t i = 1; i < bData->nRow; ++i) { + if (bData->aTSKEY[i] != bData->aTSKEY[i - 1]) { + record->count++; + } + if (bData->aVersion[i] < record->minVer) { + record->minVer = bData->aVersion[i]; + } + if (bData->aVersion[i] > record->maxVer) { + record->maxVer = bData->aVersion[i]; + } + } + + // to .data file + int32_t sizeArr[5] = {0}; + + code = tCmprBlockData(bData, writer->config->cmprAlg, NULL, NULL, writer->config->bufArr, sizeArr); + TSDB_CHECK_CODE(code, lino, _exit); + + record->blockKeySize = sizeArr[3] + sizeArr[2]; + record->blockSize = sizeArr[0] + sizeArr[1] + record->blockKeySize; + + for (int32_t i = 3; i >= 0; --i) { + if (sizeArr[i]) { + code = tsdbWriteFile(writer->fd[TSDB_FTYPE_DATA], writer->files[TSDB_FTYPE_DATA].size, writer->config->bufArr[i], + sizeArr[i]); + TSDB_CHECK_CODE(code, lino, _exit); + writer->files[TSDB_FTYPE_DATA].size += sizeArr[i]; + } + } + + // to .sma file + for (int32_t i = 0; i < bData->nColData; ++i) { + SColData *colData = bData->aColData + i; + if ((!colData->smaOn) || ((colData->flag & HAS_VALUE) == 0)) continue; + + SColumnDataAgg sma[1] = {{.colId = colData->cid}}; + tColDataCalcSMA[colData->type](colData, &sma->sum, &sma->max, &sma->min, &sma->numOfNull); + + int32_t size = tPutColumnDataAgg(NULL, sma); + + code = tRealloc(&writer->config->bufArr[0], record->smaSize + size); + TSDB_CHECK_CODE(code, lino, _exit); + + tPutColumnDataAgg(writer->config->bufArr[0] + record->smaSize, sma); + record->smaSize += size; + } + + if (record->smaSize > 0) { + code = tsdbWriteFile(writer->fd[TSDB_FTYPE_SMA], record->smaOffset, writer->config->bufArr[0], record->smaSize); + TSDB_CHECK_CODE(code, lino, _exit); + writer->files[TSDB_FTYPE_SMA].size += record->smaSize; + } + + // append SBrinRecord + code = tsdbDataFileWriteBrinRecord(writer, record); + TSDB_CHECK_CODE(code, lino, _exit); + + tBlockDataClear(bData); + +_exit: + if (code) { + TSDB_ERROR_LOG(TD_VID(writer->config->tsdb->pVnode), lino, code); + } + return code; +} + +static int32_t tsdbDataFileWriteDataBlk(SDataFileWriter *writer, const TDataBlkArray *dataBlkArray) { + if (TARRAY2_SIZE(dataBlkArray) == 0) return 0; + + int32_t code = 0; + int32_t lino = 0; + + int32_t ftype = TSDB_FTYPE_HEAD; + SBlockIdx blockIdx[1] = {{ + .suid = writer->ctx->tbid->suid, + .uid = writer->ctx->tbid->uid, + .offset = writer->files[ftype].size, + .size = TARRAY2_DATA_LEN(dataBlkArray), + }}; + + code = + tsdbWriteFile(writer->fd[ftype], blockIdx->offset, (const uint8_t *)TARRAY2_DATA(dataBlkArray), blockIdx->size); + TSDB_CHECK_CODE(code, lino, _exit); + writer->files[ftype].size += blockIdx->size; + +_exit: + if (code) { + TSDB_ERROR_LOG(TD_VID(writer->config->tsdb->pVnode), lino, code); + } + return code; +} + +static int32_t tsdbDataFileDoWriteTSRow(SDataFileWriter *writer, TSDBROW *row) { + int32_t code = 0; + int32_t lino = 0; + + // update/append + if (row->type == TSDBROW_ROW_FMT) { + code = tsdbUpdateSkmRow(writer->config->tsdb, writer->ctx->tbid, TSDBROW_SVERSION(row), writer->config->skmRow); + TSDB_CHECK_CODE(code, lino, _exit); + } + + TSDBKEY key[1]; + if (row->type == TSDBROW_ROW_FMT) { + key->ts = row->pTSRow->ts; + key->version = row->version; + } else { + key->ts = row->pBlockData->aTSKEY[row->iRow]; + key->version = row->pBlockData->aVersion[row->iRow]; + } + if (key->version <= writer->config->compactVersion // + && writer->blockData->nRow > 0 // + && writer->blockData->aTSKEY[writer->blockData->nRow - 1] == key->ts // + ) { + code = tBlockDataUpdateRow(writer->blockData, row, writer->config->skmRow->pTSchema); + TSDB_CHECK_CODE(code, lino, _exit); + } else { + if (writer->blockData->nRow >= writer->config->maxRow) { + code = tsdbDataFileDoWriteBlockData(writer, writer->blockData); + TSDB_CHECK_CODE(code, lino, _exit); + } + + code = tBlockDataAppendRow(writer->blockData, row, writer->config->skmRow->pTSchema, writer->ctx->tbid->uid); + TSDB_CHECK_CODE(code, lino, _exit); + } + +_exit: + if (code) { + TSDB_ERROR_LOG(TD_VID(writer->config->tsdb->pVnode), lino, code); + } + return code; +} + +static int32_t tsdbDataFileDoWriteTableOldData(SDataFileWriter *writer, const TSDBKEY *key) { + if (writer->ctx->tbHasOldData == false) return 0; + + int32_t code = 0; + int32_t lino = 0; + + for (;;) { + for (;;) { + // SBlockData + for (; writer->ctx->blockDataIdx < writer->ctx->blockData->nRow; writer->ctx->blockDataIdx++) { + if (key->ts < writer->ctx->blockData->aTSKEY[writer->ctx->blockDataIdx] // + || (key->ts == writer->ctx->blockData->aTSKEY[writer->ctx->blockDataIdx] && + key->version < writer->ctx->blockData->aVersion[writer->ctx->blockDataIdx])) { + goto _exit; + } else { + TSDBROW row = tsdbRowFromBlockData(writer->ctx->blockData, writer->ctx->blockDataIdx); + code = tsdbDataFileDoWriteTSRow(writer, &row); + TSDB_CHECK_CODE(code, lino, _exit); + } + } + + // SBrinBlock + if (writer->ctx->brinBlockIdx >= BRIN_BLOCK_SIZE(writer->ctx->brinBlock)) { + break; + } + + for (; writer->ctx->brinBlockIdx < BRIN_BLOCK_SIZE(writer->ctx->brinBlock); writer->ctx->brinBlockIdx++) { + if (TARRAY2_GET(writer->ctx->brinBlock->uid, writer->ctx->brinBlockIdx) != writer->ctx->tbid->uid) { + writer->ctx->tbHasOldData = false; + goto _exit; + } + + if (key->ts < TARRAY2_GET(writer->ctx->brinBlock->firstKey, writer->ctx->brinBlockIdx) // + || (key->ts == TARRAY2_GET(writer->ctx->brinBlock->firstKey, writer->ctx->brinBlockIdx) && + key->version < TARRAY2_GET(writer->ctx->brinBlock->firstKeyVer, writer->ctx->brinBlockIdx))) { + goto _exit; + } else { + SBrinRecord record[1]; + tBrinBlockGet(writer->ctx->brinBlock, writer->ctx->brinBlockIdx, record); + if (key->ts > record->lastKey || (key->ts == record->lastKey && key->version > record->maxVer)) { + if (writer->blockData->nRow > 0) { + code = tsdbDataFileDoWriteBlockData(writer, writer->blockData); + TSDB_CHECK_CODE(code, lino, _exit); + } + + code = tsdbDataFileWriteBrinRecord(writer, record); + TSDB_CHECK_CODE(code, lino, _exit); + } else { + code = tsdbDataFileReadBlockData(writer->ctx->reader, record, writer->ctx->blockData); + TSDB_CHECK_CODE(code, lino, _exit); + + writer->ctx->blockDataIdx = 0; + writer->ctx->brinBlockIdx++; + break; + } + } + } + } + + // SBrinBlk + if (writer->ctx->brinBlkArrayIdx >= TARRAY2_SIZE(writer->ctx->brinBlkArray)) { + writer->ctx->brinBlkArray = NULL; + writer->ctx->tbHasOldData = false; + goto _exit; + } + + for (; writer->ctx->brinBlkArrayIdx < TARRAY2_SIZE(writer->ctx->brinBlkArray); writer->ctx->brinBlkArrayIdx++) { + const SBrinBlk *brinBlk = TARRAY2_GET_PTR(writer->ctx->brinBlkArray, writer->ctx->brinBlkArrayIdx); + + if (brinBlk->minTbid.uid != writer->ctx->tbid->uid) { + writer->ctx->tbHasOldData = false; + goto _exit; + } + + code = tsdbDataFileReadBrinBlock(writer->ctx->reader, brinBlk, writer->ctx->brinBlock); + TSDB_CHECK_CODE(code, lino, _exit); + + writer->ctx->brinBlockIdx = 0; + writer->ctx->brinBlkArrayIdx++; + break; + } + } + +_exit: + if (code) { + TSDB_ERROR_LOG(TD_VID(writer->config->tsdb->pVnode), lino, code); + } + return code; +} + +static int32_t tsdbDataFileDoWriteTSData(SDataFileWriter *writer, TSDBROW *row) { + int32_t code = 0; + int32_t lino = 0; + + if (writer->ctx->tbHasOldData) { + TSDBKEY key[1]; + if (row->type == TSDBROW_ROW_FMT) { + key->ts = row->pTSRow->ts; + key->version = row->version; + } else { + key->ts = row->pBlockData->aTSKEY[row->iRow]; + key->version = row->pBlockData->aVersion[row->iRow]; + } + + code = tsdbDataFileDoWriteTableOldData(writer, key); + TSDB_CHECK_CODE(code, lino, _exit); + } + + code = tsdbDataFileDoWriteTSRow(writer, row); + TSDB_CHECK_CODE(code, lino, _exit); + +_exit: + if (code) { + TSDB_ERROR_LOG(TD_VID(writer->config->tsdb->pVnode), lino, code); + } + return code; +} + +static int32_t tsdbDataFileWriteTableDataEnd(SDataFileWriter *writer) { + if (writer->ctx->tbid->uid == 0) return 0; + + int32_t code = 0; + int32_t lino = 0; + + if (writer->ctx->tbHasOldData) { + TSDBKEY key = { + .ts = TSKEY_MAX, + .version = VERSION_MAX, + }; + + code = tsdbDataFileDoWriteTableOldData(writer, &key); + TSDB_CHECK_CODE(code, lino, _exit); + + ASSERT(writer->ctx->tbHasOldData == false); + } + + code = tsdbDataFileDoWriteBlockData(writer, writer->blockData); + TSDB_CHECK_CODE(code, lino, _exit); + +_exit: + if (code) { + TSDB_ERROR_LOG(TD_VID(writer->config->tsdb->pVnode), lino, code); + } + return code; +} + +static int32_t tsdbDataFileWriteTableDataBegin(SDataFileWriter *writer, const TABLEID *tbid) { + int32_t code = 0; + int32_t lino = 0; + + ASSERT(writer->ctx->blockDataIdx == writer->ctx->blockData->nRow); + ASSERT(writer->blockData->nRow == 0); + + SMetaInfo info; + bool drop = false; + TABLEID tbid1[1]; + writer->ctx->tbHasOldData = false; + while (writer->ctx->brinBlkArray) { // skip data of previous table + for (; writer->ctx->brinBlockIdx < BRIN_BLOCK_SIZE(writer->ctx->brinBlock); writer->ctx->brinBlockIdx++) { + TABLEID tbid2[1] = {{ + .suid = TARRAY2_GET(writer->ctx->brinBlock->suid, writer->ctx->brinBlockIdx), + .uid = TARRAY2_GET(writer->ctx->brinBlock->uid, writer->ctx->brinBlockIdx), + }}; + + if (tbid2->uid == tbid->uid) { + writer->ctx->tbHasOldData = true; + goto _begin; + } else if (tbid2->suid > tbid->suid || (tbid2->suid == tbid->suid && tbid2->uid > tbid->uid)) { + goto _begin; + } else { + if (tbid2->uid != writer->ctx->tbid->uid) { + if (drop && tbid1->uid == tbid2->uid) { + continue; + } else if (metaGetInfo(writer->config->tsdb->pVnode->pMeta, tbid2->uid, &info, NULL) != 0) { + drop = true; + *tbid1 = *tbid2; + continue; + } else { + drop = false; + writer->ctx->tbid[0] = *tbid2; + } + } + + SBrinRecord record[1]; + tBrinBlockGet(writer->ctx->brinBlock, writer->ctx->brinBlockIdx, record); + + code = tsdbDataFileWriteBrinRecord(writer, record); + TSDB_CHECK_CODE(code, lino, _exit); + } + } + + if (writer->ctx->brinBlkArrayIdx >= TARRAY2_SIZE(writer->ctx->brinBlkArray)) { + writer->ctx->brinBlkArray = NULL; + break; + } + + for (; writer->ctx->brinBlkArrayIdx < TARRAY2_SIZE(writer->ctx->brinBlkArray); writer->ctx->brinBlkArrayIdx++) { + const SBrinBlk *brinBlk = TARRAY2_GET_PTR(writer->ctx->brinBlkArray, writer->ctx->brinBlkArrayIdx); + + code = tsdbDataFileReadBrinBlock(writer->ctx->reader, brinBlk, writer->ctx->brinBlock); + TSDB_CHECK_CODE(code, lino, _exit); + + writer->ctx->brinBlockIdx = 0; + writer->ctx->brinBlkArrayIdx++; + break; + } + } + +_begin: + writer->ctx->tbid[0] = *tbid; + + if (tbid->uid == INT64_MAX) goto _exit; + + code = tsdbUpdateSkmTb(writer->config->tsdb, tbid, writer->config->skmTb); + TSDB_CHECK_CODE(code, lino, _exit); + + code = tBlockDataInit(writer->blockData, writer->ctx->tbid, writer->config->skmTb->pTSchema, NULL, 0); + TSDB_CHECK_CODE(code, lino, _exit); + +_exit: + if (code) { + TSDB_ERROR_LOG(TD_VID(writer->config->tsdb->pVnode), lino, code); + } + return code; +} + +int32_t tsdbFileWriteHeadFooter(STsdbFD *fd, int64_t *fileSize, const SHeadFooter *footer) { + int32_t code = tsdbWriteFile(fd, *fileSize, (const uint8_t *)footer, sizeof(*footer)); + if (code) return code; + *fileSize += sizeof(*footer); + return 0; +} + +static int32_t tsdbDataFileWriteHeadFooter(SDataFileWriter *writer) { + int32_t code = 0; + int32_t lino = 0; + + code = tsdbFileWriteHeadFooter(writer->fd[TSDB_FTYPE_HEAD], &writer->files[TSDB_FTYPE_HEAD].size, writer->headFooter); + TSDB_CHECK_CODE(code, lino, _exit); + +_exit: + if (code) { + TSDB_ERROR_LOG(TD_VID(writer->config->tsdb->pVnode), lino, code); + } + return code; +} + +static int32_t tsdbDataFileDoWriteTombBlock(SDataFileWriter *writer) { + if (TOMB_BLOCK_SIZE(writer->tombBlock) == 0) return 0; + + int32_t code = 0; + int32_t lino = 0; + + code = tsdbFileWriteTombBlock(writer->fd[TSDB_FTYPE_TOMB], writer->tombBlock, writer->config->cmprAlg, + &writer->files[TSDB_FTYPE_TOMB].size, writer->tombBlkArray, writer->config->bufArr); + TSDB_CHECK_CODE(code, lino, _exit); + +_exit: + if (code) { + TSDB_ERROR_LOG(TD_VID(writer->config->tsdb->pVnode), lino, code); + } + return code; +} + +static int32_t tsdbDataFileDoWriteTombBlk(SDataFileWriter *writer) { + ASSERT(TARRAY2_SIZE(writer->tombBlkArray) > 0); + + int32_t code = 0; + int32_t lino = 0; + + code = tsdbFileWriteTombBlk(writer->fd[TSDB_FTYPE_TOMB], writer->tombBlkArray, writer->tombFooter->tombBlkPtr, + &writer->files[TSDB_FTYPE_TOMB].size); + TSDB_CHECK_CODE(code, lino, _exit); + +_exit: + if (code) { + TSDB_ERROR_LOG(TD_VID(writer->config->tsdb->pVnode), lino, code); + } + return code; +} + +int32_t tsdbFileWriteTombFooter(STsdbFD *fd, const STombFooter *footer, int64_t *fileSize) { + int32_t code = tsdbWriteFile(fd, *fileSize, (const uint8_t *)footer, sizeof(*footer)); + if (code) return code; + *fileSize += sizeof(*footer); + return 0; +} + +static int32_t tsdbDataFileWriteTombFooter(SDataFileWriter *writer) { + int32_t code = 0; + int32_t lino = 0; + + code = tsdbFileWriteTombFooter(writer->fd[TSDB_FTYPE_TOMB], writer->tombFooter, &writer->files[TSDB_FTYPE_TOMB].size); + TSDB_CHECK_CODE(code, lino, _exit); + +_exit: + if (code) { + TSDB_ERROR_LOG(TD_VID(writer->config->tsdb->pVnode), lino, code); + } + return code; +} + +static int32_t tsdbDataFileDoWriteTombRecord(SDataFileWriter *writer, const STombRecord *record) { + int32_t code = 0; + int32_t lino = 0; + + while (writer->ctx->hasOldTomb) { + for (; writer->ctx->tombBlockIdx < TOMB_BLOCK_SIZE(writer->ctx->tombBlock); writer->ctx->tombBlockIdx++) { + STombRecord record1[1]; + tTombBlockGet(writer->ctx->tombBlock, writer->ctx->tombBlockIdx, record1); + + int32_t c = tTombRecordCompare(record, record1); + if (c < 0) { + break; + } else if (c > 0) { + code = tTombBlockPut(writer->tombBlock, record1); + TSDB_CHECK_CODE(code, lino, _exit); + + if (TOMB_BLOCK_SIZE(writer->tombBlock) >= writer->config->maxRow) { + code = tsdbDataFileDoWriteTombBlock(writer); + TSDB_CHECK_CODE(code, lino, _exit); + } + } else { + ASSERT(0); + } + } + + if (writer->ctx->tombBlkArrayIdx >= TARRAY2_SIZE(writer->ctx->tombBlkArray)) { + writer->ctx->hasOldTomb = false; + break; + } + + for (; writer->ctx->tombBlkArrayIdx < TARRAY2_SIZE(writer->ctx->tombBlkArray); ++writer->ctx->tombBlkArrayIdx) { + const STombBlk *tombBlk = TARRAY2_GET_PTR(writer->ctx->tombBlkArray, writer->ctx->tombBlkArrayIdx); + + code = tsdbDataFileReadTombBlock(writer->ctx->reader, tombBlk, writer->ctx->tombBlock); + TSDB_CHECK_CODE(code, lino, _exit); + + writer->ctx->tombBlockIdx = 0; + writer->ctx->tombBlkArrayIdx++; + break; + } + } + +_write: + if (record->suid == INT64_MAX) goto _exit; + + code = tTombBlockPut(writer->tombBlock, record); + TSDB_CHECK_CODE(code, lino, _exit); + + if (TOMB_BLOCK_SIZE(writer->tombBlock) >= writer->config->maxRow) { + code = tsdbDataFileDoWriteTombBlock(writer); + TSDB_CHECK_CODE(code, lino, _exit); + } + +_exit: + if (code) { + TSDB_ERROR_LOG(TD_VID(writer->config->tsdb->pVnode), lino, code); + } + return code; +} + +int32_t tsdbFileWriteBrinBlk(STsdbFD *fd, TBrinBlkArray *brinBlkArray, SFDataPtr *ptr, int64_t *fileSize) { + ASSERT(TARRAY2_SIZE(brinBlkArray) > 0); + ptr->offset = *fileSize; + ptr->size = TARRAY2_DATA_LEN(brinBlkArray); + + int32_t code = tsdbWriteFile(fd, ptr->offset, (uint8_t *)TARRAY2_DATA(brinBlkArray), ptr->size); + if (code) return code; + + *fileSize += ptr->size; + return 0; +} + +static int32_t tsdbDataFileWriteBrinBlk(SDataFileWriter *writer) { + int32_t code = 0; + int32_t lino = 0; + + code = tsdbFileWriteBrinBlk(writer->fd[TSDB_FTYPE_HEAD], writer->brinBlkArray, writer->headFooter->brinBlkPtr, + &writer->files[TSDB_FTYPE_HEAD].size); + TSDB_CHECK_CODE(code, lino, _exit); + +_exit: + if (code) { + TSDB_ERROR_LOG(TD_VID(writer->config->tsdb->pVnode), lino, code); + } + return code; +} + +static int32_t tsdbDataFileWriterCloseCommit(SDataFileWriter *writer, TFileOpArray *opArr) { + int32_t code = 0; + int32_t lino = 0; + + int32_t ftype; + STFileOp op; + + if (writer->fd[TSDB_FTYPE_HEAD]) { + TABLEID tbid[1] = {{ + .suid = INT64_MAX, + .uid = INT64_MAX, + }}; + + code = tsdbDataFileWriteTableDataEnd(writer); + TSDB_CHECK_CODE(code, lino, _exit); + + code = tsdbDataFileWriteTableDataBegin(writer, tbid); + TSDB_CHECK_CODE(code, lino, _exit); + + code = tsdbDataFileWriteBrinBlock(writer); + TSDB_CHECK_CODE(code, lino, _exit); + + code = tsdbDataFileWriteBrinBlk(writer); + TSDB_CHECK_CODE(code, lino, _exit); + + code = tsdbDataFileWriteHeadFooter(writer); + TSDB_CHECK_CODE(code, lino, _exit); + + // .head + ftype = TSDB_FTYPE_HEAD; + if (writer->config->files[ftype].exist) { + op = (STFileOp){ + .optype = TSDB_FOP_REMOVE, + .fid = writer->config->fid, + .of = writer->config->files[ftype].file, + }; + code = TARRAY2_APPEND(opArr, op); + TSDB_CHECK_CODE(code, lino, _exit); + } + op = (STFileOp){ + .optype = TSDB_FOP_CREATE, + .fid = writer->config->fid, + .nf = writer->files[ftype], + }; + code = TARRAY2_APPEND(opArr, op); + TSDB_CHECK_CODE(code, lino, _exit); + + // .data + ftype = TSDB_FTYPE_DATA; + if (!writer->config->files[ftype].exist) { + op = (STFileOp){ + .optype = TSDB_FOP_CREATE, + .fid = writer->config->fid, + .nf = writer->files[ftype], + }; + code = TARRAY2_APPEND(opArr, op); + TSDB_CHECK_CODE(code, lino, _exit); + } else if (writer->config->files[ftype].file.size != writer->files[ftype].size) { + op = (STFileOp){ + .optype = TSDB_FOP_MODIFY, + .fid = writer->config->fid, + .of = writer->config->files[ftype].file, + .nf = writer->files[ftype], + }; + code = TARRAY2_APPEND(opArr, op); + TSDB_CHECK_CODE(code, lino, _exit); + } + + // .sma + ftype = TSDB_FTYPE_SMA; + if (!writer->config->files[ftype].exist) { + op = (STFileOp){ + .optype = TSDB_FOP_CREATE, + .fid = writer->config->fid, + .nf = writer->files[ftype], + }; + code = TARRAY2_APPEND(opArr, op); + TSDB_CHECK_CODE(code, lino, _exit); + } else if (writer->config->files[ftype].file.size != writer->files[ftype].size) { + op = (STFileOp){ + .optype = TSDB_FOP_MODIFY, + .fid = writer->config->fid, + .of = writer->config->files[ftype].file, + .nf = writer->files[ftype], + }; + code = TARRAY2_APPEND(opArr, op); + TSDB_CHECK_CODE(code, lino, _exit); + } + } + + if (writer->fd[TSDB_FTYPE_TOMB]) { + STombRecord record[1] = {{ + .suid = INT64_MAX, + .uid = INT64_MAX, + .version = INT64_MAX, + }}; + + code = tsdbDataFileDoWriteTombRecord(writer, record); + TSDB_CHECK_CODE(code, lino, _exit); + + code = tsdbDataFileDoWriteTombBlock(writer); + TSDB_CHECK_CODE(code, lino, _exit); + + code = tsdbDataFileDoWriteTombBlk(writer); + TSDB_CHECK_CODE(code, lino, _exit); + + code = tsdbDataFileWriteTombFooter(writer); + TSDB_CHECK_CODE(code, lino, _exit); + + ftype = TSDB_FTYPE_TOMB; + if (writer->config->files[ftype].exist) { + op = (STFileOp){ + .optype = TSDB_FOP_REMOVE, + .fid = writer->config->fid, + .of = writer->config->files[ftype].file, + }; + code = TARRAY2_APPEND(opArr, op); + TSDB_CHECK_CODE(code, lino, _exit); + } + op = (STFileOp){ + .optype = TSDB_FOP_CREATE, + .fid = writer->config->fid, + .nf = writer->files[ftype], + }; + code = TARRAY2_APPEND(opArr, op); + TSDB_CHECK_CODE(code, lino, _exit); + } + + for (int32_t i = 0; i < TSDB_FTYPE_MAX; ++i) { + if (writer->fd[i]) { + code = tsdbFsyncFile(writer->fd[i]); + TSDB_CHECK_CODE(code, lino, _exit); + tsdbCloseFile(&writer->fd[i]); + } + } + +_exit: + if (code) { + TSDB_ERROR_LOG(TD_VID(writer->config->tsdb->pVnode), lino, code); + } + return code; +} + +static int32_t tsdbDataFileWriterOpenDataFD(SDataFileWriter *writer) { + int32_t code = 0; + int32_t lino = 0; + + int32_t ftypes[] = {TSDB_FTYPE_HEAD, TSDB_FTYPE_DATA, TSDB_FTYPE_SMA}; + + for (int32_t i = 0; i < ARRAY_SIZE(ftypes); ++i) { + int32_t ftype = ftypes[i]; + + char fname[TSDB_FILENAME_LEN]; + int32_t flag = TD_FILE_READ | TD_FILE_WRITE; + + if (writer->files[ftype].size == 0) { + flag |= (TD_FILE_CREATE | TD_FILE_TRUNC); + } + + tsdbTFileName(writer->config->tsdb, &writer->files[ftype], fname); + code = tsdbOpenFile(fname, writer->config->szPage, flag, &writer->fd[ftype]); + TSDB_CHECK_CODE(code, lino, _exit); + + if (writer->files[ftype].size == 0) { + uint8_t hdr[TSDB_FHDR_SIZE] = {0}; + + code = tsdbWriteFile(writer->fd[ftype], 0, hdr, TSDB_FHDR_SIZE); + TSDB_CHECK_CODE(code, lino, _exit); + + writer->files[ftype].size += TSDB_FHDR_SIZE; + } + } + + if (writer->ctx->reader) { + code = tsdbDataFileReadBrinBlk(writer->ctx->reader, &writer->ctx->brinBlkArray); + TSDB_CHECK_CODE(code, lino, _exit); + } + +_exit: + if (code) { + TSDB_ERROR_LOG(TD_VID(writer->config->tsdb->pVnode), lino, code); + } + return code; +} + +int32_t tsdbDataFileWriterOpen(const SDataFileWriterConfig *config, SDataFileWriter **writer) { + writer[0] = taosMemoryCalloc(1, sizeof(*writer[0])); + if (!writer[0]) return TSDB_CODE_OUT_OF_MEMORY; + + writer[0]->config[0] = config[0]; + return 0; +} + +int32_t tsdbDataFileWriterClose(SDataFileWriter **writer, bool abort, TFileOpArray *opArr) { + if (writer[0] == NULL) return 0; + + int32_t code = 0; + int32_t lino = 0; + + if (writer[0]->ctx->opened) { + if (abort) { + code = tsdbDataFileWriterCloseAbort(writer[0]); + TSDB_CHECK_CODE(code, lino, _exit); + } else { + code = tsdbDataFileWriterCloseCommit(writer[0], opArr); + TSDB_CHECK_CODE(code, lino, _exit); + } + tsdbDataFileWriterDoClose(writer[0]); + } + taosMemoryFree(writer[0]); + writer[0] = NULL; + +_exit: + if (code) { + TSDB_ERROR_LOG(TD_VID(writer[0]->config->tsdb->pVnode), lino, code); + } + return code; +} + +int32_t tsdbDataFileWriteRow(SDataFileWriter *writer, SRowInfo *row) { + int32_t code = 0; + int32_t lino = 0; + + if (!writer->ctx->opened) { + code = tsdbDataFileWriterDoOpen(writer); + TSDB_CHECK_CODE(code, lino, _exit); + } + + if (writer->fd[TSDB_FTYPE_HEAD] == NULL) { + code = tsdbDataFileWriterOpenDataFD(writer); + TSDB_CHECK_CODE(code, lino, _exit); + } + + if (row->uid != writer->ctx->tbid->uid) { + code = tsdbDataFileWriteTableDataEnd(writer); + TSDB_CHECK_CODE(code, lino, _exit); + + code = tsdbDataFileWriteTableDataBegin(writer, (TABLEID *)row); + TSDB_CHECK_CODE(code, lino, _exit); + } + + code = tsdbDataFileDoWriteTSData(writer, &row->row); + TSDB_CHECK_CODE(code, lino, _exit); + +_exit: + if (code) { + TSDB_ERROR_LOG(TD_VID(writer->config->tsdb->pVnode), lino, code); + } + return code; +} + +int32_t tsdbDataFileWriteBlockData(SDataFileWriter *writer, SBlockData *bData) { + if (bData->nRow == 0) return 0; + + int32_t code = 0; + int32_t lino = 0; + + ASSERT(bData->uid); + + if (!writer->ctx->opened) { + code = tsdbDataFileWriterDoOpen(writer); + TSDB_CHECK_CODE(code, lino, _exit); + } + + if (writer->fd[TSDB_FTYPE_DATA] == NULL) { + code = tsdbDataFileWriterOpenDataFD(writer); + TSDB_CHECK_CODE(code, lino, _exit); + } + + if (bData->uid != writer->ctx->tbid->uid) { + code = tsdbDataFileWriteTableDataEnd(writer); + TSDB_CHECK_CODE(code, lino, _exit); + + code = tsdbDataFileWriteTableDataBegin(writer, (TABLEID *)bData); + TSDB_CHECK_CODE(code, lino, _exit); + } + + if (writer->ctx->tbHasOldData) { + TSDBKEY key = { + .ts = bData->aTSKEY[0], + .version = bData->aVersion[0], + }; + + code = tsdbDataFileDoWriteTableOldData(writer, &key); + TSDB_CHECK_CODE(code, lino, _exit); + } + + if (!writer->ctx->tbHasOldData // + && writer->blockData->nRow == 0 // + ) { + code = tsdbDataFileDoWriteBlockData(writer, bData); + TSDB_CHECK_CODE(code, lino, _exit); + } else { + for (int32_t i = 0; i < bData->nRow; ++i) { + TSDBROW row[1] = {tsdbRowFromBlockData(bData, i)}; + code = tsdbDataFileDoWriteTSData(writer, row); + TSDB_CHECK_CODE(code, lino, _exit); + } + } + +_exit: + if (code) { + TSDB_ERROR_LOG(TD_VID(writer->config->tsdb->pVnode), lino, code); + } + return code; +} + +int32_t tsdbDataFileFlush(SDataFileWriter *writer) { + ASSERT(writer->ctx->opened); + + if (writer->blockData->nRow == 0) return 0; + if (writer->ctx->tbHasOldData) return 0; + + return tsdbDataFileDoWriteBlockData(writer, writer->blockData); +} + +static int32_t tsdbDataFileWriterOpenTombFD(SDataFileWriter *writer) { + int32_t code = 0; + int32_t lino = 0; + + char fname[TSDB_FILENAME_LEN]; + int32_t ftype = TSDB_FTYPE_TOMB; + + ASSERT(writer->files[ftype].size == 0); + + int32_t flag = (TD_FILE_READ | TD_FILE_WRITE | TD_FILE_CREATE | TD_FILE_TRUNC); + + tsdbTFileName(writer->config->tsdb, writer->files + ftype, fname); + code = tsdbOpenFile(fname, writer->config->szPage, flag, &writer->fd[ftype]); + TSDB_CHECK_CODE(code, lino, _exit); + + uint8_t hdr[TSDB_FHDR_SIZE] = {0}; + code = tsdbWriteFile(writer->fd[ftype], 0, hdr, TSDB_FHDR_SIZE); + TSDB_CHECK_CODE(code, lino, _exit); + writer->files[ftype].size += TSDB_FHDR_SIZE; + + if (writer->ctx->reader) { + code = tsdbDataFileReadTombBlk(writer->ctx->reader, &writer->ctx->tombBlkArray); + TSDB_CHECK_CODE(code, lino, _exit); + + if (TARRAY2_SIZE(writer->ctx->tombBlkArray) > 0) { + writer->ctx->hasOldTomb = true; + } + + writer->ctx->tombBlkArrayIdx = 0; + tTombBlockClear(writer->ctx->tombBlock); + writer->ctx->tombBlockIdx = 0; + } + +_exit: + if (code) { + TSDB_ERROR_LOG(TD_VID(writer->config->tsdb->pVnode), lino, code); + } + return code; +} + +int32_t tsdbDataFileWriteTombRecord(SDataFileWriter *writer, const STombRecord *record) { + int32_t code = 0; + int32_t lino = 0; + + if (!writer->ctx->opened) { + code = tsdbDataFileWriterDoOpen(writer); + TSDB_CHECK_CODE(code, lino, _exit); + } + + if (writer->fd[TSDB_FTYPE_TOMB] == NULL) { + code = tsdbDataFileWriterOpenTombFD(writer); + TSDB_CHECK_CODE(code, lino, _exit); + } + + code = tsdbDataFileDoWriteTombRecord(writer, record); + TSDB_CHECK_CODE(code, lino, _exit); + +_exit: + if (code) { + TSDB_ERROR_LOG(TD_VID(writer->config->tsdb->pVnode), lino, code); + } + return code; +} \ No newline at end of file diff --git a/source/dnode/vnode/src/tsdb/tsdbDataFileRW.h b/source/dnode/vnode/src/tsdb/tsdbDataFileRW.h new file mode 100644 index 0000000000..827b58fb4a --- /dev/null +++ b/source/dnode/vnode/src/tsdb/tsdbDataFileRW.h @@ -0,0 +1,104 @@ +/* + * Copyright (c) 2019 TAOS Data, Inc. + * + * This program is free software: you can use, redistribute, and/or modify + * it under the terms of the GNU Affero General Public License, version 3 + * or later ("AGPL"), as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. + * + * You should have received a copy of the GNU Affero General Public License + * along with this program. If not, see . + */ + +#include "tsdbDef.h" +#include "tsdbFSet2.h" +#include "tsdbSttFileRW.h" +#include "tsdbUtil2.h" + +#ifndef _TSDB_DATA_FILE_RW_H +#define _TSDB_DATA_FILE_RW_H + +#ifdef __cplusplus +extern "C" { +#endif + +typedef TARRAY2(SBlockIdx) TBlockIdxArray; +typedef TARRAY2(SDataBlk) TDataBlkArray; +typedef TARRAY2(SColumnDataAgg) TColumnDataAggArray; + +typedef struct { + SFDataPtr brinBlkPtr[1]; + SFDataPtr rsrvd[2]; +} SHeadFooter; + +typedef struct { + SFDataPtr tombBlkPtr[1]; + SFDataPtr rsrvd[2]; +} STombFooter; + +// SDataFileReader ============================================= +typedef struct SDataFileReader SDataFileReader; +typedef struct SDataFileReaderConfig { + STsdb *tsdb; + int32_t szPage; + struct { + bool exist; + STFile file; + } files[TSDB_FTYPE_MAX]; + uint8_t **bufArr; +} SDataFileReaderConfig; + +int32_t tsdbDataFileReaderOpen(const char *fname[/* TSDB_FTYPE_MAX */], const SDataFileReaderConfig *config, + SDataFileReader **reader); +int32_t tsdbDataFileReaderClose(SDataFileReader **reader); +// .head +int32_t tsdbDataFileReadBrinBlk(SDataFileReader *reader, const TBrinBlkArray **brinBlkArray); +int32_t tsdbDataFileReadBrinBlock(SDataFileReader *reader, const SBrinBlk *brinBlk, SBrinBlock *brinBlock); +// .data +int32_t tsdbDataFileReadBlockData(SDataFileReader *reader, const SBrinRecord *record, SBlockData *bData); +int32_t tsdbDataFileReadBlockDataByColumn(SDataFileReader *reader, const SBrinRecord *record, SBlockData *bData, + STSchema *pTSchema, int16_t cids[], int32_t ncid); +// .sma +int32_t tsdbDataFileReadBlockSma(SDataFileReader *reader, const SBrinRecord *record, + TColumnDataAggArray *columnDataAggArray); +// .tomb +int32_t tsdbDataFileReadTombBlk(SDataFileReader *reader, const TTombBlkArray **tombBlkArray); +int32_t tsdbDataFileReadTombBlock(SDataFileReader *reader, const STombBlk *tombBlk, STombBlock *tData); + +// SDataFileWriter ============================================= +typedef struct SDataFileWriter SDataFileWriter; +typedef struct SDataFileWriterConfig { + STsdb *tsdb; + int8_t cmprAlg; + int32_t maxRow; + int32_t szPage; + int32_t fid; + int64_t cid; + SDiskID did; + int64_t compactVersion; + struct { + bool exist; + STFile file; + } files[TSDB_FTYPE_MAX]; + SSkmInfo *skmTb; + SSkmInfo *skmRow; + uint8_t **bufArr; +} SDataFileWriterConfig; + +int32_t tsdbDataFileWriterOpen(const SDataFileWriterConfig *config, SDataFileWriter **writer); +int32_t tsdbDataFileWriterClose(SDataFileWriter **writer, bool abort, TFileOpArray *opArr); + +int32_t tsdbDataFileWriteRow(SDataFileWriter *writer, SRowInfo *row); +int32_t tsdbDataFileWriteBlockData(SDataFileWriter *writer, SBlockData *bData); +int32_t tsdbDataFileFlush(SDataFileWriter *writer); + +int32_t tsdbDataFileWriteTombRecord(SDataFileWriter *writer, const STombRecord *record); + +#ifdef __cplusplus +} +#endif + +#endif /*_TSDB_DATA_FILE_RW_H*/ \ No newline at end of file diff --git a/source/dnode/vnode/src/tsdb/tsdbDef.h b/source/dnode/vnode/src/tsdb/tsdbDef.h new file mode 100644 index 0000000000..e768f68b15 --- /dev/null +++ b/source/dnode/vnode/src/tsdb/tsdbDef.h @@ -0,0 +1,44 @@ +/* + * Copyright (c) 2019 TAOS Data, Inc. + * + * This program is free software: you can use, redistribute, and/or modify + * it under the terms of the GNU Affero General Public License, version 3 + * or later ("AGPL"), as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. + * + * You should have received a copy of the GNU Affero General Public License + * along with this program. If not, see . + */ + +#include "tarray2.h" +#include "tsdb.h" + +#ifndef _TD_TSDB_DEF_H_ +#define _TD_TSDB_DEF_H_ + +#ifdef __cplusplus +extern "C" { +#endif + +#define TSDB_ERROR_LOG(vid, lino, code) \ + tsdbError("vgId:%d %s failed at line %d since %s", vid, __func__, lino, tstrerror(code)) + +typedef struct SFDataPtr { + int64_t offset; + int64_t size; +} SFDataPtr; + +extern int32_t tsdbOpenFile(const char *path, int32_t szPage, int32_t flag, STsdbFD **ppFD); +extern void tsdbCloseFile(STsdbFD **ppFD); +extern int32_t tsdbWriteFile(STsdbFD *pFD, int64_t offset, const uint8_t *pBuf, int64_t size); +extern int32_t tsdbReadFile(STsdbFD *pFD, int64_t offset, uint8_t *pBuf, int64_t size); +extern int32_t tsdbFsyncFile(STsdbFD *pFD); + +#ifdef __cplusplus +} +#endif + +#endif /*_TD_TSDB_DEF_H_*/ \ No newline at end of file diff --git a/source/dnode/vnode/src/tsdb/tsdbFS.c b/source/dnode/vnode/src/tsdb/tsdbFS.c index 41fdd05741..ec116c717e 100644 --- a/source/dnode/vnode/src/tsdb/tsdbFS.c +++ b/source/dnode/vnode/src/tsdb/tsdbFS.c @@ -181,10 +181,10 @@ static int32_t tsdbScanAndTryFixFS(STsdb *pTsdb) { TSDB_CHECK_CODE(code, lino, _exit); } - if (size != tsdbLogicToFileSize(pTsdb->fs.pDelFile->size, pTsdb->pVnode->config.tsdbPageSize)) { - code = TSDB_CODE_FILE_CORRUPTED; - TSDB_CHECK_CODE(code, lino, _exit); - } + // if (size != tsdbLogicToFileSize(pTsdb->fs.pDelFile->size, pTsdb->pVnode->config.tsdbPageSize)) { + // code = TSDB_CODE_FILE_CORRUPTED; + // TSDB_CHECK_CODE(code, lino, _exit); + // } } // SArray @@ -199,10 +199,10 @@ static int32_t tsdbScanAndTryFixFS(STsdb *pTsdb) { code = TAOS_SYSTEM_ERROR(errno); TSDB_CHECK_CODE(code, lino, _exit); } - if (size != tsdbLogicToFileSize(pSet->pHeadF->size, pTsdb->pVnode->config.tsdbPageSize)) { - code = TSDB_CODE_FILE_CORRUPTED; - TSDB_CHECK_CODE(code, lino, _exit); - } + // if (size != tsdbLogicToFileSize(pSet->pHeadF->size, pTsdb->pVnode->config.tsdbPageSize)) { + // code = TSDB_CODE_FILE_CORRUPTED; + // TSDB_CHECK_CODE(code, lino, _exit); + // } // data ========= tsdbDataFileName(pTsdb, pSet->diskId, pSet->fid, pSet->pDataF, fname); @@ -210,10 +210,10 @@ static int32_t tsdbScanAndTryFixFS(STsdb *pTsdb) { code = TAOS_SYSTEM_ERROR(errno); TSDB_CHECK_CODE(code, lino, _exit); } - if (size < tsdbLogicToFileSize(pSet->pDataF->size, pTsdb->pVnode->config.tsdbPageSize)) { - code = TSDB_CODE_FILE_CORRUPTED; - TSDB_CHECK_CODE(code, lino, _exit); - } + // if (size < tsdbLogicToFileSize(pSet->pDataF->size, pTsdb->pVnode->config.tsdbPageSize)) { + // code = TSDB_CODE_FILE_CORRUPTED; + // TSDB_CHECK_CODE(code, lino, _exit); + // } // else if (size > tsdbLogicToFileSize(pSet->pDataF->size, pTsdb->pVnode->config.tsdbPageSize)) { // code = tsdbDFileRollback(pTsdb, pSet, TSDB_DATA_FILE); // TSDB_CHECK_CODE(code, lino, _exit); @@ -225,10 +225,10 @@ static int32_t tsdbScanAndTryFixFS(STsdb *pTsdb) { code = TAOS_SYSTEM_ERROR(errno); TSDB_CHECK_CODE(code, lino, _exit); } - if (size < tsdbLogicToFileSize(pSet->pSmaF->size, pTsdb->pVnode->config.tsdbPageSize)) { - code = TSDB_CODE_FILE_CORRUPTED; - TSDB_CHECK_CODE(code, lino, _exit); - } + // if (size < tsdbLogicToFileSize(pSet->pSmaF->size, pTsdb->pVnode->config.tsdbPageSize)) { + // code = TSDB_CODE_FILE_CORRUPTED; + // TSDB_CHECK_CODE(code, lino, _exit); + // } // else if (size > tsdbLogicToFileSize(pSet->pSmaF->size, pTsdb->pVnode->config.tsdbPageSize)) { // code = tsdbDFileRollback(pTsdb, pSet, TSDB_SMA_FILE); // TSDB_CHECK_CODE(code, lino, _exit); @@ -241,10 +241,10 @@ static int32_t tsdbScanAndTryFixFS(STsdb *pTsdb) { code = TAOS_SYSTEM_ERROR(errno); TSDB_CHECK_CODE(code, lino, _exit); } - if (size != tsdbLogicToFileSize(pSet->aSttF[iStt]->size, pTsdb->pVnode->config.tsdbPageSize)) { - code = TSDB_CODE_FILE_CORRUPTED; - TSDB_CHECK_CODE(code, lino, _exit); - } + // if (size != tsdbLogicToFileSize(pSet->aSttF[iStt]->size, pTsdb->pVnode->config.tsdbPageSize)) { + // code = TSDB_CODE_FILE_CORRUPTED; + // TSDB_CHECK_CODE(code, lino, _exit); + // } } } @@ -270,7 +270,7 @@ int32_t tDFileSetCmprFn(const void *p1, const void *p2) { return 0; } -static void tsdbGetCurrentFName(STsdb *pTsdb, char *current, char *current_t) { +void tsdbGetCurrentFName(STsdb *pTsdb, char *current, char *current_t) { SVnode *pVnode = pTsdb->pVnode; int32_t offset = 0; @@ -289,7 +289,7 @@ static void tsdbGetCurrentFName(STsdb *pTsdb, char *current, char *current_t) { } } -static int32_t tsdbLoadFSFromFile(const char *fname, STsdbFS *pFS) { +static int32_t load_fs(const char *fname, STsdbFS *pFS) { int32_t code = 0; int32_t lino = 0; uint8_t *pData = NULL; @@ -666,7 +666,7 @@ static int32_t tsdbFSApplyChange(STsdb *pTsdb, STsdbFS *pFS) { taosArrayRemove(pTsdb->fs.aDFileSet, iOld); } else { code = tsdbNewFileSet(pTsdb, &fSet, pSetNew); - TSDB_CHECK_CODE(code, lino, _exit) + TSDB_CHECK_CODE(code, lino, _exit); if (taosArrayInsert(pTsdb->fs.aDFileSet, iOld, &fSet) == NULL) { code = TSDB_CODE_OUT_OF_MEMORY; @@ -682,7 +682,7 @@ static int32_t tsdbFSApplyChange(STsdb *pTsdb, STsdbFS *pFS) { taosArrayRemove(pTsdb->fs.aDFileSet, iOld); } else { code = tsdbNewFileSet(pTsdb, &fSet, pSetNew); - TSDB_CHECK_CODE(code, lino, _exit) + TSDB_CHECK_CODE(code, lino, _exit); if (taosArrayInsert(pTsdb->fs.aDFileSet, iOld, &fSet) == NULL) { code = TSDB_CODE_OUT_OF_MEMORY; @@ -723,7 +723,7 @@ int32_t tsdbFSCommit(STsdb *pTsdb) { code = tsdbFSCreate(&fs); TSDB_CHECK_CODE(code, lino, _exit); - code = tsdbLoadFSFromFile(current, &fs); + code = load_fs(current, &fs); TSDB_CHECK_CODE(code, lino, _exit); // apply file change @@ -768,7 +768,7 @@ int32_t tsdbFSOpen(STsdb *pTsdb, int8_t rollback) { tsdbGetCurrentFName(pTsdb, current, current_t); if (taosCheckExistFile(current)) { - code = tsdbLoadFSFromFile(current, &pTsdb->fs); + code = load_fs(current, &pTsdb->fs); TSDB_CHECK_CODE(code, lino, _exit); if (taosCheckExistFile(current_t)) { diff --git a/source/dnode/vnode/src/tsdb/tsdbFS2.c b/source/dnode/vnode/src/tsdb/tsdbFS2.c new file mode 100644 index 0000000000..1a0104945f --- /dev/null +++ b/source/dnode/vnode/src/tsdb/tsdbFS2.c @@ -0,0 +1,885 @@ +/* + * Copyright (c) 2019 TAOS Data, Inc. + * + * This program is free software: you can use, redistribute, and/or modify + * it under the terms of the GNU Affero General Public License, version 3 + * or later ("AGPL"), as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. + * + * You should have received a copy of the GNU Affero General Public License + * along with this program. If not, see . + */ + +#include "tsdbFS2.h" +#include "tsdbUpgrade.h" +#include "vnd.h" + +extern int vnodeScheduleTask(int (*execute)(void *), void *arg); +extern int vnodeScheduleTaskEx(int tpid, int (*execute)(void *), void *arg); + +#define TSDB_FS_EDIT_MIN TSDB_FEDIT_COMMIT +#define TSDB_FS_EDIT_MAX (TSDB_FEDIT_MERGE + 1) + +enum { + TSDB_FS_STATE_NONE = 0, + TSDB_FS_STATE_OPEN, + TSDB_FS_STATE_EDIT, + TSDB_FS_STATE_CLOSE, +}; + +static const char *gCurrentFname[] = { + [TSDB_FCURRENT] = "current.json", + [TSDB_FCURRENT_C] = "current.c.json", + [TSDB_FCURRENT_M] = "current.m.json", +}; + +static int32_t create_fs(STsdb *pTsdb, STFileSystem **fs) { + fs[0] = taosMemoryCalloc(1, sizeof(*fs[0])); + if (fs[0] == NULL) return TSDB_CODE_OUT_OF_MEMORY; + + fs[0]->tsdb = pTsdb; + tsem_init(&fs[0]->canEdit, 0, 1); + fs[0]->state = TSDB_FS_STATE_NONE; + fs[0]->neid = 0; + TARRAY2_INIT(fs[0]->fSetArr); + TARRAY2_INIT(fs[0]->fSetArrTmp); + + // background task queue + taosThreadMutexInit(fs[0]->mutex, NULL); + fs[0]->bgTaskQueue->next = fs[0]->bgTaskQueue; + fs[0]->bgTaskQueue->prev = fs[0]->bgTaskQueue; + + return 0; +} + +static int32_t destroy_fs(STFileSystem **fs) { + if (fs[0] == NULL) return 0; + taosThreadMutexDestroy(fs[0]->mutex); + + ASSERT(fs[0]->bgTaskNum == 0); + + TARRAY2_DESTROY(fs[0]->fSetArr, NULL); + TARRAY2_DESTROY(fs[0]->fSetArrTmp, NULL); + tsem_destroy(&fs[0]->canEdit); + taosMemoryFree(fs[0]); + fs[0] = NULL; + return 0; +} + +int32_t current_fname(STsdb *pTsdb, char *fname, EFCurrentT ftype) { + int32_t offset = 0; + + vnodeGetPrimaryDir(pTsdb->path, pTsdb->pVnode->diskPrimary, pTsdb->pVnode->pTfs, fname, TSDB_FILENAME_LEN); + offset = strlen(fname); + snprintf(fname + offset, TSDB_FILENAME_LEN - offset - 1, "%s%s", TD_DIRSEP, gCurrentFname[ftype]); + + return 0; +} + +static int32_t save_json(const cJSON *json, const char *fname) { + int32_t code = 0; + + char *data = cJSON_PrintUnformatted(json); + if (data == NULL) return TSDB_CODE_OUT_OF_MEMORY; + + TdFilePtr fp = taosOpenFile(fname, TD_FILE_WRITE | TD_FILE_CREATE | TD_FILE_TRUNC); + if (fp == NULL) { + code = TAOS_SYSTEM_ERROR(code); + goto _exit; + } + + if (taosWriteFile(fp, data, strlen(data)) < 0) { + code = TAOS_SYSTEM_ERROR(code); + goto _exit; + } + + if (taosFsyncFile(fp) < 0) { + code = TAOS_SYSTEM_ERROR(code); + goto _exit; + } + + taosCloseFile(&fp); + +_exit: + taosMemoryFree(data); + return code; +} + +static int32_t load_json(const char *fname, cJSON **json) { + int32_t code = 0; + char *data = NULL; + + TdFilePtr fp = taosOpenFile(fname, TD_FILE_READ); + if (fp == NULL) return TAOS_SYSTEM_ERROR(code); + + int64_t size; + if (taosFStatFile(fp, &size, NULL) < 0) { + code = TAOS_SYSTEM_ERROR(code); + goto _exit; + } + + data = taosMemoryMalloc(size + 1); + if (data == NULL) { + code = TSDB_CODE_OUT_OF_MEMORY; + goto _exit; + } + + if (taosReadFile(fp, data, size) < 0) { + code = TAOS_SYSTEM_ERROR(code); + goto _exit; + } + data[size] = '\0'; + + json[0] = cJSON_Parse(data); + if (json[0] == NULL) { + code = TSDB_CODE_FILE_CORRUPTED; + goto _exit; + } + +_exit: + taosCloseFile(&fp); + if (data) taosMemoryFree(data); + if (code) json[0] = NULL; + return code; +} + +int32_t save_fs(const TFileSetArray *arr, const char *fname) { + int32_t code = 0; + int32_t lino = 0; + + cJSON *json = cJSON_CreateObject(); + if (!json) return TSDB_CODE_OUT_OF_MEMORY; + + // fmtv + if (cJSON_AddNumberToObject(json, "fmtv", 1) == NULL) { + code = TSDB_CODE_OUT_OF_MEMORY; + TSDB_CHECK_CODE(code, lino, _exit); + } + + // fset + cJSON *ajson = cJSON_AddArrayToObject(json, "fset"); + if (!ajson) TSDB_CHECK_CODE(code = TSDB_CODE_OUT_OF_MEMORY, lino, _exit); + const STFileSet *fset; + TARRAY2_FOREACH(arr, fset) { + cJSON *item = cJSON_CreateObject(); + if (!item) TSDB_CHECK_CODE(code = TSDB_CODE_OUT_OF_MEMORY, lino, _exit); + cJSON_AddItemToArray(ajson, item); + + code = tsdbTFileSetToJson(fset, item); + TSDB_CHECK_CODE(code, lino, _exit); + } + + code = save_json(json, fname); + TSDB_CHECK_CODE(code, lino, _exit); + +_exit: + if (code) { + tsdbError("%s failed at line %d since %s", __func__, lino, tstrerror(code)); + } + cJSON_Delete(json); + return code; +} + +static int32_t load_fs(STsdb *pTsdb, const char *fname, TFileSetArray *arr) { + int32_t code = 0; + int32_t lino = 0; + + TARRAY2_CLEAR(arr, tsdbTFileSetClear); + + // load json + cJSON *json = NULL; + code = load_json(fname, &json); + TSDB_CHECK_CODE(code, lino, _exit); + + // parse json + const cJSON *item1; + + /* fmtv */ + item1 = cJSON_GetObjectItem(json, "fmtv"); + if (cJSON_IsNumber(item1)) { + ASSERT(item1->valuedouble == 1); + } else { + TSDB_CHECK_CODE(code = TSDB_CODE_FILE_CORRUPTED, lino, _exit); + } + + /* fset */ + item1 = cJSON_GetObjectItem(json, "fset"); + if (cJSON_IsArray(item1)) { + const cJSON *item2; + cJSON_ArrayForEach(item2, item1) { + STFileSet *fset; + code = tsdbJsonToTFileSet(pTsdb, item2, &fset); + TSDB_CHECK_CODE(code, lino, _exit); + + code = TARRAY2_APPEND(arr, fset); + TSDB_CHECK_CODE(code, lino, _exit); + } + } else { + TSDB_CHECK_CODE(code = TSDB_CODE_FILE_CORRUPTED, lino, _exit); + } + +_exit: + if (code) { + tsdbError("%s failed at line %d since %s, fname:%s", __func__, lino, tstrerror(code), fname); + } + if (json) cJSON_Delete(json); + return code; +} + +static bool is_same_file(const STFile *f1, const STFile f2) { + if (f1->type != f2.type) return false; + if (f1->did.level != f2.did.level) return false; + if (f1->did.id != f2.did.id) return false; + if (f1->cid != f2.cid) return false; + return true; +} + +static int32_t apply_commit(STFileSystem *fs) { + int32_t code = 0; + TFileSetArray *fsetArray1 = fs->fSetArr; + TFileSetArray *fsetArray2 = fs->fSetArrTmp; + int32_t i1 = 0, i2 = 0; + + while (i1 < TARRAY2_SIZE(fsetArray1) || i2 < TARRAY2_SIZE(fsetArray2)) { + STFileSet *fset1 = i1 < TARRAY2_SIZE(fsetArray1) ? TARRAY2_GET(fsetArray1, i1) : NULL; + STFileSet *fset2 = i2 < TARRAY2_SIZE(fsetArray2) ? TARRAY2_GET(fsetArray2, i2) : NULL; + + if (fset1 && fset2) { + if (fset1->fid < fset2->fid) { + // delete fset1 + TARRAY2_REMOVE(fsetArray1, i1, tsdbTFileSetRemove); + } else if (fset1->fid > fset2->fid) { + // create new file set with fid of fset2->fid + code = tsdbTFileSetInitDup(fs->tsdb, fset2, &fset1); + if (code) return code; + code = TARRAY2_SORT_INSERT(fsetArray1, fset1, tsdbTFileSetCmprFn); + if (code) return code; + i1++; + i2++; + } else { + // edit + code = tsdbTFileSetApplyEdit(fs->tsdb, fset2, fset1); + if (code) return code; + i1++; + i2++; + } + } else if (fset1) { + // delete fset1 + TARRAY2_REMOVE(fsetArray1, i1, tsdbTFileSetRemove); + } else { + // create new file set with fid of fset2->fid + code = tsdbTFileSetInitDup(fs->tsdb, fset2, &fset1); + if (code) return code; + code = TARRAY2_SORT_INSERT(fsetArray1, fset1, tsdbTFileSetCmprFn); + if (code) return code; + i1++; + i2++; + } + } + + return 0; +} + +static int32_t commit_edit(STFileSystem *fs) { + char current[TSDB_FILENAME_LEN]; + char current_t[TSDB_FILENAME_LEN]; + + current_fname(fs->tsdb, current, TSDB_FCURRENT); + if (fs->etype == TSDB_FEDIT_COMMIT) { + current_fname(fs->tsdb, current_t, TSDB_FCURRENT_C); + } else if (fs->etype == TSDB_FEDIT_MERGE) { + current_fname(fs->tsdb, current_t, TSDB_FCURRENT_M); + } else { + ASSERT(0); + } + + int32_t code; + int32_t lino; + if ((code = taosRenameFile(current_t, current))) { + TSDB_CHECK_CODE(code = TAOS_SYSTEM_ERROR(code), lino, _exit); + } + + code = apply_commit(fs); + TSDB_CHECK_CODE(code, lino, _exit); + +_exit: + if (code) { + tsdbError("vgId:%d %s failed at line %d since %s", TD_VID(fs->tsdb->pVnode), __func__, lino, tstrerror(code)); + } else { + tsdbInfo("vgId:%d %s success, etype:%d", TD_VID(fs->tsdb->pVnode), __func__, fs->etype); + } + return code; +} + +// static int32_t +static int32_t apply_abort(STFileSystem *fs) { + // TODO + return 0; +} + +static int32_t abort_edit(STFileSystem *fs) { + char fname[TSDB_FILENAME_LEN]; + + if (fs->etype == TSDB_FEDIT_COMMIT) { + current_fname(fs->tsdb, fname, TSDB_FCURRENT_C); + } else if (fs->etype == TSDB_FEDIT_MERGE) { + current_fname(fs->tsdb, fname, TSDB_FCURRENT_M); + } else { + ASSERT(0); + } + + int32_t code; + int32_t lino; + if ((code = taosRemoveFile(fname))) { + TSDB_CHECK_CODE(code = TAOS_SYSTEM_ERROR(code), lino, _exit); + } + + code = apply_abort(fs); + TSDB_CHECK_CODE(code, lino, _exit); + +_exit: + if (code) { + tsdbError("vgId:%d %s failed since %s", TD_VID(fs->tsdb->pVnode), __func__, tstrerror(code)); + } else { + tsdbInfo("vgId:%d %s success, etype:%d", TD_VID(fs->tsdb->pVnode), __func__, fs->etype); + } + return code; +} + +static int32_t tsdbFSScanAndFix(STFileSystem *fs) { + fs->neid = 0; + + // get max commit id + const STFileSet *fset; + TARRAY2_FOREACH(fs->fSetArr, fset) { fs->neid = TMAX(fs->neid, tsdbTFileSetMaxCid(fset)); } + + // TODO + return 0; +} + +static int32_t tsdbFSDupState(STFileSystem *fs) { + int32_t code; + + const TFileSetArray *src = fs->fSetArr; + TFileSetArray *dst = fs->fSetArrTmp; + + TARRAY2_CLEAR(dst, tsdbTFileSetClear); + + const STFileSet *fset1; + TARRAY2_FOREACH(src, fset1) { + STFileSet *fset2; + code = tsdbTFileSetInitDup(fs->tsdb, fset1, &fset2); + if (code) return code; + code = TARRAY2_APPEND(dst, fset2); + if (code) return code; + } + + return 0; +} + +static int32_t open_fs(STFileSystem *fs, int8_t rollback) { + int32_t code = 0; + int32_t lino = 0; + STsdb *pTsdb = fs->tsdb; + + char fCurrent[TSDB_FILENAME_LEN]; + char cCurrent[TSDB_FILENAME_LEN]; + char mCurrent[TSDB_FILENAME_LEN]; + + current_fname(pTsdb, fCurrent, TSDB_FCURRENT); + current_fname(pTsdb, cCurrent, TSDB_FCURRENT_C); + current_fname(pTsdb, mCurrent, TSDB_FCURRENT_M); + + if (taosCheckExistFile(fCurrent)) { // current.json exists + code = load_fs(pTsdb, fCurrent, fs->fSetArr); + TSDB_CHECK_CODE(code, lino, _exit); + + if (taosCheckExistFile(cCurrent)) { + // current.c.json exists + + fs->etype = TSDB_FEDIT_COMMIT; + if (rollback) { + code = abort_edit(fs); + TSDB_CHECK_CODE(code, lino, _exit); + } else { + code = load_fs(pTsdb, cCurrent, fs->fSetArrTmp); + TSDB_CHECK_CODE(code, lino, _exit); + + code = commit_edit(fs); + TSDB_CHECK_CODE(code, lino, _exit); + } + } else if (taosCheckExistFile(mCurrent)) { + // current.m.json exists + fs->etype = TSDB_FEDIT_MERGE; + code = abort_edit(fs); + TSDB_CHECK_CODE(code, lino, _exit); + } + + code = tsdbFSDupState(fs); + TSDB_CHECK_CODE(code, lino, _exit); + + code = tsdbFSScanAndFix(fs); + TSDB_CHECK_CODE(code, lino, _exit); + } else { + code = save_fs(fs->fSetArr, fCurrent); + TSDB_CHECK_CODE(code, lino, _exit); + } + +_exit: + if (code) { + tsdbError("vgId:%d %s failed at line %d since %s", TD_VID(pTsdb->pVnode), __func__, lino, tstrerror(code)); + } else { + tsdbInfo("vgId:%d %s success", TD_VID(pTsdb->pVnode), __func__); + } + return 0; +} + +static int32_t close_file_system(STFileSystem *fs) { + TARRAY2_CLEAR(fs->fSetArr, tsdbTFileSetClear); + TARRAY2_CLEAR(fs->fSetArrTmp, tsdbTFileSetClear); + // TODO + return 0; +} + +static int32_t apply_edit(STFileSystem *pFS) { + int32_t code = 0; + ASSERTS(0, "TODO: Not implemented yet"); + return code; +} + +static int32_t fset_cmpr_fn(const struct STFileSet *pSet1, const struct STFileSet *pSet2) { + if (pSet1->fid < pSet2->fid) { + return -1; + } else if (pSet1->fid > pSet2->fid) { + return 1; + } + return 0; +} + +static int32_t edit_fs(STFileSystem *fs, const TFileOpArray *opArray) { + int32_t code = 0; + int32_t lino = 0; + + code = tsdbFSDupState(fs); + if (code) return code; + + TFileSetArray *fsetArray = fs->fSetArrTmp; + STFileSet *fset = NULL; + const STFileOp *op; + TARRAY2_FOREACH_PTR(opArray, op) { + if (!fset || fset->fid != op->fid) { + STFileSet tfset = {.fid = op->fid}; + fset = &tfset; + STFileSet **fsetPtr = TARRAY2_SEARCH(fsetArray, &fset, tsdbTFileSetCmprFn, TD_EQ); + fset = (fsetPtr == NULL) ? NULL : *fsetPtr; + + if (!fset) { + code = tsdbTFileSetInit(op->fid, &fset); + TSDB_CHECK_CODE(code, lino, _exit); + + code = TARRAY2_SORT_INSERT(fsetArray, fset, tsdbTFileSetCmprFn); + TSDB_CHECK_CODE(code, lino, _exit); + } + } + + code = tsdbTFileSetEdit(fs->tsdb, fset, op); + TSDB_CHECK_CODE(code, lino, _exit); + } + + // remove empty file set + int32_t i = 0; + while (i < TARRAY2_SIZE(fsetArray)) { + fset = TARRAY2_GET(fsetArray, i); + if (tsdbTFileSetIsEmpty(fset)) { + TARRAY2_REMOVE(fsetArray, i, tsdbTFileSetClear); + } else { + i++; + } + } + +_exit: + if (code) { + TSDB_ERROR_LOG(TD_VID(fs->tsdb->pVnode), lino, code); + } + return code; +} + +int32_t tsdbOpenFS(STsdb *pTsdb, STFileSystem **fs, int8_t rollback) { + int32_t code; + int32_t lino; + + code = tsdbCheckAndUpgradeFileSystem(pTsdb, rollback); + TSDB_CHECK_CODE(code, lino, _exit); + + code = create_fs(pTsdb, fs); + TSDB_CHECK_CODE(code, lino, _exit); + + code = open_fs(fs[0], rollback); + TSDB_CHECK_CODE(code, lino, _exit); + +_exit: + if (code) { + tsdbError("vgId:%d %s failed at line %d since %s", TD_VID(pTsdb->pVnode), __func__, lino, tstrerror(code)); + destroy_fs(fs); + } else { + tsdbInfo("vgId:%d %s success", TD_VID(pTsdb->pVnode), __func__); + } + return 0; +} + +static void tsdbDoWaitBgTask(STFileSystem *fs, STFSBgTask *task) { + task->numWait++; + taosThreadCondWait(task->done, fs->mutex); + task->numWait--; + + if (task->numWait == 0) { + taosThreadCondDestroy(task->done); + taosMemoryFree(task); + } +} + +static void tsdbDoDoneBgTask(STFileSystem *fs, STFSBgTask *task) { + if (task->numWait > 0) { + taosThreadCondBroadcast(task->done); + } else { + taosThreadCondDestroy(task->done); + taosMemoryFree(task); + } +} + +int32_t tsdbCloseFS(STFileSystem **fs) { + if (fs[0] == NULL) return 0; + + taosThreadMutexLock(fs[0]->mutex); + fs[0]->stop = true; + + if (fs[0]->bgTaskRunning) { + tsdbDoWaitBgTask(fs[0], fs[0]->bgTaskRunning); + } + taosThreadMutexUnlock(fs[0]->mutex); + + close_file_system(fs[0]); + destroy_fs(fs); + return 0; +} + +int64_t tsdbFSAllocEid(STFileSystem *fs) { + taosThreadRwlockRdlock(&fs->tsdb->rwLock); + int64_t cid = ++fs->neid; + taosThreadRwlockUnlock(&fs->tsdb->rwLock); + return cid; +} + +int32_t tsdbFSEditBegin(STFileSystem *fs, const TFileOpArray *opArray, EFEditT etype) { + int32_t code = 0; + int32_t lino; + char current_t[TSDB_FILENAME_LEN]; + + switch (etype) { + case TSDB_FEDIT_COMMIT: + current_fname(fs->tsdb, current_t, TSDB_FCURRENT_C); + break; + case TSDB_FEDIT_MERGE: + current_fname(fs->tsdb, current_t, TSDB_FCURRENT_M); + break; + default: + ASSERT(0); + } + + tsem_wait(&fs->canEdit); + fs->etype = etype; + + // edit + code = edit_fs(fs, opArray); + TSDB_CHECK_CODE(code, lino, _exit); + + // save fs + code = save_fs(fs->fSetArrTmp, current_t); + TSDB_CHECK_CODE(code, lino, _exit); + +_exit: + if (code) { + tsdbError("vgId:%d %s failed at line %d since %s, etype:%d", TD_VID(fs->tsdb->pVnode), __func__, lino, + tstrerror(code), etype); + } else { + tsdbInfo("vgId:%d %s done, etype:%d", TD_VID(fs->tsdb->pVnode), __func__, etype); + } + return code; +} + +int32_t tsdbFSEditCommit(STFileSystem *fs) { + int32_t code = 0; + int32_t lino = 0; + + // commit + code = commit_edit(fs); + TSDB_CHECK_CODE(code, lino, _exit); + + // schedule merge + if (fs->tsdb->pVnode->config.sttTrigger != 1) { + STFileSet *fset; + TARRAY2_FOREACH_REVERSE(fs->fSetArr, fset) { + if (TARRAY2_SIZE(fset->lvlArr) == 0) continue; + + SSttLvl *lvl = TARRAY2_FIRST(fset->lvlArr); + if (lvl->level != 0 || TARRAY2_SIZE(lvl->fobjArr) < fs->tsdb->pVnode->config.sttTrigger) continue; + + code = tsdbFSScheduleBgTask(fs, TSDB_BG_TASK_MERGER, tsdbMerge, fs->tsdb, NULL); + TSDB_CHECK_CODE(code, lino, _exit); + + break; + } + } + +_exit: + if (code) { + TSDB_ERROR_LOG(TD_VID(fs->tsdb->pVnode), lino, code); + } else { + tsdbDebug("vgId:%d %s done, etype:%d", TD_VID(fs->tsdb->pVnode), __func__, fs->etype); + tsem_post(&fs->canEdit); + } + return code; +} + +int32_t tsdbFSEditAbort(STFileSystem *fs) { + int32_t code = abort_edit(fs); + tsem_post(&fs->canEdit); + return code; +} + +int32_t tsdbFSGetFSet(STFileSystem *fs, int32_t fid, STFileSet **fset) { + STFileSet tfset = {.fid = fid}; + STFileSet *pset = &tfset; + STFileSet **fsetPtr = TARRAY2_SEARCH(fs->fSetArr, &pset, tsdbTFileSetCmprFn, TD_EQ); + fset[0] = (fsetPtr == NULL) ? NULL : fsetPtr[0]; + return 0; +} + +int32_t tsdbFSCreateCopySnapshot(STFileSystem *fs, TFileSetArray **fsetArr) { + int32_t code = 0; + STFileSet *fset; + STFileSet *fset1; + + fsetArr[0] = taosMemoryMalloc(sizeof(TFileSetArray)); + if (fsetArr == NULL) return TSDB_CODE_OUT_OF_MEMORY; + + TARRAY2_INIT(fsetArr[0]); + + taosThreadRwlockRdlock(&fs->tsdb->rwLock); + TARRAY2_FOREACH(fs->fSetArr, fset) { + code = tsdbTFileSetInitDup(fs->tsdb, fset, &fset1); + if (code) break; + + code = TARRAY2_APPEND(fsetArr[0], fset1); + if (code) break; + } + taosThreadRwlockUnlock(&fs->tsdb->rwLock); + + if (code) { + TARRAY2_DESTROY(fsetArr[0], tsdbTFileSetClear); + taosMemoryFree(fsetArr[0]); + fsetArr[0] = NULL; + } + return code; +} + +int32_t tsdbFSDestroyCopySnapshot(TFileSetArray **fsetArr) { + if (fsetArr[0]) { + TARRAY2_DESTROY(fsetArr[0], tsdbTFileSetClear); + taosMemoryFree(fsetArr[0]); + fsetArr[0] = NULL; + } + return 0; +} + +int32_t tsdbFSCreateRefSnapshot(STFileSystem *fs, TFileSetArray **fsetArr) { + int32_t code = 0; + STFileSet *fset, *fset1; + + fsetArr[0] = taosMemoryCalloc(1, sizeof(*fsetArr[0])); + if (fsetArr[0] == NULL) return TSDB_CODE_OUT_OF_MEMORY; + + taosThreadRwlockRdlock(&fs->tsdb->rwLock); + TARRAY2_FOREACH(fs->fSetArr, fset) { + code = tsdbTFileSetInitRef(fs->tsdb, fset, &fset1); + if (code) break; + + code = TARRAY2_APPEND(fsetArr[0], fset1); + if (code) break; + } + taosThreadRwlockUnlock(&fs->tsdb->rwLock); + + if (code) { + TARRAY2_DESTROY(fsetArr[0], tsdbTFileSetClear); + fsetArr[0] = NULL; + } + return code; +} + +int32_t tsdbFSDestroyRefSnapshot(TFileSetArray **fsetArr) { + if (fsetArr[0]) { + TARRAY2_DESTROY(fsetArr[0], tsdbTFileSetClear); + taosMemoryFreeClear(fsetArr[0]); + fsetArr[0] = NULL; + } + return 0; +} + +const char *gFSBgTaskName[] = {NULL, "MERGE", "RETENTION", "COMPACT"}; + +static int32_t tsdbFSRunBgTask(void *arg) { + STFileSystem *fs = (STFileSystem *)arg; + + ASSERT(fs->bgTaskRunning != NULL); + + fs->bgTaskRunning->launchTime = taosGetTimestampMs(); + fs->bgTaskRunning->run(fs->bgTaskRunning->arg); + fs->bgTaskRunning->finishTime = taosGetTimestampMs(); + + tsdbDebug("vgId:%d bg task:%s task id:%" PRId64 " finished, schedule time:%" PRId64 " launch time:%" PRId64 + " finish time:%" PRId64, + TD_VID(fs->tsdb->pVnode), gFSBgTaskName[fs->bgTaskRunning->type], fs->bgTaskRunning->taskid, + fs->bgTaskRunning->scheduleTime, fs->bgTaskRunning->launchTime, fs->bgTaskRunning->finishTime); + + taosThreadMutexLock(fs->mutex); + + // free last + tsdbDoDoneBgTask(fs, fs->bgTaskRunning); + fs->bgTaskRunning = NULL; + + // schedule next + if (fs->bgTaskNum > 0) { + if (fs->stop) { + while (fs->bgTaskNum > 0) { + STFSBgTask *task = fs->bgTaskQueue->next; + task->prev->next = task->next; + task->next->prev = task->prev; + fs->bgTaskNum--; + tsdbDoDoneBgTask(fs, task); + } + } else { + // pop task from head + fs->bgTaskRunning = fs->bgTaskQueue->next; + fs->bgTaskRunning->prev->next = fs->bgTaskRunning->next; + fs->bgTaskRunning->next->prev = fs->bgTaskRunning->prev; + fs->bgTaskNum--; + vnodeScheduleTaskEx(1, tsdbFSRunBgTask, arg); + } + } + + taosThreadMutexUnlock(fs->mutex); + return 0; +} + +static int32_t tsdbFSScheduleBgTaskImpl(STFileSystem *fs, EFSBgTaskT type, int32_t (*run)(void *), void *arg, + int64_t *taskid) { + if (fs->stop) { + return 0; // TODO: use a better error code + } + + // check if same task is on + // if (fs->bgTaskRunning && fs->bgTaskRunning->type == type) { + // return 0; + // } + + for (STFSBgTask *task = fs->bgTaskQueue->next; task != fs->bgTaskQueue; task = task->next) { + if (task->type == type) { + return 0; + } + } + + // do schedule task + STFSBgTask *task = taosMemoryCalloc(1, sizeof(STFSBgTask)); + if (task == NULL) return TSDB_CODE_OUT_OF_MEMORY; + taosThreadCondInit(task->done, NULL); + + task->type = type; + task->run = run; + task->arg = arg; + task->scheduleTime = taosGetTimestampMs(); + task->taskid = ++fs->taskid; + + if (fs->bgTaskRunning == NULL && fs->bgTaskNum == 0) { + // launch task directly + fs->bgTaskRunning = task; + vnodeScheduleTaskEx(1, tsdbFSRunBgTask, fs); + } else { + // add to the queue tail + fs->bgTaskNum++; + task->next = fs->bgTaskQueue; + task->prev = fs->bgTaskQueue->prev; + task->prev->next = task; + task->next->prev = task; + } + + if (taskid) *taskid = task->taskid; + return 0; +} + +int32_t tsdbFSScheduleBgTask(STFileSystem *fs, EFSBgTaskT type, int32_t (*run)(void *), void *arg, int64_t *taskid) { + taosThreadMutexLock(fs->mutex); + int32_t code = tsdbFSScheduleBgTaskImpl(fs, type, run, arg, taskid); + taosThreadMutexUnlock(fs->mutex); + return code; +} + +int32_t tsdbFSWaitBgTask(STFileSystem *fs, int64_t taskid) { + STFSBgTask *task = NULL; + + taosThreadMutexLock(fs->mutex); + + if (fs->bgTaskRunning && fs->bgTaskRunning->taskid == taskid) { + task = fs->bgTaskRunning; + } else { + for (STFSBgTask *taskt = fs->bgTaskQueue->next; taskt != fs->bgTaskQueue; taskt = taskt->next) { + if (taskt->taskid == taskid) { + task = taskt; + break; + } + } + } + + if (task) { + tsdbDoWaitBgTask(fs, task); + } + + taosThreadMutexUnlock(fs->mutex); + return 0; +} + +int32_t tsdbFSWaitAllBgTask(STFileSystem *fs) { + taosThreadMutexLock(fs->mutex); + + while (fs->bgTaskRunning) { + taosThreadCondWait(fs->bgTaskRunning->done, fs->mutex); + } + + taosThreadMutexUnlock(fs->mutex); + return 0; +} + +static int32_t tsdbFSDoDisableBgTask(STFileSystem *fs) { + fs->stop = true; + + if (fs->bgTaskRunning) { + tsdbDoWaitBgTask(fs, fs->bgTaskRunning); + } + return 0; +} + +int32_t tsdbFSDisableBgTask(STFileSystem *fs) { + taosThreadMutexLock(fs->mutex); + int32_t code = tsdbFSDoDisableBgTask(fs); + taosThreadMutexUnlock(fs->mutex); + return code; +} + +int32_t tsdbFSEnableBgTask(STFileSystem *fs) { + taosThreadMutexLock(fs->mutex); + fs->stop = false; + taosThreadMutexUnlock(fs->mutex); + return 0; +} diff --git a/source/dnode/vnode/src/tsdb/tsdbFS2.h b/source/dnode/vnode/src/tsdb/tsdbFS2.h new file mode 100644 index 0000000000..8270581e58 --- /dev/null +++ b/source/dnode/vnode/src/tsdb/tsdbFS2.h @@ -0,0 +1,110 @@ +/* + * Copyright (c) 2019 TAOS Data, Inc. + * + * This program is free software: you can use, redistribute, and/or modify + * it under the terms of the GNU Affero General Public License, version 3 + * or later ("AGPL"), as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. + * + * You should have received a copy of the GNU Affero General Public License + * along with this program. If not, see . + */ + +#include "tsdbFSet2.h" + +#ifndef _TSDB_FILE_SYSTEM_H +#define _TSDB_FILE_SYSTEM_H + +#ifdef __cplusplus +extern "C" { +#endif + +/* Exposed Handle */ +typedef struct STFileSystem STFileSystem; +typedef struct STFSBgTask STFSBgTask; +// typedef TARRAY2(STFileSet *) TFileSetArray; + +typedef enum { + TSDB_FEDIT_COMMIT = 1, // + TSDB_FEDIT_MERGE +} EFEditT; + +typedef enum { + TSDB_BG_TASK_MERGER = 1, + TSDB_BG_TASK_RETENTION, + TSDB_BG_TASK_COMPACT, +} EFSBgTaskT; + +typedef enum { + TSDB_FCURRENT = 1, + TSDB_FCURRENT_C, // for commit + TSDB_FCURRENT_M, // for merge +} EFCurrentT; + +/* Exposed APIs */ +// open/close +int32_t tsdbOpenFS(STsdb *pTsdb, STFileSystem **fs, int8_t rollback); +int32_t tsdbCloseFS(STFileSystem **fs); +// snapshot +int32_t tsdbFSCreateCopySnapshot(STFileSystem *fs, TFileSetArray **fsetArr); +int32_t tsdbFSDestroyCopySnapshot(TFileSetArray **fsetArr); +int32_t tsdbFSCreateRefSnapshot(STFileSystem *fs, TFileSetArray **fsetArr); +int32_t tsdbFSDestroyRefSnapshot(TFileSetArray **fsetArr); +// txn +int64_t tsdbFSAllocEid(STFileSystem *fs); +int32_t tsdbFSEditBegin(STFileSystem *fs, const TFileOpArray *opArray, EFEditT etype); +int32_t tsdbFSEditCommit(STFileSystem *fs); +int32_t tsdbFSEditAbort(STFileSystem *fs); +// background task +int32_t tsdbFSScheduleBgTask(STFileSystem *fs, EFSBgTaskT type, int32_t (*run)(void *), void *arg, int64_t *taskid); +int32_t tsdbFSWaitBgTask(STFileSystem *fs, int64_t taskid); +int32_t tsdbFSWaitAllBgTask(STFileSystem *fs); +int32_t tsdbFSDisableBgTask(STFileSystem *fs); +int32_t tsdbFSEnableBgTask(STFileSystem *fs); +// other +int32_t tsdbFSGetFSet(STFileSystem *fs, int32_t fid, STFileSet **fset); + +struct STFSBgTask { + EFSBgTaskT type; + int32_t (*run)(void *arg); + void *arg; + + TdThreadCond done[1]; + int32_t numWait; + + int64_t taskid; + int64_t scheduleTime; + int64_t launchTime; + int64_t finishTime; + + struct STFSBgTask *prev; + struct STFSBgTask *next; +}; + +/* Exposed Structs */ +struct STFileSystem { + STsdb *tsdb; + tsem_t canEdit; + int32_t state; + int64_t neid; + EFEditT etype; + TFileSetArray fSetArr[1]; + TFileSetArray fSetArrTmp[1]; + + // background task queue + TdThreadMutex mutex[1]; + bool stop; + int64_t taskid; + int32_t bgTaskNum; + STFSBgTask bgTaskQueue[1]; + STFSBgTask *bgTaskRunning; +}; + +#ifdef __cplusplus +} +#endif + +#endif /*_TSDB_FILE_SYSTEM_H*/ diff --git a/source/dnode/vnode/src/tsdb/tsdbFSet2.c b/source/dnode/vnode/src/tsdb/tsdbFSet2.c new file mode 100644 index 0000000000..7bc9743ecb --- /dev/null +++ b/source/dnode/vnode/src/tsdb/tsdbFSet2.c @@ -0,0 +1,542 @@ +/* + * Copyright (c) 2019 TAOS Data, Inc. + * + * This program is free software: you can use, redistribute, and/or modify + * it under the terms of the GNU Affero General Public License, version 3 + * or later ("AGPL"), as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. + * + * You should have received a copy of the GNU Affero General Public License + * along with this program. If not, see . + */ + +#include "tsdbFSet2.h" + +int32_t tsdbSttLvlInit(int32_t level, SSttLvl **lvl) { + if (!(lvl[0] = taosMemoryMalloc(sizeof(SSttLvl)))) return TSDB_CODE_OUT_OF_MEMORY; + lvl[0]->level = level; + TARRAY2_INIT(lvl[0]->fobjArr); + return 0; +} + +static void tsdbSttLvlClearFObj(void *data) { tsdbTFileObjUnref(*(STFileObj **)data); } + +int32_t tsdbSttLvlClear(SSttLvl **lvl) { + if (lvl[0] != NULL) { + TARRAY2_DESTROY(lvl[0]->fobjArr, tsdbSttLvlClearFObj); + taosMemoryFree(lvl[0]); + lvl[0] = NULL; + } + return 0; +} + +static int32_t tsdbSttLvlInitEx(STsdb *pTsdb, const SSttLvl *lvl1, SSttLvl **lvl) { + int32_t code = tsdbSttLvlInit(lvl1->level, lvl); + if (code) return code; + + const STFileObj *fobj1; + TARRAY2_FOREACH(lvl1->fobjArr, fobj1) { + STFileObj *fobj; + code = tsdbTFileObjInit(pTsdb, fobj1->f, &fobj); + if (code) { + tsdbSttLvlClear(lvl); + return code; + } + + TARRAY2_APPEND(lvl[0]->fobjArr, fobj); + } + return 0; +} + +static int32_t tsdbSttLvlInitRef(STsdb *pTsdb, const SSttLvl *lvl1, SSttLvl **lvl) { + int32_t code = tsdbSttLvlInit(lvl1->level, lvl); + if (code) return code; + + STFileObj *fobj1; + TARRAY2_FOREACH(lvl1->fobjArr, fobj1) { + tsdbTFileObjRef(fobj1); + code = TARRAY2_APPEND(lvl[0]->fobjArr, fobj1); + if (code) return code; + } + return 0; +} + +static void tsdbSttLvlRemoveFObj(void *data) { tsdbTFileObjRemove(*(STFileObj **)data); } +static void tsdbSttLvlRemove(SSttLvl **lvl) { + TARRAY2_DESTROY(lvl[0]->fobjArr, tsdbSttLvlRemoveFObj); + taosMemoryFree(lvl[0]); + lvl[0] = NULL; +} + +static int32_t tsdbSttLvlApplyEdit(STsdb *pTsdb, const SSttLvl *lvl1, SSttLvl *lvl2) { + int32_t code = 0; + + ASSERT(lvl1->level == lvl2->level); + + int32_t i1 = 0, i2 = 0; + while (i1 < TARRAY2_SIZE(lvl1->fobjArr) || i2 < TARRAY2_SIZE(lvl2->fobjArr)) { + STFileObj *fobj1 = i1 < TARRAY2_SIZE(lvl1->fobjArr) ? TARRAY2_GET(lvl1->fobjArr, i1) : NULL; + STFileObj *fobj2 = i2 < TARRAY2_SIZE(lvl2->fobjArr) ? TARRAY2_GET(lvl2->fobjArr, i2) : NULL; + + if (fobj1 && fobj2) { + if (fobj1->f->cid < fobj2->f->cid) { + // create a file obj + code = tsdbTFileObjInit(pTsdb, fobj1->f, &fobj2); + if (code) return code; + code = TARRAY2_APPEND(lvl2->fobjArr, fobj2); + if (code) return code; + i1++; + i2++; + } else if (fobj1->f->cid > fobj2->f->cid) { + // remove a file obj + TARRAY2_REMOVE(lvl2->fobjArr, i2, tsdbSttLvlRemoveFObj); + } else { + if (tsdbIsSameTFile(fobj1->f, fobj2->f)) { + if (tsdbIsTFileChanged(fobj1->f, fobj2->f)) { + fobj2->f[0] = fobj1->f[0]; + } + } else { + TARRAY2_REMOVE(lvl2->fobjArr, i2, tsdbSttLvlRemoveFObj); + code = tsdbTFileObjInit(pTsdb, fobj1->f, &fobj2); + if (code) return code; + code = TARRAY2_SORT_INSERT(lvl2->fobjArr, fobj2, tsdbTFileObjCmpr); + if (code) return code; + } + i1++; + i2++; + } + } else if (fobj1) { + // create a file obj + code = tsdbTFileObjInit(pTsdb, fobj1->f, &fobj2); + if (code) return code; + code = TARRAY2_APPEND(lvl2->fobjArr, fobj2); + if (code) return code; + i1++; + i2++; + } else { + // remove a file obj + TARRAY2_REMOVE(lvl2->fobjArr, i2, tsdbSttLvlRemoveFObj); + } + } + return 0; +} + +static int32_t tsdbSttLvlCmprFn(const SSttLvl **lvl1, const SSttLvl **lvl2) { + if (lvl1[0]->level < lvl2[0]->level) return -1; + if (lvl1[0]->level > lvl2[0]->level) return 1; + return 0; +} + +static int32_t tsdbSttLvlToJson(const SSttLvl *lvl, cJSON *json) { + if (cJSON_AddNumberToObject(json, "level", lvl->level) == NULL) { + return TSDB_CODE_OUT_OF_MEMORY; + } + + cJSON *ajson = cJSON_AddArrayToObject(json, "files"); + if (ajson == NULL) return TSDB_CODE_OUT_OF_MEMORY; + const STFileObj *fobj; + TARRAY2_FOREACH(lvl->fobjArr, fobj) { + cJSON *item = cJSON_CreateObject(); + if (item == NULL) return TSDB_CODE_OUT_OF_MEMORY; + cJSON_AddItemToArray(ajson, item); + + int32_t code = tsdbTFileToJson(fobj->f, item); + if (code) return code; + } + + return 0; +} + +static int32_t tsdbJsonToSttLvl(STsdb *pTsdb, const cJSON *json, SSttLvl **lvl) { + const cJSON *item1, *item2; + int32_t level; + + item1 = cJSON_GetObjectItem(json, "level"); + if (cJSON_IsNumber(item1)) { + level = item1->valuedouble; + } else { + return TSDB_CODE_FILE_CORRUPTED; + } + + int32_t code = tsdbSttLvlInit(level, lvl); + if (code) return code; + + item1 = cJSON_GetObjectItem(json, "files"); + if (!cJSON_IsArray(item1)) { + tsdbSttLvlClear(lvl); + return TSDB_CODE_FILE_CORRUPTED; + } + + cJSON_ArrayForEach(item2, item1) { + STFile tf; + code = tsdbJsonToTFile(item2, TSDB_FTYPE_STT, &tf); + if (code) { + tsdbSttLvlClear(lvl); + return code; + } + + STFileObj *fobj; + code = tsdbTFileObjInit(pTsdb, &tf, &fobj); + if (code) { + tsdbSttLvlClear(lvl); + return code; + } + + TARRAY2_APPEND(lvl[0]->fobjArr, fobj); + } + return 0; +} + +int32_t tsdbTFileSetToJson(const STFileSet *fset, cJSON *json) { + int32_t code = 0; + cJSON *item1, *item2; + + // fid + if (cJSON_AddNumberToObject(json, "fid", fset->fid) == NULL) { + return TSDB_CODE_OUT_OF_MEMORY; + } + + for (int32_t ftype = TSDB_FTYPE_MIN; ftype < TSDB_FTYPE_MAX; ++ftype) { + if (fset->farr[ftype] == NULL) continue; + + code = tsdbTFileToJson(fset->farr[ftype]->f, json); + if (code) return code; + } + + // each level + item1 = cJSON_AddArrayToObject(json, "stt lvl"); + if (item1 == NULL) return TSDB_CODE_OUT_OF_MEMORY; + const SSttLvl *lvl; + TARRAY2_FOREACH(fset->lvlArr, lvl) { + item2 = cJSON_CreateObject(); + if (!item2) return TSDB_CODE_OUT_OF_MEMORY; + cJSON_AddItemToArray(item1, item2); + + code = tsdbSttLvlToJson(lvl, item2); + if (code) return code; + } + + return 0; +} + +int32_t tsdbJsonToTFileSet(STsdb *pTsdb, const cJSON *json, STFileSet **fset) { + int32_t code; + const cJSON *item1, *item2; + int32_t fid; + STFile tf; + + // fid + item1 = cJSON_GetObjectItem(json, "fid"); + if (cJSON_IsNumber(item1)) { + fid = item1->valuedouble; + } else { + return TSDB_CODE_FILE_CORRUPTED; + } + + code = tsdbTFileSetInit(fid, fset); + if (code) return code; + + for (tsdb_ftype_t ftype = TSDB_FTYPE_MIN; ftype < TSDB_FTYPE_MAX; ++ftype) { + code = tsdbJsonToTFile(json, ftype, &tf); + if (code == TSDB_CODE_NOT_FOUND) { + continue; + } else if (code) { + tsdbTFileSetClear(fset); + return code; + } else { + code = tsdbTFileObjInit(pTsdb, &tf, &(*fset)->farr[ftype]); + if (code) return code; + } + } + + // each level + item1 = cJSON_GetObjectItem(json, "stt lvl"); + if (cJSON_IsArray(item1)) { + cJSON_ArrayForEach(item2, item1) { + SSttLvl *lvl; + code = tsdbJsonToSttLvl(pTsdb, item2, &lvl); + if (code) { + tsdbTFileSetClear(fset); + return code; + } + + TARRAY2_APPEND((*fset)->lvlArr, lvl); + } + } else { + return TSDB_CODE_FILE_CORRUPTED; + } + + return 0; +} + +// NOTE: the api does not remove file, only do memory operation +int32_t tsdbTFileSetEdit(STsdb *pTsdb, STFileSet *fset, const STFileOp *op) { + int32_t code = 0; + + if (op->optype == TSDB_FOP_CREATE) { + // create a new file + STFileObj *fobj; + code = tsdbTFileObjInit(pTsdb, &op->nf, &fobj); + if (code) return code; + + if (fobj->f->type == TSDB_FTYPE_STT) { + SSttLvl *lvl = tsdbTFileSetGetSttLvl(fset, fobj->f->stt->level); + if (!lvl) { + code = tsdbSttLvlInit(fobj->f->stt->level, &lvl); + if (code) return code; + + code = TARRAY2_SORT_INSERT(fset->lvlArr, lvl, tsdbSttLvlCmprFn); + if (code) return code; + } + + code = TARRAY2_SORT_INSERT(lvl->fobjArr, fobj, tsdbTFileObjCmpr); + if (code) return code; + } else { + ASSERT(fset->farr[fobj->f->type] == NULL); + fset->farr[fobj->f->type] = fobj; + } + } else if (op->optype == TSDB_FOP_REMOVE) { + // delete a file + if (op->of.type == TSDB_FTYPE_STT) { + SSttLvl *lvl = tsdbTFileSetGetSttLvl(fset, op->of.stt->level); + ASSERT(lvl); + + STFileObj tfobj = {.f[0] = {.cid = op->of.cid}}; + STFileObj *tfobjp = &tfobj; + int32_t idx = TARRAY2_SEARCH_IDX(lvl->fobjArr, &tfobjp, tsdbTFileObjCmpr, TD_EQ); + ASSERT(idx >= 0); + TARRAY2_REMOVE(lvl->fobjArr, idx, tsdbSttLvlClearFObj); + + if (TARRAY2_SIZE(lvl->fobjArr) == 0) { + // TODO: remove the stt level if no file exists anymore + // TARRAY2_REMOVE(&fset->lvlArr, lvl - fset->lvlArr.data, tsdbSttLvlClear); + } + } else { + ASSERT(tsdbIsSameTFile(&op->of, fset->farr[op->of.type]->f)); + tsdbTFileObjUnref(fset->farr[op->of.type]); + fset->farr[op->of.type] = NULL; + } + } else { + if (op->nf.type == TSDB_FTYPE_STT) { + SSttLvl *lvl = tsdbTFileSetGetSttLvl(fset, op->of.stt->level); + ASSERT(lvl); + + STFileObj tfobj = {.f[0] = {.cid = op->of.cid}}, *tfobjp = &tfobj; + STFileObj **fobjPtr = TARRAY2_SEARCH(lvl->fobjArr, &tfobjp, tsdbTFileObjCmpr, TD_EQ); + tfobjp = (fobjPtr ? *fobjPtr : NULL); + + ASSERT(tfobjp); + + tfobjp->f[0] = op->nf; + } else { + fset->farr[op->nf.type]->f[0] = op->nf; + } + } + + return 0; +} + +int32_t tsdbTFileSetApplyEdit(STsdb *pTsdb, const STFileSet *fset1, STFileSet *fset2) { + int32_t code = 0; + + ASSERT(fset1->fid == fset2->fid); + + for (tsdb_ftype_t ftype = TSDB_FTYPE_MIN; ftype < TSDB_FTYPE_MAX; ++ftype) { + if (!fset1->farr[ftype] && !fset2->farr[ftype]) continue; + + STFileObj *fobj1 = fset1->farr[ftype]; + STFileObj *fobj2 = fset2->farr[ftype]; + + if (fobj1 && fobj2) { + if (tsdbIsSameTFile(fobj1->f, fobj2->f)) { + if (tsdbIsTFileChanged(fobj1->f, fobj2->f)) { + fobj2->f[0] = fobj1->f[0]; + } + } else { + tsdbTFileObjRemove(fobj2); + code = tsdbTFileObjInit(pTsdb, fobj1->f, &fset2->farr[ftype]); + if (code) return code; + } + } else if (fobj1) { + // create a new file + code = tsdbTFileObjInit(pTsdb, fobj1->f, &fset2->farr[ftype]); + if (code) return code; + } else { + // remove the file + tsdbTFileObjRemove(fobj2); + fset2->farr[ftype] = NULL; + } + } + + // stt part + int32_t i1 = 0, i2 = 0; + while (i1 < TARRAY2_SIZE(fset1->lvlArr) || i2 < TARRAY2_SIZE(fset2->lvlArr)) { + SSttLvl *lvl1 = i1 < TARRAY2_SIZE(fset1->lvlArr) ? TARRAY2_GET(fset1->lvlArr, i1) : NULL; + SSttLvl *lvl2 = i2 < TARRAY2_SIZE(fset2->lvlArr) ? TARRAY2_GET(fset2->lvlArr, i2) : NULL; + + if (lvl1 && lvl2) { + if (lvl1->level < lvl2->level) { + // add a new stt level + code = tsdbSttLvlInitEx(pTsdb, lvl1, &lvl2); + if (code) return code; + code = TARRAY2_SORT_INSERT(fset2->lvlArr, lvl2, tsdbSttLvlCmprFn); + if (code) return code; + i1++; + i2++; + } else if (lvl1->level > lvl2->level) { + // remove the stt level + TARRAY2_REMOVE(fset2->lvlArr, i2, tsdbSttLvlRemove); + } else { + // apply edit on stt level + code = tsdbSttLvlApplyEdit(pTsdb, lvl1, lvl2); + if (code) return code; + i1++; + i2++; + } + } else if (lvl1) { + // add a new stt level + code = tsdbSttLvlInitEx(pTsdb, lvl1, &lvl2); + if (code) return code; + code = TARRAY2_SORT_INSERT(fset2->lvlArr, lvl2, tsdbSttLvlCmprFn); + if (code) return code; + i1++; + i2++; + } else { + // remove the stt level + TARRAY2_REMOVE(fset2->lvlArr, i2, tsdbSttLvlRemove); + } + } + + return 0; +} + +int32_t tsdbTFileSetInit(int32_t fid, STFileSet **fset) { + fset[0] = taosMemoryCalloc(1, sizeof(STFileSet)); + if (fset[0] == NULL) return TSDB_CODE_OUT_OF_MEMORY; + + fset[0]->fid = fid; + TARRAY2_INIT(fset[0]->lvlArr); + return 0; +} + +int32_t tsdbTFileSetInitDup(STsdb *pTsdb, const STFileSet *fset1, STFileSet **fset) { + int32_t code = tsdbTFileSetInit(fset1->fid, fset); + if (code) return code; + + for (int32_t ftype = TSDB_FTYPE_MIN; ftype < TSDB_FTYPE_MAX; ++ftype) { + if (fset1->farr[ftype] == NULL) continue; + + code = tsdbTFileObjInit(pTsdb, fset1->farr[ftype]->f, &fset[0]->farr[ftype]); + if (code) { + tsdbTFileSetClear(fset); + return code; + } + } + + const SSttLvl *lvl1; + TARRAY2_FOREACH(fset1->lvlArr, lvl1) { + SSttLvl *lvl; + code = tsdbSttLvlInitEx(pTsdb, lvl1, &lvl); + if (code) { + tsdbTFileSetClear(fset); + return code; + } + + code = TARRAY2_APPEND(fset[0]->lvlArr, lvl); + if (code) return code; + } + + return 0; +} + +int32_t tsdbTFileSetInitRef(STsdb *pTsdb, const STFileSet *fset1, STFileSet **fset) { + int32_t code = tsdbTFileSetInit(fset1->fid, fset); + if (code) return code; + + for (int32_t ftype = TSDB_FTYPE_MIN; ftype < TSDB_FTYPE_MAX; ++ftype) { + if (fset1->farr[ftype] == NULL) continue; + + tsdbTFileObjRef(fset1->farr[ftype]); + fset[0]->farr[ftype] = fset1->farr[ftype]; + } + + const SSttLvl *lvl1; + TARRAY2_FOREACH(fset1->lvlArr, lvl1) { + SSttLvl *lvl; + code = tsdbSttLvlInitRef(pTsdb, lvl1, &lvl); + if (code) { + tsdbTFileSetClear(fset); + return code; + } + + code = TARRAY2_APPEND(fset[0]->lvlArr, lvl); + if (code) return code; + } + + return 0; +} + +int32_t tsdbTFileSetClear(STFileSet **fset) { + if (!fset[0]) return 0; + + for (tsdb_ftype_t ftype = TSDB_FTYPE_MIN; ftype < TSDB_FTYPE_MAX; ++ftype) { + if (fset[0]->farr[ftype] == NULL) continue; + tsdbTFileObjUnref(fset[0]->farr[ftype]); + } + + TARRAY2_DESTROY(fset[0]->lvlArr, tsdbSttLvlClear); + + taosMemoryFree(fset[0]); + fset[0] = NULL; + + return 0; +} + +int32_t tsdbTFileSetRemove(STFileSet **fset) { + for (tsdb_ftype_t ftype = TSDB_FTYPE_MIN; ftype < TSDB_FTYPE_MAX; ++ftype) { + if (fset[0]->farr[ftype] == NULL) continue; + tsdbTFileObjRemove(fset[0]->farr[ftype]); + } + + TARRAY2_DESTROY(fset[0]->lvlArr, tsdbSttLvlRemove); + taosMemoryFree(fset[0]); + fset[0] = NULL; + return 0; +} + +SSttLvl *tsdbTFileSetGetSttLvl(STFileSet *fset, int32_t level) { + SSttLvl sttLvl = {.level = level}; + SSttLvl *lvl = &sttLvl; + SSttLvl **lvlPtr = TARRAY2_SEARCH(fset->lvlArr, &lvl, tsdbSttLvlCmprFn, TD_EQ); + return lvlPtr ? lvlPtr[0] : NULL; +} + +int32_t tsdbTFileSetCmprFn(const STFileSet **fset1, const STFileSet **fset2) { + if (fset1[0]->fid < fset2[0]->fid) return -1; + if (fset1[0]->fid > fset2[0]->fid) return 1; + return 0; +} + +int64_t tsdbTFileSetMaxCid(const STFileSet *fset) { + int64_t maxCid = 0; + for (tsdb_ftype_t ftype = TSDB_FTYPE_MIN; ftype < TSDB_FTYPE_MAX; ++ftype) { + if (fset->farr[ftype] == NULL) continue; + maxCid = TMAX(maxCid, fset->farr[ftype]->f->cid); + } + const SSttLvl *lvl; + const STFileObj *fobj; + TARRAY2_FOREACH(fset->lvlArr, lvl) { + TARRAY2_FOREACH(lvl->fobjArr, fobj) { maxCid = TMAX(maxCid, fobj->f->cid); } + } + return maxCid; +} + +bool tsdbTFileSetIsEmpty(const STFileSet *fset) { + for (tsdb_ftype_t ftype = TSDB_FTYPE_MIN; ftype < TSDB_FTYPE_MAX; ++ftype) { + if (fset->farr[ftype] != NULL) return false; + } + return TARRAY2_SIZE(fset->lvlArr) == 0; +} \ No newline at end of file diff --git a/source/dnode/vnode/src/tsdb/tsdbFSet2.h b/source/dnode/vnode/src/tsdb/tsdbFSet2.h new file mode 100644 index 0000000000..d7b3c1fc8c --- /dev/null +++ b/source/dnode/vnode/src/tsdb/tsdbFSet2.h @@ -0,0 +1,85 @@ +/* + * Copyright (c) 2019 TAOS Data, Inc. + * + * This program is free software: you can use, redistribute, and/or modify + * it under the terms of the GNU Affero General Public License, version 3 + * or later ("AGPL"), as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. + * + * You should have received a copy of the GNU Affero General Public License + * along with this program. If not, see . + */ + +#include "tsdbFile2.h" + +#ifndef _TSDB_FILE_SET2_H +#define _TSDB_FILE_SET2_H + +#ifdef __cplusplus +extern "C" { +#endif + +typedef struct STFileSet STFileSet; +typedef struct STFileOp STFileOp; +typedef struct SSttLvl SSttLvl; +typedef TARRAY2(STFileObj *) TFileObjArray; +typedef TARRAY2(SSttLvl *) TSttLvlArray; +typedef TARRAY2(STFileOp) TFileOpArray; + +typedef enum { + TSDB_FOP_NONE = 0, + TSDB_FOP_CREATE, + TSDB_FOP_REMOVE, + TSDB_FOP_MODIFY, +} tsdb_fop_t; + +#define TFILE_SET(fid_) \ + (STFileSet) { .fid = (fid_) } + +// init/clear +int32_t tsdbTFileSetInit(int32_t fid, STFileSet **fset); +int32_t tsdbTFileSetInitDup(STsdb *pTsdb, const STFileSet *fset1, STFileSet **fset); +int32_t tsdbTFileSetInitRef(STsdb *pTsdb, const STFileSet *fset1, STFileSet **fset); +int32_t tsdbTFileSetClear(STFileSet **fset); +int32_t tsdbTFileSetRemove(STFileSet **fset); +// to/from json +int32_t tsdbTFileSetToJson(const STFileSet *fset, cJSON *json); +int32_t tsdbJsonToTFileSet(STsdb *pTsdb, const cJSON *json, STFileSet **fset); +// cmpr +int32_t tsdbTFileSetCmprFn(const STFileSet **fset1, const STFileSet **fset2); +// edit +int32_t tsdbTFileSetEdit(STsdb *pTsdb, STFileSet *fset, const STFileOp *op); +int32_t tsdbTFileSetApplyEdit(STsdb *pTsdb, const STFileSet *fset1, STFileSet *fset); +// max commit id +int64_t tsdbTFileSetMaxCid(const STFileSet *fset); +// get +SSttLvl *tsdbTFileSetGetSttLvl(STFileSet *fset, int32_t level); +// is empty +bool tsdbTFileSetIsEmpty(const STFileSet *fset); + +struct STFileOp { + tsdb_fop_t optype; + int32_t fid; + STFile of; // old file state + STFile nf; // new file state +}; + +struct SSttLvl { + int32_t level; + TFileObjArray fobjArr[1]; +}; + +struct STFileSet { + int32_t fid; + STFileObj *farr[TSDB_FTYPE_MAX]; // file array + TSttLvlArray lvlArr[1]; // level array +}; + +#ifdef __cplusplus +} +#endif + +#endif /*_TSDB_FILE_SET2_H*/ \ No newline at end of file diff --git a/source/dnode/vnode/src/tsdb/tsdbFSetRW.c b/source/dnode/vnode/src/tsdb/tsdbFSetRW.c new file mode 100644 index 0000000000..83ae8c2429 --- /dev/null +++ b/source/dnode/vnode/src/tsdb/tsdbFSetRW.c @@ -0,0 +1,295 @@ +/* + * Copyright (c) 2019 TAOS Data, Inc. + * + * This program is free software: you can use, redistribute, and/or modify + * it under the terms of the GNU Affero General Public License, version 3 + * or later ("AGPL"), as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. + * + * You should have received a copy of the GNU Affero General Public License + * along with this program. If not, see . + */ + +#include "tsdbFSetRW.h" + +// SFSetWriter ================================================== +struct SFSetWriter { + SFSetWriterConfig config[1]; + + SSkmInfo skmTb[1]; + SSkmInfo skmRow[1]; + uint8_t *bufArr[10]; + + struct { + TABLEID tbid[1]; + } ctx[1]; + + // writer + SBlockData blockData[2]; + int32_t blockDataIdx; + SDataFileWriter *dataWriter; + SSttFileWriter *sttWriter; +}; + +static int32_t tsdbFSetWriteTableDataBegin(SFSetWriter *writer, const TABLEID *tbid) { + int32_t code = 0; + int32_t lino = 0; + + writer->ctx->tbid->suid = tbid->suid; + writer->ctx->tbid->uid = tbid->uid; + + code = tsdbUpdateSkmTb(writer->config->tsdb, writer->ctx->tbid, writer->skmTb); + TSDB_CHECK_CODE(code, lino, _exit); + + writer->blockDataIdx = 0; + for (int32_t i = 0; i < ARRAY_SIZE(writer->blockData); i++) { + code = tBlockDataInit(&writer->blockData[i], writer->ctx->tbid, writer->skmTb->pTSchema, NULL, 0); + TSDB_CHECK_CODE(code, lino, _exit); + } + +_exit: + if (code) { + TSDB_ERROR_LOG(TD_VID(writer->config->tsdb->pVnode), lino, code); + } + return code; +} + +static int32_t tsdbFSetWriteTableDataEnd(SFSetWriter *writer) { + if (writer->ctx->tbid->uid == 0) return 0; + + int32_t code = 0; + int32_t lino = 0; + + int32_t cidx = writer->blockDataIdx; + int32_t pidx = ((cidx + 1) & 1); + int32_t numRow = ((writer->blockData[pidx].nRow + writer->blockData[cidx].nRow) >> 1); + + if (writer->blockData[pidx].nRow > 0 && numRow >= writer->config->minRow) { + ASSERT(writer->blockData[pidx].nRow == writer->config->maxRow); + + SRowInfo row = { + .suid = writer->ctx->tbid->suid, + .uid = writer->ctx->tbid->uid, + .row = tsdbRowFromBlockData(writer->blockData + pidx, 0), + }; + + for (int32_t i = 0; i < numRow; i++) { + row.row.iRow = i; + + code = tsdbDataFileWriteRow(writer->dataWriter, &row); + TSDB_CHECK_CODE(code, lino, _exit); + } + + code = tsdbDataFileFlush(writer->dataWriter); + TSDB_CHECK_CODE(code, lino, _exit); + + for (int32_t i = numRow; i < writer->blockData[pidx].nRow; i++) { + row.row.iRow = i; + code = tsdbDataFileWriteRow(writer->dataWriter, &row); + TSDB_CHECK_CODE(code, lino, _exit); + } + + row.row = tsdbRowFromBlockData(writer->blockData + cidx, 0); + for (int32_t i = 0; i < writer->blockData[cidx].nRow; i++) { + row.row.iRow = i; + code = tsdbDataFileWriteRow(writer->dataWriter, &row); + TSDB_CHECK_CODE(code, lino, _exit); + } + } else { + // pidx + if (writer->blockData[pidx].nRow > 0) { + code = tsdbDataFileWriteBlockData(writer->dataWriter, &writer->blockData[pidx]); + TSDB_CHECK_CODE(code, lino, _exit); + } + + // cidx + if (writer->blockData[cidx].nRow < writer->config->minRow) { + code = tsdbSttFileWriteBlockData(writer->sttWriter, &writer->blockData[cidx]); + TSDB_CHECK_CODE(code, lino, _exit); + } else { + code = tsdbDataFileWriteBlockData(writer->dataWriter, &writer->blockData[cidx]); + TSDB_CHECK_CODE(code, lino, _exit); + } + } + + for (int32_t i = 0; i < ARRAY_SIZE(writer->blockData); i++) { + tBlockDataReset(&writer->blockData[i]); + } + +_exit: + if (code) { + TSDB_ERROR_LOG(TD_VID(writer->config->tsdb->pVnode), lino, code); + } + return code; +} + +int32_t tsdbFSetWriterOpen(SFSetWriterConfig *config, SFSetWriter **writer) { + int32_t code = 0; + int32_t lino = 0; + + writer[0] = taosMemoryCalloc(1, sizeof(*writer[0])); + if (writer[0] == NULL) return TSDB_CODE_OUT_OF_MEMORY; + + writer[0]->config[0] = config[0]; + + // data writer + if (!config->toSttOnly) { + SDataFileWriterConfig dataWriterConfig = { + .tsdb = config->tsdb, + .cmprAlg = config->cmprAlg, + .maxRow = config->maxRow, + .szPage = config->szPage, + .fid = config->fid, + .cid = config->cid, + .did = config->did, + .compactVersion = config->compactVersion, + .skmTb = writer[0]->skmTb, + .skmRow = writer[0]->skmRow, + .bufArr = writer[0]->bufArr, + }; + for (int32_t ftype = 0; ftype < TSDB_FTYPE_MAX; ++ftype) { + dataWriterConfig.files[ftype].exist = config->files[ftype].exist; + dataWriterConfig.files[ftype].file = config->files[ftype].file; + } + + code = tsdbDataFileWriterOpen(&dataWriterConfig, &writer[0]->dataWriter); + TSDB_CHECK_CODE(code, lino, _exit); + } + + // stt writer + SSttFileWriterConfig sttWriterConfig = { + .tsdb = config->tsdb, + .maxRow = config->maxRow, + .szPage = config->szPage, + .cmprAlg = config->cmprAlg, + .compactVersion = config->compactVersion, + .did = config->did, + .fid = config->fid, + .cid = config->cid, + .level = config->level, + .skmTb = writer[0]->skmTb, + .skmRow = writer[0]->skmRow, + .bufArr = writer[0]->bufArr, + }; + code = tsdbSttFileWriterOpen(&sttWriterConfig, &writer[0]->sttWriter); + TSDB_CHECK_CODE(code, lino, _exit); + +_exit: + if (code) { + TSDB_ERROR_LOG(TD_VID(config->tsdb->pVnode), lino, code); + } + return code; +} + +int32_t tsdbFSetWriterClose(SFSetWriter **writer, bool abort, TFileOpArray *fopArr) { + if (writer[0] == NULL) return 0; + + int32_t code = 0; + int32_t lino = 0; + + STsdb *tsdb = writer[0]->config->tsdb; + + // end + if (!writer[0]->config->toSttOnly) { + code = tsdbFSetWriteTableDataEnd(writer[0]); + TSDB_CHECK_CODE(code, lino, _exit); + + code = tsdbDataFileWriterClose(&writer[0]->dataWriter, abort, fopArr); + TSDB_CHECK_CODE(code, lino, _exit); + } + + code = tsdbSttFileWriterClose(&writer[0]->sttWriter, abort, fopArr); + TSDB_CHECK_CODE(code, lino, _exit); + + // free + for (int32_t i = 0; i < ARRAY_SIZE(writer[0]->blockData); i++) { + tBlockDataDestroy(&writer[0]->blockData[i]); + } + for (int32_t i = 0; i < ARRAY_SIZE(writer[0]->bufArr); i++) { + tFree(writer[0]->bufArr[i]); + } + tDestroyTSchema(writer[0]->skmRow->pTSchema); + tDestroyTSchema(writer[0]->skmTb->pTSchema); + taosMemoryFree(writer[0]); + writer[0] = NULL; + +_exit: + if (code) { + TSDB_ERROR_LOG(TD_VID(tsdb->pVnode), lino, code); + } + return code; +} + +int32_t tsdbFSetWriteRow(SFSetWriter *writer, SRowInfo *row) { + int32_t code = 0; + int32_t lino = 0; + + if (writer->config->toSttOnly) { + code = tsdbSttFileWriteRow(writer->sttWriter, row); + TSDB_CHECK_CODE(code, lino, _exit); + } else { + if (writer->ctx->tbid->uid != row->uid) { + code = tsdbFSetWriteTableDataEnd(writer); + TSDB_CHECK_CODE(code, lino, _exit); + + code = tsdbFSetWriteTableDataBegin(writer, (TABLEID *)row); + TSDB_CHECK_CODE(code, lino, _exit); + } + + if (row->row.type == TSDBROW_ROW_FMT) { + code = tsdbUpdateSkmRow(writer->config->tsdb, writer->ctx->tbid, TSDBROW_SVERSION(&row->row), writer->skmRow); + TSDB_CHECK_CODE(code, lino, _exit); + } + + TSDBKEY key = TSDBROW_KEY(&row->row); + if (key.version <= writer->config->compactVersion // + && writer->blockData[writer->blockDataIdx].nRow > 0 // + && key.ts == writer->blockData[writer->blockDataIdx].aTSKEY[writer->blockData[writer->blockDataIdx].nRow - 1]) { + code = tBlockDataUpdateRow(&writer->blockData[writer->blockDataIdx], &row->row, writer->skmRow->pTSchema); + TSDB_CHECK_CODE(code, lino, _exit); + } else { + if (writer->blockData[writer->blockDataIdx].nRow >= writer->config->maxRow) { + int32_t idx = ((writer->blockDataIdx + 1) & 1); + if (writer->blockData[idx].nRow >= writer->config->maxRow) { + code = tsdbDataFileWriteBlockData(writer->dataWriter, &writer->blockData[idx]); + TSDB_CHECK_CODE(code, lino, _exit); + + tBlockDataClear(&writer->blockData[idx]); + } + writer->blockDataIdx = idx; + } + + code = + tBlockDataAppendRow(&writer->blockData[writer->blockDataIdx], &row->row, writer->skmRow->pTSchema, row->uid); + TSDB_CHECK_CODE(code, lino, _exit); + } + } + +_exit: + if (code) { + TSDB_ERROR_LOG(TD_VID(writer->config->tsdb->pVnode), lino, code); + } + return code; +} + +int32_t tsdbFSetWriteTombRecord(SFSetWriter *writer, const STombRecord *tombRecord) { + int32_t code = 0; + int32_t lino = 0; + + if (writer->config->toSttOnly || tsdbSttFileWriterIsOpened(writer->sttWriter)) { + code = tsdbSttFileWriteTombRecord(writer->sttWriter, tombRecord); + TSDB_CHECK_CODE(code, lino, _exit); + } else { + code = tsdbDataFileWriteTombRecord(writer->dataWriter, tombRecord); + TSDB_CHECK_CODE(code, lino, _exit); + } + +_exit: + if (code) { + TSDB_ERROR_LOG(TD_VID(writer->config->tsdb->pVnode), lino, code); + } + return code; +} \ No newline at end of file diff --git a/source/dnode/vnode/src/tsdb/tsdbFSetRW.h b/source/dnode/vnode/src/tsdb/tsdbFSetRW.h new file mode 100644 index 0000000000..b5710407cf --- /dev/null +++ b/source/dnode/vnode/src/tsdb/tsdbFSetRW.h @@ -0,0 +1,55 @@ +/* + * Copyright (c) 2019 TAOS Data, Inc. + * + * This program is free software: you can use, redistribute, and/or modify + * it under the terms of the GNU Affero General Public License, version 3 + * or later ("AGPL"), as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. + * + * You should have received a copy of the GNU Affero General Public License + * along with this program. If not, see . + */ + +#include "tsdbDataFileRW.h" +#include "tsdbSttFileRW.h" + +#ifndef _TSDB_FSET_RW_H +#define _TSDB_FSET_RW_H + +#ifdef __cplusplus +extern "C" { +#endif + +// +typedef struct SFSetWriter SFSetWriter; +typedef struct { + STsdb *tsdb; + bool toSttOnly; + int64_t compactVersion; + int32_t minRow; + int32_t maxRow; + int32_t szPage; + int8_t cmprAlg; + int32_t fid; + int64_t cid; + SDiskID did; + int32_t level; + struct { + bool exist; + STFile file; + } files[TSDB_FTYPE_MAX]; +} SFSetWriterConfig; + +int32_t tsdbFSetWriterOpen(SFSetWriterConfig *config, SFSetWriter **writer); +int32_t tsdbFSetWriterClose(SFSetWriter **writer, bool abort, TFileOpArray *fopArr); +int32_t tsdbFSetWriteRow(SFSetWriter *writer, SRowInfo *row); +int32_t tsdbFSetWriteTombRecord(SFSetWriter *writer, const STombRecord *tombRecord); + +#ifdef __cplusplus +} +#endif + +#endif /*_TSDB_FSET_RW_H*/ \ No newline at end of file diff --git a/source/dnode/vnode/src/tsdb/tsdbFile.c b/source/dnode/vnode/src/tsdb/tsdbFile.c index 9ff4b28779..62b37cd0a6 100644 --- a/source/dnode/vnode/src/tsdb/tsdbFile.c +++ b/source/dnode/vnode/src/tsdb/tsdbFile.c @@ -112,7 +112,10 @@ static char* getFileNamePrefix(STsdb *pTsdb, SDiskID did, int32_t fid, uint64_t p += titoa(TD_VID(pTsdb->pVnode), 10, p); *(p++) = 'f'; - p += titoa(fid, 10, p); + if (fid < 0) { + *(p++) = '-'; + } + p += titoa((fid < 0) ? -fid : fid, 10, p); memcpy(p, "ver", 3); p += 3; diff --git a/source/dnode/vnode/src/tsdb/tsdbFile2.c b/source/dnode/vnode/src/tsdb/tsdbFile2.c new file mode 100644 index 0000000000..be021169cd --- /dev/null +++ b/source/dnode/vnode/src/tsdb/tsdbFile2.c @@ -0,0 +1,295 @@ +/* + * Copyright (c) 2019 TAOS Data, Inc. + * + * This program is free software: you can use, redistribute, and/or modify + * it under the terms of the GNU Affero General Public License, version 3 + * or later ("AGPL"), as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. + * + * You should have received a copy of the GNU Affero General Public License + * along with this program. If not, see . + */ + +#include "tsdbFile2.h" + +// to_json +static int32_t head_to_json(const STFile *file, cJSON *json); +static int32_t data_to_json(const STFile *file, cJSON *json); +static int32_t sma_to_json(const STFile *file, cJSON *json); +static int32_t tomb_to_json(const STFile *file, cJSON *json); +static int32_t stt_to_json(const STFile *file, cJSON *json); + +// from_json +static int32_t head_from_json(const cJSON *json, STFile *file); +static int32_t data_from_json(const cJSON *json, STFile *file); +static int32_t sma_from_json(const cJSON *json, STFile *file); +static int32_t tomb_from_json(const cJSON *json, STFile *file); +static int32_t stt_from_json(const cJSON *json, STFile *file); + +static const struct { + const char *suffix; + int32_t (*to_json)(const STFile *file, cJSON *json); + int32_t (*from_json)(const cJSON *json, STFile *file); +} g_tfile_info[] = { + [TSDB_FTYPE_HEAD] = {"head", head_to_json, head_from_json}, + [TSDB_FTYPE_DATA] = {"data", data_to_json, data_from_json}, + [TSDB_FTYPE_SMA] = {"sma", sma_to_json, sma_from_json}, + [TSDB_FTYPE_TOMB] = {"tomb", tomb_to_json, tomb_from_json}, + [TSDB_FTYPE_STT] = {"stt", stt_to_json, stt_from_json}, +}; + +static void remove_file(const char *fname) { + taosRemoveFile(fname); + tsdbInfo("file:%s is removed", fname); +} + +static int32_t tfile_to_json(const STFile *file, cJSON *json) { + /* did.level */ + if (cJSON_AddNumberToObject(json, "did.level", file->did.level) == NULL) { + return TSDB_CODE_OUT_OF_MEMORY; + } + + /* did.id */ + if (cJSON_AddNumberToObject(json, "did.id", file->did.id) == NULL) { + return TSDB_CODE_OUT_OF_MEMORY; + } + + /* fid */ + if (cJSON_AddNumberToObject(json, "fid", file->fid) == NULL) { + return TSDB_CODE_OUT_OF_MEMORY; + } + + /* cid */ + if (cJSON_AddNumberToObject(json, "cid", file->cid) == NULL) { + return TSDB_CODE_OUT_OF_MEMORY; + } + + /* size */ + if (cJSON_AddNumberToObject(json, "size", file->size) == NULL) { + return TSDB_CODE_OUT_OF_MEMORY; + } + + return 0; +} + +static int32_t tfile_from_json(const cJSON *json, STFile *file) { + const cJSON *item; + + /* did.level */ + item = cJSON_GetObjectItem(json, "did.level"); + if (cJSON_IsNumber(item)) { + file->did.level = item->valuedouble; + } else { + return TSDB_CODE_FILE_CORRUPTED; + } + + /* did.id */ + item = cJSON_GetObjectItem(json, "did.id"); + if (cJSON_IsNumber(item)) { + file->did.id = item->valuedouble; + } else { + return TSDB_CODE_FILE_CORRUPTED; + } + + /* fid */ + item = cJSON_GetObjectItem(json, "fid"); + if (cJSON_IsNumber(item)) { + file->fid = item->valuedouble; + } else { + return TSDB_CODE_FILE_CORRUPTED; + } + + /* cid */ + item = cJSON_GetObjectItem(json, "cid"); + if (cJSON_IsNumber(item)) { + file->cid = item->valuedouble; + } else { + return TSDB_CODE_FILE_CORRUPTED; + } + + /* size */ + item = cJSON_GetObjectItem(json, "size"); + if (cJSON_IsNumber(item)) { + file->size = item->valuedouble; + } else { + return TSDB_CODE_FILE_CORRUPTED; + } + + return 0; +} + +static int32_t head_to_json(const STFile *file, cJSON *json) { return tfile_to_json(file, json); } +static int32_t data_to_json(const STFile *file, cJSON *json) { return tfile_to_json(file, json); } +static int32_t sma_to_json(const STFile *file, cJSON *json) { return tfile_to_json(file, json); } +static int32_t tomb_to_json(const STFile *file, cJSON *json) { return tfile_to_json(file, json); } +static int32_t stt_to_json(const STFile *file, cJSON *json) { + int32_t code = tfile_to_json(file, json); + if (code) return code; + + /* lvl */ + if (cJSON_AddNumberToObject(json, "level", file->stt->level) == NULL) { + return TSDB_CODE_OUT_OF_MEMORY; + } + + return 0; +} + +static int32_t head_from_json(const cJSON *json, STFile *file) { return tfile_from_json(json, file); } +static int32_t data_from_json(const cJSON *json, STFile *file) { return tfile_from_json(json, file); } +static int32_t sma_from_json(const cJSON *json, STFile *file) { return tfile_from_json(json, file); } +static int32_t tomb_from_json(const cJSON *json, STFile *file) { return tfile_from_json(json, file); } +static int32_t stt_from_json(const cJSON *json, STFile *file) { + int32_t code = tfile_from_json(json, file); + if (code) return code; + + const cJSON *item; + + /* lvl */ + item = cJSON_GetObjectItem(json, "level"); + if (cJSON_IsNumber(item)) { + file->stt->level = item->valuedouble; + } else { + return TSDB_CODE_FILE_CORRUPTED; + } + + return 0; +} + +int32_t tsdbTFileToJson(const STFile *file, cJSON *json) { + if (file->type == TSDB_FTYPE_STT) { + return g_tfile_info[file->type].to_json(file, json); + } else { + cJSON *item = cJSON_AddObjectToObject(json, g_tfile_info[file->type].suffix); + if (item == NULL) return TSDB_CODE_OUT_OF_MEMORY; + return g_tfile_info[file->type].to_json(file, item); + } +} + +int32_t tsdbJsonToTFile(const cJSON *json, tsdb_ftype_t ftype, STFile *f) { + f[0] = (STFile){.type = ftype}; + + if (ftype == TSDB_FTYPE_STT) { + int32_t code = g_tfile_info[ftype].from_json(json, f); + if (code) return code; + } else { + const cJSON *item = cJSON_GetObjectItem(json, g_tfile_info[ftype].suffix); + if (cJSON_IsObject(item)) { + int32_t code = g_tfile_info[ftype].from_json(item, f); + if (code) return code; + } else { + return TSDB_CODE_NOT_FOUND; + } + } + + return 0; +} + +int32_t tsdbTFileObjInit(STsdb *pTsdb, const STFile *f, STFileObj **fobj) { + fobj[0] = taosMemoryMalloc(sizeof(*fobj[0])); + if (!fobj[0]) return TSDB_CODE_OUT_OF_MEMORY; + + taosThreadMutexInit(&fobj[0]->mutex, NULL); + fobj[0]->f[0] = f[0]; + fobj[0]->state = TSDB_FSTATE_LIVE; + fobj[0]->ref = 1; + tsdbTFileName(pTsdb, f, fobj[0]->fname); + return 0; +} + +int32_t tsdbTFileObjRef(STFileObj *fobj) { + int32_t nRef; + taosThreadMutexLock(&fobj->mutex); + ASSERT(fobj->ref > 0 && fobj->state == TSDB_FSTATE_LIVE); + nRef = ++fobj->ref; + taosThreadMutexUnlock(&fobj->mutex); + tsdbTrace("ref file %s, fobj:%p ref %d", fobj->fname, fobj, nRef); + return 0; +} + +int32_t tsdbTFileObjUnref(STFileObj *fobj) { + taosThreadMutexLock(&fobj->mutex); + int32_t nRef = --fobj->ref; + taosThreadMutexUnlock(&fobj->mutex); + ASSERT(nRef >= 0); + tsdbTrace("unref file %s, fobj:%p ref %d", fobj->fname, fobj, nRef); + if (nRef == 0) { + if (fobj->state == TSDB_FSTATE_DEAD) { + remove_file(fobj->fname); + } + taosMemoryFree(fobj); + } + + return 0; +} + +int32_t tsdbTFileObjRemove(STFileObj *fobj) { + taosThreadMutexLock(&fobj->mutex); + ASSERT(fobj->state == TSDB_FSTATE_LIVE && fobj->ref > 0); + fobj->state = TSDB_FSTATE_DEAD; + int32_t nRef = --fobj->ref; + taosThreadMutexUnlock(&fobj->mutex); + tsdbTrace("remove unref file %s, fobj:%p ref %d", fobj->fname, fobj, nRef); + if (nRef == 0) { + remove_file(fobj->fname); + taosMemoryFree(fobj); + } + return 0; +} + +int32_t tsdbTFileName(STsdb *pTsdb, const STFile *f, char fname[]) { + SVnode *pVnode = pTsdb->pVnode; + STfs *pTfs = pVnode->pTfs; + + if (pTfs) { + snprintf(fname, // + TSDB_FILENAME_LEN, // + "%s%s%s%sv%df%dver%" PRId64 ".%s", // + tfsGetDiskPath(pTfs, f->did), // + TD_DIRSEP, // + pTsdb->path, // + TD_DIRSEP, // + TD_VID(pVnode), // + f->fid, // + f->cid, // + g_tfile_info[f->type].suffix); + } else { + snprintf(fname, // + TSDB_FILENAME_LEN, // + "%s%sv%df%dver%" PRId64 ".%s", // + pTsdb->path, // + TD_DIRSEP, // + TD_VID(pVnode), // + f->fid, // + f->cid, // + g_tfile_info[f->type].suffix); + } + return 0; +} + +bool tsdbIsSameTFile(const STFile *f1, const STFile *f2) { + if (f1->type != f2->type) return false; + if (f1->did.level != f2->did.level) return false; + if (f1->did.id != f2->did.id) return false; + if (f1->fid != f2->fid) return false; + if (f1->cid != f2->cid) return false; + return true; +} + +bool tsdbIsTFileChanged(const STFile *f1, const STFile *f2) { + if (f1->size != f2->size) return true; + // if (f1->type == TSDB_FTYPE_STT && f1->stt->nseg != f2->stt->nseg) return true; + return false; +} + +int32_t tsdbTFileObjCmpr(const STFileObj **fobj1, const STFileObj **fobj2) { + if (fobj1[0]->f->cid < fobj2[0]->f->cid) { + return -1; + } else if (fobj1[0]->f->cid > fobj2[0]->f->cid) { + return 1; + } else { + return 0; + } +} \ No newline at end of file diff --git a/source/dnode/vnode/src/tsdb/tsdbFile2.h b/source/dnode/vnode/src/tsdb/tsdbFile2.h new file mode 100644 index 0000000000..11d08e45e6 --- /dev/null +++ b/source/dnode/vnode/src/tsdb/tsdbFile2.h @@ -0,0 +1,83 @@ +/* + * Copyright (c) 2019 TAOS Data, Inc. + * + * This program is free software: you can use, redistribute, and/or modify + * it under the terms of the GNU Affero General Public License, version 3 + * or later ("AGPL"), as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. + * + * You should have received a copy of the GNU Affero General Public License + * along with this program. If not, see . + */ + +#include "tsdbDef.h" + +#ifndef _TSDB_FILE_H +#define _TSDB_FILE_H + +#ifdef __cplusplus +extern "C" { +#endif + +typedef struct STFile STFile; +typedef struct STFileObj STFileObj; + +typedef enum { + TSDB_FTYPE_HEAD = 0, // .head + TSDB_FTYPE_DATA, // .data + TSDB_FTYPE_SMA, // .sma + TSDB_FTYPE_TOMB, // .tomb + TSDB_FTYPE_STT = TSDB_FTYPE_TOMB + 2, // .stt +} tsdb_ftype_t; + +enum { + TSDB_FSTATE_LIVE = 1, + TSDB_FSTATE_DEAD, +}; + +#define TSDB_FTYPE_MIN TSDB_FTYPE_HEAD +#define TSDB_FTYPE_MAX (TSDB_FTYPE_TOMB + 1) + +// STFile +int32_t tsdbTFileToJson(const STFile *f, cJSON *json); +int32_t tsdbJsonToTFile(const cJSON *json, tsdb_ftype_t ftype, STFile *f); +int32_t tsdbTFileName(STsdb *pTsdb, const STFile *f, char fname[]); +bool tsdbIsSameTFile(const STFile *f1, const STFile *f2); +bool tsdbIsTFileChanged(const STFile *f1, const STFile *f2); + +// STFileObj +int32_t tsdbTFileObjInit(STsdb *pTsdb, const STFile *f, STFileObj **fobj); +int32_t tsdbTFileObjRef(STFileObj *fobj); +int32_t tsdbTFileObjUnref(STFileObj *fobj); +int32_t tsdbTFileObjRemove(STFileObj *fobj); +int32_t tsdbTFileObjCmpr(const STFileObj **fobj1, const STFileObj **fobj2); + +struct STFile { + tsdb_ftype_t type; + SDiskID did; // disk id + int32_t fid; // file id + int64_t cid; // commit id + int64_t size; + union { + struct { + int32_t level; + } stt[1]; + }; +}; + +struct STFileObj { + TdThreadMutex mutex; + STFile f[1]; + int32_t state; + int32_t ref; + char fname[TSDB_FILENAME_LEN]; +}; + +#ifdef __cplusplus +} +#endif + +#endif /*_TSDB_FILE_H*/ \ No newline at end of file diff --git a/source/dnode/vnode/src/tsdb/tsdbIter.c b/source/dnode/vnode/src/tsdb/tsdbIter.c new file mode 100644 index 0000000000..9780cc6be6 --- /dev/null +++ b/source/dnode/vnode/src/tsdb/tsdbIter.c @@ -0,0 +1,780 @@ +/* + * Copyright (c) 2019 TAOS Data, Inc. + * + * This program is free software: you can use, redistribute, and/or modify + * it under the terms of the GNU Affero General Public License, version 3 + * or later ("AGPL"), as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. + * + * You should have received a copy of the GNU Affero General Public License + * along with this program. If not, see . + */ + +#include "tsdbIter.h" + +// STsdbIter ================ +struct STsdbIter { + EIterType type; + bool noMoreData; + bool filterByVersion; + int64_t range[2]; + union { + SRowInfo row[1]; + STombRecord record[1]; + }; + SRBTreeNode node[1]; + union { + struct { + SSttFileReader *reader; + const TSttBlkArray *sttBlkArray; + int32_t sttBlkArrayIdx; + SBlockData blockData[1]; + int32_t blockDataIdx; + } sttData[1]; + struct { + SDataFileReader *reader; + const TBrinBlkArray *brinBlkArray; + int32_t brinBlkArrayIdx; + SBrinBlock brinBlock[1]; + int32_t brinBlockIdx; + SBlockData blockData[1]; + int32_t blockDataIdx; + } dataData[1]; + struct { + SMemTable *memt; + TSDBKEY from[1]; + SRBTreeIter iter[1]; + STbData *tbData; + STbDataIter tbIter[1]; + } memtData[1]; + struct { + SSttFileReader *reader; + const TTombBlkArray *tombBlkArray; + int32_t tombBlkArrayIdx; + STombBlock tombBlock[1]; + int32_t tombBlockIdx; + } sttTomb[1]; + struct { + SDataFileReader *reader; + const TTombBlkArray *tombBlkArray; + int32_t tombBlkArrayIdx; + STombBlock tombBlock[1]; + int32_t tombBlockIdx; + } dataTomb[1]; + struct { + SMemTable *memt; + SRBTreeIter rbtIter[1]; + STbData *tbData; + SDelData *delData; + } memtTomb[1]; + }; +}; + +static int32_t tsdbSttIterNext(STsdbIter *iter, const TABLEID *tbid) { + while (!iter->noMoreData) { + for (; iter->sttData->blockDataIdx < iter->sttData->blockData->nRow; iter->sttData->blockDataIdx++) { + int64_t version = iter->sttData->blockData->aVersion[iter->sttData->blockDataIdx]; + + if (iter->filterByVersion && (version < iter->range[0] || version > iter->range[1])) { + continue; + } + + iter->row->suid = iter->sttData->blockData->suid; + iter->row->uid = iter->sttData->blockData->uid ? iter->sttData->blockData->uid + : iter->sttData->blockData->aUid[iter->sttData->blockDataIdx]; + + if (tbid && iter->row->suid == tbid->suid && iter->row->uid == tbid->uid) { + continue; + } + + iter->row->row = tsdbRowFromBlockData(iter->sttData->blockData, iter->sttData->blockDataIdx); + iter->sttData->blockDataIdx++; + goto _exit; + } + + if (iter->sttData->sttBlkArrayIdx >= TARRAY2_SIZE(iter->sttData->sttBlkArray)) { + iter->noMoreData = true; + break; + } + + for (; iter->sttData->sttBlkArrayIdx < TARRAY2_SIZE(iter->sttData->sttBlkArray); iter->sttData->sttBlkArrayIdx++) { + const SSttBlk *sttBlk = TARRAY2_GET_PTR(iter->sttData->sttBlkArray, iter->sttData->sttBlkArrayIdx); + + if (iter->filterByVersion && (sttBlk->maxVer < iter->range[0] || sttBlk->minVer > iter->range[1])) { + continue; + } + + if (tbid && tbid->suid == sttBlk->suid && tbid->uid == sttBlk->minUid && tbid->uid == sttBlk->maxUid) { + continue; + } + + int32_t code = tsdbSttFileReadBlockData(iter->sttData->reader, sttBlk, iter->sttData->blockData); + if (code) return code; + + iter->sttData->blockDataIdx = 0; + iter->sttData->sttBlkArrayIdx++; + break; + } + } + +_exit: + return 0; +} + +static int32_t tsdbDataIterNext(STsdbIter *iter, const TABLEID *tbid) { + int32_t code; + + while (!iter->noMoreData) { + for (;;) { + // SBlockData + for (; iter->dataData->blockDataIdx < iter->dataData->blockData->nRow; iter->dataData->blockDataIdx++) { + int64_t version = iter->dataData->blockData->aVersion[iter->dataData->blockDataIdx]; + if (iter->filterByVersion && (version < iter->range[0] || version > iter->range[1])) { + continue; + } + + if (tbid && tbid->suid == iter->dataData->blockData->suid && tbid->uid == iter->dataData->blockData->uid) { + iter->dataData->blockDataIdx = iter->dataData->blockData->nRow; + break; + } + + iter->row->row = tsdbRowFromBlockData(iter->dataData->blockData, iter->dataData->blockDataIdx); + iter->dataData->blockDataIdx++; + goto _exit; + } + + // SBrinBlock + if (iter->dataData->brinBlockIdx >= BRIN_BLOCK_SIZE(iter->dataData->brinBlock)) { + break; + } + + for (; iter->dataData->brinBlockIdx < BRIN_BLOCK_SIZE(iter->dataData->brinBlock); + iter->dataData->brinBlockIdx++) { + SBrinRecord record[1]; + tBrinBlockGet(iter->dataData->brinBlock, iter->dataData->brinBlockIdx, record); + + if (iter->filterByVersion && (record->maxVer < iter->range[0] || record->minVer > iter->range[1])) { + continue; + } + + if (tbid && tbid->suid == record->suid && tbid->uid == record->uid) { + continue; + } + + iter->row->suid = record->suid; + iter->row->uid = record->uid; + + code = tsdbDataFileReadBlockData(iter->dataData->reader, record, iter->dataData->blockData); + if (code) return code; + + iter->dataData->blockDataIdx = 0; + iter->dataData->brinBlockIdx++; + break; + } + } + + if (iter->dataData->brinBlkArrayIdx >= TARRAY2_SIZE(iter->dataData->brinBlkArray)) { + iter->noMoreData = true; + break; + } + + for (; iter->dataData->brinBlkArrayIdx < TARRAY2_SIZE(iter->dataData->brinBlkArray); + iter->dataData->brinBlkArrayIdx++) { + const SBrinBlk *brinBlk = TARRAY2_GET_PTR(iter->dataData->brinBlkArray, iter->dataData->brinBlkArrayIdx); + + if (iter->filterByVersion && (brinBlk->maxVer < iter->range[0] || brinBlk->minVer > iter->range[1])) { + continue; + } + + if (tbid && tbid->uid == brinBlk->minTbid.uid && tbid->uid == brinBlk->maxTbid.uid) { + continue; + } + + code = tsdbDataFileReadBrinBlock(iter->dataData->reader, brinBlk, iter->dataData->brinBlock); + if (code) return code; + + iter->dataData->brinBlockIdx = 0; + iter->dataData->brinBlkArrayIdx++; + break; + } + } + +_exit: + return 0; +} + +static int32_t tsdbMemTableIterNext(STsdbIter *iter, const TABLEID *tbid) { + SRBTreeNode *node; + + while (!iter->noMoreData) { + for (TSDBROW *row; iter->memtData->tbData && (row = tsdbTbDataIterGet(iter->memtData->tbIter));) { + if (tbid && tbid->suid == iter->memtData->tbData->suid && tbid->uid == iter->memtData->tbData->uid) { + iter->memtData->tbData = NULL; + break; + } + + if (iter->filterByVersion) { + int64_t version = TSDBROW_VERSION(row); + if (version < iter->range[0] || version > iter->range[1]) { + continue; + } + } + + iter->row->row = row[0]; + + tsdbTbDataIterNext(iter->memtData->tbIter); + goto _exit; + } + + for (;;) { + node = tRBTreeIterNext(iter->memtData->iter); + if (!node) { + iter->noMoreData = true; + goto _exit; + } + + iter->memtData->tbData = TCONTAINER_OF(node, STbData, rbtn); + if (tbid && tbid->suid == iter->memtData->tbData->suid && tbid->uid == iter->memtData->tbData->uid) { + continue; + } else { + iter->row->suid = iter->memtData->tbData->suid; + iter->row->uid = iter->memtData->tbData->uid; + tsdbTbDataIterOpen(iter->memtData->tbData, iter->memtData->from, 0, iter->memtData->tbIter); + break; + } + } + } + +_exit: + return 0; +} + +static int32_t tsdbDataTombIterNext(STsdbIter *iter, const TABLEID *tbid) { + while (!iter->noMoreData) { + for (; iter->dataTomb->tombBlockIdx < TOMB_BLOCK_SIZE(iter->dataTomb->tombBlock); iter->dataTomb->tombBlockIdx++) { + iter->record->suid = TARRAY2_GET(iter->dataTomb->tombBlock->suid, iter->dataTomb->tombBlockIdx); + iter->record->uid = TARRAY2_GET(iter->dataTomb->tombBlock->uid, iter->dataTomb->tombBlockIdx); + iter->record->version = TARRAY2_GET(iter->dataTomb->tombBlock->version, iter->dataTomb->tombBlockIdx); + + if (iter->filterByVersion && (iter->record->version < iter->range[0] || iter->record->version > iter->range[1])) { + continue; + } + + if (tbid && iter->record->suid == tbid->suid && iter->record->uid == tbid->uid) { + continue; + } + + iter->record->skey = TARRAY2_GET(iter->dataTomb->tombBlock->skey, iter->dataTomb->tombBlockIdx); + iter->record->ekey = TARRAY2_GET(iter->dataTomb->tombBlock->ekey, iter->dataTomb->tombBlockIdx); + iter->dataTomb->tombBlockIdx++; + goto _exit; + } + + if (iter->dataTomb->tombBlkArrayIdx >= TARRAY2_SIZE(iter->dataTomb->tombBlkArray)) { + iter->noMoreData = true; + goto _exit; + } + + for (; iter->dataTomb->tombBlkArrayIdx < TARRAY2_SIZE(iter->dataTomb->tombBlkArray); + iter->dataTomb->tombBlkArrayIdx++) { + const STombBlk *tombBlk = TARRAY2_GET_PTR(iter->dataTomb->tombBlkArray, iter->dataTomb->tombBlkArrayIdx); + + if (tbid && tbid->suid == tombBlk->minTbid.suid && tbid->uid == tombBlk->minTbid.uid && + tbid->suid == tombBlk->maxTbid.suid && tbid->uid == tombBlk->maxTbid.uid) { + continue; + } + + int32_t code = tsdbDataFileReadTombBlock(iter->dataTomb->reader, tombBlk, iter->dataTomb->tombBlock); + if (code) return code; + + iter->dataTomb->tombBlockIdx = 0; + iter->dataTomb->tombBlkArrayIdx++; + break; + } + } + +_exit: + return 0; +} + +static int32_t tsdbMemTombIterNext(STsdbIter *iter, const TABLEID *tbid) { + while (!iter->noMoreData) { + for (; iter->memtTomb->delData;) { + if (tbid && tbid->uid == iter->memtTomb->tbData->uid) { + iter->memtTomb->delData = NULL; + break; + } + + if (iter->filterByVersion && + (iter->memtTomb->delData->version < iter->range[0] || iter->memtTomb->delData->version > iter->range[1])) { + continue; + } + + iter->record->suid = iter->memtTomb->tbData->suid; + iter->record->uid = iter->memtTomb->tbData->uid; + iter->record->version = iter->memtTomb->delData->version; + iter->record->skey = iter->memtTomb->delData->sKey; + iter->record->ekey = iter->memtTomb->delData->eKey; + + iter->memtTomb->delData = iter->memtTomb->delData->pNext; + goto _exit; + } + + for (;;) { + SRBTreeNode *node = tRBTreeIterNext(iter->memtTomb->rbtIter); + if (node == NULL) { + iter->noMoreData = true; + goto _exit; + } + + iter->memtTomb->tbData = TCONTAINER_OF(node, STbData, rbtn); + if (tbid && tbid->uid == iter->memtTomb->tbData->uid) { + continue; + } else { + iter->memtTomb->delData = iter->memtTomb->tbData->pHead; + break; + } + } + } + +_exit: + return 0; +} + +static int32_t tsdbSttIterOpen(STsdbIter *iter) { + int32_t code; + + code = tsdbSttFileReadSttBlk(iter->sttData->reader, &iter->sttData->sttBlkArray); + if (code) return code; + + if (TARRAY2_SIZE(iter->sttData->sttBlkArray) == 0) { + iter->noMoreData = true; + return 0; + } + + iter->sttData->sttBlkArrayIdx = 0; + tBlockDataCreate(iter->sttData->blockData); + iter->sttData->blockDataIdx = 0; + + return tsdbSttIterNext(iter, NULL); +} + +static int32_t tsdbDataIterOpen(STsdbIter *iter) { + int32_t code; + + // SBrinBlk + code = tsdbDataFileReadBrinBlk(iter->dataData->reader, &iter->dataData->brinBlkArray); + if (code) return code; + + if (TARRAY2_SIZE(iter->dataData->brinBlkArray) == 0) { + iter->noMoreData = true; + return 0; + } + + iter->dataData->brinBlkArrayIdx = 0; + + // SBrinBlock + tBrinBlockInit(iter->dataData->brinBlock); + iter->dataData->brinBlockIdx = 0; + + // SBlockData + tBlockDataCreate(iter->dataData->blockData); + iter->dataData->blockDataIdx = 0; + + return tsdbDataIterNext(iter, NULL); +} + +static int32_t tsdbMemTableIterOpen(STsdbIter *iter) { + if (iter->memtData->memt->nRow == 0) { + iter->noMoreData = true; + return 0; + } + + iter->memtData->iter[0] = tRBTreeIterCreate(iter->memtData->memt->tbDataTree, 1); + return tsdbMemTableIterNext(iter, NULL); +} + +static int32_t tsdbSttIterClose(STsdbIter *iter) { + tBlockDataDestroy(iter->sttData->blockData); + return 0; +} + +static int32_t tsdbDataTombIterOpen(STsdbIter *iter) { + int32_t code; + + code = tsdbDataFileReadTombBlk(iter->dataTomb->reader, &iter->dataTomb->tombBlkArray); + if (code) return code; + + if (TARRAY2_SIZE(iter->dataTomb->tombBlkArray) == 0) { + iter->noMoreData = true; + return 0; + } + iter->dataTomb->tombBlkArrayIdx = 0; + + tTombBlockInit(iter->dataTomb->tombBlock); + iter->dataTomb->tombBlockIdx = 0; + + return tsdbDataTombIterNext(iter, NULL); +} + +static int32_t tsdbMemTombIterOpen(STsdbIter *iter) { + int32_t code; + + if (iter->memtTomb->memt->nDel == 0) { + iter->noMoreData = true; + return 0; + } + + iter->memtTomb->rbtIter[0] = tRBTreeIterCreate(iter->memtTomb->memt->tbDataTree, 1); + return tsdbMemTombIterNext(iter, NULL); +} + +static int32_t tsdbDataIterClose(STsdbIter *iter) { + tBrinBlockDestroy(iter->dataData->brinBlock); + tBlockDataDestroy(iter->dataData->blockData); + return 0; +} + +static int32_t tsdbMemTableIterClose(STsdbIter *iter) { return 0; } + +static int32_t tsdbSttTombIterNext(STsdbIter *iter, const TABLEID *tbid) { + while (!iter->noMoreData) { + for (; iter->sttTomb->tombBlockIdx < TOMB_BLOCK_SIZE(iter->sttTomb->tombBlock); iter->sttTomb->tombBlockIdx++) { + iter->record->suid = TARRAY2_GET(iter->sttTomb->tombBlock->suid, iter->sttTomb->tombBlockIdx); + iter->record->uid = TARRAY2_GET(iter->sttTomb->tombBlock->uid, iter->sttTomb->tombBlockIdx); + iter->record->version = TARRAY2_GET(iter->sttTomb->tombBlock->version, iter->sttTomb->tombBlockIdx); + + if (iter->filterByVersion && (iter->record->version < iter->range[0] || iter->record->version > iter->range[1])) { + continue; + } + + if (tbid && iter->record->suid == tbid->suid && iter->record->uid == tbid->uid) { + continue; + } + + iter->record->skey = TARRAY2_GET(iter->sttTomb->tombBlock->skey, iter->sttTomb->tombBlockIdx); + iter->record->ekey = TARRAY2_GET(iter->sttTomb->tombBlock->ekey, iter->sttTomb->tombBlockIdx); + iter->sttTomb->tombBlockIdx++; + goto _exit; + } + + if (iter->sttTomb->tombBlkArrayIdx >= TARRAY2_SIZE(iter->sttTomb->tombBlkArray)) { + iter->noMoreData = true; + goto _exit; + } + + for (; iter->sttTomb->tombBlkArrayIdx < TARRAY2_SIZE(iter->sttTomb->tombBlkArray); + iter->sttTomb->tombBlkArrayIdx++) { + const STombBlk *tombBlk = TARRAY2_GET_PTR(iter->sttTomb->tombBlkArray, iter->sttTomb->tombBlkArrayIdx); + + if (iter->filterByVersion && (tombBlk->maxVer < iter->range[0] || tombBlk->minVer > iter->range[1])) { + continue; + } + + if (tbid && tbid->suid == tombBlk->minTbid.suid && tbid->uid == tombBlk->minTbid.uid && + tbid->suid == tombBlk->maxTbid.suid && tbid->uid == tombBlk->maxTbid.uid) { + continue; + } + + int32_t code = tsdbSttFileReadTombBlock(iter->sttTomb->reader, tombBlk, iter->sttTomb->tombBlock); + if (code) return code; + + iter->sttTomb->tombBlockIdx = 0; + iter->sttTomb->tombBlkArrayIdx++; + break; + } + } + +_exit: + return 0; +} + +static int32_t tsdbSttTombIterOpen(STsdbIter *iter) { + int32_t code; + + code = tsdbSttFileReadTombBlk(iter->sttTomb->reader, &iter->sttTomb->tombBlkArray); + if (code) return code; + + if (TARRAY2_SIZE(iter->sttTomb->tombBlkArray) == 0) { + iter->noMoreData = true; + return 0; + } + + iter->sttTomb->tombBlkArrayIdx = 0; + tTombBlockInit(iter->sttTomb->tombBlock); + iter->sttTomb->tombBlockIdx = 0; + + return tsdbSttTombIterNext(iter, NULL); +} + +int32_t tsdbIterOpen(const STsdbIterConfig *config, STsdbIter **iter) { + int32_t code; + + iter[0] = taosMemoryCalloc(1, sizeof(*iter[0])); + if (iter[0] == NULL) { + return TSDB_CODE_OUT_OF_MEMORY; + } + + iter[0]->type = config->type; + iter[0]->noMoreData = false; + iter[0]->filterByVersion = config->filterByVersion; + if (iter[0]->filterByVersion) { + iter[0]->range[0] = config->verRange[0]; + iter[0]->range[1] = config->verRange[1]; + } + + switch (config->type) { + case TSDB_ITER_TYPE_STT: + iter[0]->sttData->reader = config->sttReader; + code = tsdbSttIterOpen(iter[0]); + break; + case TSDB_ITER_TYPE_DATA: + iter[0]->dataData->reader = config->dataReader; + code = tsdbDataIterOpen(iter[0]); + break; + case TSDB_ITER_TYPE_MEMT: + iter[0]->memtData->memt = config->memt; + iter[0]->memtData->from[0] = config->from[0]; + code = tsdbMemTableIterOpen(iter[0]); + break; + case TSDB_ITER_TYPE_STT_TOMB: + iter[0]->sttTomb->reader = config->sttReader; + code = tsdbSttTombIterOpen(iter[0]); + break; + case TSDB_ITER_TYPE_DATA_TOMB: + iter[0]->dataTomb->reader = config->dataReader; + code = tsdbDataTombIterOpen(iter[0]); + break; + case TSDB_ITER_TYPE_MEMT_TOMB: + iter[0]->memtTomb->memt = config->memt; + code = tsdbMemTombIterOpen(iter[0]); + break; + default: + code = TSDB_CODE_INVALID_PARA; + ASSERTS(false, "Not implemented"); + } + + if (code) { + taosMemoryFree(iter[0]); + iter[0] = NULL; + } + return code; +} + +static int32_t tsdbSttTombIterClose(STsdbIter *iter) { + tTombBlockDestroy(iter->sttTomb->tombBlock); + return 0; +} + +static int32_t tsdbDataTombIterClose(STsdbIter *iter) { + tTombBlockDestroy(iter->dataTomb->tombBlock); + return 0; +} + +int32_t tsdbIterClose(STsdbIter **iter) { + switch (iter[0]->type) { + case TSDB_ITER_TYPE_STT: + tsdbSttIterClose(iter[0]); + break; + case TSDB_ITER_TYPE_DATA: + tsdbDataIterClose(iter[0]); + break; + case TSDB_ITER_TYPE_MEMT: + tsdbMemTableIterClose(iter[0]); + break; + case TSDB_ITER_TYPE_STT_TOMB: + tsdbSttTombIterClose(iter[0]); + break; + case TSDB_ITER_TYPE_DATA_TOMB: + tsdbDataTombIterClose(iter[0]); + break; + case TSDB_ITER_TYPE_MEMT_TOMB: + break; + default: + ASSERT(false); + } + taosMemoryFree(iter[0]); + iter[0] = NULL; + return 0; +} + +int32_t tsdbIterNext(STsdbIter *iter) { + switch (iter->type) { + case TSDB_ITER_TYPE_STT: + return tsdbSttIterNext(iter, NULL); + case TSDB_ITER_TYPE_DATA: + return tsdbDataIterNext(iter, NULL); + case TSDB_ITER_TYPE_MEMT: + return tsdbMemTableIterNext(iter, NULL); + case TSDB_ITER_TYPE_STT_TOMB: + return tsdbSttTombIterNext(iter, NULL); + case TSDB_ITER_TYPE_DATA_TOMB: + return tsdbDataTombIterNext(iter, NULL); + case TSDB_ITER_TYPE_MEMT_TOMB: + return tsdbMemTombIterNext(iter, NULL); + default: + ASSERT(false); + } + return 0; +} + +static int32_t tsdbIterSkipTableData(STsdbIter *iter, const TABLEID *tbid) { + switch (iter->type) { + case TSDB_ITER_TYPE_STT: + return tsdbSttIterNext(iter, tbid); + case TSDB_ITER_TYPE_DATA: + return tsdbDataIterNext(iter, tbid); + case TSDB_ITER_TYPE_MEMT: + return tsdbMemTableIterNext(iter, tbid); + case TSDB_ITER_TYPE_STT_TOMB: + return tsdbSttTombIterNext(iter, tbid); + case TSDB_ITER_TYPE_DATA_TOMB: + return tsdbDataTombIterNext(iter, tbid); + case TSDB_ITER_TYPE_MEMT_TOMB: + return tsdbMemTombIterNext(iter, tbid); + default: + ASSERT(false); + } + return 0; +} + +static int32_t tsdbIterCmprFn(const SRBTreeNode *n1, const SRBTreeNode *n2) { + STsdbIter *iter1 = TCONTAINER_OF(n1, STsdbIter, node); + STsdbIter *iter2 = TCONTAINER_OF(n2, STsdbIter, node); + return tRowInfoCmprFn(&iter1->row, &iter2->row); +} + +static int32_t tsdbTombIterCmprFn(const SRBTreeNode *n1, const SRBTreeNode *n2) { + STsdbIter *iter1 = TCONTAINER_OF(n1, STsdbIter, node); + STsdbIter *iter2 = TCONTAINER_OF(n2, STsdbIter, node); + + if (iter1->record->suid < iter2->record->suid) { + return -1; + } else if (iter1->record->suid > iter2->record->suid) { + return 1; + } + + if (iter1->record->uid < iter2->record->uid) { + return -1; + } else if (iter1->record->uid > iter2->record->uid) { + return 1; + } + + if (iter1->record->version < iter2->record->version) { + return -1; + } else if (iter1->record->version > iter2->record->version) { + return 1; + } + + return 0; +} + +// SIterMerger ================ +struct SIterMerger { + bool isTomb; + STsdbIter *iter; + SRBTree iterTree[1]; +}; + +int32_t tsdbIterMergerOpen(const TTsdbIterArray *iterArray, SIterMerger **merger, bool isTomb) { + STsdbIter *iter; + SRBTreeNode *node; + + merger[0] = taosMemoryCalloc(1, sizeof(*merger[0])); + if (merger[0] == NULL) { + return TSDB_CODE_OUT_OF_MEMORY; + } + + merger[0]->isTomb = isTomb; + if (isTomb) { + tRBTreeCreate(merger[0]->iterTree, tsdbTombIterCmprFn); + } else { + tRBTreeCreate(merger[0]->iterTree, tsdbIterCmprFn); + } + TARRAY2_FOREACH(iterArray, iter) { + if (iter->noMoreData) continue; + node = tRBTreePut(merger[0]->iterTree, iter->node); + ASSERT(node); + } + + return tsdbIterMergerNext(merger[0]); +} + +int32_t tsdbIterMergerClose(SIterMerger **merger) { + if (merger[0]) { + taosMemoryFree(merger[0]); + merger[0] = NULL; + } + return 0; +} + +int32_t tsdbIterMergerNext(SIterMerger *merger) { + int32_t code; + int32_t c; + SRBTreeNode *node; + + if (merger->iter) { + code = tsdbIterNext(merger->iter); + if (code) return code; + + if (merger->iter->noMoreData) { + merger->iter = NULL; + } else if ((node = tRBTreeMin(merger->iterTree))) { + c = merger->iterTree->cmprFn(merger->iter->node, node); + ASSERT(c); + if (c > 0) { + node = tRBTreePut(merger->iterTree, merger->iter->node); + ASSERT(node); + merger->iter = NULL; + } + } + } + + if (merger->iter == NULL && (node = tRBTreeDropMin(merger->iterTree))) { + merger->iter = TCONTAINER_OF(node, STsdbIter, node); + } + + return 0; +} + +SRowInfo *tsdbIterMergerGetData(SIterMerger *merger) { + ASSERT(!merger->isTomb); + return merger->iter ? merger->iter->row : NULL; +} + +STombRecord *tsdbIterMergerGetTombRecord(SIterMerger *merger) { + ASSERT(merger->isTomb); + return merger->iter ? merger->iter->record : NULL; +} + +int32_t tsdbIterMergerSkipTableData(SIterMerger *merger, const TABLEID *tbid) { + int32_t code; + int32_t c; + SRBTreeNode *node; + + while (merger->iter && tbid->suid == merger->iter->row->suid && tbid->uid == merger->iter->row->uid) { + int32_t code = tsdbIterSkipTableData(merger->iter, tbid); + if (code) return code; + + if (merger->iter->noMoreData) { + merger->iter = NULL; + } else if ((node = tRBTreeMin(merger->iterTree))) { + c = merger->iterTree->cmprFn(merger->iter->node, node); + ASSERT(c); + if (c > 0) { + node = tRBTreePut(merger->iterTree, merger->iter->node); + ASSERT(node); + merger->iter = NULL; + } + } + + if (!merger->iter && (node = tRBTreeDropMin(merger->iterTree))) { + merger->iter = TCONTAINER_OF(node, STsdbIter, node); + } + } + + return 0; +} \ No newline at end of file diff --git a/source/dnode/vnode/src/tsdb/tsdbIter.h b/source/dnode/vnode/src/tsdb/tsdbIter.h new file mode 100644 index 0000000000..367901bd84 --- /dev/null +++ b/source/dnode/vnode/src/tsdb/tsdbIter.h @@ -0,0 +1,73 @@ +/* + * Copyright (c) 2019 TAOS Data, Inc. + * + * This program is free software: you can use, redistribute, and/or modify + * it under the terms of the GNU Affero General Public License, version 3 + * or later ("AGPL"), as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. + * + * You should have received a copy of the GNU Affero General Public License + * along with this program. If not, see . + */ + +#include "trbtree.h" +#include "tsdbDataFileRW.h" +#include "tsdbDef.h" +#include "tsdbSttFileRW.h" + +#ifndef _TSDB_ITER_H_ +#define _TSDB_ITER_H_ + +#ifdef __cplusplus +extern "C" { +#endif + +typedef struct SIterMerger SIterMerger; +typedef struct STsdbIter STsdbIter; +typedef TARRAY2(STsdbIter *) TTsdbIterArray; + +typedef enum { + TSDB_ITER_TYPE_STT = 1, + TSDB_ITER_TYPE_DATA, + TSDB_ITER_TYPE_MEMT, + TSDB_ITER_TYPE_STT_TOMB, + TSDB_ITER_TYPE_DATA_TOMB, + TSDB_ITER_TYPE_MEMT_TOMB, +} EIterType; + +typedef struct { + EIterType type; + union { + SSttFileReader *sttReader; // TSDB_ITER_TYPE_STT || TSDB_ITER_TYPE_STT_TOMB + SDataFileReader *dataReader; // TSDB_ITER_TYPE_DATA || TSDB_ITER_TYPE_DATA_TOMB + struct { + SMemTable *memt; // TSDB_ITER_TYPE_MEMT_TOMB + TSDBKEY from[1]; + }; // TSDB_ITER_TYPE_MEMT + }; + bool filterByVersion; + int64_t verRange[2]; +} STsdbIterConfig; + +// STsdbIter =============== +int32_t tsdbIterOpen(const STsdbIterConfig *config, STsdbIter **iter); +int32_t tsdbIterClose(STsdbIter **iter); +int32_t tsdbIterNext(STsdbIter *iter); + +// SIterMerger =============== +int32_t tsdbIterMergerOpen(const TTsdbIterArray *iterArray, SIterMerger **merger, bool isTomb); +int32_t tsdbIterMergerClose(SIterMerger **merger); +int32_t tsdbIterMergerNext(SIterMerger *merger); +int32_t tsdbIterMergerSkipTableData(SIterMerger *merger, const TABLEID *tbid); + +SRowInfo *tsdbIterMergerGetData(SIterMerger *merger); +STombRecord *tsdbIterMergerGetTombRecord(SIterMerger *merger); + +#ifdef __cplusplus +} +#endif + +#endif /*_TSDB_ITER_H_*/ \ No newline at end of file diff --git a/source/dnode/vnode/src/tsdb/tsdbMemTable.c b/source/dnode/vnode/src/tsdb/tsdbMemTable.c index 6d223e00c5..ee3abf7559 100644 --- a/source/dnode/vnode/src/tsdb/tsdbMemTable.c +++ b/source/dnode/vnode/src/tsdb/tsdbMemTable.c @@ -38,6 +38,16 @@ static int32_t tsdbInsertRowDataToTable(SMemTable *pMemTable, STbData *pTbData, static int32_t tsdbInsertColDataToTable(SMemTable *pMemTable, STbData *pTbData, int64_t version, SSubmitTbData *pSubmitTbData, int32_t *affectedRows); +static int32_t tTbDataCmprFn(const SRBTreeNode *n1, const SRBTreeNode *n2) { + STbData *tbData1 = TCONTAINER_OF(n1, STbData, rbtn); + STbData *tbData2 = TCONTAINER_OF(n2, STbData, rbtn); + if (tbData1->suid < tbData2->suid) return -1; + if (tbData1->suid > tbData2->suid) return 1; + if (tbData1->uid < tbData2->uid) return -1; + if (tbData1->uid > tbData2->uid) return 1; + return 0; +} + int32_t tsdbMemTableCreate(STsdb *pTsdb, SMemTable **ppMemTable) { int32_t code = 0; SMemTable *pMemTable = NULL; @@ -66,6 +76,7 @@ int32_t tsdbMemTableCreate(STsdb *pTsdb, SMemTable **ppMemTable) { goto _err; } vnodeBufPoolRef(pMemTable->pPool); + tRBTreeCreate(pMemTable->tbDataTree, tTbDataCmprFn); *ppMemTable = pMemTable; return code; @@ -406,6 +417,8 @@ static int32_t tsdbGetOrCreateTbData(SMemTable *pMemTable, tb_uid_t suid, tb_uid pMemTable->aBucket[idx] = pTbData; pMemTable->nTbData++; + tRBTreePut(pMemTable->tbDataTree, pTbData->rbtn); + taosWUnLockLatch(&pMemTable->latch); _exit: diff --git a/source/dnode/vnode/src/tsdb/tsdbMerge.c b/source/dnode/vnode/src/tsdb/tsdbMerge.c new file mode 100644 index 0000000000..ec0ea3c60f --- /dev/null +++ b/source/dnode/vnode/src/tsdb/tsdbMerge.c @@ -0,0 +1,454 @@ +/* + * Copyright (c) 2019 TAOS Data, Inc. + * + * This program is free software: you can use, redistribute, and/or modify + * it under the terms of the GNU Affero General Public License, version 3 + * or later ("AGPL"), as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. + * + * You should have received a copy of the GNU Affero General Public License + * along with this program. If not, see . + */ + +#include "tsdbMerge.h" + +typedef struct { + STsdb *tsdb; + TFileSetArray *fsetArr; + + int32_t sttTrigger; + int32_t maxRow; + int32_t minRow; + int32_t szPage; + int8_t cmprAlg; + int64_t compactVersion; + int64_t cid; + + // context + struct { + bool opened; + int64_t now; + STFileSet *fset; + bool toData; + int32_t level; + SSttLvl *lvl; + TABLEID tbid[1]; + } ctx[1]; + + TFileOpArray fopArr[1]; + + // reader + TSttFileReaderArray sttReaderArr[1]; + // iter + TTsdbIterArray dataIterArr[1]; + SIterMerger *dataIterMerger; + TTsdbIterArray tombIterArr[1]; + SIterMerger *tombIterMerger; + // writer + SFSetWriter *writer; +} SMerger; + +static int32_t tsdbMergerOpen(SMerger *merger) { + merger->ctx->now = taosGetTimestampSec(); + merger->maxRow = merger->tsdb->pVnode->config.tsdbCfg.maxRows; + merger->minRow = merger->tsdb->pVnode->config.tsdbCfg.minRows; + merger->szPage = merger->tsdb->pVnode->config.tsdbPageSize; + merger->cmprAlg = merger->tsdb->pVnode->config.tsdbCfg.compression; + merger->compactVersion = INT64_MAX; + merger->cid = tsdbFSAllocEid(merger->tsdb->pFS); + merger->ctx->opened = true; + return 0; +} + +static int32_t tsdbMergerClose(SMerger *merger) { + int32_t code = 0; + int32_t lino = 0; + SVnode *pVnode = merger->tsdb->pVnode; + + // edit file system + code = tsdbFSEditBegin(merger->tsdb->pFS, merger->fopArr, TSDB_FEDIT_MERGE); + TSDB_CHECK_CODE(code, lino, _exit); + + taosThreadRwlockWrlock(&merger->tsdb->rwLock); + code = tsdbFSEditCommit(merger->tsdb->pFS); + if (code) { + taosThreadRwlockUnlock(&merger->tsdb->rwLock); + TSDB_CHECK_CODE(code, lino, _exit); + } + taosThreadRwlockUnlock(&merger->tsdb->rwLock); + + ASSERT(merger->writer == NULL); + ASSERT(merger->dataIterMerger == NULL); + ASSERT(merger->tombIterMerger == NULL); + ASSERT(TARRAY2_SIZE(merger->dataIterArr) == 0); + ASSERT(TARRAY2_SIZE(merger->tombIterArr) == 0); + ASSERT(TARRAY2_SIZE(merger->sttReaderArr) == 0); + + // clear the merge + TARRAY2_DESTROY(merger->tombIterArr, NULL); + TARRAY2_DESTROY(merger->dataIterArr, NULL); + TARRAY2_DESTROY(merger->sttReaderArr, NULL); + TARRAY2_DESTROY(merger->fopArr, NULL); + +_exit: + if (code) { + TSDB_ERROR_LOG(TD_VID(pVnode), lino, code); + } + return code; +} + +static int32_t tsdbMergeFileSetBeginOpenReader(SMerger *merger) { + int32_t code = 0; + int32_t lino = 0; + + merger->ctx->toData = true; + merger->ctx->level = 0; + + // TODO: optimize merge strategy + for (int32_t i = 0;; ++i) { + if (i >= TARRAY2_SIZE(merger->ctx->fset->lvlArr)) { + merger->ctx->lvl = NULL; + break; + } + + merger->ctx->lvl = TARRAY2_GET(merger->ctx->fset->lvlArr, i); + if (merger->ctx->lvl->level != merger->ctx->level || + TARRAY2_SIZE(merger->ctx->lvl->fobjArr) + 1 < merger->sttTrigger) { + merger->ctx->toData = false; + merger->ctx->lvl = NULL; + break; + } + + merger->ctx->level++; + + STFileObj *fobj; + int32_t numFile = 0; + TARRAY2_FOREACH(merger->ctx->lvl->fobjArr, fobj) { + if (numFile == merger->sttTrigger) { + break; + } + + STFileOp op = { + .optype = TSDB_FOP_REMOVE, + .fid = merger->ctx->fset->fid, + .of = fobj->f[0], + }; + code = TARRAY2_APPEND(merger->fopArr, op); + TSDB_CHECK_CODE(code, lino, _exit); + + SSttFileReader *reader; + SSttFileReaderConfig config = { + .tsdb = merger->tsdb, + .szPage = merger->szPage, + .file[0] = fobj->f[0], + }; + + code = tsdbSttFileReaderOpen(fobj->fname, &config, &reader); + TSDB_CHECK_CODE(code, lino, _exit); + + code = TARRAY2_APPEND(merger->sttReaderArr, reader); + TSDB_CHECK_CODE(code, lino, _exit); + + numFile++; + } + } + +_exit: + if (code) { + TSDB_ERROR_LOG(TD_VID(merger->tsdb->pVnode), lino, code); + } + return code; +} + +static int32_t tsdbMergeFileSetBeginOpenIter(SMerger *merger) { + int32_t code = 0; + int32_t lino = 0; + int32_t vid = TD_VID(merger->tsdb->pVnode); + + SSttFileReader *sttReader; + TARRAY2_FOREACH(merger->sttReaderArr, sttReader) { + STsdbIter *iter; + STsdbIterConfig config = {0}; + + // data iter + config.type = TSDB_ITER_TYPE_STT; + config.sttReader = sttReader; + + code = tsdbIterOpen(&config, &iter); + TSDB_CHECK_CODE(code, lino, _exit); + + code = TARRAY2_APPEND(merger->dataIterArr, iter); + TSDB_CHECK_CODE(code, lino, _exit); + + // tomb iter + config.type = TSDB_ITER_TYPE_STT_TOMB; + config.sttReader = sttReader; + + code = tsdbIterOpen(&config, &iter); + TSDB_CHECK_CODE(code, lino, _exit); + + code = TARRAY2_APPEND(merger->tombIterArr, iter); + TSDB_CHECK_CODE(code, lino, _exit); + } + + code = tsdbIterMergerOpen(merger->dataIterArr, &merger->dataIterMerger, false); + TSDB_CHECK_CODE(code, lino, _exit); + + code = tsdbIterMergerOpen(merger->tombIterArr, &merger->tombIterMerger, true); + TSDB_CHECK_CODE(code, lino, _exit); + +_exit: + if (code) { + TSDB_ERROR_LOG(vid, lino, code); + } + return code; +} + +static int32_t tsdbMergeFileSetBeginOpenWriter(SMerger *merger) { + int32_t code = 0; + int32_t lino = 0; + int32_t vid = TD_VID(merger->tsdb->pVnode); + + SDiskID did; + int32_t level = tsdbFidLevel(merger->ctx->fset->fid, &merger->tsdb->keepCfg, merger->ctx->now); + if (tfsAllocDisk(merger->tsdb->pVnode->pTfs, level, &did) < 0) { + code = TSDB_CODE_FS_NO_VALID_DISK; + TSDB_CHECK_CODE(code, lino, _exit); + } + tfsMkdirRecurAt(merger->tsdb->pVnode->pTfs, merger->tsdb->path, did); + SFSetWriterConfig config = { + .tsdb = merger->tsdb, + .toSttOnly = true, + .compactVersion = merger->compactVersion, + .minRow = merger->minRow, + .maxRow = merger->maxRow, + .szPage = merger->szPage, + .cmprAlg = merger->cmprAlg, + .fid = merger->ctx->fset->fid, + .cid = merger->cid, + .did = did, + .level = merger->ctx->level, + }; + + if (merger->ctx->toData) { + config.toSttOnly = false; + + for (int32_t ftype = 0; ftype < TSDB_FTYPE_MAX; ++ftype) { + if (merger->ctx->fset->farr[ftype]) { + config.files[ftype].exist = true; + config.files[ftype].file = merger->ctx->fset->farr[ftype]->f[0]; + } else { + config.files[ftype].exist = false; + } + } + } + + code = tsdbFSetWriterOpen(&config, &merger->writer); + TSDB_CHECK_CODE(code, lino, _exit); + +_exit: + if (code) { + TSDB_ERROR_LOG(vid, lino, code); + } + return code; +} + +static int32_t tsdbMergeFileSetBegin(SMerger *merger) { + int32_t code = 0; + int32_t lino = 0; + + ASSERT(TARRAY2_SIZE(merger->sttReaderArr) == 0); + ASSERT(TARRAY2_SIZE(merger->dataIterArr) == 0); + ASSERT(merger->dataIterMerger == NULL); + ASSERT(merger->writer == NULL); + + merger->ctx->tbid->suid = 0; + merger->ctx->tbid->uid = 0; + + // open reader + code = tsdbMergeFileSetBeginOpenReader(merger); + TSDB_CHECK_CODE(code, lino, _exit); + + // open iterator + code = tsdbMergeFileSetBeginOpenIter(merger); + TSDB_CHECK_CODE(code, lino, _exit); + + // open writer + code = tsdbMergeFileSetBeginOpenWriter(merger); + TSDB_CHECK_CODE(code, lino, _exit); + +_exit: + if (code) { + TSDB_ERROR_LOG(TD_VID(merger->tsdb->pVnode), lino, code); + } + return code; +} + +static int32_t tsdbMergeFileSetEndCloseWriter(SMerger *merger) { + return tsdbFSetWriterClose(&merger->writer, 0, merger->fopArr); +} + +static int32_t tsdbMergeFileSetEndCloseIter(SMerger *merger) { + tsdbIterMergerClose(&merger->tombIterMerger); + TARRAY2_CLEAR(merger->tombIterArr, tsdbIterClose); + tsdbIterMergerClose(&merger->dataIterMerger); + TARRAY2_CLEAR(merger->dataIterArr, tsdbIterClose); + return 0; +} + +static int32_t tsdbMergeFileSetEndCloseReader(SMerger *merger) { + TARRAY2_CLEAR(merger->sttReaderArr, tsdbSttFileReaderClose); + return 0; +} + +static int32_t tsdbMergeFileSetEnd(SMerger *merger) { + int32_t code = 0; + int32_t lino = 0; + + code = tsdbMergeFileSetEndCloseWriter(merger); + TSDB_CHECK_CODE(code, lino, _exit); + + code = tsdbMergeFileSetEndCloseIter(merger); + TSDB_CHECK_CODE(code, lino, _exit); + + code = tsdbMergeFileSetEndCloseReader(merger); + TSDB_CHECK_CODE(code, lino, _exit); + +_exit: + if (code) { + TSDB_ERROR_LOG(TD_VID(merger->tsdb->pVnode), lino, code); + } + return code; +} + +static int32_t tsdbMergeFileSet(SMerger *merger, STFileSet *fset) { + int32_t code = 0; + int32_t lino = 0; + + merger->ctx->fset = fset; + code = tsdbMergeFileSetBegin(merger); + TSDB_CHECK_CODE(code, lino, _exit); + + // data + SMetaInfo info; + SRowInfo *row; + merger->ctx->tbid->suid = 0; + merger->ctx->tbid->uid = 0; + while ((row = tsdbIterMergerGetData(merger->dataIterMerger)) != NULL) { + if (row->uid != merger->ctx->tbid->uid) { + merger->ctx->tbid->uid = row->uid; + merger->ctx->tbid->suid = row->suid; + + if (metaGetInfo(merger->tsdb->pVnode->pMeta, row->uid, &info, NULL) != 0) { + code = tsdbIterMergerSkipTableData(merger->dataIterMerger, merger->ctx->tbid); + TSDB_CHECK_CODE(code, lino, _exit); + continue; + } + } + + code = tsdbFSetWriteRow(merger->writer, row); + TSDB_CHECK_CODE(code, lino, _exit); + + code = tsdbIterMergerNext(merger->dataIterMerger); + TSDB_CHECK_CODE(code, lino, _exit); + } + + // tomb + merger->ctx->tbid->suid = 0; + merger->ctx->tbid->uid = 0; + for (STombRecord *record; (record = tsdbIterMergerGetTombRecord(merger->tombIterMerger)) != NULL;) { + if (record->uid != merger->ctx->tbid->uid) { + merger->ctx->tbid->uid = record->uid; + merger->ctx->tbid->suid = record->suid; + + if (metaGetInfo(merger->tsdb->pVnode->pMeta, record->uid, &info, NULL) != 0) { + code = tsdbIterMergerSkipTableData(merger->tombIterMerger, merger->ctx->tbid); + TSDB_CHECK_CODE(code, lino, _exit); + continue; + } + } + code = tsdbFSetWriteTombRecord(merger->writer, record); + TSDB_CHECK_CODE(code, lino, _exit); + + code = tsdbIterMergerNext(merger->tombIterMerger); + TSDB_CHECK_CODE(code, lino, _exit); + } + + code = tsdbMergeFileSetEnd(merger); + TSDB_CHECK_CODE(code, lino, _exit); + +_exit: + if (code) { + tsdbError("vgId:%d %s failed at line %d since %s", TD_VID(merger->tsdb->pVnode), __func__, lino, tstrerror(code)); + } else { + tsdbDebug("vgId:%d %s done, fid:%d", TD_VID(merger->tsdb->pVnode), __func__, fset->fid); + } + return code; +} + +static int32_t tsdbDoMerge(SMerger *merger) { + int32_t code = 0; + int32_t lino = 0; + + STFileSet *fset; + TARRAY2_FOREACH(merger->fsetArr, fset) { + if (TARRAY2_SIZE(fset->lvlArr) == 0) continue; + + SSttLvl *lvl = TARRAY2_FIRST(fset->lvlArr); + + if (lvl->level != 0 || TARRAY2_SIZE(lvl->fobjArr) < merger->sttTrigger) continue; + + if (!merger->ctx->opened) { + code = tsdbMergerOpen(merger); + TSDB_CHECK_CODE(code, lino, _exit); + } + + code = tsdbMergeFileSet(merger, fset); + TSDB_CHECK_CODE(code, lino, _exit); + } + + if (merger->ctx->opened) { + code = tsdbMergerClose(merger); + TSDB_CHECK_CODE(code, lino, _exit); + } + +_exit: + if (code) { + TSDB_ERROR_LOG(TD_VID(merger->tsdb->pVnode), lino, code); + } else { + tsdbDebug("vgId:%d %s done", TD_VID(merger->tsdb->pVnode), __func__); + } + return code; +} + +int32_t tsdbMerge(void *arg) { + int32_t code = 0; + int32_t lino = 0; + STsdb *tsdb = (STsdb *)arg; + + SMerger merger[1] = {{ + .tsdb = tsdb, + .sttTrigger = tsdb->pVnode->config.sttTrigger, + }}; + + ASSERT(merger->sttTrigger > 1); + + code = tsdbFSCreateCopySnapshot(tsdb->pFS, &merger->fsetArr); + TSDB_CHECK_CODE(code, lino, _exit); + + code = tsdbDoMerge(merger); + TSDB_CHECK_CODE(code, lino, _exit); + + tsdbFSDestroyCopySnapshot(&merger->fsetArr); + +_exit: + if (code) { + TSDB_ERROR_LOG(TD_VID(tsdb->pVnode), lino, code); + } else if (merger->ctx->opened) { + tsdbDebug("vgId:%d %s done", TD_VID(tsdb->pVnode), __func__); + } + return code; +} diff --git a/source/dnode/vnode/src/tsdb/tsdbMerge.h b/source/dnode/vnode/src/tsdb/tsdbMerge.h new file mode 100644 index 0000000000..69d802fd27 --- /dev/null +++ b/source/dnode/vnode/src/tsdb/tsdbMerge.h @@ -0,0 +1,40 @@ +/* + * Copyright (c) 2019 TAOS Data, Inc. + * + * This program is free software: you can use, redistribute, and/or modify + * it under the terms of the GNU Affero General Public License, version 3 + * or later ("AGPL"), as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. + * + * You should have received a copy of the GNU Affero General Public License + * along with this program. If not, see . + */ + +#include "tsdbDataFileRW.h" +#include "tsdbFS2.h" +#include "tsdbFSetRW.h" +#include "tsdbIter.h" +#include "tsdbSttFileRW.h" +#include "tsdbUtil2.h" + +#ifndef _TD_TSDB_MERGE_H_ +#define _TD_TSDB_MERGE_H_ + +#ifdef __cplusplus +extern "C" { +#endif + +/* Exposed Handle */ + +/* Exposed APIs */ + +/* Exposed Structs */ + +#ifdef __cplusplus +} +#endif + +#endif /*_TD_TSDB_MERGE_H_*/ \ No newline at end of file diff --git a/source/dnode/vnode/src/tsdb/tsdbMergeTree.c b/source/dnode/vnode/src/tsdb/tsdbMergeTree.c index 79f4a17f65..ea5b574ced 100644 --- a/source/dnode/vnode/src/tsdb/tsdbMergeTree.c +++ b/source/dnode/vnode/src/tsdb/tsdbMergeTree.c @@ -14,6 +14,11 @@ */ #include "tsdb.h" +#include "tsdbFSet2.h" +#include "tsdbReadUtil.h" +#include "tsdbSttFileRW.h" + +static void tLDataIterClose2(SLDataIter *pIter); // SLDataIter ================================================= SSttBlockLoadInfo *tCreateLastBlockLoadInfo(STSchema *pSchema, int16_t *colList, int32_t numOfCols, @@ -24,8 +29,6 @@ SSttBlockLoadInfo *tCreateLastBlockLoadInfo(STSchema *pSchema, int16_t *colList, return NULL; } - pLoadInfo->numOfStt = numOfSttTrigger; - for (int32_t i = 0; i < numOfSttTrigger; ++i) { pLoadInfo[i].blockIndex[0] = -1; pLoadInfo[i].blockIndex[1] = -1; @@ -50,8 +53,37 @@ SSttBlockLoadInfo *tCreateLastBlockLoadInfo(STSchema *pSchema, int16_t *colList, return pLoadInfo; } +SSttBlockLoadInfo *tCreateOneLastBlockLoadInfo(STSchema *pSchema, int16_t *colList, int32_t numOfCols) { + SSttBlockLoadInfo *pLoadInfo = taosMemoryCalloc(1, sizeof(SSttBlockLoadInfo)); + if (pLoadInfo == NULL) { + terrno = TSDB_CODE_OUT_OF_MEMORY; + return NULL; + } + + pLoadInfo->blockIndex[0] = -1; + pLoadInfo->blockIndex[1] = -1; + pLoadInfo->currentLoadBlockIndex = 1; + + int32_t code = tBlockDataCreate(&pLoadInfo->blockData[0]); + if (code) { + terrno = code; + } + + code = tBlockDataCreate(&pLoadInfo->blockData[1]); + if (code) { + terrno = code; + } + + pLoadInfo->aSttBlk = taosArrayInit(4, sizeof(SSttBlk)); + pLoadInfo->pSchema = pSchema; + pLoadInfo->colIds = colList; + pLoadInfo->numOfCols = numOfCols; + + return pLoadInfo; +} + void resetLastBlockLoadInfo(SSttBlockLoadInfo *pLoadInfo) { - for (int32_t i = 0; i < pLoadInfo->numOfStt; ++i) { + for (int32_t i = 0; i < 1; ++i) { pLoadInfo[i].currentLoadBlockIndex = 1; pLoadInfo[i].blockIndex[0] = -1; pLoadInfo[i].blockIndex[1] = -1; @@ -65,18 +97,24 @@ void resetLastBlockLoadInfo(SSttBlockLoadInfo *pLoadInfo) { } void getLastBlockLoadInfo(SSttBlockLoadInfo *pLoadInfo, int64_t *blocks, double *el) { - for (int32_t i = 0; i < pLoadInfo->numOfStt; ++i) { + for (int32_t i = 0; i < 1; ++i) { *el += pLoadInfo[i].elapsedTime; *blocks += pLoadInfo[i].loadBlocks; } } +static void freeTombBlock(void *param) { + STombBlock **pTombBlock = (STombBlock **)param; + tTombBlockDestroy(*pTombBlock); + taosMemoryFree(*pTombBlock); +} + void *destroyLastBlockLoadInfo(SSttBlockLoadInfo *pLoadInfo) { if (pLoadInfo == NULL) { return NULL; } - for (int32_t i = 0; i < pLoadInfo->numOfStt; ++i) { + for (int32_t i = 0; i < 1; ++i) { pLoadInfo[i].currentLoadBlockIndex = 1; pLoadInfo[i].blockIndex[0] = -1; pLoadInfo[i].blockIndex[1] = -1; @@ -91,6 +129,33 @@ void *destroyLastBlockLoadInfo(SSttBlockLoadInfo *pLoadInfo) { return NULL; } +static void destroyLDataIter(SLDataIter *pIter) { + tLDataIterClose2(pIter); + destroyLastBlockLoadInfo(pIter->pBlockLoadInfo); + taosMemoryFree(pIter); +} + +void *destroySttBlockReader(SArray *pLDataIterArray, int64_t *blocks, double *el) { + if (pLDataIterArray == NULL) { + return NULL; + } + + int32_t numOfLevel = taosArrayGetSize(pLDataIterArray); + for (int32_t i = 0; i < numOfLevel; ++i) { + SArray *pList = taosArrayGetP(pLDataIterArray, i); + for (int32_t j = 0; j < taosArrayGetSize(pList); ++j) { + SLDataIter *pIter = taosArrayGetP(pList, j); + *el += pIter->pBlockLoadInfo->elapsedTime; + *blocks += pIter->pBlockLoadInfo->loadBlocks; + destroyLDataIter(pIter); + } + taosArrayDestroy(pList); + } + + taosArrayDestroy(pLDataIterArray); + return NULL; +} + static SBlockData *loadLastBlock(SLDataIter *pIter, const char *idStr) { int32_t code = 0; @@ -122,20 +187,8 @@ static SBlockData *loadLastBlock(SLDataIter *pIter, const char *idStr) { int64_t st = taosGetTimestampUs(); SBlockData *pBlock = &pInfo->blockData[pInfo->currentLoadBlockIndex]; - - TABLEID id = {0}; - if (pIter->pSttBlk->suid != 0) { - id.suid = pIter->pSttBlk->suid; - } else { - id.uid = pIter->uid; - } - - code = tBlockDataInit(pBlock, &id, pInfo->pSchema, pInfo->colIds, pInfo->numOfCols); - if (code != TSDB_CODE_SUCCESS) { - goto _exit; - } - - code = tsdbReadSttBlock(pIter->pReader, pIter->iStt, pIter->pSttBlk, pBlock); + code = tsdbSttFileReadBlockDataByColumn(pIter->pReader, pIter->pSttBlk, pBlock, pInfo->pSchema, &pInfo->colIds[1], + pInfo->numOfCols - 1); if (code != TSDB_CODE_SUCCESS) { goto _exit; } @@ -255,74 +308,153 @@ static int32_t binarySearchForStartRowIndex(uint64_t *uidList, int32_t num, uint int32_t tLDataIterOpen(struct SLDataIter *pIter, SDataFReader *pReader, int32_t iStt, int8_t backward, uint64_t suid, uint64_t uid, STimeWindow *pTimeWindow, SVersionRange *pRange, SSttBlockLoadInfo *pBlockLoadInfo, const char *idStr, bool strictTimeRange) { + return 0; +} + +static int32_t extractSttBlockInfo(SLDataIter *pIter, const TSttBlkArray *pArray, SSttBlockLoadInfo *pBlockLoadInfo, + uint64_t suid) { + if (TARRAY2_SIZE(pArray) <= 0) { + return TSDB_CODE_SUCCESS; + } + + SSttBlk *pStart = &pArray->data[0]; + SSttBlk *pEnd = &pArray->data[TARRAY2_SIZE(pArray) - 1]; + + // all identical + if (pStart->suid == pEnd->suid) { + if (pStart->suid != suid) { // no qualified stt block existed + taosArrayClear(pBlockLoadInfo->aSttBlk); + pIter->iSttBlk = -1; + return TSDB_CODE_SUCCESS; + } else { // all blocks are qualified + taosArrayClear(pBlockLoadInfo->aSttBlk); + taosArrayAddBatch(pBlockLoadInfo->aSttBlk, pArray->data, pArray->size); + } + } else { + SArray *pTmp = taosArrayInit(TARRAY2_SIZE(pArray), sizeof(SSttBlk)); + for (int32_t i = 0; i < TARRAY2_SIZE(pArray); ++i) { + SSttBlk *p = &pArray->data[i]; + if (p->suid < suid) { + continue; + } + + if (p->suid == suid) { + taosArrayPush(pTmp, p); + } else if (p->suid > suid) { + break; + } + } + + taosArrayDestroy(pBlockLoadInfo->aSttBlk); + pBlockLoadInfo->aSttBlk = pTmp; + } + + return TSDB_CODE_SUCCESS; +} + +static int32_t uidComparFn(const void *p1, const void *p2) { + const uint64_t *uid1 = p1; + const uint64_t *uid2 = p2; + return (*uid1) - (*uid2); +} + +static bool existsFromSttBlkStatis(const TStatisBlkArray *pStatisBlkArray, uint64_t suid, uint64_t uid, + SSttFileReader *pReader) { + if (TARRAY2_SIZE(pStatisBlkArray) <= 0) { + return true; + } + + int32_t i = 0; + for (i = 0; i < TARRAY2_SIZE(pStatisBlkArray); ++i) { + SStatisBlk *p = &pStatisBlkArray->data[i]; + if (p->minTbid.suid <= suid && p->maxTbid.suid >= suid) { + break; + } + } + + // for (; i < TARRAY2_SIZE(pStatisBlkArray); ++i) { + // SStatisBlk *p = &pStatisBlkArray->data[i]; + // if (p->minTbid.uid <= uid && p->maxTbid.uid >= uid) { + // break; + // } + // + // if (p->maxTbid.uid < uid) { + // break; + // } + // } + + if (i >= TARRAY2_SIZE(pStatisBlkArray)) { + return false; + } + + SStatisBlk *p = &pStatisBlkArray->data[i]; + STbStatisBlock block = {0}; + tsdbSttFileReadStatisBlock(pReader, p, &block); + + int32_t index = tarray2SearchIdx(block.uid, &uid, sizeof(int64_t), uidComparFn, TD_EQ); + tStatisBlockDestroy(&block); + + return (index != -1); +} + +int32_t tLDataIterOpen2(struct SLDataIter *pIter, SSttFileReader *pSttFileReader, int32_t iStt, int8_t backward, + uint64_t suid, uint64_t uid, STimeWindow *pTimeWindow, SVersionRange *pRange, + SSttBlockLoadInfo *pBlockLoadInfo, const char *idStr, bool strictTimeRange, + _load_tomb_fn loadTombFn, void *pReader1) { int32_t code = TSDB_CODE_SUCCESS; pIter->uid = uid; - pIter->pReader = pReader; pIter->iStt = iStt; pIter->backward = backward; pIter->verRange.minVer = pRange->minVer; pIter->verRange.maxVer = pRange->maxVer; pIter->timeWindow.skey = pTimeWindow->skey; pIter->timeWindow.ekey = pTimeWindow->ekey; - + pIter->pReader = pSttFileReader; pIter->pBlockLoadInfo = pBlockLoadInfo; if (!pBlockLoadInfo->sttBlockLoaded) { int64_t st = taosGetTimestampUs(); + + const TSttBlkArray *pSttBlkArray = NULL; pBlockLoadInfo->sttBlockLoaded = true; - code = tsdbReadSttBlk(pReader, iStt, pBlockLoadInfo->aSttBlk); - if (code) { + // load the stt block info for each stt-block + code = tsdbSttFileReadSttBlk(pIter->pReader, &pSttBlkArray); + if (code != TSDB_CODE_SUCCESS) { + tsdbError("load stt blk failed, code:%s, %s", tstrerror(code), idStr); return code; } - // only apply to the child tables, ordinary tables will not incur this filter procedure. - size_t size = taosArrayGetSize(pBlockLoadInfo->aSttBlk); - - if (size >= 1) { - SSttBlk *pStart = taosArrayGet(pBlockLoadInfo->aSttBlk, 0); - SSttBlk *pEnd = taosArrayGet(pBlockLoadInfo->aSttBlk, size - 1); - - // all identical - if (pStart->suid == pEnd->suid) { - if (pStart->suid != suid) { - // no qualified stt block existed - taosArrayClear(pBlockLoadInfo->aSttBlk); - - pIter->iSttBlk = -1; - double el = (taosGetTimestampUs() - st) / 1000.0; - tsdbDebug("load the last file info completed, elapsed time:%.2fms, %s", el, idStr); - return code; - } - } else { - SArray *pTmp = taosArrayInit(size, sizeof(SSttBlk)); - for (int32_t i = 0; i < size; ++i) { - SSttBlk *p = taosArrayGet(pBlockLoadInfo->aSttBlk, i); - uint64_t s = p->suid; - if (s < suid) { - continue; - } - - if (s == suid) { - taosArrayPush(pTmp, p); - } else if (s > suid) { - break; - } - } - - taosArrayDestroy(pBlockLoadInfo->aSttBlk); - pBlockLoadInfo->aSttBlk = pTmp; - } + code = extractSttBlockInfo(pIter, pSttBlkArray, pBlockLoadInfo, suid); + if (code != TSDB_CODE_SUCCESS) { + tsdbError("load stt block info failed, code:%s, %s", tstrerror(code), idStr); + return code; } + // load stt blocks statis for all stt-blocks, to decide if the data of queried table exists in current stt file + code = tsdbSttFileReadStatisBlk(pIter->pReader, (const TStatisBlkArray **)&pBlockLoadInfo->pSttStatisBlkArray); + if (code != TSDB_CODE_SUCCESS) { + tsdbError("failed to load stt block statistics, code:%s, %s", tstrerror(code), idStr); + return code; + } + + code = loadTombFn(pReader1, pIter->pReader, pIter->pBlockLoadInfo); + double el = (taosGetTimestampUs() - st) / 1000.0; - tsdbDebug("load the last file info completed, elapsed time:%.2fms, %s", el, idStr); + tsdbDebug("load the stt file info completed, elapsed time:%.2fms, %s", el, idStr); } - size_t size = taosArrayGetSize(pBlockLoadInfo->aSttBlk); + // bool exists = existsFromSttBlkStatis(pBlockLoadInfo->pSttStatisBlkArray, suid, uid, pIter->pReader); + // if (!exists) { + // pIter->iSttBlk = -1; + // pIter->pSttBlk = NULL; + // return TSDB_CODE_SUCCESS; + // } - // find the start block + // find the start block, actually we could load the position to avoid repeatly searching for the start position when + // the skey is updated. + size_t size = taosArrayGetSize(pBlockLoadInfo->aSttBlk); pIter->iSttBlk = binarySearchForStartBlock(pBlockLoadInfo->aSttBlk->pData, size, uid, backward); if (pIter->iSttBlk != -1) { pIter->pSttBlk = taosArrayGet(pBlockLoadInfo->aSttBlk, pIter->iSttBlk); @@ -343,7 +475,10 @@ int32_t tLDataIterOpen(struct SLDataIter *pIter, SDataFReader *pReader, int32_t return code; } -void tLDataIterClose(SLDataIter *pIter) { /*taosMemoryFree(pIter); */} +void tLDataIterClose2(SLDataIter *pIter) { + tsdbSttFileReaderClose(&pIter->pReader); + pIter->pReader = NULL; +} void tLDataIterNextBlock(SLDataIter *pIter, const char *idStr) { int32_t step = pIter->backward ? -1 : 1; @@ -395,25 +530,23 @@ void tLDataIterNextBlock(SLDataIter *pIter, const char *idStr) { if (index != -1) { pIter->iSttBlk = index; pIter->pSttBlk = (SSttBlk *)taosArrayGet(pIter->pBlockLoadInfo->aSttBlk, pIter->iSttBlk); - tsdbDebug("try next last file block:%d from %d, trigger by uid:%" PRIu64 ", file index:%d, %s", pIter->iSttBlk, - oldIndex, pIter->uid, pIter->iStt, idStr); + tsdbDebug("try next last file block:%d from stt fileIdx:%d, trigger by uid:%" PRIu64 ", file index:%d, %s", + pIter->iSttBlk, oldIndex, pIter->uid, pIter->iStt, idStr); } else { tsdbDebug("no more last block qualified, uid:%" PRIu64 ", file index:%d, %s", pIter->uid, oldIndex, idStr); } } static void findNextValidRow(SLDataIter *pIter, const char *idStr) { - int32_t step = pIter->backward ? -1 : 1; - bool hasVal = false; + int32_t step = pIter->backward ? -1 : 1; int32_t i = pIter->iRow; - SBlockData *pBlockData = loadLastBlock(pIter, idStr); + SBlockData *pData = loadLastBlock(pIter, idStr); // mostly we only need to find the start position for a given table - if ((((i == 0) && (!pIter->backward)) || (i == pBlockData->nRow - 1 && pIter->backward)) && - pBlockData->aUid != NULL) { - i = binarySearchForStartRowIndex((uint64_t *)pBlockData->aUid, pBlockData->nRow, pIter->uid, pIter->backward); + if ((((i == 0) && (!pIter->backward)) || (i == pData->nRow - 1 && pIter->backward)) && pData->aUid != NULL) { + i = binarySearchForStartRowIndex((uint64_t *)pData->aUid, pData->nRow, pIter->uid, pIter->backward); if (i == -1) { tsdbDebug("failed to find the data in pBlockData, uid:%" PRIu64 " , %s", pIter->uid, idStr); pIter->iRow = -1; @@ -421,20 +554,20 @@ static void findNextValidRow(SLDataIter *pIter, const char *idStr) { } } - for (; i < pBlockData->nRow && i >= 0; i += step) { - if (pBlockData->aUid != NULL) { + for (; i < pData->nRow && i >= 0; i += step) { + if (pData->aUid != NULL) { if (!pIter->backward) { - if (pBlockData->aUid[i] > pIter->uid) { + if (pData->aUid[i] > pIter->uid) { break; } } else { - if (pBlockData->aUid[i] < pIter->uid) { + if (pData->aUid[i] < pIter->uid) { break; } } } - int64_t ts = pBlockData->aTSKEY[i]; + int64_t ts = pData->aTSKEY[i]; if (!pIter->backward) { // asc if (ts > pIter->timeWindow.ekey) { // no more data break; @@ -449,7 +582,7 @@ static void findNextValidRow(SLDataIter *pIter, const char *idStr) { } } - int64_t ver = pBlockData->aVersion[i]; + int64_t ver = pData->aVersion[i]; if (ver < pIter->verRange.minVer) { continue; } @@ -485,7 +618,6 @@ bool tLDataIterNextRow(SLDataIter *pIter, const char *idStr) { while (1) { bool skipBlock = false; - findNextValidRow(pIter, idStr); if (pIter->pBlockLoadInfo->checkRemainingRow) { @@ -570,7 +702,7 @@ static FORCE_INLINE int32_t tLDataIterDescCmprFn(const SRBTreeNode *p1, const SR int32_t tMergeTreeOpen(SMergeTree *pMTree, int8_t backward, SDataFReader *pFReader, uint64_t suid, uint64_t uid, STimeWindow *pTimeWindow, SVersionRange *pVerRange, SSttBlockLoadInfo *pBlockLoadInfo, - bool destroyLoadInfo, const char *idStr, bool strictTimeRange, SLDataIter* pLDataIter) { + bool destroyLoadInfo, const char *idStr, bool strictTimeRange, SLDataIter *pLDataIter) { int32_t code = TSDB_CODE_SUCCESS; pMTree->backward = backward; @@ -612,6 +744,95 @@ _end: return code; } +int32_t tMergeTreeOpen2(SMergeTree *pMTree, SMergeTreeConf *pConf) { + int32_t code = TSDB_CODE_SUCCESS; + + pMTree->pIter = NULL; + pMTree->backward = pConf->backward; + pMTree->idStr = pConf->idstr; + + if (!pMTree->backward) { // asc + tRBTreeCreate(&pMTree->rbt, tLDataIterCmprFn); + } else { // desc + tRBTreeCreate(&pMTree->rbt, tLDataIterDescCmprFn); + } + + pMTree->ignoreEarlierTs = false; + + // todo handle other level of stt files, here only deal with the first level stt + int32_t size = ((STFileSet *)pConf->pCurrentFileset)->lvlArr->size; + if (size == 0) { + goto _end; + } + + // add the list/iter placeholder + while (taosArrayGetSize(pConf->pSttFileBlockIterArray) < size) { + SArray *pList = taosArrayInit(4, POINTER_BYTES); + taosArrayPush(pConf->pSttFileBlockIterArray, &pList); + } + + for (int32_t j = 0; j < size; ++j) { + SSttLvl *pSttLevel = ((STFileSet *)pConf->pCurrentFileset)->lvlArr->data[j]; + ASSERT(pSttLevel->level == j); + + SArray *pList = taosArrayGetP(pConf->pSttFileBlockIterArray, j); + int32_t numOfIter = taosArrayGetSize(pList); + + if (numOfIter < TARRAY2_SIZE(pSttLevel->fobjArr)) { + int32_t inc = TARRAY2_SIZE(pSttLevel->fobjArr) - numOfIter; + for (int32_t k = 0; k < inc; ++k) { + SLDataIter *pIter = taosMemoryCalloc(1, sizeof(SLDataIter)); + taosArrayPush(pList, &pIter); + } + } + + for (int32_t i = 0; i < TARRAY2_SIZE(pSttLevel->fobjArr); ++i) { // open all last file + SLDataIter *pIter = taosArrayGetP(pList, i); + + SSttFileReader *pSttFileReader = pIter->pReader; + SSttBlockLoadInfo *pLoadInfo = pIter->pBlockLoadInfo; + + // open stt file reader if not + if (pSttFileReader == NULL) { + SSttFileReaderConfig conf = {.tsdb = pConf->pTsdb, .szPage = pConf->pTsdb->pVnode->config.tsdbPageSize}; + conf.file[0] = *pSttLevel->fobjArr->data[i]->f; + + code = tsdbSttFileReaderOpen(pSttLevel->fobjArr->data[i]->fname, &conf, &pSttFileReader); + if (code != TSDB_CODE_SUCCESS) { + return code; + } + } + + if (pLoadInfo == NULL) { + pLoadInfo = tCreateOneLastBlockLoadInfo(pConf->pSchema, pConf->pCols, pConf->numOfCols); + } + + memset(pIter, 0, sizeof(SLDataIter)); + code = tLDataIterOpen2(pIter, pSttFileReader, i, pMTree->backward, pConf->suid, pConf->uid, &pConf->timewindow, + &pConf->verRange, pLoadInfo, pMTree->idStr, pConf->strictTimeRange, pConf->loadTombFn, + pConf->pReader); + if (code != TSDB_CODE_SUCCESS) { + goto _end; + } + + bool hasVal = tLDataIterNextRow(pIter, pMTree->idStr); + if (hasVal) { + tMergeTreeAddIter(pMTree, pIter); + } else { + if (!pMTree->ignoreEarlierTs) { + pMTree->ignoreEarlierTs = pIter->ignoreEarlierTs; + } + } + } + } + + return code; + +_end: + tMergeTreeClose(pMTree); + return code; +} + void tMergeTreeAddIter(SMergeTree *pMTree, SLDataIter *pIter) { tRBTreePut(&pMTree->rbt, (SRBTreeNode *)pIter); } bool tMergeTreeIgnoreEarlierTs(SMergeTree *pMTree) { return pMTree->ignoreEarlierTs; } diff --git a/source/dnode/vnode/src/tsdb/tsdbOpen.c b/source/dnode/vnode/src/tsdb/tsdbOpen.c index 8901f64459..c684ad5184 100644 --- a/source/dnode/vnode/src/tsdb/tsdbOpen.c +++ b/source/dnode/vnode/src/tsdb/tsdbOpen.c @@ -14,6 +14,7 @@ */ #include "tsdb.h" +#include "tsdbFS2.h" int32_t tsdbSetKeepCfg(STsdb *pTsdb, STsdbCfg *pCfg) { STsdbKeepCfg *pKeepCfg = &pTsdb->keepCfg; @@ -66,7 +67,7 @@ int tsdbOpen(SVnode *pVnode, STsdb **ppTsdb, const char *dir, STsdbKeepCfg *pKee } // open tsdb - if (tsdbFSOpen(pTsdb, rollback) < 0) { + if (tsdbOpenFS(pTsdb, &pTsdb->pFS, rollback) < 0) { goto _err; } @@ -94,7 +95,7 @@ int tsdbClose(STsdb **pTsdb) { taosThreadRwlockDestroy(&(*pTsdb)->rwLock); - tsdbFSClose(*pTsdb); + tsdbCloseFS(&(*pTsdb)->pFS); tsdbCloseCache(*pTsdb); taosMemoryFreeClear(*pTsdb); } diff --git a/source/dnode/vnode/src/tsdb/tsdbRead.c b/source/dnode/vnode/src/tsdb/tsdbRead.c index 5703204328..2aa21bd86f 100644 --- a/source/dnode/vnode/src/tsdb/tsdbRead.c +++ b/source/dnode/vnode/src/tsdb/tsdbRead.c @@ -17,7 +17,7 @@ #include "tsdb.h" #include "tsimplehash.h" -#define ASCENDING_TRAVERSE(o) (o == TSDB_ORDER_ASC) +#define ASCENDING_TRAVERSE(o) (o == TSDB_ORDER_ASC) #define getCurrentKeyInLastBlock(_r) ((_r)->currentKey) typedef enum { @@ -30,12 +30,12 @@ typedef enum { EXTERNAL_ROWS_MAIN = 0x2, EXTERNAL_ROWS_NEXT = 0x3, } EContentData; - +/* typedef enum { READ_MODE_COUNT_ONLY = 0x1, READ_MODE_ALL, } EReadMode; - +*/ typedef struct { STbDataIter* iter; int32_t index; @@ -166,7 +166,7 @@ typedef struct SReaderStatus { SDataBlockIter blockIter; SLDataIter* pLDataIter; SRowMerger merger; - SColumnInfoData* pPrimaryTsCol; // primary time stamp output col info data + SColumnInfoData* pPrimaryTsCol; // primary time stamp output col info data } SReaderStatus; typedef struct SBlockInfoBuf { @@ -292,7 +292,7 @@ static int32_t updateBlockSMAInfo(STSchema* pSchema, SBlockLoadSuppInfo* pSupInf if (j < pSupInfo->numOfCols && PRIMARYKEY_TIMESTAMP_COL_ID == pSupInfo->colId[j]) { j += 1; } - + while (i < pSchema->numOfCols && j < pSupInfo->numOfCols) { STColumn* pTCol = &pSchema->columns[i]; if (pTCol->colId == pSupInfo->colId[j]) { @@ -410,7 +410,7 @@ static int32_t uidComparFunc(const void* p1, const void* p2) { // NOTE: speedup the whole processing by preparing the buffer for STableBlockScanInfo in batch model static SSHashObj* createDataBlockScanInfo(STsdbReader* pTsdbReader, SBlockInfoBuf* pBuf, const STableKeyInfo* idList, - STableUidList* pUidList, int32_t numOfTables) { + STableUidList* pUidList, int32_t numOfTables) { // allocate buffer in order to load data blocks from file // todo use simple hash instead, optimize the memory consumption SSHashObj* pTableMap = tSimpleHashInit(numOfTables, taosGetDefaultHashFunction(TSDB_DATA_TYPE_BIGINT)); @@ -461,7 +461,7 @@ static SSHashObj* createDataBlockScanInfo(STsdbReader* pTsdbReader, SBlockInfoBu } static void resetAllDataBlockScanInfo(SSHashObj* pTableMap, int64_t ts, int32_t step) { - void *p = NULL; + void* p = NULL; int32_t iter = 0; while ((p = tSimpleHashIterate(pTableMap, p, &iter)) != NULL) { @@ -505,7 +505,7 @@ static void clearBlockScanInfo(STableBlockScanInfo* p) { } static void destroyAllBlockScanInfo(SSHashObj* pTableMap) { - void* p = NULL; + void* p = NULL; int32_t iter = 0; while ((p = tSimpleHashIterate(pTableMap, p, &iter)) != NULL) { @@ -743,7 +743,8 @@ void tsdbReleaseDataBlock(STsdbReader* pReader) { } } -static int32_t initResBlockInfo(SResultBlockInfo* pResBlockInfo, int64_t capacity, SSDataBlock* pResBlock, SQueryTableDataCond* pCond) { +static int32_t initResBlockInfo(SResultBlockInfo* pResBlockInfo, int64_t capacity, SSDataBlock* pResBlock, + SQueryTableDataCond* pCond) { pResBlockInfo->capacity = capacity; pResBlockInfo->pResBlock = pResBlock; terrno = 0; @@ -921,9 +922,9 @@ static void cleanupTableScanInfo(SReaderStatus* pStatus) { return; } - SSHashObj* pTableMap = pStatus->pTableMap; + SSHashObj* pTableMap = pStatus->pTableMap; STableBlockScanInfo** px = NULL; - int32_t iter = 0; + int32_t iter = 0; while (1) { px = tSimpleHashIterate(pTableMap, px, &iter); @@ -937,9 +938,10 @@ static void cleanupTableScanInfo(SReaderStatus* pStatus) { pStatus->mapDataCleaned = true; } -static int32_t doLoadFileBlock(STsdbReader* pReader, SArray* pIndexList, SBlockNumber* pBlockNum, SArray* pTableScanInfoList) { - size_t sizeInDisk = 0; - size_t numOfTables = taosArrayGetSize(pIndexList); +static int32_t doLoadFileBlock(STsdbReader* pReader, SArray* pIndexList, SBlockNumber* pBlockNum, + SArray* pTableScanInfoList) { + size_t sizeInDisk = 0; + size_t numOfTables = taosArrayGetSize(pIndexList); int64_t st = taosGetTimestampUs(); cleanupTableScanInfo(&pReader->status); @@ -1125,18 +1127,18 @@ static int32_t getEndPosInDataBlock(STsdbReader* pReader, SBlockData* pBlockData endPos = doBinarySearchKey(pBlockData->aTSKEY, pBlock->nRow, pos, key, pReader->order); } - if ((pReader->verRange.maxVer >= pBlock->minVer && pReader->verRange.maxVer < pBlock->maxVer)|| + if ((pReader->verRange.maxVer >= pBlock->minVer && pReader->verRange.maxVer < pBlock->maxVer) || (pReader->verRange.minVer <= pBlock->maxVer && pReader->verRange.minVer > pBlock->minVer)) { int32_t i = endPos; if (asc) { - for(; i >= 0; --i) { + for (; i >= 0; --i) { if (pBlockData->aVersion[i] <= pReader->verRange.maxVer) { break; } } } else { - for(; i < pBlock->nRow; ++i) { + for (; i < pBlock->nRow; ++i) { if (pBlockData->aVersion[i] >= pReader->verRange.minVer) { break; } @@ -1309,17 +1311,17 @@ static int32_t copyBlockDataToSDataBlock(STsdbReader* pReader) { ASSERT(pReader->verRange.minVer <= pBlock->maxVer && pReader->verRange.maxVer >= pBlock->minVer); // find the appropriate start position that satisfies the version requirement. - if ((pReader->verRange.maxVer >= pBlock->minVer && pReader->verRange.maxVer < pBlock->maxVer)|| + if ((pReader->verRange.maxVer >= pBlock->minVer && pReader->verRange.maxVer < pBlock->maxVer) || (pReader->verRange.minVer <= pBlock->maxVer && pReader->verRange.minVer > pBlock->minVer)) { int32_t i = pDumpInfo->rowIndex; if (asc) { - for(; i < pBlock->nRow; ++i) { + for (; i < pBlock->nRow; ++i) { if (pBlockData->aVersion[i] >= pReader->verRange.minVer) { break; } } } else { - for(; i >= 0; --i) { + for (; i >= 0; --i) { if (pBlockData->aVersion[i] <= pReader->verRange.maxVer) { break; } @@ -1562,7 +1564,8 @@ static int32_t doSetCurrentBlock(SDataBlockIter* pBlockIter, const char* idStr) return TSDB_CODE_SUCCESS; } -static int32_t initBlockIterator(STsdbReader* pReader, SDataBlockIter* pBlockIter, int32_t numOfBlocks, SArray* pTableList) { +static int32_t initBlockIterator(STsdbReader* pReader, SDataBlockIter* pBlockIter, int32_t numOfBlocks, + SArray* pTableList) { bool asc = ASCENDING_TRAVERSE(pReader->order); SBlockOrderSupporter sup = {0}; @@ -1967,13 +1970,14 @@ static bool nextRowFromLastBlocks(SLastBlockReader* pLastBlockReader, STableBloc } TSDBROW* pRow = tMergeTreeGetRow(&pLastBlockReader->mergeTree); - int64_t key = pRow->pBlockData->aTSKEY[pRow->iRow]; - int64_t ver = pRow->pBlockData->aVersion[pRow->iRow]; + int64_t key = pRow->pBlockData->aTSKEY[pRow->iRow]; + int64_t ver = pRow->pBlockData->aVersion[pRow->iRow]; pLastBlockReader->currentKey = key; pScanInfo->lastKeyInStt = key; - if (!hasBeenDropped(pScanInfo->delSkyline, &pScanInfo->lastBlockDelIndex, key, ver, pLastBlockReader->order, pVerRange)) { + if (!hasBeenDropped(pScanInfo->delSkyline, &pScanInfo->lastBlockDelIndex, key, ver, pLastBlockReader->order, + pVerRange)) { return true; } } @@ -2030,7 +2034,7 @@ static FORCE_INLINE STSchema* doGetSchemaForTSRow(int32_t sversion, STsdbReader* } STSchema* ptr = NULL; - int32_t code = metaGetTbTSchemaEx(pReader->pTsdb->pVnode->pMeta, pReader->suid, uid, sversion, &ptr); + int32_t code = metaGetTbTSchemaEx(pReader->pTsdb->pVnode->pMeta, pReader->suid, uid, sversion, &ptr); if (code != TSDB_CODE_SUCCESS) { terrno = code; return NULL; @@ -2153,7 +2157,7 @@ static int32_t doMergeBufAndFileRows(STsdbReader* pReader, STableBlockScanInfo* return terrno; } - int32_t code = tsdbRowMergerAdd(pMerger, pRow, pSchema); + int32_t code = tsdbRowMergerAdd(pMerger, pRow, pSchema); if (code != TSDB_CODE_SUCCESS) { return code; } @@ -2208,7 +2212,7 @@ static int32_t doMergeBufAndFileRows(STsdbReader* pReader, STableBlockScanInfo* static int32_t doMergeFileBlockAndLastBlock(SLastBlockReader* pLastBlockReader, STsdbReader* pReader, STableBlockScanInfo* pBlockScanInfo, SBlockData* pBlockData, bool mergeBlockData) { - SRowMerger* pMerger = &pReader->status.merger; + SRowMerger* pMerger = &pReader->status.merger; SFileBlockDumpInfo* pDumpInfo = &pReader->status.fBlockDumpInfo; int64_t tsLastBlock = getCurrentKeyInLastBlock(pLastBlockReader); @@ -2218,9 +2222,10 @@ static int32_t doMergeFileBlockAndLastBlock(SLastBlockReader* pLastBlockReader, TSDBROW* pRow = tMergeTreeGetRow(&pLastBlockReader->mergeTree); // create local variable to hold the row value - TSDBROW fRow = {.iRow = pRow->iRow, .type = TSDBROW_COL_FMT, .pBlockData = pRow->pBlockData}; + TSDBROW fRow = {.iRow = pRow->iRow, .type = TSDBROW_COL_FMT, .pBlockData = pRow->pBlockData}; - tsdbTrace("fRow ptr:%p, %d, uid:%" PRIu64 ", %s", pRow->pBlockData, pRow->iRow, pLastBlockReader->uid, pReader->idStr); + tsdbTrace("fRow ptr:%p, %d, uid:%" PRIu64 ", %s", pRow->pBlockData, pRow->iRow, pLastBlockReader->uid, + pReader->idStr); // only last block exists if ((!mergeBlockData) || (tsLastBlock != pBlockData->aTSKEY[pDumpInfo->rowIndex])) { @@ -2240,7 +2245,8 @@ static int32_t doMergeFileBlockAndLastBlock(SLastBlockReader* pLastBlockReader, TSDBROW* pRow1 = tMergeTreeGetRow(&pLastBlockReader->mergeTree); tsdbRowMergerAdd(pMerger, pRow1, NULL); - doMergeRowsInLastBlock(pLastBlockReader, pBlockScanInfo, tsLastBlock, pMerger, &pReader->verRange, pReader->idStr); + doMergeRowsInLastBlock(pLastBlockReader, pBlockScanInfo, tsLastBlock, pMerger, &pReader->verRange, + pReader->idStr); code = tsdbRowMergerGetRow(pMerger, &pTSRow); if (code != TSDB_CODE_SUCCESS) { @@ -2290,7 +2296,7 @@ static int32_t doMergeFileBlockAndLastBlock(SLastBlockReader* pLastBlockReader, static int32_t mergeFileBlockAndLastBlock(STsdbReader* pReader, SLastBlockReader* pLastBlockReader, int64_t key, STableBlockScanInfo* pBlockScanInfo, SBlockData* pBlockData) { SFileBlockDumpInfo* pDumpInfo = &pReader->status.fBlockDumpInfo; - SRowMerger* pMerger = &pReader->status.merger; + SRowMerger* pMerger = &pReader->status.merger; // merge is not initialized yet, due to the fact that the pReader->pSchema is not initialized if (pMerger->pArray == NULL) { @@ -2316,7 +2322,7 @@ static int32_t mergeFileBlockAndLastBlock(STsdbReader* pReader, SLastBlockReader if (key < ts) { // imem, mem are all empty, file blocks (data blocks and last block) exist return mergeRowsInFileBlocks(pBlockData, pBlockScanInfo, key, pReader); } else if (key == ts) { - SRow* pTSRow = NULL; + SRow* pTSRow = NULL; int32_t code = tsdbRowMergerAdd(pMerger, &fRow, pReader->pSchema); if (code != TSDB_CODE_SUCCESS) { return code; @@ -2723,7 +2729,7 @@ int32_t mergeRowsInFileBlocks(SBlockData* pBlockData, STableBlockScanInfo* pBloc } else { TSDBROW fRow = tsdbRowFromBlockData(pBlockData, pDumpInfo->rowIndex); - SRow* pTSRow = NULL; + SRow* pTSRow = NULL; code = tsdbRowMergerAdd(pMerger, &fRow, pReader->pSchema); if (code != TSDB_CODE_SUCCESS) { return code; @@ -2837,11 +2843,11 @@ static int32_t buildComposedDataBlock(STsdbReader* pReader) { SFileDataBlockInfo* pBlockInfo = getCurrentBlockInfo(&pReader->status.blockIter); SLastBlockReader* pLastBlockReader = pReader->status.fileIter.pLastBlockReader; - bool asc = ASCENDING_TRAVERSE(pReader->order); - int64_t st = taosGetTimestampUs(); - int32_t step = asc ? 1 : -1; - double el = 0; - SDataBlk* pBlock = getCurrentBlock(&pReader->status.blockIter); + bool asc = ASCENDING_TRAVERSE(pReader->order); + int64_t st = taosGetTimestampUs(); + int32_t step = asc ? 1 : -1; + double el = 0; + SDataBlk* pBlock = getCurrentBlock(&pReader->status.blockIter); SFileBlockDumpInfo* pDumpInfo = &pReader->status.fBlockDumpInfo; STableBlockScanInfo* pBlockScanInfo = NULL; @@ -2874,7 +2880,8 @@ static int32_t buildComposedDataBlock(STsdbReader* pReader) { } } else { // file blocks not exist pBlockScanInfo = *pReader->status.pTableIter; - if (pReader->pIgnoreTables && taosHashGet(*pReader->pIgnoreTables, &pBlockScanInfo->uid, sizeof(pBlockScanInfo->uid))) { + if (pReader->pIgnoreTables && + taosHashGet(*pReader->pIgnoreTables, &pBlockScanInfo->uid, sizeof(pBlockScanInfo->uid))) { setBlockAllDumped(pDumpInfo, pBlock->maxKey.ts, pReader->order); return code; } @@ -3248,7 +3255,7 @@ static int32_t doLoadLastBlockSequentially(STsdbReader* pReader) { } static int32_t doBuildDataBlock(STsdbReader* pReader) { - int32_t code = TSDB_CODE_SUCCESS; + int32_t code = TSDB_CODE_SUCCESS; SReaderStatus* pStatus = &pReader->status; SDataBlockIter* pBlockIter = &pStatus->blockIter; @@ -3271,7 +3278,6 @@ static int32_t doBuildDataBlock(STsdbReader* pReader) { return terrno; } - initLastBlockReader(pLastBlockReader, pScanInfo, pReader); TSDBKEY keyInBuf = getCurrentKeyInBuf(pScanInfo, pReader); @@ -3348,7 +3354,7 @@ static int32_t doBuildDataBlock(STsdbReader* pReader) { } } - return (pReader->code != TSDB_CODE_SUCCESS)? pReader->code:code; + return (pReader->code != TSDB_CODE_SUCCESS) ? pReader->code : code; } static int32_t doSumFileBlockRows(STsdbReader* pReader, SDataFReader* pFileReader) { @@ -3503,14 +3509,15 @@ static int32_t buildBlockFromBufferSequentially(STsdbReader* pReader) { } STableBlockScanInfo** pBlockScanInfo = pStatus->pTableIter; - if (pReader->pIgnoreTables && taosHashGet(*pReader->pIgnoreTables, &(*pBlockScanInfo)->uid, sizeof((*pBlockScanInfo)->uid))) { + if (pReader->pIgnoreTables && + taosHashGet(*pReader->pIgnoreTables, &(*pBlockScanInfo)->uid, sizeof((*pBlockScanInfo)->uid))) { bool hasNexTable = moveToNextTable(pUidList, pStatus); if (!hasNexTable) { return TSDB_CODE_SUCCESS; } pBlockScanInfo = pStatus->pTableIter; } - + initMemDataIterator(*pBlockScanInfo, pReader); int64_t endKey = (ASCENDING_TRAVERSE(pReader->order)) ? INT64_MAX : INT64_MIN; @@ -3554,7 +3561,7 @@ static void initBlockDumpInfo(STsdbReader* pReader, SDataBlockIter* pBlockIter) static int32_t initForFirstBlockInFile(STsdbReader* pReader, SDataBlockIter* pBlockIter) { SBlockNumber num = {0}; - SArray* pTableList = taosArrayInit(40, POINTER_BYTES); + SArray* pTableList = taosArrayInit(40, POINTER_BYTES); int32_t code = moveToNextFile(pReader, &num, pTableList); if (code != TSDB_CODE_SUCCESS) { @@ -3599,7 +3606,7 @@ static ERetrieveType doReadDataFromLastFiles(STsdbReader* pReader) { SSDataBlock* pResBlock = pReader->resBlockInfo.pResBlock; SDataBlockIter* pBlockIter = &pReader->status.blockIter; - while(1) { + while (1) { terrno = 0; code = doLoadLastBlockSequentially(pReader); @@ -3622,7 +3629,7 @@ static ERetrieveType doReadDataFromLastFiles(STsdbReader* pReader) { return TSDB_READ_RETURN; } - if (pBlockIter->numOfBlocks > 0) { // there are data blocks existed. + if (pBlockIter->numOfBlocks > 0) { // there are data blocks existed. return TSDB_READ_CONTINUE; } else { // all blocks in data file are checked, let's check the data in last files resetTableListIndex(&pReader->status); @@ -3635,7 +3642,7 @@ static int32_t buildBlockFromFiles(STsdbReader* pReader) { bool asc = ASCENDING_TRAVERSE(pReader->order); SDataBlockIter* pBlockIter = &pReader->status.blockIter; - SSDataBlock* pResBlock = pReader->resBlockInfo.pResBlock; + SSDataBlock* pResBlock = pReader->resBlockInfo.pResBlock; if (pBlockIter->numOfBlocks == 0) { // let's try to extract data from stt files. @@ -3747,13 +3754,14 @@ SVersionRange getQueryVerRange(SVnode* pVnode, SQueryTableDataCond* pCond, const endVer = (pCond->endVersion > pVnode->state.applied) ? pVnode->state.applied : pCond->endVersion; } - tsdbDebug("queried verRange:%"PRId64"-%"PRId64", revised query verRange:%"PRId64"-%"PRId64", %s", pCond->startVersion, - pCond->endVersion, startVer, endVer, id); + tsdbDebug("queried verRange:%" PRId64 "-%" PRId64 ", revised query verRange:%" PRId64 "-%" PRId64 ", %s", + pCond->startVersion, pCond->endVersion, startVer, endVer, id); return (SVersionRange){.minVer = startVer, .maxVer = endVer}; } -bool hasBeenDropped(const SArray* pDelList, int32_t* index, int64_t key, int64_t ver, int32_t order, SVersionRange* pVerRange) { +bool hasBeenDropped(const SArray* pDelList, int32_t* index, int64_t key, int64_t ver, int32_t order, + SVersionRange* pVerRange) { if (pDelList == NULL) { return false; } @@ -3771,8 +3779,7 @@ bool hasBeenDropped(const SArray* pDelList, int32_t* index, int64_t key, int64_t return false; } else if (key == last->ts) { TSDBKEY* prev = taosArrayGet(pDelList, num - 2); - return (prev->version >= ver && prev->version <= pVerRange->maxVer && - prev->version >= pVerRange->minVer); + return (prev->version >= ver && prev->version <= pVerRange->maxVer && prev->version >= pVerRange->minVer); } } else { TSDBKEY* pCurrent = taosArrayGet(pDelList, *index); @@ -3981,9 +3988,9 @@ int32_t doMergeRowsInFileBlocks(SBlockData* pBlockData, STableBlockScanInfo* pSc SFileBlockDumpInfo* pDumpInfo = &pReader->status.fBlockDumpInfo; SRowMerger* pMerger = &pReader->status.merger; - bool asc = ASCENDING_TRAVERSE(pReader->order); - int64_t key = pBlockData->aTSKEY[pDumpInfo->rowIndex]; - int32_t step = asc ? 1 : -1; + bool asc = ASCENDING_TRAVERSE(pReader->order); + int64_t key = pBlockData->aTSKEY[pDumpInfo->rowIndex]; + int32_t step = asc ? 1 : -1; pDumpInfo->rowIndex += step; if ((pDumpInfo->rowIndex <= pBlockData->nRow - 1 && asc) || (pDumpInfo->rowIndex >= 0 && !asc)) { @@ -4080,14 +4087,14 @@ int32_t doMergeMemTableMultiRows(TSDBROW* pRow, uint64_t uid, SIterInfo* pIter, return terrno; } - tsdbRowMergerAdd(&pReader->status.merger,pNextRow, pTSchema1); + tsdbRowMergerAdd(&pReader->status.merger, pNextRow, pTSchema1); } else { // let's merge rows in file block code = tsdbRowMergerAdd(&pReader->status.merger, ¤t, pReader->pSchema); if (code != TSDB_CODE_SUCCESS) { return code; } - tsdbRowMergerAdd(&pReader->status.merger,pNextRow, NULL); + tsdbRowMergerAdd(&pReader->status.merger, pNextRow, NULL); } code = doMergeRowsInBuf(pIter, uid, TSDBROW_TS(¤t), pDelList, pReader); @@ -4134,9 +4141,8 @@ int32_t doMergeMemIMemRows(TSDBROW* pRow, TSDBROW* piRow, STableBlockScanInfo* p return code; } - tsdbRowMergerAdd(&pReader->status.merger,pRow, pSchema); - code = - doMergeRowsInBuf(&pBlockScanInfo->iter, pBlockScanInfo->uid, k.ts, pBlockScanInfo->delSkyline, pReader); + tsdbRowMergerAdd(&pReader->status.merger, pRow, pSchema); + code = doMergeRowsInBuf(&pBlockScanInfo->iter, pBlockScanInfo->uid, k.ts, pBlockScanInfo->delSkyline, pReader); if (code != TSDB_CODE_SUCCESS) { return code; } @@ -4375,7 +4381,7 @@ int32_t tsdbSetTableList(STsdbReader* pReader, const void* pTableList, int32_t n int32_t size = tSimpleHashGetSize(pReader->status.pTableMap); STableBlockScanInfo** p = NULL; - int32_t iter = 0; + int32_t iter = 0; while ((p = tSimpleHashIterate(pReader->status.pTableMap, p, &iter)) != NULL) { clearBlockScanInfo(*p); @@ -4462,15 +4468,16 @@ static int32_t doOpenReaderImpl(STsdbReader* pReader) { } static void freeSchemaFunc(void* param) { - void **p = (void **)param; + void** p = (void**)param; taosMemoryFreeClear(*p); } // ====================================== EXPOSED APIs ====================================== int32_t tsdbReaderOpen(void* pVnode, SQueryTableDataCond* pCond, void* pTableList, int32_t numOfTables, - SSDataBlock* pResBlock, void** ppReader, const char* idstr, bool countOnly, SHashObj** pIgnoreTables) { + SSDataBlock* pResBlock, void** ppReader, const char* idstr, bool countOnly, + SHashObj** pIgnoreTables) { STimeWindow window = pCond->twindows; - SVnodeCfg* pConf = &(((SVnode*)pVnode)->config); + SVnodeCfg* pConf = &(((SVnode*)pVnode)->config); int32_t capacity = pConf->tsdbCfg.maxRows; if (pResBlock != NULL) { @@ -4739,7 +4746,7 @@ int32_t tsdbReaderSuspend(STsdbReader* pReader) { // resetDataBlockScanInfo excluding lastKey STableBlockScanInfo** p = NULL; - int32_t iter = 0; + int32_t iter = 0; while ((p = tSimpleHashIterate(pStatus->pTableMap, p, &iter)) != NULL) { STableBlockScanInfo* pInfo = *(STableBlockScanInfo**)p; @@ -4761,7 +4768,7 @@ int32_t tsdbReaderSuspend(STsdbReader* pReader) { } else { // resetDataBlockScanInfo excluding lastKey STableBlockScanInfo** p = NULL; - int32_t iter = 0; + int32_t iter = 0; while ((p = tSimpleHashIterate(pStatus->pTableMap, p, &iter)) != NULL) { STableBlockScanInfo* pInfo = *(STableBlockScanInfo**)p; @@ -4960,8 +4967,9 @@ int32_t tsdbNextDataBlock(STsdbReader* pReader, bool* hasNext) { *hasNext = false; - if (isEmptyQueryTimeWindow(&pReader->window) || pReader->step == EXTERNAL_ROWS_NEXT || pReader->code != TSDB_CODE_SUCCESS) { - return (pReader->code != TSDB_CODE_SUCCESS)? pReader->code:code; + if (isEmptyQueryTimeWindow(&pReader->window) || pReader->step == EXTERNAL_ROWS_NEXT || + pReader->code != TSDB_CODE_SUCCESS) { + return (pReader->code != TSDB_CODE_SUCCESS) ? pReader->code : code; } SReaderStatus* pStatus = &pReader->status; @@ -5097,7 +5105,7 @@ static bool doFillNullColSMA(SBlockLoadSuppInfo* pSup, int32_t numOfRows, int32_ return hasNullSMA; } -int32_t tsdbRetrieveDatablockSMA(STsdbReader* pReader, SSDataBlock* pDataBlock, bool* allHave, bool *hasNullSMA) { +int32_t tsdbRetrieveDatablockSMA(STsdbReader* pReader, SSDataBlock* pDataBlock, bool* allHave, bool* hasNullSMA) { SColumnDataAgg*** pBlockSMA = &pDataBlock->pBlockAgg; int32_t code = 0; @@ -5206,9 +5214,9 @@ STableBlockScanInfo* getTableBlockScanInfo(SSHashObj* pTableMap, uint64_t uid, c } static SSDataBlock* doRetrieveDataBlock(STsdbReader* pReader) { - SReaderStatus* pStatus = &pReader->status; - int32_t code = TSDB_CODE_SUCCESS; - SFileDataBlockInfo* pBlockInfo = getCurrentBlockInfo(&pStatus->blockIter); + SReaderStatus* pStatus = &pReader->status; + int32_t code = TSDB_CODE_SUCCESS; + SFileDataBlockInfo* pBlockInfo = getCurrentBlockInfo(&pStatus->blockIter); if (pReader->code != TSDB_CODE_SUCCESS) { return NULL; diff --git a/source/dnode/vnode/src/tsdb/tsdbRead2.c b/source/dnode/vnode/src/tsdb/tsdbRead2.c new file mode 100644 index 0000000000..e96406567a --- /dev/null +++ b/source/dnode/vnode/src/tsdb/tsdbRead2.c @@ -0,0 +1,4945 @@ +/* + * Copyright (c) 2019 TAOS Data, Inc. + * + * This program is free software: you can use, redistribute, and/or modify + * it under the terms of the GNU Affero General Public License, version 3 + * or later ("AGPL"), as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. + * + * You should have received a copy of the GNU Affero General Public License + * along with this program. If not, see . + */ + +#include "osDef.h" +#include "tsdb.h" +#include "tsdbDataFileRW.h" +#include "tsdbFS2.h" +#include "tsdbMerge.h" +#include "tsdbReadUtil.h" +#include "tsdbUtil2.h" +#include "tsimplehash.h" + +#define ASCENDING_TRAVERSE(o) (o == TSDB_ORDER_ASC) +#define getCurrentKeyInLastBlock(_r) ((_r)->currentKey) + +static SFileDataBlockInfo* getCurrentBlockInfo(SDataBlockIter* pBlockIter); +static int32_t buildDataBlockFromBufImpl(STableBlockScanInfo* pBlockScanInfo, int64_t endKey, int32_t capacity, + STsdbReader* pReader); +static TSDBROW* getValidMemRow(SIterInfo* pIter, const SArray* pDelList, STsdbReader* pReader); +static int32_t doMergeRowsInFileBlocks(SBlockData* pBlockData, STableBlockScanInfo* pScanInfo, STsdbReader* pReader); +static int32_t doMergeRowsInLastBlock(SLastBlockReader* pLastBlockReader, STableBlockScanInfo* pScanInfo, int64_t ts, + SRowMerger* pMerger, SVersionRange* pVerRange, const char* id); +static int32_t doMergeRowsInBuf(SIterInfo* pIter, uint64_t uid, int64_t ts, SArray* pDelList, STsdbReader* pReader); +static int32_t doAppendRowFromTSRow(SSDataBlock* pBlock, STsdbReader* pReader, SRow* pTSRow, + STableBlockScanInfo* pScanInfo); +static int32_t doAppendRowFromFileBlock(SSDataBlock* pResBlock, STsdbReader* pReader, SBlockData* pBlockData, + int32_t rowIndex); +static void setComposedBlockFlag(STsdbReader* pReader, bool composed); +static bool hasBeenDropped(const SArray* pDelList, int32_t* index, int64_t key, int64_t ver, int32_t order, + SVersionRange* pVerRange); + +static int32_t doMergeMemTableMultiRows(TSDBROW* pRow, uint64_t uid, SIterInfo* pIter, SArray* pDelList, + TSDBROW* pResRow, STsdbReader* pReader, bool* freeTSRow); +static int32_t doMergeMemIMemRows(TSDBROW* pRow, TSDBROW* piRow, STableBlockScanInfo* pBlockScanInfo, + STsdbReader* pReader, SRow** pTSRow); +static int32_t mergeRowsInFileBlocks(SBlockData* pBlockData, STableBlockScanInfo* pBlockScanInfo, int64_t key, + STsdbReader* pReader); + +static int32_t initDelSkylineIterator(STableBlockScanInfo* pBlockScanInfo, int32_t order, SCostSummary* pCost); +static STsdb* getTsdbByRetentions(SVnode* pVnode, TSKEY winSKey, SRetention* retentions, const char* idstr, + int8_t* pLevel); +static SVersionRange getQueryVerRange(SVnode* pVnode, SQueryTableDataCond* pCond, int8_t level); +static bool hasDataInLastBlock(SLastBlockReader* pLastBlockReader); +static int32_t doBuildDataBlock(STsdbReader* pReader); +static TSDBKEY getCurrentKeyInBuf(STableBlockScanInfo* pScanInfo, STsdbReader* pReader); +static bool hasDataInFileBlock(const SBlockData* pBlockData, const SFileBlockDumpInfo* pDumpInfo); +static void initBlockDumpInfo(STsdbReader* pReader, SDataBlockIter* pBlockIter); +static int32_t getInitialDelIndex(const SArray* pDelSkyline, int32_t order); + +static bool outOfTimeWindow(int64_t ts, STimeWindow* pWindow) { return (ts > pWindow->ekey) || (ts < pWindow->skey); } + +static int32_t setColumnIdSlotList(SBlockLoadSuppInfo* pSupInfo, SColumnInfo* pCols, const int32_t* pSlotIdList, + int32_t numOfCols) { + pSupInfo->smaValid = true; + pSupInfo->numOfCols = numOfCols; + pSupInfo->colId = taosMemoryMalloc(numOfCols * (sizeof(int16_t) * 2 + POINTER_BYTES)); + if (pSupInfo->colId == NULL) { + taosMemoryFree(pSupInfo->colId); + return TSDB_CODE_OUT_OF_MEMORY; + } + + pSupInfo->slotId = (int16_t*)((char*)pSupInfo->colId + (sizeof(int16_t) * numOfCols)); + pSupInfo->buildBuf = (char**)((char*)pSupInfo->slotId + (sizeof(int16_t) * numOfCols)); + for (int32_t i = 0; i < numOfCols; ++i) { + pSupInfo->colId[i] = pCols[i].colId; + pSupInfo->slotId[i] = pSlotIdList[i]; + + if (IS_VAR_DATA_TYPE(pCols[i].type)) { + pSupInfo->buildBuf[i] = taosMemoryMalloc(pCols[i].bytes); + } else { + pSupInfo->buildBuf[i] = NULL; + } + } + + return TSDB_CODE_SUCCESS; +} + +static int32_t updateBlockSMAInfo(STSchema* pSchema, SBlockLoadSuppInfo* pSupInfo) { + int32_t i = 0, j = 0; + + while (i < pSchema->numOfCols && j < pSupInfo->numOfCols) { + STColumn* pTCol = &pSchema->columns[i]; + if (pTCol->colId == pSupInfo->colId[j]) { + if (!IS_BSMA_ON(pTCol)) { + pSupInfo->smaValid = false; + return TSDB_CODE_SUCCESS; + } + + i += 1; + j += 1; + } else if (pTCol->colId < pSupInfo->colId[j]) { // do nothing + i += 1; + } else { + return TSDB_CODE_INVALID_PARA; + } + } + + return TSDB_CODE_SUCCESS; +} + +static bool isEmptyQueryTimeWindow(STimeWindow* pWindow) { return pWindow->skey > pWindow->ekey; } + +// Update the query time window according to the data time to live(TTL) information, in order to avoid to return +// the expired data to client, even it is queried already. +static STimeWindow updateQueryTimeWindow(STsdb* pTsdb, STimeWindow* pWindow) { + STsdbKeepCfg* pCfg = &pTsdb->keepCfg; + + int64_t now = taosGetTimestamp(pCfg->precision); + int64_t earilyTs = now - (tsTickPerMin[pCfg->precision] * pCfg->keep2) + 1; // needs to add one tick + + STimeWindow win = *pWindow; + if (win.skey < earilyTs) { + win.skey = earilyTs; + } + + return win; +} + +// init file iterator +static int32_t initFilesetIterator(SFilesetIter* pIter, TFileSetArray* pFileSetArray, STsdbReader* pReader) { + size_t numOfFileset = TARRAY2_SIZE(pFileSetArray); + + pIter->index = ASCENDING_TRAVERSE(pReader->info.order) ? -1 : numOfFileset; + pIter->order = pReader->info.order; + pIter->pFilesetList = pFileSetArray; + pIter->numOfFiles = numOfFileset; + + if (pIter->pLastBlockReader == NULL) { + pIter->pLastBlockReader = taosMemoryCalloc(1, sizeof(struct SLastBlockReader)); + if (pIter->pLastBlockReader == NULL) { + int32_t code = TSDB_CODE_OUT_OF_MEMORY; + tsdbError("failed to prepare the last block iterator, since:%s %s", tstrerror(code), pReader->idStr); + return code; + } + } + + SLastBlockReader* pLReader = pIter->pLastBlockReader; + pLReader->order = pReader->info.order; + pLReader->window = pReader->info.window; + pLReader->verRange = pReader->info.verRange; + + pLReader->uid = 0; + tMergeTreeClose(&pLReader->mergeTree); + tsdbDebug("init fileset iterator, total files:%d %s", pIter->numOfFiles, pReader->idStr); + return TSDB_CODE_SUCCESS; +} + +static int32_t filesetIteratorNext(SFilesetIter* pIter, STsdbReader* pReader, bool* hasNext) { + bool asc = ASCENDING_TRAVERSE(pIter->order); + int32_t step = asc ? 1 : -1; + int32_t code = 0; + + pIter->index += step; + if ((asc && pIter->index >= pIter->numOfFiles) || ((!asc) && pIter->index < 0)) { + *hasNext = false; + return TSDB_CODE_SUCCESS; + } + + SCostSummary* pSum = &pReader->cost; + + pIter->pLastBlockReader->uid = 0; + tMergeTreeClose(&pIter->pLastBlockReader->mergeTree); + + pReader->status.pLDataIterArray = + destroySttBlockReader(pReader->status.pLDataIterArray, &pSum->lastBlockLoad, &pSum->lastBlockLoadTime); + pReader->status.pLDataIterArray = taosArrayInit(4, POINTER_BYTES); + + // check file the time range of coverage + STimeWindow win = {0}; + + while (1) { + if (pReader->pFileReader != NULL) { + tsdbDataFileReaderClose(&pReader->pFileReader); + } + + pReader->status.pCurrentFileset = pIter->pFilesetList->data[pIter->index]; + + STFileObj** pFileObj = pReader->status.pCurrentFileset->farr; + if (pFileObj[0] != NULL || pFileObj[3] != NULL) { + SDataFileReaderConfig conf = {.tsdb = pReader->pTsdb, .szPage = pReader->pTsdb->pVnode->config.tsdbPageSize}; + + const char* filesName[4] = {0}; + + if (pFileObj[0] != NULL) { + conf.files[0].file = *pFileObj[0]->f; + conf.files[0].exist = true; + filesName[0] = pFileObj[0]->fname; + + conf.files[1].file = *pFileObj[1]->f; + conf.files[1].exist = true; + filesName[1] = pFileObj[1]->fname; + + conf.files[2].file = *pFileObj[2]->f; + conf.files[2].exist = true; + filesName[2] = pFileObj[2]->fname; + } + + if (pFileObj[3] != NULL) { + conf.files[3].exist = true; + conf.files[3].file = *pFileObj[3]->f; + filesName[3] = pFileObj[3]->fname; + } + + code = tsdbDataFileReaderOpen(filesName, &conf, &pReader->pFileReader); + if (code != TSDB_CODE_SUCCESS) { + goto _err; + } + + pReader->cost.headFileLoad += 1; + } + + int32_t fid = pReader->status.pCurrentFileset->fid; + tsdbFidKeyRange(fid, pReader->pTsdb->keepCfg.days, pReader->pTsdb->keepCfg.precision, &win.skey, &win.ekey); + + // current file are no longer overlapped with query time window, ignore remain files + if ((asc && win.skey > pReader->info.window.ekey) || (!asc && win.ekey < pReader->info.window.skey)) { + tsdbDebug("%p remain files are not qualified for qrange:%" PRId64 "-%" PRId64 ", ignore, %s", pReader, + pReader->info.window.skey, pReader->info.window.ekey, pReader->idStr); + *hasNext = false; + return TSDB_CODE_SUCCESS; + } + + if ((asc && (win.ekey < pReader->info.window.skey)) || ((!asc) && (win.skey > pReader->info.window.ekey))) { + pIter->index += step; + if ((asc && pIter->index >= pIter->numOfFiles) || ((!asc) && pIter->index < 0)) { + *hasNext = false; + return TSDB_CODE_SUCCESS; + } + continue; + } + + tsdbDebug("%p file found fid:%d for qrange:%" PRId64 "-%" PRId64 ", %s", pReader, fid, pReader->info.window.skey, + pReader->info.window.ekey, pReader->idStr); + *hasNext = true; + return TSDB_CODE_SUCCESS; + } + +_err: + *hasNext = false; + return code; +} + +static void resetDataBlockIterator(SDataBlockIter* pIter, int32_t order) { + pIter->order = order; + pIter->index = -1; + pIter->numOfBlocks = 0; + if (pIter->blockList == NULL) { + pIter->blockList = taosArrayInit(4, sizeof(SFileDataBlockInfo)); + } else { + taosArrayClear(pIter->blockList); + } +} + +static void cleanupDataBlockIterator(SDataBlockIter* pIter) { taosArrayDestroy(pIter->blockList); } + +static void initReaderStatus(SReaderStatus* pStatus) { + pStatus->pTableIter = NULL; + pStatus->loadFromFile = true; +} + +static SSDataBlock* createResBlock(SQueryTableDataCond* pCond, int32_t capacity) { + SSDataBlock* pResBlock = createDataBlock(); + if (pResBlock == NULL) { + terrno = TSDB_CODE_OUT_OF_MEMORY; + return NULL; + } + + for (int32_t i = 0; i < pCond->numOfCols; ++i) { + SColumnInfoData colInfo = {0}; + colInfo.info = pCond->colList[i]; + blockDataAppendColInfo(pResBlock, &colInfo); + } + + int32_t code = blockDataEnsureCapacity(pResBlock, capacity); + if (code != TSDB_CODE_SUCCESS) { + terrno = code; + taosMemoryFree(pResBlock); + return NULL; + } + return pResBlock; +} + +static int32_t tsdbInitReaderLock(STsdbReader* pReader) { + int32_t code = -1; + qTrace("tsdb/read: %p, pre-init read mutex: %p, code: %d", pReader, &pReader->readerMutex, code); + + code = taosThreadMutexInit(&pReader->readerMutex, NULL); + + qTrace("tsdb/read: %p, post-init read mutex: %p, code: %d", pReader, &pReader->readerMutex, code); + + return code; +} + +static int32_t tsdbUninitReaderLock(STsdbReader* pReader) { + int32_t code = -1; + qTrace("tsdb/read: %p, pre-uninit read mutex: %p, code: %d", pReader, &pReader->readerMutex, code); + + code = taosThreadMutexDestroy(&pReader->readerMutex); + + qTrace("tsdb/read: %p, post-uninit read mutex: %p, code: %d", pReader, &pReader->readerMutex, code); + + return code; +} + +static int32_t tsdbAcquireReader(STsdbReader* pReader) { + int32_t code = -1; + qTrace("tsdb/read: %p, pre-take read mutex: %p, code: %d", pReader, &pReader->readerMutex, code); + + code = taosThreadMutexLock(&pReader->readerMutex); + + qTrace("tsdb/read: %p, post-take read mutex: %p, code: %d", pReader, &pReader->readerMutex, code); + + return code; +} + +static int32_t tsdbTryAcquireReader(STsdbReader* pReader) { + int32_t code = -1; + qTrace("tsdb/read: %p, pre-trytake read mutex: %p, code: %d", pReader, &pReader->readerMutex, code); + + code = taosThreadMutexTryLock(&pReader->readerMutex); + + qTrace("tsdb/read: %p, post-trytake read mutex: %p, code: %d", pReader, &pReader->readerMutex, code); + + return code; +} + +static int32_t tsdbReleaseReader(STsdbReader* pReader) { + int32_t code = -1; + qTrace("tsdb/read: %p, pre-untake read mutex: %p, code: %d", pReader, &pReader->readerMutex, code); + + code = taosThreadMutexUnlock(&pReader->readerMutex); + + qTrace("tsdb/read: %p, post-untake read mutex: %p, code: %d", pReader, &pReader->readerMutex, code); + + return code; +} + +void tsdbReleaseDataBlock2(STsdbReader* pReader) { + SReaderStatus* pStatus = &pReader->status; + if (!pStatus->composedDataBlock) { + tsdbReleaseReader(pReader); + } +} + +static int32_t initResBlockInfo(SResultBlockInfo* pResBlockInfo, int64_t capacity, SSDataBlock* pResBlock, + SQueryTableDataCond* pCond) { + pResBlockInfo->capacity = capacity; + pResBlockInfo->pResBlock = pResBlock; + terrno = 0; + + if (pResBlockInfo->pResBlock == NULL) { + pResBlockInfo->freeBlock = true; + pResBlockInfo->pResBlock = createResBlock(pCond, pResBlockInfo->capacity); + } else { + pResBlockInfo->freeBlock = false; + } + + return terrno; +} + +static int32_t tsdbReaderCreate(SVnode* pVnode, SQueryTableDataCond* pCond, void** ppReader, int32_t capacity, + SSDataBlock* pResBlock, const char* idstr) { + int32_t code = 0; + int8_t level = 0; + STsdbReader* pReader = (STsdbReader*)taosMemoryCalloc(1, sizeof(*pReader)); + if (pReader == NULL) { + code = TSDB_CODE_OUT_OF_MEMORY; + goto _end; + } + + if (VND_IS_TSMA(pVnode)) { + tsdbDebug("vgId:%d, tsma is selected to query, %s", TD_VID(pVnode), idstr); + } + + initReaderStatus(&pReader->status); + + pReader->pTsdb = getTsdbByRetentions(pVnode, pCond->twindows.skey, pVnode->config.tsdbCfg.retentions, idstr, &level); + pReader->info.suid = pCond->suid; + pReader->info.order = pCond->order; + + pReader->idStr = (idstr != NULL) ? taosStrdup(idstr) : NULL; + pReader->info.verRange = getQueryVerRange(pVnode, pCond, level); + pReader->type = pCond->type; + pReader->info.window = updateQueryTimeWindow(pReader->pTsdb, &pCond->twindows); + pReader->blockInfoBuf.numPerBucket = 1000; // 1000 tables per bucket + + code = initResBlockInfo(&pReader->resBlockInfo, capacity, pResBlock, pCond); + if (code != TSDB_CODE_SUCCESS) { + goto _end; + } + + if (pCond->numOfCols <= 0) { + tsdbError("vgId:%d, invalid column number %d in query cond, %s", TD_VID(pVnode), pCond->numOfCols, idstr); + code = TSDB_CODE_INVALID_PARA; + goto _end; + } + + // allocate buffer in order to load data blocks from file + SBlockLoadSuppInfo* pSup = &pReader->suppInfo; + pSup->tsColAgg.colId = PRIMARYKEY_TIMESTAMP_COL_ID; + setColumnIdSlotList(pSup, pCond->colList, pCond->pSlotList, pCond->numOfCols); + + code = tBlockDataCreate(&pReader->status.fileBlockData); + if (code != TSDB_CODE_SUCCESS) { + terrno = code; + goto _end; + } + + if (pReader->suppInfo.colId[0] != PRIMARYKEY_TIMESTAMP_COL_ID) { + tsdbError("the first column isn't primary timestamp, %d, %s", pReader->suppInfo.colId[0], pReader->idStr); + code = TSDB_CODE_INVALID_PARA; + goto _end; + } + + pReader->status.pPrimaryTsCol = taosArrayGet(pReader->resBlockInfo.pResBlock->pDataBlock, pSup->slotId[0]); + int32_t type = pReader->status.pPrimaryTsCol->info.type; + if (type != TSDB_DATA_TYPE_TIMESTAMP) { + tsdbError("the first column isn't primary timestamp in result block, actual: %s, %s", tDataTypes[type].name, + pReader->idStr); + code = TSDB_CODE_INVALID_PARA; + goto _end; + } + + tsdbInitReaderLock(pReader); + + *ppReader = pReader; + return code; + +_end: + tsdbReaderClose(pReader); + *ppReader = NULL; + return code; +} + +static int32_t doLoadBlockIndex(STsdbReader* pReader, SDataFileReader* pFileReader, SArray* pIndexList) { + int64_t st = taosGetTimestampUs(); + int32_t numOfTables = tSimpleHashGetSize(pReader->status.pTableMap); + if (pFileReader == NULL) { + return TSDB_CODE_SUCCESS; + } + + const TBrinBlkArray* pBlkArray = NULL; + + int32_t code = tsdbDataFileReadBrinBlk(pFileReader, &pBlkArray); + +#if 0 + LRUHandle* handle = NULL; + + int32_t code = tsdbCacheGetBlockIdx(pFileReader->pTsdb->biCache, pFileReader, &handle); + if (code != TSDB_CODE_SUCCESS || handle == NULL) { + goto _end; + } + + + SArray* aBlockIdx = (SArray*)taosLRUCacheValue(pFileReader->pTsdb->biCache, handle); + size_t num = taosArrayGetSize(aBlockIdx); + if (num == 0) { + tsdbBICacheRelease(pFileReader->pTsdb->biCache, handle); + return TSDB_CODE_SUCCESS; + } +#endif + + // todo binary search to the start position + int64_t et1 = taosGetTimestampUs(); + + SBrinBlk* pBrinBlk = NULL; + STableUidList* pList = &pReader->status.uidList; + + int32_t i = 0; + + while (i < TARRAY2_SIZE(pBlkArray)) { + pBrinBlk = &pBlkArray->data[i]; + if (pBrinBlk->maxTbid.suid < pReader->info.suid) { + i += 1; + continue; + } + + if (pBrinBlk->minTbid.suid > pReader->info.suid) { // not include the queried table/super table, quit the loop + break; + } + + ASSERT(pBrinBlk->minTbid.suid <= pReader->info.suid && pBrinBlk->maxTbid.suid >= pReader->info.suid); + if (pBrinBlk->maxTbid.suid == pReader->info.suid && pBrinBlk->maxTbid.uid < pList->tableUidList[0]) { + i += 1; + continue; + } + + if (pBrinBlk->minTbid.suid == pReader->info.suid && pBrinBlk->minTbid.uid > pList->tableUidList[numOfTables - 1]) { + break; + } + + taosArrayPush(pIndexList, pBrinBlk); + i += 1; + } + + int64_t et2 = taosGetTimestampUs(); + tsdbDebug("load block index for %d/%d tables completed, elapsed time:%.2f ms, set BrinBlk:%.2f ms, size:%.2f Kb %s", + numOfTables, (int32_t)pBlkArray->size, (et1 - st) / 1000.0, (et2 - et1) / 1000.0, + pBlkArray->size * sizeof(SBrinBlk) / 1024.0, pReader->idStr); + + pReader->cost.headFileLoadTime += (et1 - st) / 1000.0; + +_end: + // tsdbBICacheRelease(pFileReader->pTsdb->biCache, handle); + return code; +} + +static int32_t doLoadFileBlock(STsdbReader* pReader, SArray* pIndexList, SBlockNumber* pBlockNum, + SArray* pTableScanInfoList) { + size_t sizeInDisk = 0; + int64_t st = taosGetTimestampUs(); + + // clear info for the new file + cleanupInfoFoxNextFileset(pReader->status.pTableMap); + + int32_t k = 0; + int32_t numOfTables = tSimpleHashGetSize(pReader->status.pTableMap); + int32_t step = ASCENDING_TRAVERSE(pReader->info.order) ? 1 : -1; + STimeWindow w = pReader->info.window; + SBrinRecord* pRecord = NULL; + + SBrinRecordIter iter = {0}; + initBrinRecordIter(&iter, pReader->pFileReader, pIndexList); + + while (((pRecord = getNextBrinRecord(&iter)) != NULL)) { + if (pRecord->suid > pReader->info.suid) { + break; + } + + uint64_t uid = pReader->status.uidList.tableUidList[k]; + if (pRecord->suid < pReader->info.suid) { + continue; + } + + if (uid < pRecord->uid) { // forward the table uid index + while (k < numOfTables && pReader->status.uidList.tableUidList[k] < pRecord->uid) { + k += 1; + } + + if (k >= numOfTables) { + break; + } + + uid = pReader->status.uidList.tableUidList[k]; + } + + if (pRecord->uid < uid) { + continue; + } + + ASSERT(pRecord->suid == pReader->info.suid && uid == pRecord->uid); + + STableBlockScanInfo* pScanInfo = getTableBlockScanInfo(pReader->status.pTableMap, uid, pReader->idStr); + if (ASCENDING_TRAVERSE(pReader->info.order)) { + w.skey = pScanInfo->lastKey + step; + } else { + w.ekey = pScanInfo->lastKey + step; + } + + if (isEmptyQueryTimeWindow(&w)) { + k += 1; + continue; + } + + // 1. time range check + if (pRecord->firstKey > w.ekey || pRecord->lastKey < w.skey) { + continue; + } + + // 2. version range check + if (pRecord->minVer > pReader->info.verRange.maxVer || pRecord->maxVer < pReader->info.verRange.minVer) { + continue; + } + + if (pScanInfo->pBlockList == NULL) { + pScanInfo->pBlockList = taosArrayInit(4, sizeof(SBrinRecord)); + } + + void* p1 = taosArrayPush(pScanInfo->pBlockList, pRecord); + if (p1 == NULL) { + return TSDB_CODE_OUT_OF_MEMORY; + } + + pBlockNum->numOfBlocks += 1; + if (taosArrayGetSize(pTableScanInfoList) == 0) { + taosArrayPush(pTableScanInfoList, &pScanInfo); + } else { + STableBlockScanInfo** p = taosArrayGetLast(pTableScanInfoList); + if ((*p)->uid != uid) { + taosArrayPush(pTableScanInfoList, &pScanInfo); + } + } + } + + clearBrinBlockIter(&iter); + + pBlockNum->numOfLastFiles = pReader->status.pCurrentFileset->lvlArr->size; + int32_t total = pBlockNum->numOfLastFiles + pBlockNum->numOfBlocks; + + double el = (taosGetTimestampUs() - st) / 1000.0; + tsdbDebug( + "load block of %d tables completed, blocks:%d in %d tables, last-files:%d, block-info-size:%.2f Kb, elapsed " + "time:%.2f ms %s", + numOfTables, pBlockNum->numOfBlocks, (int32_t)taosArrayGetSize(pTableScanInfoList), pBlockNum->numOfLastFiles, + sizeInDisk / 1000.0, el, pReader->idStr); + + pReader->cost.numOfBlocks += total; + pReader->cost.headFileLoadTime += el; + + return TSDB_CODE_SUCCESS; +} + +static void setBlockAllDumped(SFileBlockDumpInfo* pDumpInfo, int64_t maxKey, int32_t order) { + int32_t step = ASCENDING_TRAVERSE(order) ? 1 : -1; + pDumpInfo->allDumped = true; + pDumpInfo->lastKey = maxKey + step; +} + +static int32_t doCopyColVal(SColumnInfoData* pColInfoData, int32_t rowIndex, int32_t colIndex, SColVal* pColVal, + SBlockLoadSuppInfo* pSup) { + if (IS_VAR_DATA_TYPE(pColVal->type)) { + if (!COL_VAL_IS_VALUE(pColVal)) { + colDataSetNULL(pColInfoData, rowIndex); + } else { + varDataSetLen(pSup->buildBuf[colIndex], pColVal->value.nData); + if (pColVal->value.nData > pColInfoData->info.bytes) { + tsdbWarn("column cid:%d actual data len %d is bigger than schema len %d", pColVal->cid, pColVal->value.nData, + pColInfoData->info.bytes); + return TSDB_CODE_TDB_INVALID_TABLE_SCHEMA_VER; + } + if (pColVal->value.nData > 0) { // pData may be null, if nData is 0 + memcpy(varDataVal(pSup->buildBuf[colIndex]), pColVal->value.pData, pColVal->value.nData); + } + + colDataSetVal(pColInfoData, rowIndex, pSup->buildBuf[colIndex], false); + } + } else { + colDataSetVal(pColInfoData, rowIndex, (const char*)&pColVal->value, !COL_VAL_IS_VALUE(pColVal)); + } + + return TSDB_CODE_SUCCESS; +} + +static SFileDataBlockInfo* getCurrentBlockInfo(SDataBlockIter* pBlockIter) { + size_t num = TARRAY_SIZE(pBlockIter->blockList); + if (num == 0) { + ASSERT(pBlockIter->numOfBlocks == num); + return NULL; + } + + SFileDataBlockInfo* pBlockInfo = taosArrayGet(pBlockIter->blockList, pBlockIter->index); + return pBlockInfo; +} + +static int doBinarySearchKey(TSKEY* keyList, int num, int pos, TSKEY key, int order) { + // start end position + int s, e; + s = pos; + + // check + ASSERT(pos >= 0 && pos < num && num > 0); + if (order == TSDB_ORDER_ASC) { + // find the first position which is smaller than the key + e = num - 1; + if (key < keyList[pos]) return -1; + while (1) { + // check can return + if (key >= keyList[e]) return e; + if (key <= keyList[s]) return s; + if (e - s <= 1) return s; + + // change start or end position + int mid = s + (e - s + 1) / 2; + if (keyList[mid] > key) + e = mid; + else if (keyList[mid] < key) + s = mid; + else + return mid; + } + } else { // DESC + // find the first position which is bigger than the key + e = 0; + if (key > keyList[pos]) return -1; + while (1) { + // check can return + if (key <= keyList[e]) return e; + if (key >= keyList[s]) return s; + if (s - e <= 1) return s; + + // change start or end position + int mid = s - (s - e + 1) / 2; + if (keyList[mid] < key) + e = mid; + else if (keyList[mid] > key) + s = mid; + else + return mid; + } + } +} + +static int32_t getEndPosInDataBlock(STsdbReader* pReader, SBlockData* pBlockData, SBrinRecord* pRecord, int32_t pos) { + // NOTE: reverse the order to find the end position in data block + int32_t endPos = -1; + bool asc = ASCENDING_TRAVERSE(pReader->info.order); + + if (asc && pReader->info.window.ekey >= pRecord->lastKey) { + endPos = pRecord->numRow - 1; + } else if (!asc && pReader->info.window.skey <= pRecord->firstKey) { + endPos = 0; + } else { + int64_t key = asc ? pReader->info.window.ekey : pReader->info.window.skey; + endPos = doBinarySearchKey(pBlockData->aTSKEY, pRecord->numRow, pos, key, pReader->info.order); + } + + if ((pReader->info.verRange.maxVer >= pRecord->minVer && pReader->info.verRange.maxVer < pRecord->maxVer) || + (pReader->info.verRange.minVer <= pRecord->maxVer && pReader->info.verRange.minVer > pRecord->minVer)) { + int32_t i = endPos; + + if (asc) { + for (; i >= 0; --i) { + if (pBlockData->aVersion[i] <= pReader->info.verRange.maxVer) { + break; + } + } + } else { + for (; i < pRecord->numRow; ++i) { + if (pBlockData->aVersion[i] >= pReader->info.verRange.minVer) { + break; + } + } + } + + endPos = i; + } + + return endPos; +} + +static void copyPrimaryTsCol(const SBlockData* pBlockData, SFileBlockDumpInfo* pDumpInfo, SColumnInfoData* pColData, + int32_t dumpedRows, bool asc) { + if (asc) { + memcpy(pColData->pData, &pBlockData->aTSKEY[pDumpInfo->rowIndex], dumpedRows * sizeof(int64_t)); + } else { + int32_t startIndex = pDumpInfo->rowIndex - dumpedRows + 1; + memcpy(pColData->pData, &pBlockData->aTSKEY[startIndex], dumpedRows * sizeof(int64_t)); + + // todo: opt perf by extract the loop + // reverse the array list + int32_t mid = dumpedRows >> 1u; + int64_t* pts = (int64_t*)pColData->pData; + for (int32_t j = 0; j < mid; ++j) { + int64_t t = pts[j]; + pts[j] = pts[dumpedRows - j - 1]; + pts[dumpedRows - j - 1] = t; + } + } +} + +// a faster version of copy procedure. +static void copyNumericCols(const SColData* pData, SFileBlockDumpInfo* pDumpInfo, SColumnInfoData* pColData, + int32_t dumpedRows, bool asc) { + uint8_t* p = NULL; + if (asc) { + p = pData->pData + tDataTypes[pData->type].bytes * pDumpInfo->rowIndex; + } else { + int32_t startIndex = pDumpInfo->rowIndex - dumpedRows + 1; + p = pData->pData + tDataTypes[pData->type].bytes * startIndex; + } + + int32_t step = asc ? 1 : -1; + + // make sure it is aligned to 8bit, the allocated memory address is aligned to 256bit + // ASSERT((((uint64_t)pColData->pData) & (0x8 - 1)) == 0); + + // 1. copy data in a batch model + memcpy(pColData->pData, p, dumpedRows * tDataTypes[pData->type].bytes); + + // 2. reverse the array list in case of descending order scan data block + if (!asc) { + switch (pColData->info.type) { + case TSDB_DATA_TYPE_TIMESTAMP: + case TSDB_DATA_TYPE_DOUBLE: + case TSDB_DATA_TYPE_BIGINT: + case TSDB_DATA_TYPE_UBIGINT: { + int32_t mid = dumpedRows >> 1u; + int64_t* pts = (int64_t*)pColData->pData; + for (int32_t j = 0; j < mid; ++j) { + int64_t t = pts[j]; + pts[j] = pts[dumpedRows - j - 1]; + pts[dumpedRows - j - 1] = t; + } + break; + } + + case TSDB_DATA_TYPE_BOOL: + case TSDB_DATA_TYPE_TINYINT: + case TSDB_DATA_TYPE_UTINYINT: { + int32_t mid = dumpedRows >> 1u; + int8_t* pts = (int8_t*)pColData->pData; + for (int32_t j = 0; j < mid; ++j) { + int8_t t = pts[j]; + pts[j] = pts[dumpedRows - j - 1]; + pts[dumpedRows - j - 1] = t; + } + break; + } + + case TSDB_DATA_TYPE_SMALLINT: + case TSDB_DATA_TYPE_USMALLINT: { + int32_t mid = dumpedRows >> 1u; + int16_t* pts = (int16_t*)pColData->pData; + for (int32_t j = 0; j < mid; ++j) { + int64_t t = pts[j]; + pts[j] = pts[dumpedRows - j - 1]; + pts[dumpedRows - j - 1] = t; + } + break; + } + + case TSDB_DATA_TYPE_FLOAT: + case TSDB_DATA_TYPE_INT: + case TSDB_DATA_TYPE_UINT: { + int32_t mid = dumpedRows >> 1u; + int32_t* pts = (int32_t*)pColData->pData; + for (int32_t j = 0; j < mid; ++j) { + int32_t t = pts[j]; + pts[j] = pts[dumpedRows - j - 1]; + pts[dumpedRows - j - 1] = t; + } + break; + } + } + } + + // 3. if the null value exists, check items one-by-one + if (pData->flag != HAS_VALUE) { + int32_t rowIndex = 0; + + for (int32_t j = pDumpInfo->rowIndex; rowIndex < dumpedRows; j += step, rowIndex++) { + uint8_t v = tColDataGetBitValue(pData, j); + if (v == 0 || v == 1) { + colDataSetNull_f(pColData->nullbitmap, rowIndex); + pColData->hasNull = true; + } + } + } +} + +static int32_t copyBlockDataToSDataBlock(STsdbReader* pReader) { + SReaderStatus* pStatus = &pReader->status; + SDataBlockIter* pBlockIter = &pStatus->blockIter; + SBlockLoadSuppInfo* pSupInfo = &pReader->suppInfo; + SFileBlockDumpInfo* pDumpInfo = &pReader->status.fBlockDumpInfo; + + SBlockData* pBlockData = &pStatus->fileBlockData; + SFileDataBlockInfo* pBlockInfo = getCurrentBlockInfo(pBlockIter); + SSDataBlock* pResBlock = pReader->resBlockInfo.pResBlock; + int32_t numOfOutputCols = pSupInfo->numOfCols; + int32_t code = TSDB_CODE_SUCCESS; + + SColVal cv = {0}; + int64_t st = taosGetTimestampUs(); + bool asc = ASCENDING_TRAVERSE(pReader->info.order); + int32_t step = asc ? 1 : -1; + + SBrinRecord* pRecord = &pBlockInfo->record; + + // no data exists, return directly. + if (pBlockData->nRow == 0 || pBlockData->aTSKEY == 0) { + tsdbWarn("%p no need to copy since no data in blockData, table uid:%" PRIu64 " has been dropped, %s", pReader, + pBlockInfo->uid, pReader->idStr); + pResBlock->info.rows = 0; + return 0; + } + + // row index of dump info remain the initial position, let's find the appropriate start position. + if ((pDumpInfo->rowIndex == 0 && asc) || (pDumpInfo->rowIndex == pRecord->numRow - 1 && (!asc))) { + if (asc && pReader->info.window.skey <= pRecord->firstKey && pReader->info.verRange.minVer <= pRecord->minVer) { + // pDumpInfo->rowIndex = 0; + } else if (!asc && pReader->info.window.ekey >= pRecord->lastKey && + pReader->info.verRange.maxVer >= pRecord->maxVer) { + // pDumpInfo->rowIndex = pRecord->numRow - 1; + } else { // find the appropriate the start position in current block, and set it to be the current rowIndex + int32_t pos = asc ? pRecord->numRow - 1 : 0; + int32_t order = asc ? TSDB_ORDER_DESC : TSDB_ORDER_ASC; + int64_t key = asc ? pReader->info.window.skey : pReader->info.window.ekey; + pDumpInfo->rowIndex = doBinarySearchKey(pBlockData->aTSKEY, pRecord->numRow, pos, key, order); + + if (pDumpInfo->rowIndex < 0) { + tsdbError( + "%p failed to locate the start position in current block, global index:%d, table index:%d, brange:%" PRId64 + "-%" PRId64 ", minVer:%" PRId64 ", maxVer:%" PRId64 " %s", + pReader, pBlockIter->index, pBlockInfo->tbBlockIdx, pRecord->firstKey, pRecord->lastKey, pRecord->minVer, + pRecord->maxVer, pReader->idStr); + return TSDB_CODE_INVALID_PARA; + } + + ASSERT(pReader->info.verRange.minVer <= pRecord->maxVer && pReader->info.verRange.maxVer >= pRecord->minVer); + + // find the appropriate start position that satisfies the version requirement. + if ((pReader->info.verRange.maxVer >= pRecord->minVer && pReader->info.verRange.maxVer < pRecord->maxVer) || + (pReader->info.verRange.minVer <= pRecord->maxVer && pReader->info.verRange.minVer > pRecord->minVer)) { + int32_t i = pDumpInfo->rowIndex; + if (asc) { + for (; i < pRecord->numRow; ++i) { + if (pBlockData->aVersion[i] >= pReader->info.verRange.minVer) { + break; + } + } + } else { + for (; i >= 0; --i) { + if (pBlockData->aVersion[i] <= pReader->info.verRange.maxVer) { + break; + } + } + } + + pDumpInfo->rowIndex = i; + } + } + } + + // time window check + int32_t endIndex = getEndPosInDataBlock(pReader, pBlockData, pRecord, pDumpInfo->rowIndex); + if (endIndex == -1) { + setBlockAllDumped(pDumpInfo, pReader->info.window.ekey, pReader->info.order); + return TSDB_CODE_SUCCESS; + } + + endIndex += step; + int32_t dumpedRows = asc ? (endIndex - pDumpInfo->rowIndex) : (pDumpInfo->rowIndex - endIndex); + if (dumpedRows > pReader->resBlockInfo.capacity) { // output buffer check + dumpedRows = pReader->resBlockInfo.capacity; + } else if (dumpedRows <= 0) { // no qualified rows in current data block, abort directly. + setBlockAllDumped(pDumpInfo, pReader->info.window.ekey, pReader->info.order); + return TSDB_CODE_SUCCESS; + } + + int32_t i = 0; + int32_t rowIndex = 0; + + SColumnInfoData* pColData = taosArrayGet(pResBlock->pDataBlock, pSupInfo->slotId[i]); + if (pSupInfo->colId[i] == PRIMARYKEY_TIMESTAMP_COL_ID) { + copyPrimaryTsCol(pBlockData, pDumpInfo, pColData, dumpedRows, asc); + i += 1; + } + + int32_t colIndex = 0; + int32_t num = pBlockData->nColData; + while (i < numOfOutputCols && colIndex < num) { + rowIndex = 0; + + SColData* pData = tBlockDataGetColDataByIdx(pBlockData, colIndex); + if (pData->cid < pSupInfo->colId[i]) { + colIndex += 1; + } else if (pData->cid == pSupInfo->colId[i]) { + pColData = taosArrayGet(pResBlock->pDataBlock, pSupInfo->slotId[i]); + + if (pData->flag == HAS_NONE || pData->flag == HAS_NULL || pData->flag == (HAS_NULL | HAS_NONE)) { + colDataSetNNULL(pColData, 0, dumpedRows); + } else { + if (IS_MATHABLE_TYPE(pColData->info.type)) { + copyNumericCols(pData, pDumpInfo, pColData, dumpedRows, asc); + } else { // varchar/nchar type + for (int32_t j = pDumpInfo->rowIndex; rowIndex < dumpedRows; j += step) { + tColDataGetValue(pData, j, &cv); + code = doCopyColVal(pColData, rowIndex++, i, &cv, pSupInfo); + if (code) { + return code; + } + } + } + } + + colIndex += 1; + i += 1; + } else { // the specified column does not exist in file block, fill with null data + pColData = taosArrayGet(pResBlock->pDataBlock, pSupInfo->slotId[i]); + colDataSetNNULL(pColData, 0, dumpedRows); + i += 1; + } + } + + // fill the mis-matched columns with null value + while (i < numOfOutputCols) { + pColData = taosArrayGet(pResBlock->pDataBlock, pSupInfo->slotId[i]); + colDataSetNNULL(pColData, 0, dumpedRows); + i += 1; + } + + pResBlock->info.dataLoad = 1; + pResBlock->info.rows = dumpedRows; + pDumpInfo->rowIndex += step * dumpedRows; + + // check if current block are all handled + if (pDumpInfo->rowIndex >= 0 && pDumpInfo->rowIndex < pRecord->numRow) { + int64_t ts = pBlockData->aTSKEY[pDumpInfo->rowIndex]; + if (outOfTimeWindow(ts, + &pReader->info.window)) { // the remain data has out of query time window, ignore current block + setBlockAllDumped(pDumpInfo, ts, pReader->info.order); + } + } else { + int64_t ts = asc ? pRecord->lastKey : pRecord->firstKey; + setBlockAllDumped(pDumpInfo, ts, pReader->info.order); + } + + double elapsedTime = (taosGetTimestampUs() - st) / 1000.0; + pReader->cost.blockLoadTime += elapsedTime; + + int32_t unDumpedRows = asc ? pRecord->numRow - pDumpInfo->rowIndex : pDumpInfo->rowIndex + 1; + tsdbDebug("%p copy file block to sdatablock, global index:%d, table index:%d, brange:%" PRId64 "-%" PRId64 + ", rows:%d, remain:%d, minVer:%" PRId64 ", maxVer:%" PRId64 ", uid:%" PRIu64 " elapsed time:%.2f ms, %s", + pReader, pBlockIter->index, pBlockInfo->tbBlockIdx, pRecord->firstKey, pRecord->lastKey, dumpedRows, + unDumpedRows, pRecord->minVer, pRecord->maxVer, pBlockInfo->uid, elapsedTime, pReader->idStr); + + return TSDB_CODE_SUCCESS; +} + +static FORCE_INLINE STSchema* getTableSchemaImpl(STsdbReader* pReader, uint64_t uid) { + ASSERT(pReader->info.pSchema == NULL); + + int32_t code = metaGetTbTSchemaEx(pReader->pTsdb->pVnode->pMeta, pReader->info.suid, uid, -1, &pReader->info.pSchema); + if (code != TSDB_CODE_SUCCESS || pReader->info.pSchema == NULL) { + terrno = code; + tsdbError("failed to get table schema, uid:%" PRIu64 ", it may have been dropped, ver:-1, %s", uid, pReader->idStr); + return NULL; + } + + code = tsdbRowMergerInit(&pReader->status.merger, pReader->info.pSchema); + if (code != TSDB_CODE_SUCCESS) { + terrno = code; + tsdbError("failed to init merger, code:%s, %s", tstrerror(code), pReader->idStr); + return NULL; + } + + return pReader->info.pSchema; +} + +static int32_t doLoadFileBlockData(STsdbReader* pReader, SDataBlockIter* pBlockIter, SBlockData* pBlockData, + uint64_t uid) { + int32_t code = 0; + STSchema* pSchema = pReader->info.pSchema; + int64_t st = taosGetTimestampUs(); + + tBlockDataReset(pBlockData); + + if (pReader->info.pSchema == NULL) { + pSchema = getTableSchemaImpl(pReader, uid); + if (pSchema == NULL) { + tsdbDebug("%p table uid:%" PRIu64 " has been dropped, no data existed, %s", pReader, uid, pReader->idStr); + return code; + } + } + + SBlockLoadSuppInfo* pSup = &pReader->suppInfo; + SFileDataBlockInfo* pBlockInfo = getCurrentBlockInfo(pBlockIter); + SFileBlockDumpInfo* pDumpInfo = &pReader->status.fBlockDumpInfo; + + SBrinRecord* pRecord = &pBlockInfo->record; + code = tsdbDataFileReadBlockDataByColumn(pReader->pFileReader, pRecord, pBlockData, pSchema, &pSup->colId[1], + pSup->numOfCols - 1); + if (code != TSDB_CODE_SUCCESS) { + tsdbError("%p error occurs in loading file block, global index:%d, table index:%d, brange:%" PRId64 "-%" PRId64 + ", rows:%d, code:%s %s", + pReader, pBlockIter->index, pBlockInfo->tbBlockIdx, pBlockInfo->record.firstKey, + pBlockInfo->record.lastKey, pBlockInfo->record.numRow, tstrerror(code), pReader->idStr); + return code; + } + + double elapsedTime = (taosGetTimestampUs() - st) / 1000.0; + + tsdbDebug("%p load file block into buffer, global index:%d, index in table block list:%d, brange:%" PRId64 "-%" PRId64 + ", rows:%d, minVer:%" PRId64 ", maxVer:%" PRId64 ", elapsed time:%.2f ms, %s", + pReader, pBlockIter->index, pBlockInfo->tbBlockIdx, pRecord->firstKey, pRecord->lastKey, pRecord->numRow, + pRecord->minVer, pRecord->maxVer, elapsedTime, pReader->idStr); + + pReader->cost.blockLoadTime += elapsedTime; + pDumpInfo->allDumped = false; + + return TSDB_CODE_SUCCESS; +} + +/** + * This is an two rectangles overlap cases. + */ +static int32_t dataBlockPartiallyRequired(STimeWindow* pWindow, SVersionRange* pVerRange, SFileDataBlockInfo* pBlock) { + return (pWindow->ekey < pBlock->record.lastKey && pWindow->ekey >= pBlock->record.firstKey) || + (pWindow->skey > pBlock->record.firstKey && pWindow->skey <= pBlock->record.lastKey) || + (pVerRange->minVer > pBlock->record.minVer && pVerRange->minVer <= pBlock->record.maxVer) || + (pVerRange->maxVer < pBlock->record.maxVer && pVerRange->maxVer >= pBlock->record.minVer); +} + +static bool getNeighborBlockOfSameTable(SFileDataBlockInfo* pBlockInfo, STableBlockScanInfo* pTableBlockScanInfo, + int32_t* nextIndex, int32_t order, SBrinRecord* pRecord) { + bool asc = ASCENDING_TRAVERSE(order); + if (asc && pBlockInfo->tbBlockIdx >= taosArrayGetSize(pTableBlockScanInfo->pBlockList) - 1) { + return false; + } + + if (!asc && pBlockInfo->tbBlockIdx == 0) { + return false; + } + + int32_t step = asc ? 1 : -1; + // *nextIndex = pBlockInfo->tbBlockIdx + step; + // *pBlockIndex = *(SBlockIndex*)taosArrayGet(pTableBlockScanInfo->pBlockList, *nextIndex); + SBrinRecord* p = taosArrayGet(pTableBlockScanInfo->pBlockList, pBlockInfo->tbBlockIdx + step); + memcpy(pRecord, p, sizeof(SBrinRecord)); + + // tMapDataGetItemByIdx(&pTableBlockScanInfo->mapData, pIndex->ordinalIndex, pBlock, tGetDataBlk); + return true; +} + +static int32_t findFileBlockInfoIndex(SDataBlockIter* pBlockIter, SFileDataBlockInfo* pFBlockInfo) { + int32_t step = ASCENDING_TRAVERSE(pBlockIter->order) ? 1 : -1; + int32_t index = pBlockIter->index; + + while (index < pBlockIter->numOfBlocks && index >= 0) { + SFileDataBlockInfo* pFBlock = taosArrayGet(pBlockIter->blockList, index); + if (pFBlock->uid == pFBlockInfo->uid && pFBlock->tbBlockIdx == pFBlockInfo->tbBlockIdx) { + return index; + } + + index += step; + } + + return -1; +} + +static int32_t setFileBlockActiveInBlockIter(SDataBlockIter* pBlockIter, int32_t index, int32_t step) { + if (index < 0 || index >= pBlockIter->numOfBlocks) { + return -1; + } + + SFileDataBlockInfo fblock = *(SFileDataBlockInfo*)taosArrayGet(pBlockIter->blockList, index); + pBlockIter->index += step; + + if (index != pBlockIter->index) { + taosArrayRemove(pBlockIter->blockList, index); + taosArrayInsert(pBlockIter->blockList, pBlockIter->index, &fblock); + + SFileDataBlockInfo* pBlockInfo = taosArrayGet(pBlockIter->blockList, pBlockIter->index); + ASSERT(pBlockInfo->uid == fblock.uid && pBlockInfo->tbBlockIdx == fblock.tbBlockIdx); + } + + return TSDB_CODE_SUCCESS; +} + +// todo: this attribute could be acquired during extractin the global ordered block list. +static bool overlapWithNeighborBlock2(SFileDataBlockInfo* pBlock, SBrinRecord* pRec, int32_t order) { + // it is the last block in current file, no chance to overlap with neighbor blocks. + if (ASCENDING_TRAVERSE(order)) { + return pBlock->record.lastKey == pRec->firstKey; + } else { + return pBlock->record.firstKey == pRec->lastKey; + } +} + +static bool bufferDataInFileBlockGap(int32_t order, TSDBKEY key, SFileDataBlockInfo* pBlock) { + bool ascScan = ASCENDING_TRAVERSE(order); + + return (ascScan && (key.ts != TSKEY_INITIAL_VAL && key.ts <= pBlock->record.firstKey)) || + (!ascScan && (key.ts != TSKEY_INITIAL_VAL && key.ts >= pBlock->record.lastKey)); +} + +static bool keyOverlapFileBlock(TSDBKEY key, SFileDataBlockInfo* pBlock, SVersionRange* pVerRange) { + return (key.ts >= pBlock->record.firstKey && key.ts <= pBlock->record.lastKey) && + (pBlock->record.maxVer >= pVerRange->minVer) && (pBlock->record.minVer <= pVerRange->maxVer); +} + +static bool doCheckforDatablockOverlap(STableBlockScanInfo* pBlockScanInfo, const SBrinRecord* pRecord, + int32_t startIndex) { + size_t num = taosArrayGetSize(pBlockScanInfo->delSkyline); + + for (int32_t i = startIndex; i < num; i += 1) { + TSDBKEY* p = taosArrayGet(pBlockScanInfo->delSkyline, i); + if (p->ts >= pRecord->firstKey && p->ts <= pRecord->lastKey) { + if (p->version >= pRecord->minVer) { + return true; + } + } else if (p->ts < pRecord->firstKey) { // p->ts < pBlock->minKey.ts + if (p->version >= pRecord->minVer) { + if (i < num - 1) { + TSDBKEY* pnext = taosArrayGet(pBlockScanInfo->delSkyline, i + 1); + if (pnext->ts >= pRecord->firstKey) { + return true; + } + } else { // it must be the last point + ASSERT(p->version == 0); + } + } + } else { // (p->ts > pBlock->maxKey.ts) { + return false; + } + } + + return false; +} + +static bool overlapWithDelSkyline(STableBlockScanInfo* pBlockScanInfo, const SBrinRecord* pRecord, int32_t order) { + if (pBlockScanInfo->delSkyline == NULL || (taosArrayGetSize(pBlockScanInfo->delSkyline) == 0)) { + return false; + } + + // ts is not overlap + TSDBKEY* pFirst = taosArrayGet(pBlockScanInfo->delSkyline, 0); + TSDBKEY* pLast = taosArrayGetLast(pBlockScanInfo->delSkyline); + if (pRecord->firstKey > pLast->ts || pRecord->lastKey < pFirst->ts) { + return false; + } + + // version is not overlap + if (ASCENDING_TRAVERSE(order)) { + return doCheckforDatablockOverlap(pBlockScanInfo, pRecord, pBlockScanInfo->fileDelIndex); + } else { + int32_t index = pBlockScanInfo->fileDelIndex; + while (1) { + TSDBKEY* p = taosArrayGet(pBlockScanInfo->delSkyline, index); + if (p->ts > pRecord->firstKey && index > 0) { + index -= 1; + } else { // find the first point that is smaller than the minKey.ts of dataBlock. + if (p->ts == pRecord->firstKey && p->version < pRecord->maxVer && index > 0) { + index -= 1; + } + break; + } + } + + return doCheckforDatablockOverlap(pBlockScanInfo, pRecord, index); + } +} + +typedef struct { + bool overlapWithNeighborBlock; + bool hasDupTs; + bool overlapWithDelInfo; + bool overlapWithLastBlock; + bool overlapWithKeyInBuf; + bool partiallyRequired; + bool moreThanCapcity; +} SDataBlockToLoadInfo; + +static void getBlockToLoadInfo(SDataBlockToLoadInfo* pInfo, SFileDataBlockInfo* pBlockInfo, + STableBlockScanInfo* pScanInfo, TSDBKEY keyInBuf, SLastBlockReader* pLastBlockReader, + STsdbReader* pReader) { + int32_t neighborIndex = 0; + SBrinRecord rec = {0}; + + bool hasNeighbor = getNeighborBlockOfSameTable(pBlockInfo, pScanInfo, &neighborIndex, pReader->info.order, &rec); + + // overlap with neighbor + if (hasNeighbor) { + pInfo->overlapWithNeighborBlock = overlapWithNeighborBlock2(pBlockInfo, &rec, pReader->info.order); + } + + // has duplicated ts of different version in this block + pInfo->hasDupTs = (pBlockInfo->record.numRow > pBlockInfo->record.count); + pInfo->overlapWithDelInfo = overlapWithDelSkyline(pScanInfo, &pBlockInfo->record, pReader->info.order); + + if (hasDataInLastBlock(pLastBlockReader)) { + int64_t tsLast = getCurrentKeyInLastBlock(pLastBlockReader); + pInfo->overlapWithLastBlock = !(pBlockInfo->record.lastKey < tsLast || pBlockInfo->record.firstKey > tsLast); + } + + pInfo->moreThanCapcity = pBlockInfo->record.numRow > pReader->resBlockInfo.capacity; + pInfo->partiallyRequired = dataBlockPartiallyRequired(&pReader->info.window, &pReader->info.verRange, pBlockInfo); + pInfo->overlapWithKeyInBuf = keyOverlapFileBlock(keyInBuf, pBlockInfo, &pReader->info.verRange); +} + +// 1. the version of all rows should be less than the endVersion +// 2. current block should not overlap with next neighbor block +// 3. current timestamp should not be overlap with each other +// 4. output buffer should be large enough to hold all rows in current block +// 5. delete info should not overlap with current block data +// 6. current block should not contain the duplicated ts +static bool fileBlockShouldLoad(STsdbReader* pReader, SFileDataBlockInfo* pBlockInfo, STableBlockScanInfo* pScanInfo, + TSDBKEY keyInBuf, SLastBlockReader* pLastBlockReader) { + SDataBlockToLoadInfo info = {0}; + getBlockToLoadInfo(&info, pBlockInfo, pScanInfo, keyInBuf, pLastBlockReader, pReader); + + bool loadDataBlock = + (info.overlapWithNeighborBlock || info.hasDupTs || info.partiallyRequired || info.overlapWithKeyInBuf || + info.moreThanCapcity || info.overlapWithDelInfo || info.overlapWithLastBlock); + + // log the reason why load the datablock for profile + if (loadDataBlock) { + tsdbDebug("%p uid:%" PRIu64 + " need to load the datablock, overlapneighbor:%d, hasDup:%d, partiallyRequired:%d, " + "overlapWithKey:%d, greaterThanBuf:%d, overlapWithDel:%d, overlapWithlastBlock:%d, %s", + pReader, pBlockInfo->uid, info.overlapWithNeighborBlock, info.hasDupTs, info.partiallyRequired, + info.overlapWithKeyInBuf, info.moreThanCapcity, info.overlapWithDelInfo, info.overlapWithLastBlock, + pReader->idStr); + } + + return loadDataBlock; +} + +static bool isCleanFileDataBlock(STsdbReader* pReader, SFileDataBlockInfo* pBlockInfo, STableBlockScanInfo* pScanInfo, + TSDBKEY keyInBuf, SLastBlockReader* pLastBlockReader) { + SDataBlockToLoadInfo info = {0}; + getBlockToLoadInfo(&info, pBlockInfo, pScanInfo, keyInBuf, pLastBlockReader, pReader); + bool isCleanFileBlock = !(info.overlapWithNeighborBlock || info.hasDupTs || info.overlapWithKeyInBuf || + info.overlapWithDelInfo || info.overlapWithLastBlock); + return isCleanFileBlock; +} + +static int32_t buildDataBlockFromBuf(STsdbReader* pReader, STableBlockScanInfo* pBlockScanInfo, int64_t endKey) { + if (!(pBlockScanInfo->iiter.hasVal || pBlockScanInfo->iter.hasVal)) { + return TSDB_CODE_SUCCESS; + } + + SSDataBlock* pBlock = pReader->resBlockInfo.pResBlock; + + int64_t st = taosGetTimestampUs(); + int32_t code = buildDataBlockFromBufImpl(pBlockScanInfo, endKey, pReader->resBlockInfo.capacity, pReader); + + blockDataUpdateTsWindow(pBlock, pReader->suppInfo.slotId[0]); + pBlock->info.id.uid = pBlockScanInfo->uid; + + setComposedBlockFlag(pReader, true); + + double elapsedTime = (taosGetTimestampUs() - st) / 1000.0; + tsdbDebug("%p build data block from cache completed, elapsed time:%.2f ms, numOfRows:%" PRId64 ", brange:%" PRId64 + " - %" PRId64 ", uid:%" PRIu64 ", %s", + pReader, elapsedTime, pBlock->info.rows, pBlock->info.window.skey, pBlock->info.window.ekey, + pBlockScanInfo->uid, pReader->idStr); + + pReader->cost.buildmemBlock += elapsedTime; + return code; +} + +static bool tryCopyDistinctRowFromFileBlock(STsdbReader* pReader, SBlockData* pBlockData, int64_t key, + SFileBlockDumpInfo* pDumpInfo, bool* copied) { + // opt version + // 1. it is not a border point + // 2. the direct next point is not an duplicated timestamp + int32_t code = TSDB_CODE_SUCCESS; + + *copied = false; + bool asc = (pReader->info.order == TSDB_ORDER_ASC); + if ((pDumpInfo->rowIndex < pDumpInfo->totalRows - 1 && asc) || (pDumpInfo->rowIndex > 0 && (!asc))) { + int32_t step = pReader->info.order == TSDB_ORDER_ASC ? 1 : -1; + + int64_t nextKey = pBlockData->aTSKEY[pDumpInfo->rowIndex + step]; + if (nextKey != key) { // merge is not needed + code = doAppendRowFromFileBlock(pReader->resBlockInfo.pResBlock, pReader, pBlockData, pDumpInfo->rowIndex); + if (code) { + return code; + } + pDumpInfo->rowIndex += step; + *copied = true; + } + } + + return code; +} + +static bool nextRowFromLastBlocks(SLastBlockReader* pLastBlockReader, STableBlockScanInfo* pScanInfo, + SVersionRange* pVerRange) { + int32_t step = ASCENDING_TRAVERSE(pLastBlockReader->order) ? 1 : -1; + + while (1) { + bool hasVal = tMergeTreeNext(&pLastBlockReader->mergeTree); + if (!hasVal) { // the next value will be the accessed key in stt + pScanInfo->lastKeyInStt += step; + return false; + } + + TSDBROW* pRow = tMergeTreeGetRow(&pLastBlockReader->mergeTree); + int64_t key = pRow->pBlockData->aTSKEY[pRow->iRow]; + int64_t ver = pRow->pBlockData->aVersion[pRow->iRow]; + + pLastBlockReader->currentKey = key; + pScanInfo->lastKeyInStt = key; + + if (!hasBeenDropped(pScanInfo->delSkyline, &pScanInfo->lastBlockDelIndex, key, ver, pLastBlockReader->order, + pVerRange)) { + return true; + } + } +} + +static bool tryCopyDistinctRowFromSttBlock(TSDBROW* fRow, SLastBlockReader* pLastBlockReader, + STableBlockScanInfo* pScanInfo, int64_t ts, STsdbReader* pReader, + bool* copied) { + int32_t code = TSDB_CODE_SUCCESS; + + *copied = false; + + bool hasVal = nextRowFromLastBlocks(pLastBlockReader, pScanInfo, &pReader->info.verRange); + if (hasVal) { + int64_t next1 = getCurrentKeyInLastBlock(pLastBlockReader); + if (next1 != ts) { + code = doAppendRowFromFileBlock(pReader->resBlockInfo.pResBlock, pReader, fRow->pBlockData, fRow->iRow); + if (code) { + return code; + } + + *copied = true; + return code; + } + } else { + code = doAppendRowFromFileBlock(pReader->resBlockInfo.pResBlock, pReader, fRow->pBlockData, fRow->iRow); + if (code) { + return code; + } + + *copied = true; + return code; + } + + return code; +} + +static FORCE_INLINE STSchema* doGetSchemaForTSRow(int32_t sversion, STsdbReader* pReader, uint64_t uid) { + // always set the newest schema version in pReader->info.pSchema + if (pReader->info.pSchema == NULL) { + STSchema* ps = getTableSchemaImpl(pReader, uid); + if (ps == NULL) { + return NULL; + } + } + + if (pReader->info.pSchema && sversion == pReader->info.pSchema->version) { + return pReader->info.pSchema; + } + + void** p = tSimpleHashGet(pReader->pSchemaMap, &sversion, sizeof(sversion)); + if (p != NULL) { + return *(STSchema**)p; + } + + STSchema* ptr = NULL; + int32_t code = metaGetTbTSchemaEx(pReader->pTsdb->pVnode->pMeta, pReader->info.suid, uid, sversion, &ptr); + if (code != TSDB_CODE_SUCCESS) { + terrno = code; + return NULL; + } else { + code = tSimpleHashPut(pReader->pSchemaMap, &sversion, sizeof(sversion), &ptr, POINTER_BYTES); + if (code != TSDB_CODE_SUCCESS) { + terrno = code; + return NULL; + } + return ptr; + } +} + +static int32_t doMergeBufAndFileRows(STsdbReader* pReader, STableBlockScanInfo* pBlockScanInfo, TSDBROW* pRow, + SIterInfo* pIter, int64_t key, SLastBlockReader* pLastBlockReader) { + SRowMerger* pMerger = &pReader->status.merger; + SRow* pTSRow = NULL; + SBlockData* pBlockData = &pReader->status.fileBlockData; + SFileBlockDumpInfo* pDumpInfo = &pReader->status.fBlockDumpInfo; + + int64_t tsLast = INT64_MIN; + if (hasDataInLastBlock(pLastBlockReader)) { + tsLast = getCurrentKeyInLastBlock(pLastBlockReader); + } + + TSDBKEY k = TSDBROW_KEY(pRow); + TSDBROW fRow = tsdbRowFromBlockData(pBlockData, pDumpInfo->rowIndex); + + // merge is not initialized yet, due to the fact that the pReader->info.pSchema is not initialized + if (pMerger->pArray == NULL) { + ASSERT(pReader->info.pSchema == NULL); + STSchema* ps = getTableSchemaImpl(pReader, pBlockScanInfo->uid); + if (ps == NULL) { + return terrno; + } + } + + int64_t minKey = 0; + if (pReader->info.order == TSDB_ORDER_ASC) { + minKey = INT64_MAX; // chosen the minimum value + if (minKey > tsLast && hasDataInLastBlock(pLastBlockReader)) { + minKey = tsLast; + } + + if (minKey > k.ts) { + minKey = k.ts; + } + + if (minKey > key && hasDataInFileBlock(pBlockData, pDumpInfo)) { + minKey = key; + } + } else { + minKey = INT64_MIN; + if (minKey < tsLast && hasDataInLastBlock(pLastBlockReader)) { + minKey = tsLast; + } + + if (minKey < k.ts) { + minKey = k.ts; + } + + if (minKey < key && hasDataInFileBlock(pBlockData, pDumpInfo)) { + minKey = key; + } + } + + // todo remove init + bool init = false; + + // ASC: file block ---> last block -----> imem -----> mem + // DESC: mem -----> imem -----> last block -----> file block + if (pReader->info.order == TSDB_ORDER_ASC) { + if (minKey == key) { + init = true; + int32_t code = tsdbRowMergerAdd(pMerger, &fRow, pReader->info.pSchema); + if (code != TSDB_CODE_SUCCESS) { + return code; + } + doMergeRowsInFileBlocks(pBlockData, pBlockScanInfo, pReader); + } + + if (minKey == tsLast) { + TSDBROW* fRow1 = tMergeTreeGetRow(&pLastBlockReader->mergeTree); + if (init) { + tsdbRowMergerAdd(pMerger, fRow1, NULL); + } else { + init = true; + int32_t code = tsdbRowMergerAdd(pMerger, fRow1, pReader->info.pSchema); + if (code != TSDB_CODE_SUCCESS) { + return code; + } + } + doMergeRowsInLastBlock(pLastBlockReader, pBlockScanInfo, tsLast, pMerger, &pReader->info.verRange, + pReader->idStr); + } + + if (minKey == k.ts) { + STSchema* pSchema = doGetSchemaForTSRow(TSDBROW_SVERSION(pRow), pReader, pBlockScanInfo->uid); + if (pSchema == NULL) { + return terrno; + } + if (init) { + tsdbRowMergerAdd(pMerger, pRow, pSchema); + } else { + init = true; + int32_t code = tsdbRowMergerAdd(pMerger, pRow, pSchema); + if (code != TSDB_CODE_SUCCESS) { + return code; + } + } + int32_t code = doMergeRowsInBuf(pIter, pBlockScanInfo->uid, k.ts, pBlockScanInfo->delSkyline, pReader); + if (code != TSDB_CODE_SUCCESS) { + return code; + } + } + } else { + if (minKey == k.ts) { + init = true; + STSchema* pSchema = doGetSchemaForTSRow(TSDBROW_SVERSION(pRow), pReader, pBlockScanInfo->uid); + if (pSchema == NULL) { + return terrno; + } + + int32_t code = tsdbRowMergerAdd(pMerger, pRow, pSchema); + if (code != TSDB_CODE_SUCCESS) { + return code; + } + + code = doMergeRowsInBuf(pIter, pBlockScanInfo->uid, k.ts, pBlockScanInfo->delSkyline, pReader); + if (code != TSDB_CODE_SUCCESS || pMerger->pTSchema == NULL) { + return code; + } + } + + if (minKey == tsLast) { + TSDBROW* fRow1 = tMergeTreeGetRow(&pLastBlockReader->mergeTree); + if (init) { + tsdbRowMergerAdd(pMerger, fRow1, NULL); + } else { + init = true; + int32_t code = tsdbRowMergerAdd(pMerger, fRow1, pReader->info.pSchema); + if (code != TSDB_CODE_SUCCESS) { + return code; + } + } + doMergeRowsInLastBlock(pLastBlockReader, pBlockScanInfo, tsLast, pMerger, &pReader->info.verRange, + pReader->idStr); + } + + if (minKey == key) { + if (init) { + tsdbRowMergerAdd(pMerger, &fRow, NULL); + } else { + init = true; + int32_t code = tsdbRowMergerAdd(pMerger, &fRow, pReader->info.pSchema); + if (code != TSDB_CODE_SUCCESS) { + return code; + } + } + doMergeRowsInFileBlocks(pBlockData, pBlockScanInfo, pReader); + } + } + + int32_t code = tsdbRowMergerGetRow(pMerger, &pTSRow); + if (code != TSDB_CODE_SUCCESS) { + return code; + } + + code = doAppendRowFromTSRow(pReader->resBlockInfo.pResBlock, pReader, pTSRow, pBlockScanInfo); + + taosMemoryFree(pTSRow); + tsdbRowMergerClear(pMerger); + + return code; +} + +static int32_t doMergeFileBlockAndLastBlock(SLastBlockReader* pLastBlockReader, STsdbReader* pReader, + STableBlockScanInfo* pBlockScanInfo, SBlockData* pBlockData, + bool mergeBlockData) { + SRowMerger* pMerger = &pReader->status.merger; + SFileBlockDumpInfo* pDumpInfo = &pReader->status.fBlockDumpInfo; + + int64_t tsLastBlock = getCurrentKeyInLastBlock(pLastBlockReader); + bool copied = false; + int32_t code = TSDB_CODE_SUCCESS; + SRow* pTSRow = NULL; + TSDBROW* pRow = tMergeTreeGetRow(&pLastBlockReader->mergeTree); + + // create local variable to hold the row value + TSDBROW fRow = {.iRow = pRow->iRow, .type = TSDBROW_COL_FMT, .pBlockData = pRow->pBlockData}; + + tsdbTrace("fRow ptr:%p, %d, uid:%" PRIu64 ", %s", pRow->pBlockData, pRow->iRow, pLastBlockReader->uid, + pReader->idStr); + + // only last block exists + if ((!mergeBlockData) || (tsLastBlock != pBlockData->aTSKEY[pDumpInfo->rowIndex])) { + code = tryCopyDistinctRowFromSttBlock(&fRow, pLastBlockReader, pBlockScanInfo, tsLastBlock, pReader, &copied); + if (code) { + return code; + } + + if (copied) { + pBlockScanInfo->lastKey = tsLastBlock; + return TSDB_CODE_SUCCESS; + } else { + code = tsdbRowMergerAdd(pMerger, &fRow, pReader->info.pSchema); + if (code != TSDB_CODE_SUCCESS) { + return code; + } + + TSDBROW* pRow1 = tMergeTreeGetRow(&pLastBlockReader->mergeTree); + tsdbRowMergerAdd(pMerger, pRow1, NULL); + doMergeRowsInLastBlock(pLastBlockReader, pBlockScanInfo, tsLastBlock, pMerger, &pReader->info.verRange, + pReader->idStr); + + code = tsdbRowMergerGetRow(pMerger, &pTSRow); + if (code != TSDB_CODE_SUCCESS) { + return code; + } + + code = doAppendRowFromTSRow(pReader->resBlockInfo.pResBlock, pReader, pTSRow, pBlockScanInfo); + + taosMemoryFree(pTSRow); + tsdbRowMergerClear(pMerger); + + if (code != TSDB_CODE_SUCCESS) { + return code; + } + } + } else { // not merge block data + code = tsdbRowMergerAdd(pMerger, &fRow, pReader->info.pSchema); + if (code != TSDB_CODE_SUCCESS) { + return code; + } + + doMergeRowsInLastBlock(pLastBlockReader, pBlockScanInfo, tsLastBlock, pMerger, &pReader->info.verRange, + pReader->idStr); + + // merge with block data if ts == key + if (tsLastBlock == pBlockData->aTSKEY[pDumpInfo->rowIndex]) { + doMergeRowsInFileBlocks(pBlockData, pBlockScanInfo, pReader); + } + + code = tsdbRowMergerGetRow(pMerger, &pTSRow); + if (code != TSDB_CODE_SUCCESS) { + return code; + } + + code = doAppendRowFromTSRow(pReader->resBlockInfo.pResBlock, pReader, pTSRow, pBlockScanInfo); + + taosMemoryFree(pTSRow); + tsdbRowMergerClear(pMerger); + + if (code != TSDB_CODE_SUCCESS) { + return code; + } + } + + return TSDB_CODE_SUCCESS; +} + +static int32_t mergeFileBlockAndLastBlock(STsdbReader* pReader, SLastBlockReader* pLastBlockReader, int64_t key, + STableBlockScanInfo* pBlockScanInfo, SBlockData* pBlockData) { + SFileBlockDumpInfo* pDumpInfo = &pReader->status.fBlockDumpInfo; + SRowMerger* pMerger = &pReader->status.merger; + + // merge is not initialized yet, due to the fact that the pReader->info.pSchema is not initialized + if (pMerger->pArray == NULL) { + ASSERT(pReader->info.pSchema == NULL); + STSchema* ps = getTableSchemaImpl(pReader, pBlockScanInfo->uid); + if (ps == NULL) { + return terrno; + } + } + + if (hasDataInFileBlock(pBlockData, pDumpInfo)) { + // no last block available, only data block exists + if (!hasDataInLastBlock(pLastBlockReader)) { + return mergeRowsInFileBlocks(pBlockData, pBlockScanInfo, key, pReader); + } + + // row in last file block + TSDBROW fRow = tsdbRowFromBlockData(pBlockData, pDumpInfo->rowIndex); + int64_t ts = getCurrentKeyInLastBlock(pLastBlockReader); + + if (ASCENDING_TRAVERSE(pReader->info.order)) { + if (key < ts) { // imem, mem are all empty, file blocks (data blocks and last block) exist + return mergeRowsInFileBlocks(pBlockData, pBlockScanInfo, key, pReader); + } else if (key == ts) { + SRow* pTSRow = NULL; + int32_t code = tsdbRowMergerAdd(pMerger, &fRow, pReader->info.pSchema); + if (code != TSDB_CODE_SUCCESS) { + return code; + } + + doMergeRowsInFileBlocks(pBlockData, pBlockScanInfo, pReader); + + TSDBROW* pRow1 = tMergeTreeGetRow(&pLastBlockReader->mergeTree); + tsdbRowMergerAdd(pMerger, pRow1, NULL); + + doMergeRowsInLastBlock(pLastBlockReader, pBlockScanInfo, ts, pMerger, &pReader->info.verRange, pReader->idStr); + + code = tsdbRowMergerGetRow(pMerger, &pTSRow); + if (code != TSDB_CODE_SUCCESS) { + return code; + } + + code = doAppendRowFromTSRow(pReader->resBlockInfo.pResBlock, pReader, pTSRow, pBlockScanInfo); + + taosMemoryFree(pTSRow); + tsdbRowMergerClear(pMerger); + return code; + } else { // key > ts + return doMergeFileBlockAndLastBlock(pLastBlockReader, pReader, pBlockScanInfo, NULL, false); + } + } else { // desc order + return doMergeFileBlockAndLastBlock(pLastBlockReader, pReader, pBlockScanInfo, pBlockData, true); + } + } else { // only last block exists + return doMergeFileBlockAndLastBlock(pLastBlockReader, pReader, pBlockScanInfo, NULL, false); + } +} + +static int32_t doMergeMultiLevelRows(STsdbReader* pReader, STableBlockScanInfo* pBlockScanInfo, SBlockData* pBlockData, + SLastBlockReader* pLastBlockReader) { + SRowMerger* pMerger = &pReader->status.merger; + SRow* pTSRow = NULL; + int32_t code = TSDB_CODE_SUCCESS; + SFileBlockDumpInfo* pDumpInfo = &pReader->status.fBlockDumpInfo; + SArray* pDelList = pBlockScanInfo->delSkyline; + + TSDBROW* pRow = getValidMemRow(&pBlockScanInfo->iter, pDelList, pReader); + TSDBROW* piRow = getValidMemRow(&pBlockScanInfo->iiter, pDelList, pReader); + + int64_t tsLast = INT64_MIN; + if (hasDataInLastBlock(pLastBlockReader)) { + tsLast = getCurrentKeyInLastBlock(pLastBlockReader); + } + + int64_t key = hasDataInFileBlock(pBlockData, pDumpInfo) ? pBlockData->aTSKEY[pDumpInfo->rowIndex] : INT64_MIN; + + TSDBKEY k = TSDBROW_KEY(pRow); + TSDBKEY ik = TSDBROW_KEY(piRow); + STSchema* pSchema = doGetSchemaForTSRow(TSDBROW_SVERSION(pRow), pReader, pBlockScanInfo->uid); + if (pSchema == NULL) { + return code; + } + + STSchema* piSchema = doGetSchemaForTSRow(TSDBROW_SVERSION(piRow), pReader, pBlockScanInfo->uid); + if (piSchema == NULL) { + return code; + } + + // merge is not initialized yet, due to the fact that the pReader->info.pSchema is not initialized + if (pMerger->pArray == NULL) { + ASSERT(pReader->info.pSchema == NULL); + STSchema* ps = getTableSchemaImpl(pReader, pBlockScanInfo->uid); + if (ps == NULL) { + return terrno; + } + } + + int64_t minKey = 0; + if (ASCENDING_TRAVERSE(pReader->info.order)) { + minKey = INT64_MAX; // let's find the minimum + if (minKey > k.ts) { + minKey = k.ts; + } + + if (minKey > ik.ts) { + minKey = ik.ts; + } + + if (minKey > key && hasDataInFileBlock(pBlockData, pDumpInfo)) { + minKey = key; + } + + if (minKey > tsLast && hasDataInLastBlock(pLastBlockReader)) { + minKey = tsLast; + } + } else { + minKey = INT64_MIN; // let find the maximum ts value + if (minKey < k.ts) { + minKey = k.ts; + } + + if (minKey < ik.ts) { + minKey = ik.ts; + } + + if (minKey < key && hasDataInFileBlock(pBlockData, pDumpInfo)) { + minKey = key; + } + + if (minKey < tsLast && hasDataInLastBlock(pLastBlockReader)) { + minKey = tsLast; + } + } + + bool init = false; + + // ASC: file block -----> last block -----> imem -----> mem + // DESC: mem -----> imem -----> last block -----> file block + if (ASCENDING_TRAVERSE(pReader->info.order)) { + if (minKey == key) { + init = true; + TSDBROW fRow = tsdbRowFromBlockData(pBlockData, pDumpInfo->rowIndex); + code = tsdbRowMergerAdd(pMerger, &fRow, pReader->info.pSchema); + if (code != TSDB_CODE_SUCCESS) { + return code; + } + + doMergeRowsInFileBlocks(pBlockData, pBlockScanInfo, pReader); + } + + if (minKey == tsLast) { + TSDBROW* pRow1 = tMergeTreeGetRow(&pLastBlockReader->mergeTree); + if (init) { + tsdbRowMergerAdd(pMerger, pRow1, NULL); + } else { + init = true; + code = tsdbRowMergerAdd(pMerger, pRow1, pReader->info.pSchema); + if (code != TSDB_CODE_SUCCESS) { + return code; + } + } + + doMergeRowsInLastBlock(pLastBlockReader, pBlockScanInfo, tsLast, pMerger, &pReader->info.verRange, + pReader->idStr); + } + + if (minKey == ik.ts) { + if (init) { + tsdbRowMergerAdd(pMerger, piRow, piSchema); + } else { + init = true; + code = tsdbRowMergerAdd(pMerger, piRow, piSchema); + if (code != TSDB_CODE_SUCCESS) { + return code; + } + } + + code = doMergeRowsInBuf(&pBlockScanInfo->iiter, pBlockScanInfo->uid, ik.ts, pBlockScanInfo->delSkyline, pReader); + if (code != TSDB_CODE_SUCCESS) { + return code; + } + } + + if (minKey == k.ts) { + if (init) { + tsdbRowMergerAdd(pMerger, pRow, pSchema); + } else { + // STSchema* pSchema = doGetSchemaForTSRow(TSDBROW_SVERSION(pRow), pReader, pBlockScanInfo->uid); + code = tsdbRowMergerAdd(pMerger, pRow, pSchema); + if (code != TSDB_CODE_SUCCESS) { + return code; + } + } + code = doMergeRowsInBuf(&pBlockScanInfo->iter, pBlockScanInfo->uid, k.ts, pBlockScanInfo->delSkyline, pReader); + if (code != TSDB_CODE_SUCCESS) { + return code; + } + } + } else { + if (minKey == k.ts) { + init = true; + code = tsdbRowMergerAdd(pMerger, pRow, pSchema); + if (code != TSDB_CODE_SUCCESS) { + return code; + } + + code = doMergeRowsInBuf(&pBlockScanInfo->iter, pBlockScanInfo->uid, k.ts, pBlockScanInfo->delSkyline, pReader); + if (code != TSDB_CODE_SUCCESS) { + return code; + } + } + + if (minKey == ik.ts) { + if (init) { + tsdbRowMergerAdd(pMerger, piRow, piSchema); + } else { + init = true; + code = tsdbRowMergerAdd(pMerger, piRow, piSchema); + if (code != TSDB_CODE_SUCCESS) { + return code; + } + } + code = doMergeRowsInBuf(&pBlockScanInfo->iiter, pBlockScanInfo->uid, ik.ts, pBlockScanInfo->delSkyline, pReader); + if (code != TSDB_CODE_SUCCESS) { + return code; + } + } + + if (minKey == tsLast) { + TSDBROW* pRow1 = tMergeTreeGetRow(&pLastBlockReader->mergeTree); + if (init) { + tsdbRowMergerAdd(pMerger, pRow1, NULL); + } else { + init = true; + code = tsdbRowMergerAdd(pMerger, pRow1, pReader->info.pSchema); + if (code != TSDB_CODE_SUCCESS) { + return code; + } + } + doMergeRowsInLastBlock(pLastBlockReader, pBlockScanInfo, tsLast, pMerger, &pReader->info.verRange, + pReader->idStr); + } + + if (minKey == key) { + TSDBROW fRow = tsdbRowFromBlockData(pBlockData, pDumpInfo->rowIndex); + if (!init) { + code = tsdbRowMergerAdd(pMerger, &fRow, pReader->info.pSchema); + if (code != TSDB_CODE_SUCCESS) { + return code; + } + } else { + tsdbRowMergerAdd(pMerger, &fRow, NULL); + } + doMergeRowsInFileBlocks(pBlockData, pBlockScanInfo, pReader); + } + } + + code = tsdbRowMergerGetRow(pMerger, &pTSRow); + if (code != TSDB_CODE_SUCCESS) { + return code; + } + + code = doAppendRowFromTSRow(pReader->resBlockInfo.pResBlock, pReader, pTSRow, pBlockScanInfo); + + taosMemoryFree(pTSRow); + tsdbRowMergerClear(pMerger); + return code; +} + +int32_t doInitMemDataIter(STsdbReader* pReader, STbData** pData, STableBlockScanInfo* pBlockScanInfo, TSDBKEY* pKey, + SMemTable* pMem, SIterInfo* pIter, const char* type) { + int32_t code = TSDB_CODE_SUCCESS; + int32_t backward = (!ASCENDING_TRAVERSE(pReader->info.order)); + pIter->hasVal = false; + + if (pMem != NULL) { + *pData = tsdbGetTbDataFromMemTable(pMem, pReader->info.suid, pBlockScanInfo->uid); + + if ((*pData) != NULL) { + code = tsdbTbDataIterCreate((*pData), pKey, backward, &pIter->iter); + if (code == TSDB_CODE_SUCCESS) { + pIter->hasVal = (tsdbTbDataIterGet(pIter->iter) != NULL); + + tsdbDebug("%p uid:%" PRIu64 ", check data in %s from skey:%" PRId64 ", order:%d, ts range in buf:%" PRId64 + "-%" PRId64 " %s", + pReader, pBlockScanInfo->uid, type, pKey->ts, pReader->info.order, (*pData)->minKey, (*pData)->maxKey, + pReader->idStr); + } else { + tsdbError("%p uid:%" PRIu64 ", failed to create iterator for %s, code:%s, %s", pReader, pBlockScanInfo->uid, + type, tstrerror(code), pReader->idStr); + return code; + } + } + } else { + tsdbDebug("%p uid:%" PRIu64 ", no data in %s, %s", pReader, pBlockScanInfo->uid, type, pReader->idStr); + } + + return code; +} + +static int32_t initMemDataIterator(STableBlockScanInfo* pBlockScanInfo, STsdbReader* pReader) { + if (pBlockScanInfo->iterInit) { + return TSDB_CODE_SUCCESS; + } + + STbData* d = NULL; + TSDBKEY startKey = {0}; + if (ASCENDING_TRAVERSE(pReader->info.order)) { + startKey = (TSDBKEY){.ts = pBlockScanInfo->lastKey + 1, .version = pReader->info.verRange.minVer}; + } else { + startKey = (TSDBKEY){.ts = pBlockScanInfo->lastKey - 1, .version = pReader->info.verRange.maxVer}; + } + + int32_t code = + doInitMemDataIter(pReader, &d, pBlockScanInfo, &startKey, pReader->pReadSnap->pMem, &pBlockScanInfo->iter, "mem"); + if (code != TSDB_CODE_SUCCESS) { + return code; + } + + STbData* di = NULL; + code = doInitMemDataIter(pReader, &di, pBlockScanInfo, &startKey, pReader->pReadSnap->pIMem, &pBlockScanInfo->iiter, + "imem"); + if (code != TSDB_CODE_SUCCESS) { + return code; + } + + loadMemTombData(&pBlockScanInfo->pMemDelData, d, di, pReader->info.verRange.maxVer); + + pBlockScanInfo->iterInit = true; + return TSDB_CODE_SUCCESS; +} + +static bool isValidFileBlockRow(SBlockData* pBlockData, SFileBlockDumpInfo* pDumpInfo, + STableBlockScanInfo* pBlockScanInfo, STsdbReader* pReader) { + // it is an multi-table data block + if (pBlockData->aUid != NULL) { + uint64_t uid = pBlockData->aUid[pDumpInfo->rowIndex]; + if (uid != pBlockScanInfo->uid) { // move to next row + return false; + } + } + + // check for version and time range + int64_t ver = pBlockData->aVersion[pDumpInfo->rowIndex]; + if (ver > pReader->info.verRange.maxVer || ver < pReader->info.verRange.minVer) { + return false; + } + + int64_t ts = pBlockData->aTSKEY[pDumpInfo->rowIndex]; + if (ts > pReader->info.window.ekey || ts < pReader->info.window.skey) { + return false; + } + + if (hasBeenDropped(pBlockScanInfo->delSkyline, &pBlockScanInfo->fileDelIndex, ts, ver, pReader->info.order, + &pReader->info.verRange)) { + return false; + } + + return true; +} + +static bool initLastBlockReader(SLastBlockReader* pLBlockReader, STableBlockScanInfo* pScanInfo, STsdbReader* pReader) { + // the last block reader has been initialized for this table. + if (pLBlockReader->uid == pScanInfo->uid) { + return hasDataInLastBlock(pLBlockReader); + } + + if (pLBlockReader->uid != 0) { + tMergeTreeClose(&pLBlockReader->mergeTree); + } + + pLBlockReader->uid = pScanInfo->uid; + + STimeWindow w = pLBlockReader->window; + if (ASCENDING_TRAVERSE(pLBlockReader->order)) { + w.skey = pScanInfo->lastKeyInStt; + } else { + w.ekey = pScanInfo->lastKeyInStt; + } + + int64_t st = taosGetTimestampUs(); + tsdbDebug("init last block reader, window:%" PRId64 "-%" PRId64 ", uid:%" PRIu64 ", %s", w.skey, w.ekey, + pScanInfo->uid, pReader->idStr); + + SMergeTreeConf conf = { + .uid = pScanInfo->uid, + .suid = pReader->info.suid, + .pTsdb = pReader->pTsdb, + .timewindow = w, + .verRange = pLBlockReader->verRange, + .strictTimeRange = false, + .pSchema = pReader->info.pSchema, + .pCurrentFileset = pReader->status.pCurrentFileset, + .backward = (pLBlockReader->order == TSDB_ORDER_DESC), + .pSttFileBlockIterArray = pReader->status.pLDataIterArray, + .pCols = pReader->suppInfo.colId, + .numOfCols = pReader->suppInfo.numOfCols, + .loadTombFn = loadSttTombDataForAll, + .pReader = pReader, + .idstr = pReader->idStr, + }; + + int32_t code = tMergeTreeOpen2(&pLBlockReader->mergeTree, &conf); + if (code != TSDB_CODE_SUCCESS) { + return false; + } + + initMemDataIterator(pScanInfo, pReader); + initDelSkylineIterator(pScanInfo, pReader->info.order, &pReader->cost); + + code = nextRowFromLastBlocks(pLBlockReader, pScanInfo, &pReader->info.verRange); + + int64_t el = taosGetTimestampUs() - st; + pReader->cost.initLastBlockReader += (el / 1000.0); + + tsdbDebug("init last block reader completed, elapsed time:%" PRId64 "us %s", el, pReader->idStr); + return code; +} + +static bool hasDataInLastBlock(SLastBlockReader* pLastBlockReader) { return pLastBlockReader->mergeTree.pIter != NULL; } + +bool hasDataInFileBlock(const SBlockData* pBlockData, const SFileBlockDumpInfo* pDumpInfo) { + if ((pBlockData->nRow > 0) && (pBlockData->nRow != pDumpInfo->totalRows)) { + return false; // this is an invalid result. + } + return pBlockData->nRow > 0 && (!pDumpInfo->allDumped); +} + +int32_t mergeRowsInFileBlocks(SBlockData* pBlockData, STableBlockScanInfo* pBlockScanInfo, int64_t key, + STsdbReader* pReader) { + SRowMerger* pMerger = &pReader->status.merger; + SFileBlockDumpInfo* pDumpInfo = &pReader->status.fBlockDumpInfo; + bool copied = false; + + int32_t code = tryCopyDistinctRowFromFileBlock(pReader, pBlockData, key, pDumpInfo, &copied); + if (code != TSDB_CODE_SUCCESS) { + return code; + } + + // merge is not initialized yet, due to the fact that the pReader->info.pSchema is not initialized + if (pMerger->pArray == NULL) { + ASSERT(pReader->info.pSchema == NULL); + STSchema* ps = getTableSchemaImpl(pReader, pBlockScanInfo->uid); + if (ps == NULL) { + return terrno; + } + } + + if (copied) { + pBlockScanInfo->lastKey = key; + return TSDB_CODE_SUCCESS; + } else { + TSDBROW fRow = tsdbRowFromBlockData(pBlockData, pDumpInfo->rowIndex); + + SRow* pTSRow = NULL; + code = tsdbRowMergerAdd(pMerger, &fRow, pReader->info.pSchema); + if (code != TSDB_CODE_SUCCESS) { + return code; + } + + doMergeRowsInFileBlocks(pBlockData, pBlockScanInfo, pReader); + code = tsdbRowMergerGetRow(pMerger, &pTSRow); + if (code != TSDB_CODE_SUCCESS) { + return code; + } + + code = doAppendRowFromTSRow(pReader->resBlockInfo.pResBlock, pReader, pTSRow, pBlockScanInfo); + + taosMemoryFree(pTSRow); + tsdbRowMergerClear(pMerger); + return code; + } +} + +static int32_t buildComposedDataBlockImpl(STsdbReader* pReader, STableBlockScanInfo* pBlockScanInfo, + SBlockData* pBlockData, SLastBlockReader* pLastBlockReader) { + SFileBlockDumpInfo* pDumpInfo = &pReader->status.fBlockDumpInfo; + + TSDBROW *pRow = NULL, *piRow = NULL; + int64_t key = (pBlockData->nRow > 0 && (!pDumpInfo->allDumped)) ? pBlockData->aTSKEY[pDumpInfo->rowIndex] : INT64_MIN; + if (pBlockScanInfo->iter.hasVal) { + pRow = getValidMemRow(&pBlockScanInfo->iter, pBlockScanInfo->delSkyline, pReader); + } + + if (pBlockScanInfo->iiter.hasVal) { + piRow = getValidMemRow(&pBlockScanInfo->iiter, pBlockScanInfo->delSkyline, pReader); + } + + // two levels of mem-table does contain the valid rows + if (pRow != NULL && piRow != NULL) { + return doMergeMultiLevelRows(pReader, pBlockScanInfo, pBlockData, pLastBlockReader); + } + + // imem + file + last block + if (pBlockScanInfo->iiter.hasVal) { + return doMergeBufAndFileRows(pReader, pBlockScanInfo, piRow, &pBlockScanInfo->iiter, key, pLastBlockReader); + } + + // mem + file + last block + if (pBlockScanInfo->iter.hasVal) { + return doMergeBufAndFileRows(pReader, pBlockScanInfo, pRow, &pBlockScanInfo->iter, key, pLastBlockReader); + } + + // files data blocks + last block + return mergeFileBlockAndLastBlock(pReader, pLastBlockReader, key, pBlockScanInfo, pBlockData); +} + +static int32_t loadNeighborIfOverlap(SFileDataBlockInfo* pBlockInfo, STableBlockScanInfo* pBlockScanInfo, + STsdbReader* pReader, bool* loadNeighbor) { + int32_t code = TSDB_CODE_SUCCESS; + int32_t step = ASCENDING_TRAVERSE(pReader->info.order) ? 1 : -1; + int32_t nextIndex = -1; + + *loadNeighbor = false; + + SBrinRecord rec = {0}; + bool hasNeighbor = getNeighborBlockOfSameTable(pBlockInfo, pBlockScanInfo, &nextIndex, pReader->info.order, &rec); + if (!hasNeighbor) { // do nothing + return code; + } + + if (overlapWithNeighborBlock2(pBlockInfo, &rec, pReader->info.order)) { // load next block + SReaderStatus* pStatus = &pReader->status; + SDataBlockIter* pBlockIter = &pStatus->blockIter; + + // 1. find the next neighbor block in the scan block list + SFileDataBlockInfo fb = {.uid = pBlockInfo->uid, .tbBlockIdx = nextIndex}; + int32_t neighborIndex = findFileBlockInfoIndex(pBlockIter, &fb); + + // 2. remove it from the scan block list + setFileBlockActiveInBlockIter(pBlockIter, neighborIndex, step); + + // 3. load the neighbor block, and set it to be the currently accessed file data block + code = doLoadFileBlockData(pReader, pBlockIter, &pStatus->fileBlockData, pBlockInfo->uid); + if (code != TSDB_CODE_SUCCESS) { + return code; + } + + // 4. check the data values + initBlockDumpInfo(pReader, pBlockIter); + *loadNeighbor = true; + } + + return code; +} + +static void updateComposedBlockInfo(STsdbReader* pReader, double el, STableBlockScanInfo* pBlockScanInfo) { + SSDataBlock* pResBlock = pReader->resBlockInfo.pResBlock; + + pResBlock->info.id.uid = (pBlockScanInfo != NULL) ? pBlockScanInfo->uid : 0; + pResBlock->info.dataLoad = 1; + blockDataUpdateTsWindow(pResBlock, pReader->suppInfo.slotId[0]); + + setComposedBlockFlag(pReader, true); + + pReader->cost.composedBlocks += 1; + pReader->cost.buildComposedBlockTime += el; +} + +static int32_t buildComposedDataBlock(STsdbReader* pReader) { + int32_t code = TSDB_CODE_SUCCESS; + + SSDataBlock* pResBlock = pReader->resBlockInfo.pResBlock; + + SFileDataBlockInfo* pBlockInfo = getCurrentBlockInfo(&pReader->status.blockIter); + SLastBlockReader* pLastBlockReader = pReader->status.fileIter.pLastBlockReader; + + bool asc = ASCENDING_TRAVERSE(pReader->info.order); + int64_t st = taosGetTimestampUs(); + int32_t step = asc ? 1 : -1; + double el = 0; + SBrinRecord* pRecord = &pBlockInfo->record; + + SFileBlockDumpInfo* pDumpInfo = &pReader->status.fBlockDumpInfo; + + STableBlockScanInfo* pBlockScanInfo = NULL; + if (pBlockInfo != NULL) { + if (pReader->pIgnoreTables && taosHashGet(*pReader->pIgnoreTables, &pBlockInfo->uid, sizeof(pBlockInfo->uid))) { + setBlockAllDumped(pDumpInfo, pRecord->lastKey, pReader->info.order); + return code; + } + + pBlockScanInfo = getTableBlockScanInfo(pReader->status.pTableMap, pBlockInfo->uid, pReader->idStr); + if (pBlockScanInfo == NULL) { + goto _end; + } + + pRecord = &pBlockInfo->record; + TSDBKEY keyInBuf = getCurrentKeyInBuf(pBlockScanInfo, pReader); + + // it is a clean block, load it directly + if (isCleanFileDataBlock(pReader, pBlockInfo, pBlockScanInfo, keyInBuf, pLastBlockReader) && + (pRecord->numRow <= pReader->resBlockInfo.capacity)) { + if (asc || (!hasDataInLastBlock(pLastBlockReader))) { + code = copyBlockDataToSDataBlock(pReader); + if (code) { + goto _end; + } + + // record the last key value + pBlockScanInfo->lastKey = asc ? pRecord->lastKey : pRecord->firstKey; + goto _end; + } + } + } else { // file blocks not exist + ASSERT(0); + pBlockScanInfo = *pReader->status.pTableIter; + if (pReader->pIgnoreTables && + taosHashGet(*pReader->pIgnoreTables, &pBlockScanInfo->uid, sizeof(pBlockScanInfo->uid))) { + // setBlockAllDumped(pDumpInfo, pBlock->maxKey.ts, pReader->info.order); + return code; + } + } + + SBlockData* pBlockData = &pReader->status.fileBlockData; + + while (1) { + bool hasBlockData = false; + { + while (pBlockData->nRow > 0 && pBlockData->uid == pBlockScanInfo->uid) { + // find the first qualified row in data block + if (isValidFileBlockRow(pBlockData, pDumpInfo, pBlockScanInfo, pReader)) { + hasBlockData = true; + break; + } + + pDumpInfo->rowIndex += step; + + if (pDumpInfo->rowIndex >= pBlockData->nRow || pDumpInfo->rowIndex < 0) { + pBlockInfo = getCurrentBlockInfo(&pReader->status.blockIter); // NOTE: get the new block info + + // continue check for the next file block if the last ts in the current block + // is overlapped with the next neighbor block + bool loadNeighbor = false; + code = loadNeighborIfOverlap(pBlockInfo, pBlockScanInfo, pReader, &loadNeighbor); + if ((!loadNeighbor) || (code != 0)) { + setBlockAllDumped(pDumpInfo, pRecord->lastKey, pReader->info.order); + break; + } + } + } + } + + // no data in last block and block, no need to proceed. + if (hasBlockData == false) { + break; + } + + code = buildComposedDataBlockImpl(pReader, pBlockScanInfo, pBlockData, pLastBlockReader); + if (code) { + goto _end; + } + + // currently loaded file data block is consumed + if ((pBlockData->nRow > 0) && (pDumpInfo->rowIndex >= pBlockData->nRow || pDumpInfo->rowIndex < 0)) { + setBlockAllDumped(pDumpInfo, pRecord->lastKey, pReader->info.order); + break; + } + + if (pResBlock->info.rows >= pReader->resBlockInfo.capacity) { + break; + } + } + +_end: + el = (taosGetTimestampUs() - st) / 1000.0; + updateComposedBlockInfo(pReader, el, pBlockScanInfo); + + if (pResBlock->info.rows > 0) { + tsdbDebug("%p uid:%" PRIu64 ", composed data block created, brange:%" PRIu64 "-%" PRIu64 " rows:%" PRId64 + ", elapsed time:%.2f ms %s", + pReader, pResBlock->info.id.uid, pResBlock->info.window.skey, pResBlock->info.window.ekey, + pResBlock->info.rows, el, pReader->idStr); + } + + return code; +} + +void setComposedBlockFlag(STsdbReader* pReader, bool composed) { pReader->status.composedDataBlock = composed; } + +int32_t getInitialDelIndex(const SArray* pDelSkyline, int32_t order) { + if (pDelSkyline == NULL) { + return 0; + } + + return ASCENDING_TRAVERSE(order) ? 0 : taosArrayGetSize(pDelSkyline) - 1; +} + +int32_t initDelSkylineIterator(STableBlockScanInfo* pBlockScanInfo, int32_t order, SCostSummary* pCost) { + int32_t code = 0; + int32_t newDelDataInFile = taosArrayGetSize(pBlockScanInfo->pfileDelData); + if (newDelDataInFile == 0 && + ((pBlockScanInfo->delSkyline != NULL) || (TARRAY_SIZE(pBlockScanInfo->pMemDelData) == 0))) { + return code; + } + + int64_t st = taosGetTimestampUs(); + + if (pBlockScanInfo->delSkyline != NULL) { + taosArrayClear(pBlockScanInfo->delSkyline); + } else { + pBlockScanInfo->delSkyline = taosArrayInit(4, sizeof(TSDBKEY)); + } + + SArray* pSource = pBlockScanInfo->pfileDelData; + if (pSource == NULL) { + pSource = pBlockScanInfo->pMemDelData; + } else { + taosArrayAddAll(pSource, pBlockScanInfo->pMemDelData); + } + + code = tsdbBuildDeleteSkyline(pSource, 0, taosArrayGetSize(pSource) - 1, pBlockScanInfo->delSkyline); + + taosArrayClear(pBlockScanInfo->pfileDelData); + int32_t index = getInitialDelIndex(pBlockScanInfo->delSkyline, order); + + pBlockScanInfo->iter.index = index; + pBlockScanInfo->iiter.index = index; + pBlockScanInfo->fileDelIndex = index; + pBlockScanInfo->lastBlockDelIndex = index; + + double el = taosGetTimestampUs() - st; + pCost->createSkylineIterTime = el / 1000.0; + + return code; +} + +TSDBKEY getCurrentKeyInBuf(STableBlockScanInfo* pScanInfo, STsdbReader* pReader) { + bool asc = ASCENDING_TRAVERSE(pReader->info.order); + TSDBKEY key = {.ts = TSKEY_INITIAL_VAL}, ikey = {.ts = TSKEY_INITIAL_VAL}; + + bool hasKey = false, hasIKey = false; + TSDBROW* pRow = getValidMemRow(&pScanInfo->iter, pScanInfo->delSkyline, pReader); + if (pRow != NULL) { + hasKey = true; + key = TSDBROW_KEY(pRow); + } + + TSDBROW* pIRow = getValidMemRow(&pScanInfo->iiter, pScanInfo->delSkyline, pReader); + if (pIRow != NULL) { + hasIKey = true; + ikey = TSDBROW_KEY(pIRow); + } + + if (hasKey) { + if (hasIKey) { // has data in mem & imem + if (asc) { + return key.ts <= ikey.ts ? key : ikey; + } else { + return key.ts <= ikey.ts ? ikey : key; + } + } else { // no data in imem + return key; + } + } else { + // no data in mem & imem, return the initial value + // only imem has data, return ikey + return ikey; + } +} + +static int32_t moveToNextFile(STsdbReader* pReader, SBlockNumber* pBlockNum, SArray* pTableList) { + SReaderStatus* pStatus = &pReader->status; + pBlockNum->numOfBlocks = 0; + pBlockNum->numOfLastFiles = 0; + + size_t numOfTables = tSimpleHashGetSize(pReader->status.pTableMap); + SArray* pIndexList = taosArrayInit(numOfTables, sizeof(SBrinBlk)); + + while (1) { + // only check here, since the iterate data in memory is very fast. + if (pReader->code != TSDB_CODE_SUCCESS) { + tsdbWarn("tsdb reader is stopped ASAP, code:%s, %s", strerror(pReader->code), pReader->idStr); + return pReader->code; + } + + bool hasNext = false; + int32_t code = filesetIteratorNext(&pStatus->fileIter, pReader, &hasNext); + if (code != TSDB_CODE_SUCCESS) { + taosArrayDestroy(pIndexList); + return code; + } + + if (!hasNext) { // no data files on disk + break; + } + + taosArrayClear(pIndexList); + code = doLoadBlockIndex(pReader, pReader->pFileReader, pIndexList); + if (code != TSDB_CODE_SUCCESS) { + taosArrayDestroy(pIndexList); + return code; + } + + if (taosArrayGetSize(pIndexList) > 0 || pReader->status.pCurrentFileset->lvlArr->size > 0) { + code = doLoadFileBlock(pReader, pIndexList, pBlockNum, pTableList); + if (code != TSDB_CODE_SUCCESS) { + taosArrayDestroy(pIndexList); + return code; + } + + if (pBlockNum->numOfBlocks + pBlockNum->numOfLastFiles > 0) { + break; + } + } + + // no blocks in current file, try next files + } + + taosArrayDestroy(pIndexList); + return loadDataFileTombDataForAll(pReader); +} + +static void resetTableListIndex(SReaderStatus* pStatus) { + STableUidList* pList = &pStatus->uidList; + + pList->currentIndex = 0; + uint64_t uid = pList->tableUidList[0]; + pStatus->pTableIter = tSimpleHashGet(pStatus->pTableMap, &uid, sizeof(uid)); +} + +static bool moveToNextTable(STableUidList* pOrderedCheckInfo, SReaderStatus* pStatus) { + pOrderedCheckInfo->currentIndex += 1; + if (pOrderedCheckInfo->currentIndex >= tSimpleHashGetSize(pStatus->pTableMap)) { + pStatus->pTableIter = NULL; + return false; + } + + uint64_t uid = pOrderedCheckInfo->tableUidList[pOrderedCheckInfo->currentIndex]; + pStatus->pTableIter = tSimpleHashGet(pStatus->pTableMap, &uid, sizeof(uid)); + return (pStatus->pTableIter != NULL); +} + +static int32_t doLoadLastBlockSequentially(STsdbReader* pReader) { + SReaderStatus* pStatus = &pReader->status; + SLastBlockReader* pLastBlockReader = pStatus->fileIter.pLastBlockReader; + STableUidList* pUidList = &pStatus->uidList; + int32_t code = TSDB_CODE_SUCCESS; + + if (tSimpleHashGetSize(pStatus->pTableMap) == 0) { + return TSDB_CODE_SUCCESS; + } + + SSDataBlock* pResBlock = pReader->resBlockInfo.pResBlock; + + while (1) { + if (pReader->code != TSDB_CODE_SUCCESS) { + tsdbWarn("tsdb reader is stopped ASAP, code:%s, %s", strerror(pReader->code), pReader->idStr); + return pReader->code; + } + + // load the last data block of current table + STableBlockScanInfo* pScanInfo = *(STableBlockScanInfo**)pStatus->pTableIter; + if (pReader->pIgnoreTables && taosHashGet(*pReader->pIgnoreTables, &pScanInfo->uid, sizeof(pScanInfo->uid))) { + // reset the index in last block when handing a new file + // doCleanupTableScanInfo(pScanInfo); + bool hasNexTable = moveToNextTable(pUidList, pStatus); + if (!hasNexTable) { + return TSDB_CODE_SUCCESS; + } + + continue; + } + + // reset the index in last block when handing a new file + // doCleanupTableScanInfo(pScanInfo); + + bool hasDataInLastFile = initLastBlockReader(pLastBlockReader, pScanInfo, pReader); + if (!hasDataInLastFile) { + bool hasNexTable = moveToNextTable(pUidList, pStatus); + if (!hasNexTable) { + return TSDB_CODE_SUCCESS; + } + + continue; + } + + int64_t st = taosGetTimestampUs(); + while (1) { + bool hasBlockLData = hasDataInLastBlock(pLastBlockReader); + + // no data in last block and block, no need to proceed. + if (hasBlockLData == false) { + break; + } + + code = buildComposedDataBlockImpl(pReader, pScanInfo, &pReader->status.fileBlockData, pLastBlockReader); + if (code) { + return code; + } + + if (pResBlock->info.rows >= pReader->resBlockInfo.capacity) { + break; + } + } + + double el = (taosGetTimestampUs() - st) / 1000.0; + updateComposedBlockInfo(pReader, el, pScanInfo); + + if (pResBlock->info.rows > 0) { + tsdbDebug("%p uid:%" PRIu64 ", composed data block created, brange:%" PRIu64 "-%" PRIu64 " rows:%" PRId64 + ", elapsed time:%.2f ms %s", + pReader, pResBlock->info.id.uid, pResBlock->info.window.skey, pResBlock->info.window.ekey, + pResBlock->info.rows, el, pReader->idStr); + return TSDB_CODE_SUCCESS; + } + + // current table is exhausted, let's try next table + bool hasNexTable = moveToNextTable(pUidList, pStatus); + if (!hasNexTable) { + return TSDB_CODE_SUCCESS; + } + } +} + +static int32_t doBuildDataBlock(STsdbReader* pReader) { + int32_t code = TSDB_CODE_SUCCESS; + + SReaderStatus* pStatus = &pReader->status; + SDataBlockIter* pBlockIter = &pStatus->blockIter; + STableBlockScanInfo* pScanInfo = NULL; + SFileDataBlockInfo* pBlockInfo = getCurrentBlockInfo(pBlockIter); + SLastBlockReader* pLastBlockReader = pReader->status.fileIter.pLastBlockReader; + + if (pReader->pIgnoreTables && taosHashGet(*pReader->pIgnoreTables, &pBlockInfo->uid, sizeof(pBlockInfo->uid))) { + setBlockAllDumped(&pStatus->fBlockDumpInfo, pBlockInfo->record.lastKey, pReader->info.order); + return code; + } + + if (pReader->code != TSDB_CODE_SUCCESS) { + return pReader->code; + } + + pScanInfo = getTableBlockScanInfo(pReader->status.pTableMap, pBlockInfo->uid, pReader->idStr); + if (pScanInfo == NULL) { + return terrno; + } + + initLastBlockReader(pLastBlockReader, pScanInfo, pReader); + TSDBKEY keyInBuf = getCurrentKeyInBuf(pScanInfo, pReader); + + if (fileBlockShouldLoad(pReader, pBlockInfo, pScanInfo, keyInBuf, pLastBlockReader)) { + code = doLoadFileBlockData(pReader, pBlockIter, &pStatus->fileBlockData, pScanInfo->uid); + if (code != TSDB_CODE_SUCCESS) { + return code; + } + + // build composed data block + code = buildComposedDataBlock(pReader); + } else if (bufferDataInFileBlockGap(pReader->info.order, keyInBuf, pBlockInfo)) { + // data in memory that are earlier than current file block + // rows in buffer should be less than the file block in asc, greater than file block in desc + int64_t endKey = + (ASCENDING_TRAVERSE(pReader->info.order)) ? pBlockInfo->record.firstKey : pBlockInfo->record.lastKey; + code = buildDataBlockFromBuf(pReader, pScanInfo, endKey); + } else { + if (hasDataInLastBlock(pLastBlockReader) && !ASCENDING_TRAVERSE(pReader->info.order)) { + // only return the rows in last block + int64_t tsLast = getCurrentKeyInLastBlock(pLastBlockReader); + ASSERT(tsLast >= pBlockInfo->record.lastKey); + + SBlockData* pBData = &pReader->status.fileBlockData; + tBlockDataReset(pBData); + + SSDataBlock* pResBlock = pReader->resBlockInfo.pResBlock; + tsdbDebug("load data in last block firstly, due to desc scan data, %s", pReader->idStr); + + int64_t st = taosGetTimestampUs(); + + while (1) { + bool hasBlockLData = hasDataInLastBlock(pLastBlockReader); + + // no data in last block and block, no need to proceed. + if (hasBlockLData == false) { + break; + } + + code = buildComposedDataBlockImpl(pReader, pScanInfo, &pReader->status.fileBlockData, pLastBlockReader); + if (code) { + return code; + } + + if (pResBlock->info.rows >= pReader->resBlockInfo.capacity) { + break; + } + } + + double el = (taosGetTimestampUs() - st) / 1000.0; + updateComposedBlockInfo(pReader, el, pScanInfo); + + if (pResBlock->info.rows > 0) { + tsdbDebug("%p uid:%" PRIu64 ", composed data block created, brange:%" PRIu64 "-%" PRIu64 " rows:%" PRId64 + ", elapsed time:%.2f ms %s", + pReader, pResBlock->info.id.uid, pResBlock->info.window.skey, pResBlock->info.window.ekey, + pResBlock->info.rows, el, pReader->idStr); + } + } else { // whole block is required, return it directly + SDataBlockInfo* pInfo = &pReader->resBlockInfo.pResBlock->info; + pInfo->rows = pBlockInfo->record.numRow; + pInfo->id.uid = pScanInfo->uid; + pInfo->dataLoad = 0; + pInfo->window = (STimeWindow){.skey = pBlockInfo->record.firstKey, .ekey = pBlockInfo->record.lastKey}; + setComposedBlockFlag(pReader, false); + setBlockAllDumped(&pStatus->fBlockDumpInfo, pBlockInfo->record.lastKey, pReader->info.order); + + // update the last key for the corresponding table + pScanInfo->lastKey = ASCENDING_TRAVERSE(pReader->info.order) ? pInfo->window.ekey : pInfo->window.skey; + tsdbDebug("%p uid:%" PRIu64 + " clean file block retrieved from file, global index:%d, " + "table index:%d, rows:%d, brange:%" PRId64 "-%" PRId64 ", %s", + pReader, pScanInfo->uid, pBlockIter->index, pBlockInfo->tbBlockIdx, pBlockInfo->record.numRow, + pBlockInfo->record.firstKey, pBlockInfo->record.lastKey, pReader->idStr); + } + } + + return (pReader->code != TSDB_CODE_SUCCESS) ? pReader->code : code; +} + +static int32_t doSumFileBlockRows(STsdbReader* pReader, SDataFReader* pFileReader) { + int64_t st = taosGetTimestampUs(); + LRUHandle* handle = NULL; + int32_t code = tsdbCacheGetBlockIdx(pFileReader->pTsdb->biCache, pFileReader, &handle); + if (code != TSDB_CODE_SUCCESS || handle == NULL) { + goto _end; + } + + int32_t numOfTables = tSimpleHashGetSize(pReader->status.pTableMap); + + SArray* aBlockIdx = (SArray*)taosLRUCacheValue(pFileReader->pTsdb->biCache, handle); + size_t num = taosArrayGetSize(aBlockIdx); + if (num == 0) { + tsdbBICacheRelease(pFileReader->pTsdb->biCache, handle); + return TSDB_CODE_SUCCESS; + } + + SBlockIdx* pBlockIdx = NULL; + for (int32_t i = 0; i < num; ++i) { + pBlockIdx = (SBlockIdx*)taosArrayGet(aBlockIdx, i); + if (pBlockIdx->suid != pReader->info.suid) { + continue; + } + + STableBlockScanInfo** p = tSimpleHashGet(pReader->status.pTableMap, &pBlockIdx->uid, sizeof(pBlockIdx->uid)); + if (p == NULL) { + continue; + } + + STableBlockScanInfo* pScanInfo = *p; + SDataBlk block = {0}; + // for (int32_t j = 0; j < pScanInfo->mapData.nItem; ++j) { + // tGetDataBlk(pScanInfo->mapData.pData + pScanInfo->mapData.aOffset[j], &block); + // pReader->rowsNum += block.nRow; + // } + } + +_end: + tsdbBICacheRelease(pFileReader->pTsdb->biCache, handle); + return code; +} + +static int32_t doSumSttBlockRows(STsdbReader* pReader) { + int32_t code = TSDB_CODE_SUCCESS; + SLastBlockReader* pLastBlockReader = pReader->status.fileIter.pLastBlockReader; + SSttBlockLoadInfo* pBlockLoadInfo = NULL; +#if 0 + for (int32_t i = 0; i < pReader->pFileReader->pSet->nSttF; ++i) { // open all last file + pBlockLoadInfo = &pLastBlockReader->pInfo[i]; + + code = tsdbReadSttBlk(pReader->pFileReader, i, pBlockLoadInfo->aSttBlk); + if (code) { + return code; + } + + size_t size = taosArrayGetSize(pBlockLoadInfo->aSttBlk); + if (size >= 1) { + SSttBlk* pStart = taosArrayGet(pBlockLoadInfo->aSttBlk, 0); + SSttBlk* pEnd = taosArrayGet(pBlockLoadInfo->aSttBlk, size - 1); + + // all identical + if (pStart->suid == pEnd->suid) { + if (pStart->suid != pReader->info.suid) { + // no qualified stt block existed + taosArrayClear(pBlockLoadInfo->aSttBlk); + continue; + } + for (int32_t j = 0; j < size; ++j) { + SSttBlk* p = taosArrayGet(pBlockLoadInfo->aSttBlk, j); + pReader->rowsNum += p->nRow; + } + } else { + for (int32_t j = 0; j < size; ++j) { + SSttBlk* p = taosArrayGet(pBlockLoadInfo->aSttBlk, j); + uint64_t s = p->suid; + if (s < pReader->info.suid) { + continue; + } + + if (s == pReader->info.suid) { + pReader->rowsNum += p->nRow; + } else if (s > pReader->info.suid) { + break; + } + } + } + } + } +#endif + + return code; +} + +static int32_t readRowsCountFromFiles(STsdbReader* pReader) { + int32_t code = TSDB_CODE_SUCCESS; + + while (1) { + bool hasNext = false; + code = filesetIteratorNext(&pReader->status.fileIter, pReader, &hasNext); + if (code) { + return code; + } + + if (!hasNext) { // no data files on disk + break; + } + + // code = doSumFileBlockRows(pReader, pReader->pFileReader); + if (code != TSDB_CODE_SUCCESS) { + return code; + } + + code = doSumSttBlockRows(pReader); + if (code != TSDB_CODE_SUCCESS) { + return code; + } + } + + pReader->status.loadFromFile = false; + + return code; +} + +static int32_t readRowsCountFromMem(STsdbReader* pReader) { + int32_t code = TSDB_CODE_SUCCESS; + int64_t memNum = 0, imemNum = 0; + if (pReader->pReadSnap->pMem != NULL) { + tsdbMemTableCountRows(pReader->pReadSnap->pMem, pReader->status.pTableMap, &memNum); + } + + if (pReader->pReadSnap->pIMem != NULL) { + tsdbMemTableCountRows(pReader->pReadSnap->pIMem, pReader->status.pTableMap, &imemNum); + } + + pReader->rowsNum += memNum + imemNum; + + return code; +} + +static int32_t buildBlockFromBufferSequentially(STsdbReader* pReader) { + SReaderStatus* pStatus = &pReader->status; + STableUidList* pUidList = &pStatus->uidList; + + while (1) { + if (pReader->code != TSDB_CODE_SUCCESS) { + tsdbWarn("tsdb reader is stopped ASAP, code:%s, %s", strerror(pReader->code), pReader->idStr); + return pReader->code; + } + + STableBlockScanInfo** pBlockScanInfo = pStatus->pTableIter; + if (pReader->pIgnoreTables && + taosHashGet(*pReader->pIgnoreTables, &(*pBlockScanInfo)->uid, sizeof((*pBlockScanInfo)->uid))) { + bool hasNexTable = moveToNextTable(pUidList, pStatus); + if (!hasNexTable) { + return TSDB_CODE_SUCCESS; + } + pBlockScanInfo = pStatus->pTableIter; + } + + initMemDataIterator(*pBlockScanInfo, pReader); + initDelSkylineIterator(*pBlockScanInfo, pReader->info.order, &pReader->cost); + + int64_t endKey = (ASCENDING_TRAVERSE(pReader->info.order)) ? INT64_MAX : INT64_MIN; + int32_t code = buildDataBlockFromBuf(pReader, *pBlockScanInfo, endKey); + if (code != TSDB_CODE_SUCCESS) { + return code; + } + + if (pReader->resBlockInfo.pResBlock->info.rows > 0) { + return TSDB_CODE_SUCCESS; + } + + // current table is exhausted, let's try next table + bool hasNexTable = moveToNextTable(pUidList, pStatus); + if (!hasNexTable) { + return TSDB_CODE_SUCCESS; + } + } +} + +// set the correct start position in case of the first/last file block, according to the query time window +static void initBlockDumpInfo(STsdbReader* pReader, SDataBlockIter* pBlockIter) { + int64_t lastKey = ASCENDING_TRAVERSE(pReader->info.order) ? INT64_MIN : INT64_MAX; + SFileDataBlockInfo* pBlockInfo = getCurrentBlockInfo(pBlockIter); + SReaderStatus* pStatus = &pReader->status; + SFileBlockDumpInfo* pDumpInfo = &pStatus->fBlockDumpInfo; + + if (pBlockInfo) { + STableBlockScanInfo* pScanInfo = tSimpleHashGet(pBlockIter->pTableMap, &pBlockInfo->uid, sizeof(pBlockInfo->uid)); + if (pScanInfo) { + lastKey = pScanInfo->lastKey; + } + + pDumpInfo->totalRows = pBlockInfo->record.numRow; + pDumpInfo->rowIndex = ASCENDING_TRAVERSE(pReader->info.order) ? 0 : pBlockInfo->record.numRow - 1; + } else { + pDumpInfo->totalRows = 0; + pDumpInfo->rowIndex = 0; + } + + pDumpInfo->allDumped = false; + pDumpInfo->lastKey = lastKey; +} + +static int32_t initForFirstBlockInFile(STsdbReader* pReader, SDataBlockIter* pBlockIter) { + SBlockNumber num = {0}; + SArray* pTableList = taosArrayInit(40, POINTER_BYTES); + + int32_t code = moveToNextFile(pReader, &num, pTableList); + if (code != TSDB_CODE_SUCCESS) { + taosArrayDestroy(pTableList); + return code; + } + + // all data files are consumed, try data in buffer + if (num.numOfBlocks + num.numOfLastFiles == 0) { + pReader->status.loadFromFile = false; + taosArrayDestroy(pTableList); + return code; + } + + // initialize the block iterator for a new fileset + if (num.numOfBlocks > 0) { + code = initBlockIterator(pReader, pBlockIter, num.numOfBlocks, pTableList); + } else { // no block data, only last block exists + tBlockDataReset(&pReader->status.fileBlockData); + resetDataBlockIterator(pBlockIter, pReader->info.order); + resetTableListIndex(&pReader->status); + } + + // set the correct start position according to the query time window + initBlockDumpInfo(pReader, pBlockIter); + taosArrayDestroy(pTableList); + return code; +} + +static bool fileBlockPartiallyRead(SFileBlockDumpInfo* pDumpInfo, bool asc) { + return (!pDumpInfo->allDumped) && + ((pDumpInfo->rowIndex > 0 && asc) || (pDumpInfo->rowIndex < (pDumpInfo->totalRows - 1) && (!asc))); +} + +typedef enum { + TSDB_READ_RETURN = 0x1, + TSDB_READ_CONTINUE = 0x2, +} ERetrieveType; + +static ERetrieveType doReadDataFromLastFiles(STsdbReader* pReader) { + int32_t code = TSDB_CODE_SUCCESS; + SSDataBlock* pResBlock = pReader->resBlockInfo.pResBlock; + SDataBlockIter* pBlockIter = &pReader->status.blockIter; + + while (1) { + terrno = 0; + + code = doLoadLastBlockSequentially(pReader); + if (code != TSDB_CODE_SUCCESS) { + terrno = code; + return TSDB_READ_RETURN; + } + + if (pResBlock->info.rows > 0) { + return TSDB_READ_RETURN; + } + + // all data blocks are checked in this last block file, now let's try the next file + ASSERT(pReader->status.pTableIter == NULL); + code = initForFirstBlockInFile(pReader, pBlockIter); + + // error happens or all the data files are completely checked + if ((code != TSDB_CODE_SUCCESS) || (pReader->status.loadFromFile == false)) { + terrno = code; + return TSDB_READ_RETURN; + } + + if (pBlockIter->numOfBlocks > 0) { // there are data blocks existed. + return TSDB_READ_CONTINUE; + } else { // all blocks in data file are checked, let's check the data in last files + resetTableListIndex(&pReader->status); + } + } +} + +static int32_t buildBlockFromFiles(STsdbReader* pReader) { + int32_t code = TSDB_CODE_SUCCESS; + bool asc = ASCENDING_TRAVERSE(pReader->info.order); + + SDataBlockIter* pBlockIter = &pReader->status.blockIter; + SSDataBlock* pResBlock = pReader->resBlockInfo.pResBlock; + + if (pBlockIter->numOfBlocks == 0) { + // let's try to extract data from stt files. + ERetrieveType type = doReadDataFromLastFiles(pReader); + if (type == TSDB_READ_RETURN) { + return terrno; + } + + code = doBuildDataBlock(pReader); + if (code != TSDB_CODE_SUCCESS || pResBlock->info.rows > 0) { + return code; + } + } + + while (1) { + SFileBlockDumpInfo* pDumpInfo = &pReader->status.fBlockDumpInfo; + + if (fileBlockPartiallyRead(pDumpInfo, asc)) { // file data block is partially loaded + code = buildComposedDataBlock(pReader); + } else { + // current block are exhausted, try the next file block + if (pDumpInfo->allDumped) { + // try next data block in current file + bool hasNext = blockIteratorNext(&pReader->status.blockIter, pReader->idStr); + if (hasNext) { // check for the next block in the block accessed order list + initBlockDumpInfo(pReader, pBlockIter); + } else { + // all data blocks in files are checked, let's check the data in last files. + // data blocks in current file are exhausted, let's try the next file now + SBlockData* pBlockData = &pReader->status.fileBlockData; + if (pBlockData->uid != 0) { + tBlockDataClear(pBlockData); + } + + tBlockDataReset(pBlockData); + resetDataBlockIterator(pBlockIter, pReader->info.order); + resetTableListIndex(&pReader->status); + + ERetrieveType type = doReadDataFromLastFiles(pReader); + if (type == TSDB_READ_RETURN) { + return terrno; + } + } + } + + code = doBuildDataBlock(pReader); + } + + if (code != TSDB_CODE_SUCCESS || pResBlock->info.rows > 0) { + return code; + } + } +} + +static STsdb* getTsdbByRetentions(SVnode* pVnode, TSKEY winSKey, SRetention* retentions, const char* idStr, + int8_t* pLevel) { + if (VND_IS_RSMA(pVnode)) { + int8_t level = 0; + int8_t precision = pVnode->config.tsdbCfg.precision; + int64_t now = taosGetTimestamp(precision); + int64_t offset = tsQueryRsmaTolerance * ((precision == TSDB_TIME_PRECISION_MILLI) ? 1L + : (precision == TSDB_TIME_PRECISION_MICRO) ? 1000L + : 1000000L); + + for (int8_t i = 0; i < TSDB_RETENTION_MAX; ++i) { + SRetention* pRetention = retentions + level; + if (pRetention->keep <= 0) { + if (level > 0) { + --level; + } + break; + } + if ((now - pRetention->keep) <= (winSKey + offset)) { + break; + } + ++level; + } + + const char* str = (idStr != NULL) ? idStr : ""; + + if (level == TSDB_RETENTION_L0) { + *pLevel = TSDB_RETENTION_L0; + tsdbDebug("vgId:%d, rsma level %d is selected to query %s", TD_VID(pVnode), TSDB_RETENTION_L0, str); + return VND_RSMA0(pVnode); + } else if (level == TSDB_RETENTION_L1) { + *pLevel = TSDB_RETENTION_L1; + tsdbDebug("vgId:%d, rsma level %d is selected to query %s", TD_VID(pVnode), TSDB_RETENTION_L1, str); + return VND_RSMA1(pVnode); + } else { + *pLevel = TSDB_RETENTION_L2; + tsdbDebug("vgId:%d, rsma level %d is selected to query %s", TD_VID(pVnode), TSDB_RETENTION_L2, str); + return VND_RSMA2(pVnode); + } + } + + return VND_TSDB(pVnode); +} + +SVersionRange getQueryVerRange(SVnode* pVnode, SQueryTableDataCond* pCond, int8_t level) { + int64_t startVer = (pCond->startVersion == -1) ? 0 : pCond->startVersion; + + int64_t endVer = 0; + if (pCond->endVersion == -1) { + // user not specified end version, set current maximum version of vnode as the endVersion + endVer = pVnode->state.applied; + } else { + endVer = (pCond->endVersion > pVnode->state.applied) ? pVnode->state.applied : pCond->endVersion; + } + + return (SVersionRange){.minVer = startVer, .maxVer = endVer}; +} + +bool hasBeenDropped(const SArray* pDelList, int32_t* index, int64_t key, int64_t ver, int32_t order, + SVersionRange* pVerRange) { + if (pDelList == NULL || (taosArrayGetSize(pDelList) == 0)) { + return false; + } + + size_t num = taosArrayGetSize(pDelList); + bool asc = ASCENDING_TRAVERSE(order); + int32_t step = asc ? 1 : -1; + + if (asc) { + if (*index >= num - 1) { + TSDBKEY* last = taosArrayGetLast(pDelList); + ASSERT(key >= last->ts); + + if (key > last->ts) { + return false; + } else if (key == last->ts) { + TSDBKEY* prev = taosArrayGet(pDelList, num - 2); + return (prev->version >= ver && prev->version <= pVerRange->maxVer && prev->version >= pVerRange->minVer); + } + } else { + TSDBKEY* pCurrent = taosArrayGet(pDelList, *index); + TSDBKEY* pNext = taosArrayGet(pDelList, (*index) + 1); + + if (key < pCurrent->ts) { + return false; + } + + if (pCurrent->ts <= key && pNext->ts >= key && pCurrent->version >= ver && + pVerRange->maxVer >= pCurrent->version) { + return true; + } + + while (pNext->ts <= key && (*index) < num - 1) { + (*index) += 1; + + if ((*index) < num - 1) { + pCurrent = taosArrayGet(pDelList, *index); + pNext = taosArrayGet(pDelList, (*index) + 1); + + // it is not a consecutive deletion range, ignore it + if (pCurrent->version == 0 && pNext->version > 0) { + continue; + } + + if (pCurrent->ts <= key && pNext->ts >= key && pCurrent->version >= ver && + pVerRange->maxVer >= pCurrent->version) { + return true; + } + } + } + + return false; + } + } else { + if (*index <= 0) { + TSDBKEY* pFirst = taosArrayGet(pDelList, 0); + + if (key < pFirst->ts) { + return false; + } else if (key == pFirst->ts) { + return pFirst->version >= ver; + } else { + ASSERT(0); + } + } else { + TSDBKEY* pCurrent = taosArrayGet(pDelList, *index); + TSDBKEY* pPrev = taosArrayGet(pDelList, (*index) - 1); + + if (key > pCurrent->ts) { + return false; + } + + if (pPrev->ts <= key && pCurrent->ts >= key && pPrev->version >= ver) { + return true; + } + + while (pPrev->ts >= key && (*index) > 1) { + (*index) += step; + + if ((*index) >= 1) { + pCurrent = taosArrayGet(pDelList, *index); + pPrev = taosArrayGet(pDelList, (*index) - 1); + + // it is not a consecutive deletion range, ignore it + if (pCurrent->version > 0 && pPrev->version == 0) { + continue; + } + + if (pPrev->ts <= key && pCurrent->ts >= key && pPrev->version >= ver) { + return true; + } + } + } + + return false; + } + } + + return false; +} + +TSDBROW* getValidMemRow(SIterInfo* pIter, const SArray* pDelList, STsdbReader* pReader) { + if (!pIter->hasVal) { + return NULL; + } + + TSDBROW* pRow = tsdbTbDataIterGet(pIter->iter); + TSDBKEY key = TSDBROW_KEY(pRow); + + if (outOfTimeWindow(key.ts, &pReader->info.window)) { + pIter->hasVal = false; + return NULL; + } + + // it is a valid data version + if ((key.version <= pReader->info.verRange.maxVer && key.version >= pReader->info.verRange.minVer) && + (!hasBeenDropped(pDelList, &pIter->index, key.ts, key.version, pReader->info.order, &pReader->info.verRange))) { + return pRow; + } + + while (1) { + pIter->hasVal = tsdbTbDataIterNext(pIter->iter); + if (!pIter->hasVal) { + return NULL; + } + + pRow = tsdbTbDataIterGet(pIter->iter); + + key = TSDBROW_KEY(pRow); + if (outOfTimeWindow(key.ts, &pReader->info.window)) { + pIter->hasVal = false; + return NULL; + } + + if (key.version <= pReader->info.verRange.maxVer && key.version >= pReader->info.verRange.minVer && + (!hasBeenDropped(pDelList, &pIter->index, key.ts, key.version, pReader->info.order, &pReader->info.verRange))) { + return pRow; + } + } +} + +int32_t doMergeRowsInBuf(SIterInfo* pIter, uint64_t uid, int64_t ts, SArray* pDelList, STsdbReader* pReader) { + SRowMerger* pMerger = &pReader->status.merger; + + while (1) { + pIter->hasVal = tsdbTbDataIterNext(pIter->iter); + if (!pIter->hasVal) { + break; + } + + // data exists but not valid + TSDBROW* pRow = getValidMemRow(pIter, pDelList, pReader); + if (pRow == NULL) { + break; + } + + // ts is not identical, quit + TSDBKEY k = TSDBROW_KEY(pRow); + if (k.ts != ts) { + break; + } + + if (pRow->type == TSDBROW_ROW_FMT) { + STSchema* pTSchema = doGetSchemaForTSRow(TSDBROW_SVERSION(pRow), pReader, uid); + if (pTSchema == NULL) { + return terrno; + } + + tsdbRowMergerAdd(pMerger, pRow, pTSchema); + } else { // column format + tsdbRowMergerAdd(pMerger, pRow, NULL); + } + } + + return TSDB_CODE_SUCCESS; +} + +static int32_t doMergeRowsInFileBlockImpl(SBlockData* pBlockData, int32_t rowIndex, int64_t key, SRowMerger* pMerger, + SVersionRange* pVerRange, int32_t step) { + while (rowIndex < pBlockData->nRow && rowIndex >= 0 && pBlockData->aTSKEY[rowIndex] == key) { + if (pBlockData->aVersion[rowIndex] > pVerRange->maxVer || pBlockData->aVersion[rowIndex] < pVerRange->minVer) { + rowIndex += step; + continue; + } + + TSDBROW fRow = tsdbRowFromBlockData(pBlockData, rowIndex); + tsdbRowMergerAdd(pMerger, &fRow, NULL); + rowIndex += step; + } + + return rowIndex; +} + +typedef enum { + CHECK_FILEBLOCK_CONT = 0x1, + CHECK_FILEBLOCK_QUIT = 0x2, +} CHECK_FILEBLOCK_STATE; + +static int32_t checkForNeighborFileBlock(STsdbReader* pReader, STableBlockScanInfo* pScanInfo, + SFileDataBlockInfo* pFBlock, SRowMerger* pMerger, int64_t key, + CHECK_FILEBLOCK_STATE* state) { + SFileBlockDumpInfo* pDumpInfo = &pReader->status.fBlockDumpInfo; + SBlockData* pBlockData = &pReader->status.fileBlockData; + bool asc = ASCENDING_TRAVERSE(pReader->info.order); + + *state = CHECK_FILEBLOCK_QUIT; + int32_t step = ASCENDING_TRAVERSE(pReader->info.order) ? 1 : -1; + + bool loadNeighbor = true; + int32_t code = loadNeighborIfOverlap(pFBlock, pScanInfo, pReader, &loadNeighbor); + + if (loadNeighbor && (code == TSDB_CODE_SUCCESS)) { + pDumpInfo->rowIndex = + doMergeRowsInFileBlockImpl(pBlockData, pDumpInfo->rowIndex, key, pMerger, &pReader->info.verRange, step); + if ((pDumpInfo->rowIndex >= pDumpInfo->totalRows && asc) || (pDumpInfo->rowIndex < 0 && !asc)) { + *state = CHECK_FILEBLOCK_CONT; + } + } + + return code; +} + +int32_t doMergeRowsInFileBlocks(SBlockData* pBlockData, STableBlockScanInfo* pScanInfo, STsdbReader* pReader) { + SFileBlockDumpInfo* pDumpInfo = &pReader->status.fBlockDumpInfo; + + SRowMerger* pMerger = &pReader->status.merger; + bool asc = ASCENDING_TRAVERSE(pReader->info.order); + int64_t key = pBlockData->aTSKEY[pDumpInfo->rowIndex]; + int32_t step = asc ? 1 : -1; + + pDumpInfo->rowIndex += step; + if ((pDumpInfo->rowIndex <= pBlockData->nRow - 1 && asc) || (pDumpInfo->rowIndex >= 0 && !asc)) { + pDumpInfo->rowIndex = + doMergeRowsInFileBlockImpl(pBlockData, pDumpInfo->rowIndex, key, pMerger, &pReader->info.verRange, step); + } + + // all rows are consumed, let's try next file block + if ((pDumpInfo->rowIndex >= pBlockData->nRow && asc) || (pDumpInfo->rowIndex < 0 && !asc)) { + while (1) { + CHECK_FILEBLOCK_STATE st; + + SFileDataBlockInfo* pFileBlockInfo = getCurrentBlockInfo(&pReader->status.blockIter); + if (pFileBlockInfo == NULL) { + st = CHECK_FILEBLOCK_QUIT; + break; + } + + checkForNeighborFileBlock(pReader, pScanInfo, pFileBlockInfo, pMerger, key, &st); + if (st == CHECK_FILEBLOCK_QUIT) { + break; + } + } + } + + return TSDB_CODE_SUCCESS; +} + +int32_t doMergeRowsInLastBlock(SLastBlockReader* pLastBlockReader, STableBlockScanInfo* pScanInfo, int64_t ts, + SRowMerger* pMerger, SVersionRange* pVerRange, const char* idStr) { + while (nextRowFromLastBlocks(pLastBlockReader, pScanInfo, pVerRange)) { + int64_t next1 = getCurrentKeyInLastBlock(pLastBlockReader); + if (next1 == ts) { + TSDBROW* pRow1 = tMergeTreeGetRow(&pLastBlockReader->mergeTree); + tsdbRowMergerAdd(pMerger, pRow1, NULL); + } else { + tsdbTrace("uid:%" PRIu64 " last del index:%d, del range:%d, lastKeyInStt:%" PRId64 ", %s", pScanInfo->uid, + pScanInfo->lastBlockDelIndex, (int32_t)taosArrayGetSize(pScanInfo->delSkyline), pScanInfo->lastKeyInStt, + idStr); + break; + } + } + + return TSDB_CODE_SUCCESS; +} + +int32_t doMergeMemTableMultiRows(TSDBROW* pRow, uint64_t uid, SIterInfo* pIter, SArray* pDelList, TSDBROW* pResRow, + STsdbReader* pReader, bool* freeTSRow) { + TSDBROW* pNextRow = NULL; + TSDBROW current = *pRow; + + { // if the timestamp of the next valid row has a different ts, return current row directly + pIter->hasVal = tsdbTbDataIterNext(pIter->iter); + + if (!pIter->hasVal) { + *pResRow = *pRow; + *freeTSRow = false; + return TSDB_CODE_SUCCESS; + } else { // has next point in mem/imem + pNextRow = getValidMemRow(pIter, pDelList, pReader); + if (pNextRow == NULL) { + *pResRow = current; + *freeTSRow = false; + return TSDB_CODE_SUCCESS; + } + + if (TSDBROW_TS(¤t) != TSDBROW_TS(pNextRow)) { + *pResRow = current; + *freeTSRow = false; + return TSDB_CODE_SUCCESS; + } + } + } + + terrno = 0; + int32_t code = 0; + + // start to merge duplicated rows + if (current.type == TSDBROW_ROW_FMT) { + // get the correct schema for data in memory + STSchema* pTSchema = doGetSchemaForTSRow(TSDBROW_SVERSION(¤t), pReader, uid); + if (pTSchema == NULL) { + return terrno; + } + + code = tsdbRowMergerAdd(&pReader->status.merger, ¤t, pTSchema); + if (code != TSDB_CODE_SUCCESS) { + return code; + } + + STSchema* pTSchema1 = doGetSchemaForTSRow(TSDBROW_SVERSION(pNextRow), pReader, uid); + if (pTSchema1 == NULL) { + return terrno; + } + + tsdbRowMergerAdd(&pReader->status.merger, pNextRow, pTSchema1); + } else { // let's merge rows in file block + code = tsdbRowMergerAdd(&pReader->status.merger, ¤t, pReader->info.pSchema); + if (code != TSDB_CODE_SUCCESS) { + return code; + } + + tsdbRowMergerAdd(&pReader->status.merger, pNextRow, NULL); + } + + code = doMergeRowsInBuf(pIter, uid, TSDBROW_TS(¤t), pDelList, pReader); + if (code != TSDB_CODE_SUCCESS) { + return code; + } + + code = tsdbRowMergerGetRow(&pReader->status.merger, &pResRow->pTSRow); + if (code != TSDB_CODE_SUCCESS) { + return code; + } + + pResRow->type = TSDBROW_ROW_FMT; + tsdbRowMergerClear(&pReader->status.merger); + *freeTSRow = true; + + return TSDB_CODE_SUCCESS; +} + +int32_t doMergeMemIMemRows(TSDBROW* pRow, TSDBROW* piRow, STableBlockScanInfo* pBlockScanInfo, STsdbReader* pReader, + SRow** pTSRow) { + SRowMerger* pMerger = &pReader->status.merger; + + TSDBKEY k = TSDBROW_KEY(pRow); + TSDBKEY ik = TSDBROW_KEY(piRow); + STSchema* pSchema = doGetSchemaForTSRow(TSDBROW_SVERSION(pRow), pReader, pBlockScanInfo->uid); + if (pSchema == NULL) { + return terrno; + } + + STSchema* piSchema = doGetSchemaForTSRow(TSDBROW_SVERSION(piRow), pReader, pBlockScanInfo->uid); + if (piSchema == NULL) { + return terrno; + } + + if (ASCENDING_TRAVERSE(pReader->info.order)) { // ascending order imem --> mem + int32_t code = tsdbRowMergerAdd(&pReader->status.merger, piRow, piSchema); + if (code != TSDB_CODE_SUCCESS) { + return code; + } + + code = doMergeRowsInBuf(&pBlockScanInfo->iiter, pBlockScanInfo->uid, ik.ts, pBlockScanInfo->delSkyline, pReader); + if (code != TSDB_CODE_SUCCESS) { + return code; + } + + tsdbRowMergerAdd(&pReader->status.merger, pRow, pSchema); + code = doMergeRowsInBuf(&pBlockScanInfo->iter, pBlockScanInfo->uid, k.ts, pBlockScanInfo->delSkyline, pReader); + if (code != TSDB_CODE_SUCCESS) { + return code; + } + + } else { + int32_t code = tsdbRowMergerAdd(&pReader->status.merger, pRow, pSchema); + if (code != TSDB_CODE_SUCCESS || pMerger->pTSchema == NULL) { + return code; + } + + code = doMergeRowsInBuf(&pBlockScanInfo->iter, pBlockScanInfo->uid, k.ts, pBlockScanInfo->delSkyline, pReader); + if (code != TSDB_CODE_SUCCESS) { + return code; + } + + tsdbRowMergerAdd(&pReader->status.merger, piRow, piSchema); + code = doMergeRowsInBuf(&pBlockScanInfo->iiter, pBlockScanInfo->uid, ik.ts, pBlockScanInfo->delSkyline, pReader); + if (code != TSDB_CODE_SUCCESS) { + return code; + } + } + + int32_t code = tsdbRowMergerGetRow(pMerger, pTSRow); + tsdbRowMergerClear(pMerger); + return code; +} + +static int32_t tsdbGetNextRowInMem(STableBlockScanInfo* pBlockScanInfo, STsdbReader* pReader, TSDBROW* pResRow, + int64_t endKey, bool* freeTSRow) { + TSDBROW* pRow = getValidMemRow(&pBlockScanInfo->iter, pBlockScanInfo->delSkyline, pReader); + TSDBROW* piRow = getValidMemRow(&pBlockScanInfo->iiter, pBlockScanInfo->delSkyline, pReader); + SArray* pDelList = pBlockScanInfo->delSkyline; + uint64_t uid = pBlockScanInfo->uid; + + // todo refactor + bool asc = ASCENDING_TRAVERSE(pReader->info.order); + if (pBlockScanInfo->iter.hasVal) { + TSDBKEY k = TSDBROW_KEY(pRow); + if ((k.ts >= endKey && asc) || (k.ts <= endKey && !asc)) { + pRow = NULL; + } + } + + if (pBlockScanInfo->iiter.hasVal) { + TSDBKEY k = TSDBROW_KEY(piRow); + if ((k.ts >= endKey && asc) || (k.ts <= endKey && !asc)) { + piRow = NULL; + } + } + + if (pBlockScanInfo->iter.hasVal && pBlockScanInfo->iiter.hasVal && pRow != NULL && piRow != NULL) { + TSDBKEY k = TSDBROW_KEY(pRow); + TSDBKEY ik = TSDBROW_KEY(piRow); + + int32_t code = TSDB_CODE_SUCCESS; + if (ik.ts != k.ts) { + if (((ik.ts < k.ts) && asc) || ((ik.ts > k.ts) && (!asc))) { // ik.ts < k.ts + code = doMergeMemTableMultiRows(piRow, uid, &pBlockScanInfo->iiter, pDelList, pResRow, pReader, freeTSRow); + } else if (((k.ts < ik.ts) && asc) || ((k.ts > ik.ts) && (!asc))) { + code = doMergeMemTableMultiRows(pRow, uid, &pBlockScanInfo->iter, pDelList, pResRow, pReader, freeTSRow); + } + } else { // ik.ts == k.ts + *freeTSRow = true; + pResRow->type = TSDBROW_ROW_FMT; + code = doMergeMemIMemRows(pRow, piRow, pBlockScanInfo, pReader, &pResRow->pTSRow); + if (code != TSDB_CODE_SUCCESS) { + return code; + } + } + + return code; + } + + if (pBlockScanInfo->iter.hasVal && pRow != NULL) { + return doMergeMemTableMultiRows(pRow, pBlockScanInfo->uid, &pBlockScanInfo->iter, pDelList, pResRow, pReader, + freeTSRow); + } + + if (pBlockScanInfo->iiter.hasVal && piRow != NULL) { + return doMergeMemTableMultiRows(piRow, uid, &pBlockScanInfo->iiter, pDelList, pResRow, pReader, freeTSRow); + } + + return TSDB_CODE_SUCCESS; +} + +int32_t doAppendRowFromTSRow(SSDataBlock* pBlock, STsdbReader* pReader, SRow* pTSRow, STableBlockScanInfo* pScanInfo) { + int32_t outputRowIndex = pBlock->info.rows; + int64_t uid = pScanInfo->uid; + int32_t code = TSDB_CODE_SUCCESS; + + SBlockLoadSuppInfo* pSupInfo = &pReader->suppInfo; + STSchema* pSchema = doGetSchemaForTSRow(pTSRow->sver, pReader, uid); + if (pSchema == NULL) { + return terrno; + } + + SColVal colVal = {0}; + int32_t i = 0, j = 0; + + if (pSupInfo->colId[i] == PRIMARYKEY_TIMESTAMP_COL_ID) { + SColumnInfoData* pColData = taosArrayGet(pBlock->pDataBlock, pSupInfo->slotId[i]); + ((int64_t*)pColData->pData)[outputRowIndex] = pTSRow->ts; + i += 1; + } + + while (i < pSupInfo->numOfCols && j < pSchema->numOfCols) { + col_id_t colId = pSupInfo->colId[i]; + + if (colId == pSchema->columns[j].colId) { + SColumnInfoData* pColInfoData = taosArrayGet(pBlock->pDataBlock, pSupInfo->slotId[i]); + + tRowGet(pTSRow, pSchema, j, &colVal); + code = doCopyColVal(pColInfoData, outputRowIndex, i, &colVal, pSupInfo); + if (code) { + return code; + } + i += 1; + j += 1; + } else if (colId < pSchema->columns[j].colId) { + SColumnInfoData* pColInfoData = taosArrayGet(pBlock->pDataBlock, pSupInfo->slotId[i]); + + colDataSetNULL(pColInfoData, outputRowIndex); + i += 1; + } else if (colId > pSchema->columns[j].colId) { + j += 1; + } + } + + // set null value since current column does not exist in the "pSchema" + while (i < pSupInfo->numOfCols) { + SColumnInfoData* pColInfoData = taosArrayGet(pBlock->pDataBlock, pSupInfo->slotId[i]); + colDataSetNULL(pColInfoData, outputRowIndex); + i += 1; + } + + pBlock->info.dataLoad = 1; + pBlock->info.rows += 1; + pScanInfo->lastKey = pTSRow->ts; + return TSDB_CODE_SUCCESS; +} + +int32_t doAppendRowFromFileBlock(SSDataBlock* pResBlock, STsdbReader* pReader, SBlockData* pBlockData, + int32_t rowIndex) { + int32_t i = 0, j = 0; + int32_t outputRowIndex = pResBlock->info.rows; + int32_t code = TSDB_CODE_SUCCESS; + + SBlockLoadSuppInfo* pSupInfo = &pReader->suppInfo; + ((int64_t*)pReader->status.pPrimaryTsCol->pData)[outputRowIndex] = pBlockData->aTSKEY[rowIndex]; + i += 1; + + SColVal cv = {0}; + int32_t numOfInputCols = pBlockData->nColData; + int32_t numOfOutputCols = pSupInfo->numOfCols; + + while (i < numOfOutputCols && j < numOfInputCols) { + SColData* pData = tBlockDataGetColDataByIdx(pBlockData, j); + if (pData->cid < pSupInfo->colId[i]) { + j += 1; + continue; + } + + SColumnInfoData* pCol = TARRAY_GET_ELEM(pResBlock->pDataBlock, pSupInfo->slotId[i]); + if (pData->cid == pSupInfo->colId[i]) { + tColDataGetValue(pData, rowIndex, &cv); + code = doCopyColVal(pCol, outputRowIndex, i, &cv, pSupInfo); + if (code) { + return code; + } + j += 1; + } else if (pData->cid > pCol->info.colId) { + // the specified column does not exist in file block, fill with null data + colDataSetNULL(pCol, outputRowIndex); + } + + i += 1; + } + + while (i < numOfOutputCols) { + SColumnInfoData* pCol = taosArrayGet(pResBlock->pDataBlock, pSupInfo->slotId[i]); + colDataSetNULL(pCol, outputRowIndex); + i += 1; + } + + pResBlock->info.dataLoad = 1; + pResBlock->info.rows += 1; + return TSDB_CODE_SUCCESS; +} + +int32_t buildDataBlockFromBufImpl(STableBlockScanInfo* pBlockScanInfo, int64_t endKey, int32_t capacity, + STsdbReader* pReader) { + SSDataBlock* pBlock = pReader->resBlockInfo.pResBlock; + int32_t code = TSDB_CODE_SUCCESS; + + do { + // SRow* pTSRow = NULL; + TSDBROW row = {.type = -1}; + bool freeTSRow = false; + tsdbGetNextRowInMem(pBlockScanInfo, pReader, &row, endKey, &freeTSRow); + if (row.type == -1) { + break; + } + + if (row.type == TSDBROW_ROW_FMT) { + code = doAppendRowFromTSRow(pBlock, pReader, row.pTSRow, pBlockScanInfo); + + if (freeTSRow) { + taosMemoryFree(row.pTSRow); + } + + if (code) { + return code; + } + } else { + code = doAppendRowFromFileBlock(pBlock, pReader, row.pBlockData, row.iRow); + if (code) { + break; + } + } + + // no data in buffer, return immediately + if (!(pBlockScanInfo->iter.hasVal || pBlockScanInfo->iiter.hasVal)) { + break; + } + + if (pBlock->info.rows >= capacity) { + break; + } + } while (1); + + return code; +} + +// TODO refactor: with createDataBlockScanInfo +int32_t tsdbSetTableList2(STsdbReader* pReader, const void* pTableList, int32_t num) { + int32_t size = tSimpleHashGetSize(pReader->status.pTableMap); + + STableBlockScanInfo** p = NULL; + int32_t iter = 0; + + while ((p = tSimpleHashIterate(pReader->status.pTableMap, p, &iter)) != NULL) { + clearBlockScanInfo(*p); + } + + if (size < num) { + int32_t code = ensureBlockScanInfoBuf(&pReader->blockInfoBuf, num); + if (code) { + return code; + } + + char* p1 = taosMemoryRealloc(pReader->status.uidList.tableUidList, sizeof(uint64_t) * num); + if (p1 == NULL) { + return TSDB_CODE_OUT_OF_MEMORY; + } + + pReader->status.uidList.tableUidList = (uint64_t*)p1; + } + + tSimpleHashClear(pReader->status.pTableMap); + STableUidList* pUidList = &pReader->status.uidList; + pUidList->currentIndex = 0; + + STableKeyInfo* pList = (STableKeyInfo*)pTableList; + for (int32_t i = 0; i < num; ++i) { + STableBlockScanInfo* pInfo = getPosInBlockInfoBuf(&pReader->blockInfoBuf, i); + pInfo->uid = pList[i].uid; + pUidList->tableUidList[i] = pList[i].uid; + + // todo extract method + if (ASCENDING_TRAVERSE(pReader->info.order)) { + int64_t skey = pReader->info.window.skey; + pInfo->lastKey = (skey > INT64_MIN) ? (skey - 1) : skey; + pInfo->lastKeyInStt = skey; + } else { + int64_t ekey = pReader->info.window.ekey; + pInfo->lastKey = (ekey < INT64_MAX) ? (ekey + 1) : ekey; + pInfo->lastKeyInStt = ekey; + } + + tSimpleHashPut(pReader->status.pTableMap, &pInfo->uid, sizeof(uint64_t), &pInfo, POINTER_BYTES); + } + + return TDB_CODE_SUCCESS; +} + +void* tsdbGetIdx2(SMeta* pMeta) { + if (pMeta == NULL) { + return NULL; + } + return metaGetIdx(pMeta); +} + +void* tsdbGetIvtIdx2(SMeta* pMeta) { + if (pMeta == NULL) { + return NULL; + } + return metaGetIvtIdx(pMeta); +} + +uint64_t tsdbGetReaderMaxVersion2(STsdbReader* pReader) { return pReader->info.verRange.maxVer; } + +static int32_t doOpenReaderImpl(STsdbReader* pReader) { + SReaderStatus* pStatus = &pReader->status; + SDataBlockIter* pBlockIter = &pStatus->blockIter; + + initFilesetIterator(&pStatus->fileIter, pReader->pReadSnap->pfSetArray, pReader); + resetDataBlockIterator(&pStatus->blockIter, pReader->info.order); + + int32_t code = TSDB_CODE_SUCCESS; + if (pStatus->fileIter.numOfFiles == 0) { + pStatus->loadFromFile = false; + } else if (READ_MODE_COUNT_ONLY == pReader->info.readMode) { + // DO NOTHING + } else { + code = initForFirstBlockInFile(pReader, pBlockIter); + } + + if (!pStatus->loadFromFile) { + resetTableListIndex(pStatus); + } + + return code; +} + +static void freeSchemaFunc(void* param) { + void** p = (void**)param; + taosMemoryFreeClear(*p); +} + +static void clearSharedPtr(STsdbReader* p) { + p->status.pTableMap = NULL; + p->status.uidList.tableUidList = NULL; + p->info.pSchema = NULL; + p->pReadSnap = NULL; + p->pSchemaMap = NULL; +} + +static void setSharedPtr(STsdbReader* pDst, const STsdbReader* pSrc) { + pDst->status.pTableMap = pSrc->status.pTableMap; + pDst->status.uidList = pSrc->status.uidList; + pDst->info.pSchema = pSrc->info.pSchema; + pDst->pSchemaMap = pSrc->pSchemaMap; + pDst->pReadSnap = pSrc->pReadSnap; + pDst->pReadSnap->pfSetArray = pSrc->pReadSnap->pfSetArray; + + if (pDst->info.pSchema) { + tsdbRowMergerInit(&pDst->status.merger, pDst->info.pSchema); + } +} + +// ====================================== EXPOSED APIs ====================================== +int32_t tsdbReaderOpen2(void* pVnode, SQueryTableDataCond* pCond, void* pTableList, int32_t numOfTables, + SSDataBlock* pResBlock, void** ppReader, const char* idstr, bool countOnly, + SHashObj** pIgnoreTables) { + STimeWindow window = pCond->twindows; + SVnodeCfg* pConf = &(((SVnode*)pVnode)->config); + + int32_t capacity = pConf->tsdbCfg.maxRows; + if (pResBlock != NULL) { + blockDataEnsureCapacity(pResBlock, capacity); + } + + int32_t code = tsdbReaderCreate(pVnode, pCond, ppReader, capacity, pResBlock, idstr); + if (code != TSDB_CODE_SUCCESS) { + goto _err; + } + + // check for query time window + STsdbReader* pReader = *ppReader; + if (isEmptyQueryTimeWindow(&pReader->info.window) && pCond->type == TIMEWINDOW_RANGE_CONTAINED) { + tsdbDebug("%p query window not overlaps with the data set, no result returned, %s", pReader, pReader->idStr); + return TSDB_CODE_SUCCESS; + } + + if (pCond->type == TIMEWINDOW_RANGE_EXTERNAL) { + // update the SQueryTableDataCond to create inner reader + int32_t order = pCond->order; + if (order == TSDB_ORDER_ASC) { + pCond->twindows.ekey = window.skey - 1; + pCond->twindows.skey = INT64_MIN; + pCond->order = TSDB_ORDER_DESC; + } else { + pCond->twindows.skey = window.ekey + 1; + pCond->twindows.ekey = INT64_MAX; + pCond->order = TSDB_ORDER_ASC; + } + + // here we only need one more row, so the capacity is set to be ONE. + code = tsdbReaderCreate(pVnode, pCond, (void**)&((STsdbReader*)pReader)->innerReader[0], 1, pResBlock, idstr); + if (code != TSDB_CODE_SUCCESS) { + goto _err; + } + + if (order == TSDB_ORDER_ASC) { + pCond->twindows.skey = window.ekey + 1; + pCond->twindows.ekey = INT64_MAX; + } else { + pCond->twindows.skey = INT64_MIN; + pCond->twindows.ekey = window.ekey - 1; + } + pCond->order = order; + + code = tsdbReaderCreate(pVnode, pCond, (void**)&((STsdbReader*)pReader)->innerReader[1], 1, pResBlock, idstr); + if (code != TSDB_CODE_SUCCESS) { + goto _err; + } + } + + // NOTE: the endVersion in pCond is the data version not schema version, so pCond->endVersion is not correct here. + // no valid error code set in metaGetTbTSchema, so let's set the error code here. + // we should proceed in case of tmq processing. + if (pCond->suid != 0) { + pReader->info.pSchema = metaGetTbTSchema(pReader->pTsdb->pVnode->pMeta, pReader->info.suid, -1, 1); + if (pReader->info.pSchema == NULL) { + tsdbError("failed to get table schema, suid:%" PRIu64 ", ver:-1, %s", pReader->info.suid, pReader->idStr); + } + } else if (numOfTables > 0) { + STableKeyInfo* pKey = pTableList; + pReader->info.pSchema = metaGetTbTSchema(pReader->pTsdb->pVnode->pMeta, pKey->uid, -1, 1); + if (pReader->info.pSchema == NULL) { + tsdbError("failed to get table schema, uid:%" PRIu64 ", ver:-1, %s", pKey->uid, pReader->idStr); + } + } + + if (pReader->info.pSchema != NULL) { + tsdbRowMergerInit(&pReader->status.merger, pReader->info.pSchema); + } + + pReader->pSchemaMap = tSimpleHashInit(8, taosFastHash); + if (pReader->pSchemaMap == NULL) { + tsdbError("failed init schema hash for reader %s", pReader->idStr); + code = TSDB_CODE_OUT_OF_MEMORY; + goto _err; + } + + tSimpleHashSetFreeFp(pReader->pSchemaMap, freeSchemaFunc); + if (pReader->info.pSchema != NULL) { + code = updateBlockSMAInfo(pReader->info.pSchema, &pReader->suppInfo); + if (code != TSDB_CODE_SUCCESS) { + goto _err; + } + } + + STsdbReader* p = (pReader->innerReader[0] != NULL) ? pReader->innerReader[0] : pReader; + pReader->status.pTableMap = + createDataBlockScanInfo(p, &pReader->blockInfoBuf, pTableList, &pReader->status.uidList, numOfTables); + if (pReader->status.pTableMap == NULL) { + *ppReader = NULL; + code = TSDB_CODE_OUT_OF_MEMORY; + goto _err; + } + + pReader->status.pLDataIterArray = taosArrayInit(4, POINTER_BYTES); + if (pReader->status.pLDataIterArray == NULL) { + terrno = TSDB_CODE_OUT_OF_MEMORY; + goto _err; + } + + pReader->flag = READER_STATUS_SUSPEND; + + if (countOnly) { + pReader->info.readMode = READ_MODE_COUNT_ONLY; + } + + pReader->pIgnoreTables = pIgnoreTables; + + tsdbDebug("%p total numOfTable:%d, window:%" PRId64 " - %" PRId64 ", verRange:%" PRId64 " - %" PRId64 + " in this query %s", + pReader, numOfTables, pReader->info.window.skey, pReader->info.window.ekey, pReader->info.verRange.minVer, + pReader->info.verRange.maxVer, pReader->idStr); + + return code; + +_err: + tsdbError("failed to create data reader, code:%s %s", tstrerror(code), idstr); + tsdbReaderClose2(*ppReader); + *ppReader = NULL; // reset the pointer value. + return code; +} + +void tsdbReaderClose2(STsdbReader* pReader) { + if (pReader == NULL) { + return; + } + + tsdbAcquireReader(pReader); + + { + if (pReader->innerReader[0] != NULL || pReader->innerReader[1] != NULL) { + STsdbReader* p = pReader->innerReader[0]; + clearSharedPtr(p); + + p = pReader->innerReader[1]; + clearSharedPtr(p); + + tsdbReaderClose2(pReader->innerReader[0]); + tsdbReaderClose2(pReader->innerReader[1]); + } + } + + SBlockLoadSuppInfo* pSupInfo = &pReader->suppInfo; + TARRAY2_DESTROY(&pSupInfo->colAggArray, NULL); + for (int32_t i = 0; i < pSupInfo->numOfCols; ++i) { + if (pSupInfo->buildBuf[i] != NULL) { + taosMemoryFreeClear(pSupInfo->buildBuf[i]); + } + } + + if (pReader->resBlockInfo.freeBlock) { + pReader->resBlockInfo.pResBlock = blockDataDestroy(pReader->resBlockInfo.pResBlock); + } + + taosMemoryFree(pSupInfo->colId); + tBlockDataDestroy(&pReader->status.fileBlockData); + cleanupDataBlockIterator(&pReader->status.blockIter); + + size_t numOfTables = tSimpleHashGetSize(pReader->status.pTableMap); + if (pReader->status.pTableMap != NULL) { + destroyAllBlockScanInfo(pReader->status.pTableMap); + clearBlockScanInfoBuf(&pReader->blockInfoBuf); + pReader->status.pTableMap = NULL; + } + + if (pReader->pFileReader != NULL) { + tsdbDataFileReaderClose(&pReader->pFileReader); + } + + qTrace("tsdb/reader-close: %p, untake snapshot", pReader); + tsdbUntakeReadSnap2(pReader, pReader->pReadSnap, true); + pReader->pReadSnap = NULL; + + tsdbReleaseReader(pReader); + tsdbUninitReaderLock(pReader); + + SCostSummary* pCost = &pReader->cost; + SFilesetIter* pFilesetIter = &pReader->status.fileIter; + if (pFilesetIter->pLastBlockReader != NULL) { + SLastBlockReader* pLReader = pFilesetIter->pLastBlockReader; + tMergeTreeClose(&pLReader->mergeTree); + taosMemoryFree(pLReader); + } + + destroySttBlockReader(pReader->status.pLDataIterArray, &pCost->lastBlockLoad, &pCost->lastBlockLoadTime); + taosMemoryFreeClear(pReader->status.uidList.tableUidList); + + tsdbDebug( + "%p :io-cost summary: head-file:%" PRIu64 ", head-file time:%.2f ms, SMA:%" PRId64 + " SMA-time:%.2f ms, fileBlocks:%" PRId64 + ", fileBlocks-load-time:%.2f ms, " + "build in-memory-block-time:%.2f ms, lastBlocks:%" PRId64 ", lastBlocks-time:%.2f ms, composed-blocks:%" PRId64 + ", composed-blocks-time:%.2fms, STableBlockScanInfo size:%.2f Kb, createTime:%.2f ms,createSkylineIterTime:%.2f " + "ms, initLastBlockReader:%.2fms, %s", + pReader, pCost->headFileLoad, pCost->headFileLoadTime, pCost->smaDataLoad, pCost->smaLoadTime, pCost->numOfBlocks, + pCost->blockLoadTime, pCost->buildmemBlock, pCost->lastBlockLoad, pCost->lastBlockLoadTime, pCost->composedBlocks, + pCost->buildComposedBlockTime, numOfTables * sizeof(STableBlockScanInfo) / 1000.0, pCost->createScanInfoList, + pCost->createSkylineIterTime, pCost->initLastBlockReader, pReader->idStr); + + taosMemoryFree(pReader->idStr); + + tsdbRowMergerCleanup(&pReader->status.merger); + taosMemoryFree(pReader->info.pSchema); + + tSimpleHashCleanup(pReader->pSchemaMap); + taosMemoryFreeClear(pReader); +} + +int32_t tsdbReaderSuspend2(STsdbReader* pReader) { + int32_t code = 0; + + // save reader's base state & reset top state to be reconstructed from base state + SReaderStatus* pStatus = &pReader->status; + STableBlockScanInfo* pBlockScanInfo = NULL; + + if (pStatus->loadFromFile) { + SFileDataBlockInfo* pBlockInfo = getCurrentBlockInfo(&pReader->status.blockIter); + if (pBlockInfo != NULL) { + pBlockScanInfo = getTableBlockScanInfo(pStatus->pTableMap, pBlockInfo->uid, pReader->idStr); + if (pBlockScanInfo == NULL) { + goto _err; + } + } else { + pBlockScanInfo = *pStatus->pTableIter; + } + + tsdbDataFileReaderClose(&pReader->pFileReader); + + // resetDataBlockScanInfo excluding lastKey + STableBlockScanInfo** p = NULL; + int32_t iter = 0; + + while ((p = tSimpleHashIterate(pStatus->pTableMap, p, &iter)) != NULL) { + STableBlockScanInfo* pInfo = *(STableBlockScanInfo**)p; + + pInfo->iterInit = false; + pInfo->iter.hasVal = false; + pInfo->iiter.hasVal = false; + + if (pInfo->iter.iter != NULL) { + pInfo->iter.iter = tsdbTbDataIterDestroy(pInfo->iter.iter); + } + + if (pInfo->iiter.iter != NULL) { + pInfo->iiter.iter = tsdbTbDataIterDestroy(pInfo->iiter.iter); + } + + pInfo->delSkyline = taosArrayDestroy(pInfo->delSkyline); + pInfo->pfileDelData = taosArrayDestroy(pInfo->pfileDelData); + } + } else { + // resetDataBlockScanInfo excluding lastKey + STableBlockScanInfo** p = NULL; + int32_t iter = 0; + + while ((p = tSimpleHashIterate(pStatus->pTableMap, p, &iter)) != NULL) { + STableBlockScanInfo* pInfo = *(STableBlockScanInfo**)p; + + pInfo->iterInit = false; + pInfo->iter.hasVal = false; + pInfo->iiter.hasVal = false; + + if (pInfo->iter.iter != NULL) { + pInfo->iter.iter = tsdbTbDataIterDestroy(pInfo->iter.iter); + } + + if (pInfo->iiter.iter != NULL) { + pInfo->iiter.iter = tsdbTbDataIterDestroy(pInfo->iiter.iter); + } + + pInfo->delSkyline = taosArrayDestroy(pInfo->delSkyline); + } + + pBlockScanInfo = pStatus->pTableIter == NULL ? NULL : *pStatus->pTableIter; + if (pBlockScanInfo) { + // save lastKey to restore memory iterator + STimeWindow w = pReader->resBlockInfo.pResBlock->info.window; + pBlockScanInfo->lastKey = ASCENDING_TRAVERSE(pReader->info.order) ? w.ekey : w.skey; + + // reset current current table's data block scan info, + pBlockScanInfo->iterInit = false; + + pBlockScanInfo->iter.hasVal = false; + pBlockScanInfo->iiter.hasVal = false; + if (pBlockScanInfo->iter.iter != NULL) { + pBlockScanInfo->iter.iter = tsdbTbDataIterDestroy(pBlockScanInfo->iter.iter); + } + + if (pBlockScanInfo->iiter.iter != NULL) { + pBlockScanInfo->iiter.iter = tsdbTbDataIterDestroy(pBlockScanInfo->iiter.iter); + } + + pBlockScanInfo->pBlockList = taosArrayDestroy(pBlockScanInfo->pBlockList); + // TODO: keep skyline for reuse + pBlockScanInfo->delSkyline = taosArrayDestroy(pBlockScanInfo->delSkyline); + } + } + + tsdbUntakeReadSnap(pReader, pReader->pReadSnap, false); + pReader->pReadSnap = NULL; + pReader->flag = READER_STATUS_SUSPEND; + + tsdbDebug("reader: %p suspended uid %" PRIu64 " in this query %s", pReader, pBlockScanInfo ? pBlockScanInfo->uid : 0, + pReader->idStr); + return code; + +_err: + tsdbError("failed to suspend data reader, code:%s %s", tstrerror(code), pReader->idStr); + return code; +} + +static int32_t tsdbSetQueryReseek(void* pQHandle) { + int32_t code = 0; + STsdbReader* pReader = pQHandle; + + code = tsdbTryAcquireReader(pReader); + if (code == 0) { + if (pReader->flag == READER_STATUS_SUSPEND) { + tsdbReleaseReader(pReader); + return code; + } + + tsdbReaderSuspend2(pReader); + + tsdbReleaseReader(pReader); + + return code; + } else if (code == EBUSY) { + return TSDB_CODE_VND_QUERY_BUSY; + } else { + terrno = TAOS_SYSTEM_ERROR(code); + return TSDB_CODE_FAILED; + } +} + +int32_t tsdbReaderResume2(STsdbReader* pReader) { + int32_t code = 0; + + STableBlockScanInfo** pBlockScanInfo = pReader->status.pTableIter; + + // restore reader's state + // task snapshot + int32_t numOfTables = tSimpleHashGetSize(pReader->status.pTableMap); + if (numOfTables > 0) { + qTrace("tsdb/reader: %p, take snapshot", pReader); + code = tsdbTakeReadSnap2(pReader, tsdbSetQueryReseek, &pReader->pReadSnap); + if (code != TSDB_CODE_SUCCESS) { + goto _err; + } + + if (pReader->type == TIMEWINDOW_RANGE_CONTAINED) { + code = doOpenReaderImpl(pReader); + if (code != TSDB_CODE_SUCCESS) { + return code; + } + } else { + STsdbReader* pPrevReader = pReader->innerReader[0]; + STsdbReader* pNextReader = pReader->innerReader[1]; + + // we need only one row + pPrevReader->resBlockInfo.capacity = 1; + setSharedPtr(pPrevReader, pReader); + + pNextReader->resBlockInfo.capacity = 1; + setSharedPtr(pNextReader, pReader); + + code = doOpenReaderImpl(pPrevReader); + if (code != TSDB_CODE_SUCCESS) { + return code; + } + } + } + + pReader->flag = READER_STATUS_NORMAL; + tsdbDebug("reader: %p resumed uid %" PRIu64 ", numOfTable:%" PRId32 ", in this query %s", pReader, + pBlockScanInfo ? (*pBlockScanInfo)->uid : 0, numOfTables, pReader->idStr); + return code; + +_err: + tsdbError("failed to resume data reader, code:%s %s", tstrerror(code), pReader->idStr); + return code; +} + +static bool tsdbReadRowsCountOnly(STsdbReader* pReader) { + int32_t code = TSDB_CODE_SUCCESS; + SSDataBlock* pBlock = pReader->resBlockInfo.pResBlock; + + if (pReader->status.loadFromFile == false) { + return false; + } + + code = readRowsCountFromFiles(pReader); + if (code != TSDB_CODE_SUCCESS) { + return false; + } + + code = readRowsCountFromMem(pReader); + if (code != TSDB_CODE_SUCCESS) { + return false; + } + + pBlock->info.rows = pReader->rowsNum; + pBlock->info.id.uid = 0; + pBlock->info.dataLoad = 0; + + pReader->rowsNum = 0; + + return pBlock->info.rows > 0; +} + +static int32_t doTsdbNextDataBlock2(STsdbReader* pReader, bool* hasNext) { + int32_t code = TSDB_CODE_SUCCESS; + + // cleanup the data that belongs to the previous data block + SSDataBlock* pBlock = pReader->resBlockInfo.pResBlock; + blockDataCleanup(pBlock); + + *hasNext = false; + + SReaderStatus* pStatus = &pReader->status; + if (tSimpleHashGetSize(pStatus->pTableMap) == 0) { + return code; + } + + if (READ_MODE_COUNT_ONLY == pReader->info.readMode) { + return tsdbReadRowsCountOnly(pReader); + } + + if (pStatus->loadFromFile) { + code = buildBlockFromFiles(pReader); + if (code != TSDB_CODE_SUCCESS) { + return code; + } + + if (pBlock->info.rows <= 0) { + resetTableListIndex(&pReader->status); + code = buildBlockFromBufferSequentially(pReader); + } + } else { // no data in files, let's try the buffer + code = buildBlockFromBufferSequentially(pReader); + } + + *hasNext = pBlock->info.rows > 0; + + return code; +} + +int32_t tsdbNextDataBlock2(STsdbReader* pReader, bool* hasNext) { + int32_t code = TSDB_CODE_SUCCESS; + + *hasNext = false; + + if (isEmptyQueryTimeWindow(&pReader->info.window) || pReader->step == EXTERNAL_ROWS_NEXT || + pReader->code != TSDB_CODE_SUCCESS) { + return (pReader->code != TSDB_CODE_SUCCESS) ? pReader->code : code; + } + + SReaderStatus* pStatus = &pReader->status; + + code = tsdbAcquireReader(pReader); + qTrace("tsdb/read: %p, take read mutex, code: %d", pReader, code); + + if (pReader->flag == READER_STATUS_SUSPEND) { + code = tsdbReaderResume2(pReader); + if (code != TSDB_CODE_SUCCESS) { + tsdbReleaseReader(pReader); + return code; + } + } + + if (pReader->innerReader[0] != NULL && pReader->step == 0) { + code = doTsdbNextDataBlock2(pReader->innerReader[0], hasNext); + if (code) { + tsdbReleaseReader(pReader); + return code; + } + + pReader->step = EXTERNAL_ROWS_PREV; + if (*hasNext) { + pStatus = &pReader->innerReader[0]->status; + if (pStatus->composedDataBlock) { + qTrace("tsdb/read: %p, unlock read mutex", pReader); + tsdbReleaseReader(pReader); + } + + return code; + } + } + + if (pReader->step == EXTERNAL_ROWS_PREV) { + // prepare for the main scan + code = doOpenReaderImpl(pReader); + int32_t step = 1; + resetAllDataBlockScanInfo(pReader->status.pTableMap, pReader->innerReader[0]->info.window.ekey, step); + + if (code != TSDB_CODE_SUCCESS) { + return code; + } + + pReader->step = EXTERNAL_ROWS_MAIN; + } + + code = doTsdbNextDataBlock2(pReader, hasNext); + if (code != TSDB_CODE_SUCCESS) { + tsdbReleaseReader(pReader); + return code; + } + + if (*hasNext) { + if (pStatus->composedDataBlock) { + qTrace("tsdb/read: %p, unlock read mutex", pReader); + tsdbReleaseReader(pReader); + } + + return code; + } + + if (pReader->step == EXTERNAL_ROWS_MAIN && pReader->innerReader[1] != NULL) { + // prepare for the next row scan + int32_t step = -1; + code = doOpenReaderImpl(pReader->innerReader[1]); + resetAllDataBlockScanInfo(pReader->innerReader[1]->status.pTableMap, pReader->info.window.ekey, step); + if (code != TSDB_CODE_SUCCESS) { + return code; + } + + code = doTsdbNextDataBlock2(pReader->innerReader[1], hasNext); + if (code != TSDB_CODE_SUCCESS) { + tsdbReleaseReader(pReader); + return code; + } + + pReader->step = EXTERNAL_ROWS_NEXT; + if (*hasNext) { + pStatus = &pReader->innerReader[1]->status; + if (pStatus->composedDataBlock) { + qTrace("tsdb/read: %p, unlock read mutex", pReader); + tsdbReleaseReader(pReader); + } + + return code; + } + } + + qTrace("tsdb/read: %p, unlock read mutex", pReader); + tsdbReleaseReader(pReader); + + return code; +} + +static void doFillNullColSMA(SBlockLoadSuppInfo* pSup, int32_t numOfRows, int32_t numOfCols, SColumnDataAgg* pTsAgg) { + // do fill all null column value SMA info + int32_t i = 0, j = 0; + int32_t size = (int32_t)TARRAY2_SIZE(&pSup->colAggArray); + TARRAY2_INSERT_PTR(&pSup->colAggArray, 0, pTsAgg); + size++; + + while (j < numOfCols && i < size) { + SColumnDataAgg* pAgg = &pSup->colAggArray.data[i]; + if (pAgg->colId == pSup->colId[j]) { + i += 1; + j += 1; + } else if (pAgg->colId < pSup->colId[j]) { + i += 1; + } else if (pSup->colId[j] < pAgg->colId) { + if (pSup->colId[j] != PRIMARYKEY_TIMESTAMP_COL_ID) { + SColumnDataAgg nullColAgg = {.colId = pSup->colId[j], .numOfNull = numOfRows}; + TARRAY2_INSERT_PTR(&pSup->colAggArray, i, &nullColAgg); + i += 1; + size++; + } + j += 1; + } + } + + while (j < numOfCols) { + if (pSup->colId[j] != PRIMARYKEY_TIMESTAMP_COL_ID) { + SColumnDataAgg nullColAgg = {.colId = pSup->colId[j], .numOfNull = numOfRows}; + TARRAY2_INSERT_PTR(&pSup->colAggArray, i, &nullColAgg); + i += 1; + } + j++; + } +} + +int32_t tsdbRetrieveDatablockSMA2(STsdbReader* pReader, SSDataBlock* pDataBlock, bool* allHave, bool* hasNullSMA) { + SColumnDataAgg*** pBlockSMA = &pDataBlock->pBlockAgg; + + int32_t code = 0; + *allHave = false; + *pBlockSMA = NULL; + + if (pReader->type == TIMEWINDOW_RANGE_EXTERNAL) { + return TSDB_CODE_SUCCESS; + } + + // there is no statistics data for composed block + if (pReader->status.composedDataBlock || (!pReader->suppInfo.smaValid)) { + return TSDB_CODE_SUCCESS; + } + + SFileDataBlockInfo* pFBlock = getCurrentBlockInfo(&pReader->status.blockIter); + SBlockLoadSuppInfo* pSup = &pReader->suppInfo; + + SSDataBlock* pResBlock = pReader->resBlockInfo.pResBlock; + if (pResBlock->info.id.uid != pFBlock->uid) { + return TSDB_CODE_SUCCESS; + } + + // int64_t st = taosGetTimestampUs(); + TARRAY2_CLEAR(&pSup->colAggArray, 0); + + code = tsdbDataFileReadBlockSma(pReader->pFileReader, &pFBlock->record, &pSup->colAggArray); + if (code != TSDB_CODE_SUCCESS) { + tsdbDebug("vgId:%d, failed to load block SMA for uid %" PRIu64 ", code:%s, %s", 0, pFBlock->uid, tstrerror(code), + pReader->idStr); + return code; + } + + if (pSup->colAggArray.size > 0) { + *allHave = true; + } else { + *pBlockSMA = NULL; + return TSDB_CODE_SUCCESS; + } + + // always load the first primary timestamp column data + SColumnDataAgg* pTsAgg = &pSup->tsColAgg; + + pTsAgg->numOfNull = 0; + pTsAgg->colId = PRIMARYKEY_TIMESTAMP_COL_ID; + pTsAgg->min = pResBlock->info.window.skey; + pTsAgg->max = pResBlock->info.window.ekey; + + // update the number of NULL data rows + size_t numOfCols = pSup->numOfCols; + + if (pResBlock->pBlockAgg == NULL) { + size_t num = taosArrayGetSize(pResBlock->pDataBlock); + pResBlock->pBlockAgg = taosMemoryCalloc(num, POINTER_BYTES); + } + + // do fill all null column value SMA info + doFillNullColSMA(pSup, pFBlock->record.numRow, numOfCols, pTsAgg); + + size_t size = pSup->colAggArray.size; + + int32_t i = 0, j = 0; + while (j < numOfCols && i < size) { + SColumnDataAgg* pAgg = &pSup->colAggArray.data[i]; + if (pAgg->colId == pSup->colId[j]) { + pResBlock->pBlockAgg[pSup->slotId[j]] = pAgg; + i += 1; + j += 1; + } else if (pAgg->colId < pSup->colId[j]) { + i += 1; + } else if (pSup->colId[j] < pAgg->colId) { + pResBlock->pBlockAgg[pSup->slotId[j]] = NULL; + *allHave = false; + j += 1; + } + } + + *pBlockSMA = pResBlock->pBlockAgg; + pReader->cost.smaDataLoad += 1; + + // double elapsedTime = (taosGetTimestampUs() - st) / 1000.0; + pReader->cost.smaLoadTime += 0; // elapsedTime; + + tsdbDebug("vgId:%d, succeed to load block SMA for uid %" PRIu64 ", %s", 0, pFBlock->uid, pReader->idStr); + return code; +} + +static SSDataBlock* doRetrieveDataBlock(STsdbReader* pReader) { + SReaderStatus* pStatus = &pReader->status; + int32_t code = TSDB_CODE_SUCCESS; + SFileDataBlockInfo* pBlockInfo = getCurrentBlockInfo(&pStatus->blockIter); + + if (pReader->code != TSDB_CODE_SUCCESS) { + return NULL; + } + + STableBlockScanInfo* pBlockScanInfo = getTableBlockScanInfo(pStatus->pTableMap, pBlockInfo->uid, pReader->idStr); + if (pBlockScanInfo == NULL) { + return NULL; + } + + code = doLoadFileBlockData(pReader, &pStatus->blockIter, &pStatus->fileBlockData, pBlockScanInfo->uid); + if (code != TSDB_CODE_SUCCESS) { + tBlockDataDestroy(&pStatus->fileBlockData); + terrno = code; + return NULL; + } + + code = copyBlockDataToSDataBlock(pReader); + if (code != TSDB_CODE_SUCCESS) { + tBlockDataDestroy(&pStatus->fileBlockData); + terrno = code; + return NULL; + } + + return pReader->resBlockInfo.pResBlock; +} + +SSDataBlock* tsdbRetrieveDataBlock2(STsdbReader* pReader, SArray* pIdList) { + STsdbReader* pTReader = pReader; + if (pReader->type == TIMEWINDOW_RANGE_EXTERNAL) { + if (pReader->step == EXTERNAL_ROWS_PREV) { + pTReader = pReader->innerReader[0]; + } else if (pReader->step == EXTERNAL_ROWS_NEXT) { + pTReader = pReader->innerReader[1]; + } + } + + SReaderStatus* pStatus = &pTReader->status; + if (pStatus->composedDataBlock) { + return pTReader->resBlockInfo.pResBlock; + } + + SSDataBlock* ret = doRetrieveDataBlock(pTReader); + + qTrace("tsdb/read-retrieve: %p, unlock read mutex", pReader); + tsdbReleaseReader(pReader); + + return ret; +} + +int32_t tsdbReaderReset2(STsdbReader* pReader, SQueryTableDataCond* pCond) { + int32_t code = TSDB_CODE_SUCCESS; + + qTrace("tsdb/reader-reset: %p, take read mutex", pReader); + tsdbAcquireReader(pReader); + + if (pReader->flag == READER_STATUS_SUSPEND) { + code = tsdbReaderResume2(pReader); + if (code != TSDB_CODE_SUCCESS) { + tsdbReleaseReader(pReader); + return code; + } + } + + if (isEmptyQueryTimeWindow(&pReader->info.window) || pReader->pReadSnap == NULL) { + tsdbDebug("tsdb reader reset return %p, %s", pReader->pReadSnap, pReader->idStr); + tsdbReleaseReader(pReader); + return TSDB_CODE_SUCCESS; + } + + SReaderStatus* pStatus = &pReader->status; + SDataBlockIter* pBlockIter = &pStatus->blockIter; + + pReader->info.order = pCond->order; + pReader->type = TIMEWINDOW_RANGE_CONTAINED; + pStatus->loadFromFile = true; + pStatus->pTableIter = NULL; + pReader->info.window = updateQueryTimeWindow(pReader->pTsdb, &pCond->twindows); + + // allocate buffer in order to load data blocks from file + memset(&pReader->suppInfo.tsColAgg, 0, sizeof(SColumnDataAgg)); + + pReader->suppInfo.tsColAgg.colId = PRIMARYKEY_TIMESTAMP_COL_ID; + tsdbDataFileReaderClose(&pReader->pFileReader); + + int32_t numOfTables = tSimpleHashGetSize(pStatus->pTableMap); + + initFilesetIterator(&pStatus->fileIter, pReader->pReadSnap->pfSetArray, pReader); + resetDataBlockIterator(pBlockIter, pReader->info.order); + resetTableListIndex(&pReader->status); + + bool asc = ASCENDING_TRAVERSE(pReader->info.order); + int32_t step = asc ? 1 : -1; + int64_t ts = asc ? pReader->info.window.skey - 1 : pReader->info.window.ekey + 1; + resetAllDataBlockScanInfo(pStatus->pTableMap, ts, step); + + // no data in files, let's try buffer in memory + if (pStatus->fileIter.numOfFiles == 0) { + pStatus->loadFromFile = false; + resetTableListIndex(pStatus); + } else { + code = initForFirstBlockInFile(pReader, pBlockIter); + if (code != TSDB_CODE_SUCCESS) { + tsdbError("%p reset reader failed, numOfTables:%d, query range:%" PRId64 " - %" PRId64 " in query %s", pReader, + numOfTables, pReader->info.window.skey, pReader->info.window.ekey, pReader->idStr); + + tsdbReleaseReader(pReader); + return code; + } + } + + tsdbDebug("%p reset reader, suid:%" PRIu64 ", numOfTables:%d, skey:%" PRId64 ", query range:%" PRId64 " - %" PRId64 + " in query %s", + pReader, pReader->info.suid, numOfTables, pCond->twindows.skey, pReader->info.window.skey, + pReader->info.window.ekey, pReader->idStr); + + tsdbReleaseReader(pReader); + + return code; +} + +static int32_t getBucketIndex(int32_t startRow, int32_t bucketRange, int32_t numOfRows, int32_t numOfBucket) { + if (numOfRows < startRow) { + return 0; + } + int32_t bucketIndex = ((numOfRows - startRow) / bucketRange); + if (bucketIndex == numOfBucket) { + bucketIndex -= 1; + } + return bucketIndex; +} + +int32_t tsdbGetFileBlocksDistInfo2(STsdbReader* pReader, STableBlockDistInfo* pTableBlockInfo) { + int32_t code = TSDB_CODE_SUCCESS; + pTableBlockInfo->totalSize = 0; + pTableBlockInfo->totalRows = 0; + pTableBlockInfo->numOfVgroups = 1; + + const int32_t numOfBuckets = 20.0; + const int32_t defaultRows = 4096; + + // find the start data block in file + tsdbAcquireReader(pReader); + if (pReader->flag == READER_STATUS_SUSPEND) { + code = tsdbReaderResume2(pReader); + if (code != TSDB_CODE_SUCCESS) { + tsdbReleaseReader(pReader); + return code; + } + } + SReaderStatus* pStatus = &pReader->status; + + STsdbCfg* pc = &pReader->pTsdb->pVnode->config.tsdbCfg; + pTableBlockInfo->defMinRows = pc->minRows; + pTableBlockInfo->defMaxRows = pc->maxRows; + + int32_t bucketRange = ceil(((double)(pc->maxRows - pc->minRows)) / numOfBuckets); + + pTableBlockInfo->numOfFiles += 1; + + int32_t numOfTables = (int32_t)tSimpleHashGetSize(pStatus->pTableMap); + + SDataBlockIter* pBlockIter = &pStatus->blockIter; + pTableBlockInfo->numOfFiles += pStatus->fileIter.numOfFiles; + + if (pBlockIter->numOfBlocks > 0) { + pTableBlockInfo->numOfBlocks += pBlockIter->numOfBlocks; + } + + pTableBlockInfo->numOfTables = numOfTables; + bool hasNext = (pBlockIter->numOfBlocks > 0); + + while (true) { + if (hasNext) { + SFileDataBlockInfo* pBlockInfo = getCurrentBlockInfo(pBlockIter); + int32_t numOfRows = pBlockInfo->record.numRow; + + pTableBlockInfo->totalRows += numOfRows; + + if (numOfRows > pTableBlockInfo->maxRows) { + pTableBlockInfo->maxRows = numOfRows; + } + + if (numOfRows < pTableBlockInfo->minRows) { + pTableBlockInfo->minRows = numOfRows; + } + + if (numOfRows < defaultRows) { + pTableBlockInfo->numOfSmallBlocks += 1; + } + + pTableBlockInfo->totalSize += pBlockInfo->record.blockSize; + + int32_t bucketIndex = getBucketIndex(pTableBlockInfo->defMinRows, bucketRange, numOfRows, numOfBuckets); + pTableBlockInfo->blockRowsHisto[bucketIndex]++; + + hasNext = blockIteratorNext(&pStatus->blockIter, pReader->idStr); + } else { + code = initForFirstBlockInFile(pReader, pBlockIter); + if ((code != TSDB_CODE_SUCCESS) || (pStatus->loadFromFile == false)) { + break; + } + + pTableBlockInfo->numOfBlocks += pBlockIter->numOfBlocks; + hasNext = (pBlockIter->numOfBlocks > 0); + } + + // tsdbDebug("%p %d blocks found in file for %d table(s), fid:%d, %s", pReader, numOfBlocks, numOfTables, + // pReader->pFileGroup->fid, pReader->idStr); + } + tsdbReleaseReader(pReader); + return code; +} + +int64_t tsdbGetNumOfRowsInMemTable2(STsdbReader* pReader) { + int32_t code = TSDB_CODE_SUCCESS; + int64_t rows = 0; + + SReaderStatus* pStatus = &pReader->status; + tsdbAcquireReader(pReader); + if (pReader->flag == READER_STATUS_SUSPEND) { + code = tsdbReaderResume2(pReader); + if (code != TSDB_CODE_SUCCESS) { + tsdbReleaseReader(pReader); + return code; + } + } + + int32_t iter = 0; + pStatus->pTableIter = tSimpleHashIterate(pStatus->pTableMap, NULL, &iter); + + while (pStatus->pTableIter != NULL) { + STableBlockScanInfo* pBlockScanInfo = *(STableBlockScanInfo**)pStatus->pTableIter; + + STbData* d = NULL; + if (pReader->pReadSnap->pMem != NULL) { + d = tsdbGetTbDataFromMemTable(pReader->pReadSnap->pMem, pReader->info.suid, pBlockScanInfo->uid); + if (d != NULL) { + rows += tsdbGetNRowsInTbData(d); + } + } + + STbData* di = NULL; + if (pReader->pReadSnap->pIMem != NULL) { + di = tsdbGetTbDataFromMemTable(pReader->pReadSnap->pIMem, pReader->info.suid, pBlockScanInfo->uid); + if (di != NULL) { + rows += tsdbGetNRowsInTbData(di); + } + } + + // current table is exhausted, let's try the next table + pStatus->pTableIter = tSimpleHashIterate(pStatus->pTableMap, pStatus->pTableIter, &iter); + } + + tsdbReleaseReader(pReader); + + return rows; +} + +int32_t tsdbGetTableSchema2(void* pVnode, int64_t uid, STSchema** pSchema, int64_t* suid) { + SMetaReader mr = {0}; + metaReaderDoInit(&mr, ((SVnode*)pVnode)->pMeta, 0); + int32_t code = metaReaderGetTableEntryByUidCache(&mr, uid); + if (code != TSDB_CODE_SUCCESS) { + terrno = TSDB_CODE_TDB_INVALID_TABLE_ID; + metaReaderClear(&mr); + return terrno; + } + + *suid = 0; + + // only child table and ordinary table is allowed, super table is not allowed. + if (mr.me.type == TSDB_CHILD_TABLE) { + tDecoderClear(&mr.coder); + *suid = mr.me.ctbEntry.suid; + code = metaReaderGetTableEntryByUidCache(&mr, *suid); + if (code != TSDB_CODE_SUCCESS) { + terrno = TSDB_CODE_TDB_INVALID_TABLE_ID; + metaReaderClear(&mr); + return terrno; + } + } else if (mr.me.type == TSDB_NORMAL_TABLE) { // do nothing + } else { + terrno = TSDB_CODE_INVALID_PARA; + metaReaderClear(&mr); + return terrno; + } + + metaReaderClear(&mr); + + // get the newest table schema version + code = metaGetTbTSchemaEx(((SVnode*)pVnode)->pMeta, *suid, uid, -1, pSchema); + return code; +} + +int32_t tsdbTakeReadSnap2(STsdbReader* pReader, _query_reseek_func_t reseek, STsdbReadSnap** ppSnap) { + int32_t code = 0; + STsdb* pTsdb = pReader->pTsdb; + SVersionRange* pRange = &pReader->info.verRange; + + // lock + taosThreadRwlockRdlock(&pTsdb->rwLock); + + // alloc + STsdbReadSnap* pSnap = (STsdbReadSnap*)taosMemoryCalloc(1, sizeof(STsdbReadSnap)); + if (pSnap == NULL) { + code = TSDB_CODE_OUT_OF_MEMORY; + goto _exit; + } + + // take snapshot + if (pTsdb->mem && (pRange->minVer <= pTsdb->mem->maxVer && pRange->maxVer >= pTsdb->mem->minVer)) { + pSnap->pMem = pTsdb->mem; + pSnap->pNode = taosMemoryMalloc(sizeof(*pSnap->pNode)); + if (pSnap->pNode == NULL) { + taosThreadRwlockUnlock(&pTsdb->rwLock); + code = TSDB_CODE_OUT_OF_MEMORY; + goto _exit; + } + pSnap->pNode->pQHandle = pReader; + pSnap->pNode->reseek = reseek; + + tsdbRefMemTable(pTsdb->mem, pSnap->pNode); + } + + if (pTsdb->imem && (pRange->minVer <= pTsdb->imem->maxVer && pRange->maxVer >= pTsdb->imem->minVer)) { + pSnap->pIMem = pTsdb->imem; + pSnap->pINode = taosMemoryMalloc(sizeof(*pSnap->pINode)); + if (pSnap->pINode == NULL) { + taosThreadRwlockUnlock(&pTsdb->rwLock); + code = TSDB_CODE_OUT_OF_MEMORY; + goto _exit; + } + pSnap->pINode->pQHandle = pReader; + pSnap->pINode->reseek = reseek; + + tsdbRefMemTable(pTsdb->imem, pSnap->pINode); + } + + // fs + code = tsdbFSCreateRefSnapshot(pTsdb->pFS, &pSnap->pfSetArray); + if (code) { + taosThreadRwlockUnlock(&pTsdb->rwLock); + goto _exit; + } + + // unlock + taosThreadRwlockUnlock(&pTsdb->rwLock); + + tsdbTrace("vgId:%d, take read snapshot", TD_VID(pTsdb->pVnode)); + +_exit: + if (code) { + *ppSnap = NULL; + if (pSnap) { + if (pSnap->pNode) taosMemoryFree(pSnap->pNode); + if (pSnap->pINode) taosMemoryFree(pSnap->pINode); + taosMemoryFree(pSnap); + } + } else { + *ppSnap = pSnap; + } + + return code; +} + +void tsdbUntakeReadSnap2(STsdbReader* pReader, STsdbReadSnap* pSnap, bool proactive) { + STsdb* pTsdb = pReader->pTsdb; + + if (pSnap) { + if (pSnap->pMem) { + tsdbUnrefMemTable(pSnap->pMem, pSnap->pNode, proactive); + } + + if (pSnap->pIMem) { + tsdbUnrefMemTable(pSnap->pIMem, pSnap->pINode, proactive); + } + + tsdbFSUnref(pTsdb, &pSnap->fs); + if (pSnap->pNode) taosMemoryFree(pSnap->pNode); + if (pSnap->pINode) taosMemoryFree(pSnap->pINode); + + tsdbFSDestroyRefSnapshot(&pSnap->pfSetArray); + + taosMemoryFree(pSnap); + } + tsdbTrace("vgId:%d, untake read snapshot", TD_VID(pTsdb->pVnode)); +} + +// if failed, do nothing +void tsdbReaderSetId2(STsdbReader* pReader, const char* idstr) { + taosMemoryFreeClear(pReader->idStr); + pReader->idStr = taosStrdup(idstr); + pReader->status.fileIter.pLastBlockReader->mergeTree.idStr = pReader->idStr; +} + +void tsdbReaderSetCloseFlag2(STsdbReader* pReader) { pReader->code = TSDB_CODE_TSC_QUERY_CANCELLED; } diff --git a/source/dnode/vnode/src/tsdb/tsdbReadUtil.c b/source/dnode/vnode/src/tsdb/tsdbReadUtil.c new file mode 100644 index 0000000000..635a74d8dd --- /dev/null +++ b/source/dnode/vnode/src/tsdb/tsdbReadUtil.c @@ -0,0 +1,630 @@ +/* + * Copyright (c) 2019 TAOS Data, Inc. + * + * This program is free software: you can use, redistribute, and/or modify + * it under the terms of the GNU Affero General Public License, version 3 + * or later ("AGPL"), as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. + * + * You should have received a copy of the GNU Affero General Public License + * along with this program. If not, see . + */ + +#include "tsdbReadUtil.h" +#include "osDef.h" +#include "tsdb.h" +#include "tsdbDataFileRW.h" +#include "tsdbFS2.h" +#include "tsdbMerge.h" +#include "tsdbUtil2.h" +#include "tsimplehash.h" + +static int32_t uidComparFunc(const void* p1, const void* p2) { + uint64_t pu1 = *(uint64_t*)p1; + uint64_t pu2 = *(uint64_t*)p2; + if (pu1 == pu2) { + return 0; + } else { + return (pu1 < pu2) ? -1 : 1; + } +} + +static int32_t initBlockScanInfoBuf(SBlockInfoBuf* pBuf, int32_t numOfTables) { + int32_t num = numOfTables / pBuf->numPerBucket; + int32_t remainder = numOfTables % pBuf->numPerBucket; + if (pBuf->pData == NULL) { + pBuf->pData = taosArrayInit(num + 1, POINTER_BYTES); + } + + for (int32_t i = 0; i < num; ++i) { + char* p = taosMemoryCalloc(pBuf->numPerBucket, sizeof(STableBlockScanInfo)); + if (p == NULL) { + return TSDB_CODE_OUT_OF_MEMORY; + } + + taosArrayPush(pBuf->pData, &p); + } + + if (remainder > 0) { + char* p = taosMemoryCalloc(remainder, sizeof(STableBlockScanInfo)); + if (p == NULL) { + return TSDB_CODE_OUT_OF_MEMORY; + } + taosArrayPush(pBuf->pData, &p); + } + + pBuf->numOfTables = numOfTables; + + return TSDB_CODE_SUCCESS; +} + +int32_t ensureBlockScanInfoBuf(SBlockInfoBuf* pBuf, int32_t numOfTables) { + if (numOfTables <= pBuf->numOfTables) { + return TSDB_CODE_SUCCESS; + } + + if (pBuf->numOfTables > 0) { + STableBlockScanInfo** p = (STableBlockScanInfo**)taosArrayPop(pBuf->pData); + taosMemoryFree(*p); + pBuf->numOfTables /= pBuf->numPerBucket; + } + + int32_t num = (numOfTables - pBuf->numOfTables) / pBuf->numPerBucket; + int32_t remainder = (numOfTables - pBuf->numOfTables) % pBuf->numPerBucket; + if (pBuf->pData == NULL) { + pBuf->pData = taosArrayInit(num + 1, POINTER_BYTES); + } + + for (int32_t i = 0; i < num; ++i) { + char* p = taosMemoryCalloc(pBuf->numPerBucket, sizeof(STableBlockScanInfo)); + if (p == NULL) { + return TSDB_CODE_OUT_OF_MEMORY; + } + + taosArrayPush(pBuf->pData, &p); + } + + if (remainder > 0) { + char* p = taosMemoryCalloc(remainder, sizeof(STableBlockScanInfo)); + if (p == NULL) { + return TSDB_CODE_OUT_OF_MEMORY; + } + taosArrayPush(pBuf->pData, &p); + } + + pBuf->numOfTables = numOfTables; + + return TSDB_CODE_SUCCESS; +} + +void clearBlockScanInfoBuf(SBlockInfoBuf* pBuf) { + size_t num = taosArrayGetSize(pBuf->pData); + for (int32_t i = 0; i < num; ++i) { + char** p = taosArrayGet(pBuf->pData, i); + taosMemoryFree(*p); + } + + taosArrayDestroy(pBuf->pData); +} + +void* getPosInBlockInfoBuf(SBlockInfoBuf* pBuf, int32_t index) { + int32_t bucketIndex = index / pBuf->numPerBucket; + char** pBucket = taosArrayGet(pBuf->pData, bucketIndex); + return (*pBucket) + (index % pBuf->numPerBucket) * sizeof(STableBlockScanInfo); +} + +STableBlockScanInfo* getTableBlockScanInfo(SSHashObj* pTableMap, uint64_t uid, const char* id) { + STableBlockScanInfo** p = tSimpleHashGet(pTableMap, &uid, sizeof(uid)); + if (p == NULL || *p == NULL) { + terrno = TSDB_CODE_INVALID_PARA; + int32_t size = tSimpleHashGetSize(pTableMap); + tsdbError("failed to locate the uid:%" PRIu64 " in query table uid list, total tables:%d, %s", uid, size, id); + return NULL; + } + + return *p; +} + +// NOTE: speedup the whole processing by preparing the buffer for STableBlockScanInfo in batch model +SSHashObj* createDataBlockScanInfo(STsdbReader* pTsdbReader, SBlockInfoBuf* pBuf, const STableKeyInfo* idList, + STableUidList* pUidList, int32_t numOfTables) { + // allocate buffer in order to load data blocks from file + // todo use simple hash instead, optimize the memory consumption + SSHashObj* pTableMap = tSimpleHashInit(numOfTables, taosGetDefaultHashFunction(TSDB_DATA_TYPE_BIGINT)); + if (pTableMap == NULL) { + return NULL; + } + + int64_t st = taosGetTimestampUs(); + initBlockScanInfoBuf(pBuf, numOfTables); + + pUidList->tableUidList = taosMemoryMalloc(numOfTables * sizeof(uint64_t)); + if (pUidList->tableUidList == NULL) { + tSimpleHashCleanup(pTableMap); + return NULL; + } + + pUidList->currentIndex = 0; + + for (int32_t j = 0; j < numOfTables; ++j) { + STableBlockScanInfo* pScanInfo = getPosInBlockInfoBuf(pBuf, j); + + pScanInfo->uid = idList[j].uid; + pUidList->tableUidList[j] = idList[j].uid; + + if (ASCENDING_TRAVERSE(pTsdbReader->info.order)) { + int64_t skey = pTsdbReader->info.window.skey; + pScanInfo->lastKey = (skey > INT64_MIN) ? (skey - 1) : skey; + pScanInfo->lastKeyInStt = skey; + } else { + int64_t ekey = pTsdbReader->info.window.ekey; + pScanInfo->lastKey = (ekey < INT64_MAX) ? (ekey + 1) : ekey; + pScanInfo->lastKeyInStt = ekey; + } + + tSimpleHashPut(pTableMap, &pScanInfo->uid, sizeof(uint64_t), &pScanInfo, POINTER_BYTES); + tsdbTrace("%p check table uid:%" PRId64 " from lastKey:%" PRId64 " %s", pTsdbReader, pScanInfo->uid, + pScanInfo->lastKey, pTsdbReader->idStr); + } + + taosSort(pUidList->tableUidList, numOfTables, sizeof(uint64_t), uidComparFunc); + + pTsdbReader->cost.createScanInfoList = (taosGetTimestampUs() - st) / 1000.0; + tsdbDebug("%p create %d tables scan-info, size:%.2f Kb, elapsed time:%.2f ms, %s", pTsdbReader, numOfTables, + (sizeof(STableBlockScanInfo) * numOfTables) / 1024.0, pTsdbReader->cost.createScanInfoList, + pTsdbReader->idStr); + + return pTableMap; +} + +void resetAllDataBlockScanInfo(SSHashObj* pTableMap, int64_t ts, int32_t step) { + void* p = NULL; + int32_t iter = 0; + + while ((p = tSimpleHashIterate(pTableMap, p, &iter)) != NULL) { + STableBlockScanInfo* pInfo = *(STableBlockScanInfo**)p; + + pInfo->iterInit = false; + pInfo->iter.hasVal = false; + pInfo->iiter.hasVal = false; + + if (pInfo->iter.iter != NULL) { + pInfo->iter.iter = tsdbTbDataIterDestroy(pInfo->iter.iter); + } + + if (pInfo->iiter.iter != NULL) { + pInfo->iiter.iter = tsdbTbDataIterDestroy(pInfo->iiter.iter); + } + + pInfo->delSkyline = taosArrayDestroy(pInfo->delSkyline); + pInfo->lastKey = ts; + pInfo->lastKeyInStt = ts + step; + } +} + +void clearBlockScanInfo(STableBlockScanInfo* p) { + p->iterInit = false; + p->iter.hasVal = false; + p->iiter.hasVal = false; + + if (p->iter.iter != NULL) { + p->iter.iter = tsdbTbDataIterDestroy(p->iter.iter); + } + + if (p->iiter.iter != NULL) { + p->iiter.iter = tsdbTbDataIterDestroy(p->iiter.iter); + } + + p->delSkyline = taosArrayDestroy(p->delSkyline); + p->pBlockList = taosArrayDestroy(p->pBlockList); + p->pMemDelData = taosArrayDestroy(p->pMemDelData); + p->pfileDelData = taosArrayDestroy(p->pfileDelData); +} + +void destroyAllBlockScanInfo(SSHashObj* pTableMap) { + void* p = NULL; + int32_t iter = 0; + + while ((p = tSimpleHashIterate(pTableMap, p, &iter)) != NULL) { + clearBlockScanInfo(*(STableBlockScanInfo**)p); + } + + tSimpleHashCleanup(pTableMap); +} + +static void doCleanupInfoForNextFileset(STableBlockScanInfo* pScanInfo) { + // reset the index in last block when handing a new file + taosArrayClear(pScanInfo->pBlockList); + taosArrayClear(pScanInfo->pfileDelData); // del data from each file set +} + +void cleanupInfoFoxNextFileset(SSHashObj* pTableMap) { + STableBlockScanInfo** p = NULL; + + int32_t iter = 0; + while ((p = tSimpleHashIterate(pTableMap, p, &iter)) != NULL) { + doCleanupInfoForNextFileset(*p); + } +} + +// brin records iterator +void initBrinRecordIter(SBrinRecordIter* pIter, SDataFileReader* pReader, SArray* pList) { + memset(&pIter->block, 0, sizeof(SBrinBlock)); + memset(&pIter->record, 0, sizeof(SBrinRecord)); + pIter->blockIndex = -1; + pIter->recordIndex = -1; + + pIter->pReader = pReader; + pIter->pBrinBlockList = pList; +} + +SBrinRecord* getNextBrinRecord(SBrinRecordIter* pIter) { + if (pIter->blockIndex == -1 || (pIter->recordIndex + 1) >= TARRAY2_SIZE(pIter->block.numRow)) { + pIter->blockIndex += 1; + if (pIter->blockIndex >= taosArrayGetSize(pIter->pBrinBlockList)) { + return NULL; + } + + pIter->pCurrentBlk = taosArrayGet(pIter->pBrinBlockList, pIter->blockIndex); + + tBrinBlockClear(&pIter->block); + tsdbDataFileReadBrinBlock(pIter->pReader, pIter->pCurrentBlk, &pIter->block); + pIter->recordIndex = -1; + } + + pIter->recordIndex += 1; + tBrinBlockGet(&pIter->block, pIter->recordIndex, &pIter->record); + return &pIter->record; +} + +void clearBrinBlockIter(SBrinRecordIter* pIter) { tBrinBlockDestroy(&pIter->block); } + +// initialize the file block access order +// sort the file blocks according to the offset of each data block in the files +static void cleanupBlockOrderSupporter(SBlockOrderSupporter* pSup) { + taosMemoryFreeClear(pSup->numOfBlocksPerTable); + taosMemoryFreeClear(pSup->indexPerTable); + + for (int32_t i = 0; i < pSup->numOfTables; ++i) { + SBlockOrderWrapper* pBlockInfo = pSup->pDataBlockInfo[i]; + taosMemoryFreeClear(pBlockInfo); + } + + taosMemoryFreeClear(pSup->pDataBlockInfo); +} + +static int32_t initBlockOrderSupporter(SBlockOrderSupporter* pSup, int32_t numOfTables) { + pSup->numOfBlocksPerTable = taosMemoryCalloc(1, sizeof(int32_t) * numOfTables); + pSup->indexPerTable = taosMemoryCalloc(1, sizeof(int32_t) * numOfTables); + pSup->pDataBlockInfo = taosMemoryCalloc(1, POINTER_BYTES * numOfTables); + + if (pSup->numOfBlocksPerTable == NULL || pSup->indexPerTable == NULL || pSup->pDataBlockInfo == NULL) { + cleanupBlockOrderSupporter(pSup); + return TSDB_CODE_OUT_OF_MEMORY; + } + + return TSDB_CODE_SUCCESS; +} + +static int32_t fileDataBlockOrderCompar(const void* pLeft, const void* pRight, void* param) { + int32_t leftIndex = *(int32_t*)pLeft; + int32_t rightIndex = *(int32_t*)pRight; + + SBlockOrderSupporter* pSupporter = (SBlockOrderSupporter*)param; + + int32_t leftTableBlockIndex = pSupporter->indexPerTable[leftIndex]; + int32_t rightTableBlockIndex = pSupporter->indexPerTable[rightIndex]; + + if (leftTableBlockIndex > pSupporter->numOfBlocksPerTable[leftIndex]) { + /* left block is empty */ + return 1; + } else if (rightTableBlockIndex > pSupporter->numOfBlocksPerTable[rightIndex]) { + /* right block is empty */ + return -1; + } + + SBlockOrderWrapper* pLeftBlock = &pSupporter->pDataBlockInfo[leftIndex][leftTableBlockIndex]; + SBlockOrderWrapper* pRightBlock = &pSupporter->pDataBlockInfo[rightIndex][rightTableBlockIndex]; + + return pLeftBlock->offset > pRightBlock->offset ? 1 : -1; +} + +int32_t initBlockIterator(STsdbReader* pReader, SDataBlockIter* pBlockIter, int32_t numOfBlocks, SArray* pTableList) { + bool asc = ASCENDING_TRAVERSE(pReader->info.order); + + SBlockOrderSupporter sup = {0}; + pBlockIter->numOfBlocks = numOfBlocks; + taosArrayClear(pBlockIter->blockList); + + pBlockIter->pTableMap = pReader->status.pTableMap; + + // access data blocks according to the offset of each block in asc/desc order. + int32_t numOfTables = taosArrayGetSize(pTableList); + + int64_t st = taosGetTimestampUs(); + int32_t code = initBlockOrderSupporter(&sup, numOfTables); + if (code != TSDB_CODE_SUCCESS) { + return code; + } + + int32_t cnt = 0; + + for (int32_t i = 0; i < numOfTables; ++i) { + STableBlockScanInfo* pTableScanInfo = taosArrayGetP(pTableList, i); + // ASSERT(pTableScanInfo->pBlockList != NULL && taosArrayGetSize(pTableScanInfo->pBlockList) > 0); + + size_t num = taosArrayGetSize(pTableScanInfo->pBlockList); + sup.numOfBlocksPerTable[sup.numOfTables] = num; + + char* buf = taosMemoryMalloc(sizeof(SBlockOrderWrapper) * num); + if (buf == NULL) { + cleanupBlockOrderSupporter(&sup); + return TSDB_CODE_OUT_OF_MEMORY; + } + + sup.pDataBlockInfo[sup.numOfTables] = (SBlockOrderWrapper*)buf; + + for (int32_t k = 0; k < num; ++k) { + SBrinRecord* pRecord = taosArrayGet(pTableScanInfo->pBlockList, k); + sup.pDataBlockInfo[sup.numOfTables][k] = + (SBlockOrderWrapper){.uid = pTableScanInfo->uid, .offset = pRecord->blockOffset, .pInfo = pTableScanInfo}; + cnt++; + } + + sup.numOfTables += 1; + } + + if (numOfBlocks != cnt && sup.numOfTables != numOfTables) { + cleanupBlockOrderSupporter(&sup); + return TSDB_CODE_INVALID_PARA; + } + + // since there is only one table qualified, blocks are not sorted + if (sup.numOfTables == 1) { + for (int32_t i = 0; i < numOfBlocks; ++i) { + SFileDataBlockInfo blockInfo = {.uid = sup.pDataBlockInfo[0][i].uid, .tbBlockIdx = i}; + blockInfo.record = *(SBrinRecord*)taosArrayGet(sup.pDataBlockInfo[0][i].pInfo->pBlockList, i); + + taosArrayPush(pBlockIter->blockList, &blockInfo); + } + + int64_t et = taosGetTimestampUs(); + tsdbDebug("%p create blocks info struct completed for one table, %d blocks not sorted, elapsed time:%.2f ms %s", + pReader, numOfBlocks, (et - st) / 1000.0, pReader->idStr); + + pBlockIter->index = asc ? 0 : (numOfBlocks - 1); + cleanupBlockOrderSupporter(&sup); + return TSDB_CODE_SUCCESS; + } + + tsdbDebug("%p create data blocks info struct completed, %d blocks in %d tables %s", pReader, cnt, sup.numOfTables, + pReader->idStr); + + SMultiwayMergeTreeInfo* pTree = NULL; + + uint8_t ret = tMergeTreeCreate(&pTree, sup.numOfTables, &sup, fileDataBlockOrderCompar); + if (ret != TSDB_CODE_SUCCESS) { + cleanupBlockOrderSupporter(&sup); + return TSDB_CODE_OUT_OF_MEMORY; + } + + int32_t numOfTotal = 0; + while (numOfTotal < cnt) { + int32_t pos = tMergeTreeGetChosenIndex(pTree); + int32_t index = sup.indexPerTable[pos]++; + + SFileDataBlockInfo blockInfo = {.uid = sup.pDataBlockInfo[pos][index].uid, .tbBlockIdx = index}; + blockInfo.record = *(SBrinRecord*)taosArrayGet(sup.pDataBlockInfo[pos][index].pInfo->pBlockList, index); + + taosArrayPush(pBlockIter->blockList, &blockInfo); + + // set data block index overflow, in order to disable the offset comparator + if (sup.indexPerTable[pos] >= sup.numOfBlocksPerTable[pos]) { + sup.indexPerTable[pos] = sup.numOfBlocksPerTable[pos] + 1; + } + + numOfTotal += 1; + tMergeTreeAdjust(pTree, tMergeTreeGetAdjustIndex(pTree)); + } + + int64_t et = taosGetTimestampUs(); + tsdbDebug("%p %d data blocks access order completed, elapsed time:%.2f ms %s", pReader, numOfBlocks, + (et - st) / 1000.0, pReader->idStr); + cleanupBlockOrderSupporter(&sup); + taosMemoryFree(pTree); + + pBlockIter->index = asc ? 0 : (numOfBlocks - 1); + return TSDB_CODE_SUCCESS; +} + +bool blockIteratorNext(SDataBlockIter* pBlockIter, const char* idStr) { + bool asc = ASCENDING_TRAVERSE(pBlockIter->order); + + int32_t step = asc ? 1 : -1; + if ((pBlockIter->index >= pBlockIter->numOfBlocks - 1 && asc) || (pBlockIter->index <= 0 && (!asc))) { + return false; + } + + pBlockIter->index += step; + return true; +} + +typedef enum { + BLK_CHECK_CONTINUE = 0x1, + BLK_CHECK_QUIT = 0x2, +} ETombBlkCheckEnum; + +static int32_t doCheckTombBlock(STombBlock* pBlock, STsdbReader* pReader, int32_t numOfTables, int32_t* j, + STableBlockScanInfo** pScanInfo, ETombBlkCheckEnum* pRet) { + int32_t code = 0; + STombRecord record = {0}; + uint64_t uid = pReader->status.uidList.tableUidList[*j]; + + for (int32_t k = 0; k < TARRAY2_SIZE(pBlock->suid); ++k) { + code = tTombBlockGet(pBlock, k, &record); + if (code != TSDB_CODE_SUCCESS) { + *pRet = BLK_CHECK_QUIT; + return code; + } + + if (record.suid < pReader->info.suid) { + continue; + } + + if (record.suid > pReader->info.suid) { + *pRet = BLK_CHECK_QUIT; + return TSDB_CODE_SUCCESS; + } + + bool newTable = false; + if (uid < record.uid) { + while ((*j) < numOfTables && pReader->status.uidList.tableUidList[*j] < record.uid) { + (*j) += 1; + newTable = true; + } + + if ((*j) >= numOfTables) { + *pRet = BLK_CHECK_QUIT; + return TSDB_CODE_SUCCESS; + } + + uid = pReader->status.uidList.tableUidList[*j]; + } + + if (record.uid < uid) { + continue; + } + + ASSERT(record.suid == pReader->info.suid && uid == record.uid); + + if (newTable) { + (*pScanInfo) = getTableBlockScanInfo(pReader->status.pTableMap, uid, pReader->idStr); + if ((*pScanInfo)->pfileDelData == NULL) { + (*pScanInfo)->pfileDelData = taosArrayInit(4, sizeof(SDelData)); + } + } + + if (record.version <= pReader->info.verRange.maxVer) { + SDelData delData = {.version = record.version, .sKey = record.skey, .eKey = record.ekey}; + taosArrayPush((*pScanInfo)->pfileDelData, &delData); + } + } + + *pRet = BLK_CHECK_CONTINUE; + return TSDB_CODE_SUCCESS; +} + +// load tomb data API +static int32_t doLoadTombDataFromTombBlk(const TTombBlkArray* pTombBlkArray, STsdbReader* pReader, void* pFileReader, + bool isFile) { + int32_t code = 0; + STableUidList* pList = &pReader->status.uidList; + int32_t numOfTables = tSimpleHashGetSize(pReader->status.pTableMap); + + int32_t i = 0, j = 0; + while (i < pTombBlkArray->size && j < numOfTables) { + STombBlk* pTombBlk = &pTombBlkArray->data[i]; + if (pTombBlk->maxTbid.suid < pReader->info.suid) { + i += 1; + continue; + } + + if (pTombBlk->minTbid.suid > pReader->info.suid) { + break; + } + + ASSERT(pTombBlk->minTbid.suid <= pReader->info.suid && pTombBlk->maxTbid.suid >= pReader->info.suid); + if (pTombBlk->maxTbid.suid == pReader->info.suid && pTombBlk->maxTbid.uid < pList->tableUidList[0]) { + i += 1; + continue; + } + + if (pTombBlk->minTbid.suid == pReader->info.suid && pTombBlk->minTbid.uid > pList->tableUidList[numOfTables - 1]) { + break; + } + + STombBlock block = {0}; + code = isFile ? tsdbDataFileReadTombBlock(pFileReader, &pTombBlkArray->data[i], &block) + : tsdbSttFileReadTombBlock(pFileReader, &pTombBlkArray->data[i], &block); + if (code != TSDB_CODE_SUCCESS) { + return code; + } + + uint64_t uid = pReader->status.uidList.tableUidList[j]; + + STableBlockScanInfo* pScanInfo = getTableBlockScanInfo(pReader->status.pTableMap, uid, pReader->idStr); + if (pScanInfo->pfileDelData == NULL) { + pScanInfo->pfileDelData = taosArrayInit(4, sizeof(SDelData)); + } + + ETombBlkCheckEnum ret = 0; + code = doCheckTombBlock(&block, pReader, numOfTables, &j, &pScanInfo, &ret); + + tTombBlockDestroy(&block); + if (code != TSDB_CODE_SUCCESS || ret == BLK_CHECK_QUIT) { + return code; + } + + i += 1; + } + + return TSDB_CODE_SUCCESS; +} + +int32_t loadDataFileTombDataForAll(STsdbReader* pReader) { + if (pReader->status.pCurrentFileset == NULL || pReader->status.pCurrentFileset->farr[3] == NULL) { + return TSDB_CODE_SUCCESS; + } + + const TTombBlkArray* pBlkArray = NULL; + + int32_t code = tsdbDataFileReadTombBlk(pReader->pFileReader, &pBlkArray); + if (code != TSDB_CODE_SUCCESS) { + return code; + } + + return doLoadTombDataFromTombBlk(pBlkArray, pReader, pReader->pFileReader, true); +} + +int32_t loadSttTombDataForAll(STsdbReader* pReader, SSttFileReader* pSttFileReader, SSttBlockLoadInfo* pLoadInfo) { + const TTombBlkArray* pBlkArray = NULL; + int32_t code = tsdbSttFileReadTombBlk(pSttFileReader, &pBlkArray); + if (code != TSDB_CODE_SUCCESS) { + return code; + } + + return doLoadTombDataFromTombBlk(pBlkArray, pReader, pSttFileReader, false); +} + +void loadMemTombData(SArray** ppMemDelData, STbData* pMemTbData, STbData* piMemTbData, int64_t ver) { + if (*ppMemDelData == NULL) { + *ppMemDelData = taosArrayInit(4, sizeof(SDelData)); + } + + SArray* pMemDelData = *ppMemDelData; + + SDelData* p = NULL; + if (pMemTbData != NULL) { + p = pMemTbData->pHead; + while (p) { + if (p->version <= ver) { + taosArrayPush(pMemDelData, p); + } + + p = p->pNext; + } + } + + if (piMemTbData != NULL) { + p = piMemTbData->pHead; + while (p) { + if (p->version <= ver) { + taosArrayPush(pMemDelData, p); + } + p = p->pNext; + } + } +} diff --git a/source/dnode/vnode/src/tsdb/tsdbReadUtil.h b/source/dnode/vnode/src/tsdb/tsdbReadUtil.h new file mode 100644 index 0000000000..5c4737440d --- /dev/null +++ b/source/dnode/vnode/src/tsdb/tsdbReadUtil.h @@ -0,0 +1,248 @@ +/* + * Copyright (c) 2019 TAOS Data, Inc. + * + * This program is free software: you can use, redistribute, and/or modify + * it under the terms of the GNU Affero General Public License, version 3 + * or later ("AGPL"), as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. + * + * You should have received a copy of the GNU Affero General Public License + * along with this program. If not, see . + */ + +#ifndef TDENGINE_TSDBREADUTIL_H +#define TDENGINE_TSDBREADUTIL_H + +#ifdef __cplusplus +extern "C" { +#endif + +#include "tsdbDataFileRW.h" +#include "tsdbUtil2.h" + +#define ASCENDING_TRAVERSE(o) (o == TSDB_ORDER_ASC) + +typedef enum { + READER_STATUS_SUSPEND = 0x1, + READER_STATUS_NORMAL = 0x2, +} EReaderStatus; + +typedef enum { + EXTERNAL_ROWS_PREV = 0x1, + EXTERNAL_ROWS_MAIN = 0x2, + EXTERNAL_ROWS_NEXT = 0x3, +} EContentData; + +typedef struct SBlockInfoBuf { + int32_t currentIndex; + SArray* pData; + int32_t numPerBucket; + int32_t numOfTables; +} SBlockInfoBuf; + +typedef struct { + STbDataIter* iter; + int32_t index; + bool hasVal; +} SIterInfo; + +typedef struct STableBlockScanInfo { + uint64_t uid; + TSKEY lastKey; + TSKEY lastKeyInStt; // last accessed key in stt + SArray* pBlockList; // block data index list, SArray + SArray* pMemDelData; // SArray + SArray* pfileDelData; // SArray from each file set + SIterInfo iter; // mem buffer skip list iterator + SIterInfo iiter; // imem buffer skip list iterator + SArray* delSkyline; // delete info for this table + int32_t fileDelIndex; // file block delete index + int32_t lastBlockDelIndex; // delete index for last block + bool iterInit; // whether to initialize the in-memory skip list iterator or not +} STableBlockScanInfo; + +typedef struct SResultBlockInfo { + SSDataBlock* pResBlock; + bool freeBlock; + int64_t capacity; +} SResultBlockInfo; + +typedef struct SCostSummary { + int64_t numOfBlocks; + double blockLoadTime; + double buildmemBlock; + int64_t headFileLoad; + double headFileLoadTime; + int64_t smaDataLoad; + double smaLoadTime; + int64_t lastBlockLoad; + double lastBlockLoadTime; + int64_t composedBlocks; + double buildComposedBlockTime; + double createScanInfoList; + double createSkylineIterTime; + double initLastBlockReader; +} SCostSummary; + +typedef struct STableUidList { + uint64_t* tableUidList; // access table uid list in uid ascending order list + int32_t currentIndex; // index in table uid list +} STableUidList; + +typedef struct { + int32_t numOfBlocks; + int32_t numOfLastFiles; +} SBlockNumber; + +typedef struct SBlockIndex { + int32_t ordinalIndex; + int64_t inFileOffset; + STimeWindow window; // todo replace it with overlap flag. +} SBlockIndex; + +typedef struct SBlockOrderWrapper { + int64_t uid; + int64_t offset; + STableBlockScanInfo* pInfo; +} SBlockOrderWrapper; + +typedef struct SBlockOrderSupporter { + SBlockOrderWrapper** pDataBlockInfo; + int32_t* indexPerTable; + int32_t* numOfBlocksPerTable; + int32_t numOfTables; +} SBlockOrderSupporter; + +typedef struct SBlockLoadSuppInfo { + TColumnDataAggArray colAggArray; + SColumnDataAgg tsColAgg; + int16_t* colId; + int16_t* slotId; + int32_t numOfCols; + char** buildBuf; // build string tmp buffer, todo remove it later after all string format being updated. + bool smaValid; // the sma on all queried columns are activated +} SBlockLoadSuppInfo; + +typedef struct SLastBlockReader { + STimeWindow window; + SVersionRange verRange; + int32_t order; + uint64_t uid; + SMergeTree mergeTree; + SSttBlockLoadInfo* pInfo; + int64_t currentKey; +} SLastBlockReader; + +typedef struct SFilesetIter { + int32_t numOfFiles; // number of total files + int32_t index; // current accessed index in the list + TFileSetArray* pFilesetList; // data file set list + int32_t order; + SLastBlockReader* pLastBlockReader; // last file block reader +} SFilesetIter; + +typedef struct SFileDataBlockInfo { + // index position in STableBlockScanInfo in order to check whether neighbor block overlaps with it + uint64_t uid; + int32_t tbBlockIdx; + SBrinRecord record; +} SFileDataBlockInfo; + +typedef struct SDataBlockIter { + int32_t numOfBlocks; + int32_t index; + SArray* blockList; // SArray + int32_t order; + SDataBlk block; // current SDataBlk data + SSHashObj* pTableMap; +} SDataBlockIter; + +typedef struct SFileBlockDumpInfo { + int32_t totalRows; + int32_t rowIndex; + int64_t lastKey; + bool allDumped; +} SFileBlockDumpInfo; + +typedef struct SReaderStatus { + bool loadFromFile; // check file stage + bool composedDataBlock; // the returned data block is a composed block or not + SSHashObj* pTableMap; // SHash + STableBlockScanInfo** pTableIter; // table iterator used in building in-memory buffer data blocks. + STableUidList uidList; // check tables in uid order, to avoid the repeatly load of blocks in STT. + SFileBlockDumpInfo fBlockDumpInfo; + STFileSet* pCurrentFileset; // current opened file set + SBlockData fileBlockData; + SFilesetIter fileIter; + SDataBlockIter blockIter; + SArray* pLDataIterArray; + SRowMerger merger; + SColumnInfoData* pPrimaryTsCol; // primary time stamp output col info data +} SReaderStatus; + +struct STsdbReader { + STsdb* pTsdb; + STsdbReaderInfo info; + TdThreadMutex readerMutex; + EReaderStatus flag; + int32_t code; + uint64_t rowsNum; + SResultBlockInfo resBlockInfo; + SReaderStatus status; + char* idStr; // query info handle, for debug purpose + int32_t type; // query type: 1. retrieve all data blocks, 2. retrieve direct prev|next rows + SBlockLoadSuppInfo suppInfo; + STsdbReadSnap* pReadSnap; + SCostSummary cost; + SHashObj** pIgnoreTables; + SSHashObj* pSchemaMap; // keep the retrieved schema info, to avoid the overhead by repeatly load schema + SDataFileReader* pFileReader; // the file reader + SBlockInfoBuf blockInfoBuf; + EContentData step; + STsdbReader* innerReader[2]; +}; + +typedef struct SBrinRecordIter { + SArray* pBrinBlockList; + SBrinBlk* pCurrentBlk; + int32_t blockIndex; + int32_t recordIndex; + SDataFileReader* pReader; + SBrinBlock block; + SBrinRecord record; +} SBrinRecordIter; + +STableBlockScanInfo* getTableBlockScanInfo(SSHashObj* pTableMap, uint64_t uid, const char* id); + +SSHashObj* createDataBlockScanInfo(STsdbReader* pTsdbReader, SBlockInfoBuf* pBuf, const STableKeyInfo* idList, + STableUidList* pUidList, int32_t numOfTables); +void clearBlockScanInfo(STableBlockScanInfo* p); +void destroyAllBlockScanInfo(SSHashObj* pTableMap); +void resetAllDataBlockScanInfo(SSHashObj* pTableMap, int64_t ts, int32_t step); +void cleanupInfoFoxNextFileset(SSHashObj* pTableMap); +int32_t ensureBlockScanInfoBuf(SBlockInfoBuf* pBuf, int32_t numOfTables); +void clearBlockScanInfoBuf(SBlockInfoBuf* pBuf); +void* getPosInBlockInfoBuf(SBlockInfoBuf* pBuf, int32_t index); + +// brin records iterator +void initBrinRecordIter(SBrinRecordIter* pIter, SDataFileReader* pReader, SArray* pList); +SBrinRecord* getNextBrinRecord(SBrinRecordIter* pIter); +void clearBrinBlockIter(SBrinRecordIter* pIter); + +// initialize block iterator API +int32_t initBlockIterator(STsdbReader* pReader, SDataBlockIter* pBlockIter, int32_t numOfBlocks, SArray* pTableList); +bool blockIteratorNext(SDataBlockIter* pBlockIter, const char* idStr); + +// load tomb data API (stt/mem only for one table each, tomb data from data files are load for all tables at one time) +void loadMemTombData(SArray** ppMemDelData, STbData* pMemTbData, STbData* piMemTbData, int64_t ver); +int32_t loadDataFileTombDataForAll(STsdbReader* pReader); +int32_t loadSttTombDataForAll(STsdbReader* pReader, SSttFileReader* pSttFileReader, SSttBlockLoadInfo* pLoadInfo); + +#ifdef __cplusplus +} +#endif + +#endif // TDENGINE_TSDBREADUTIL_H diff --git a/source/dnode/vnode/src/tsdb/tsdbReaderWriter.c b/source/dnode/vnode/src/tsdb/tsdbReaderWriter.c index 4b677533e7..89b7d019ae 100644 --- a/source/dnode/vnode/src/tsdb/tsdbReaderWriter.c +++ b/source/dnode/vnode/src/tsdb/tsdbReaderWriter.c @@ -16,7 +16,7 @@ #include "tsdb.h" // =============== PAGE-WISE FILE =============== -static int32_t tsdbOpenFile(const char *path, int32_t szPage, int32_t flag, STsdbFD **ppFD) { +int32_t tsdbOpenFile(const char *path, int32_t szPage, int32_t flag, STsdbFD **ppFD) { int32_t code = 0; STsdbFD *pFD = NULL; @@ -68,7 +68,7 @@ _exit: return code; } -static void tsdbCloseFile(STsdbFD **ppFD) { +void tsdbCloseFile(STsdbFD **ppFD) { STsdbFD *pFD = *ppFD; if (pFD) { taosMemoryFree(pFD->pBuf); @@ -141,7 +141,7 @@ _exit: return code; } -static int32_t tsdbWriteFile(STsdbFD *pFD, int64_t offset, const uint8_t *pBuf, int64_t size) { +int32_t tsdbWriteFile(STsdbFD *pFD, int64_t offset, const uint8_t *pBuf, int64_t size) { int32_t code = 0; int64_t fOffset = LOGIC_TO_FILE_OFFSET(offset, pFD->szPage); int64_t pgno = OFFSET_PGNO(fOffset, pFD->szPage); @@ -173,7 +173,7 @@ _exit: return code; } -static int32_t tsdbReadFile(STsdbFD *pFD, int64_t offset, uint8_t *pBuf, int64_t size) { +int32_t tsdbReadFile(STsdbFD *pFD, int64_t offset, uint8_t *pBuf, int64_t size) { int32_t code = 0; int64_t n = 0; int64_t fOffset = LOGIC_TO_FILE_OFFSET(offset, pFD->szPage); @@ -202,7 +202,7 @@ _exit: return code; } -static int32_t tsdbFsyncFile(STsdbFD *pFD) { +int32_t tsdbFsyncFile(STsdbFD *pFD) { int32_t code = 0; code = tsdbWriteFilePage(pFD); @@ -749,7 +749,7 @@ int32_t tsdbDFileSetCopy(STsdb *pTsdb, SDFileSet *pSetFrom, SDFileSet *pSetTo) { int64_t size; TdFilePtr pOutFD = NULL; TdFilePtr PInFD = NULL; - int32_t szPage = pTsdb->pVnode->config.szPage; + int32_t szPage = pTsdb->pVnode->config.tsdbPageSize; char fNameFrom[TSDB_FILENAME_LEN]; char fNameTo[TSDB_FILENAME_LEN]; @@ -1489,7 +1489,7 @@ int32_t tsdbDelFReaderClose(SDelFReader **ppReader) { } int32_t tsdbReadDelData(SDelFReader *pReader, SDelIdx *pDelIdx, SArray *aDelData) { - return tsdbReadDelDatav1(pReader, pDelIdx, aDelData, INT64_MAX); + return tsdbReadDelDatav1(pReader, pDelIdx, aDelData, INT64_MAX); } int32_t tsdbReadDelDatav1(SDelFReader *pReader, SDelIdx *pDelIdx, SArray *aDelData, int64_t maxVer) { @@ -1517,10 +1517,10 @@ int32_t tsdbReadDelDatav1(SDelFReader *pReader, SDelIdx *pDelIdx, SArray *aDelDa if (delData.version > maxVer) { continue; } - if (taosArrayPush(aDelData, &delData) == NULL) { - code = TSDB_CODE_OUT_OF_MEMORY; - goto _err; - } + if (taosArrayPush(aDelData, &delData) == NULL) { + code = TSDB_CODE_OUT_OF_MEMORY; + goto _err; + } } ASSERT(n == size); diff --git a/source/dnode/vnode/src/tsdb/tsdbRetention.c b/source/dnode/vnode/src/tsdb/tsdbRetention.c index 7c7e1bd0f7..ae5ac4ae36 100644 --- a/source/dnode/vnode/src/tsdb/tsdbRetention.c +++ b/source/dnode/vnode/src/tsdb/tsdbRetention.c @@ -14,101 +14,265 @@ */ #include "tsdb.h" +#include "tsdbFS2.h" -static bool tsdbShouldDoRetentionImpl(STsdb *pTsdb, int64_t now) { - for (int32_t iSet = 0; iSet < taosArrayGetSize(pTsdb->fs.aDFileSet); iSet++) { - SDFileSet *pSet = (SDFileSet *)taosArrayGet(pTsdb->fs.aDFileSet, iSet); - int32_t expLevel = tsdbFidLevel(pSet->fid, &pTsdb->keepCfg, now); - SDiskID did; +typedef struct { + STsdb *tsdb; + int32_t szPage; + int64_t now; + int64_t cid; - if (expLevel == pSet->diskId.level) continue; + TFileSetArray *fsetArr; + TFileOpArray fopArr[1]; - if (expLevel < 0) { - return true; - } else { - if (tfsAllocDisk(pTsdb->pVnode->pTfs, expLevel, &did) < 0) { - return false; - } + struct { + int32_t fsetArrIdx; + STFileSet *fset; + } ctx[1]; +} SRTNer; - if (did.level == pSet->diskId.level) continue; +static int32_t tsdbDoRemoveFileObject(SRTNer *rtner, const STFileObj *fobj) { + STFileOp op = { + .optype = TSDB_FOP_REMOVE, + .fid = fobj->f->fid, + .of = fobj->f[0], + }; - return true; - } - } - - return false; -} -bool tsdbShouldDoRetention(STsdb *pTsdb, int64_t now) { - bool should; - taosThreadRwlockRdlock(&pTsdb->rwLock); - should = tsdbShouldDoRetentionImpl(pTsdb, now); - taosThreadRwlockUnlock(&pTsdb->rwLock); - return should; + return TARRAY2_APPEND(rtner->fopArr, op); } -int32_t tsdbDoRetention(STsdb *pTsdb, int64_t now) { +static int32_t tsdbDoCopyFile(SRTNer *rtner, const STFileObj *from, const STFile *to) { int32_t code = 0; int32_t lino = 0; - STsdbFS fs = {0}; - code = tsdbFSCopy(pTsdb, &fs); + char fname[TSDB_FILENAME_LEN]; + TdFilePtr fdFrom = NULL; + TdFilePtr fdTo = NULL; + + tsdbTFileName(rtner->tsdb, to, fname); + + fdFrom = taosOpenFile(from->fname, TD_FILE_READ); + if (fdFrom == NULL) code = terrno; TSDB_CHECK_CODE(code, lino, _exit); - for (int32_t iSet = 0; iSet < taosArrayGetSize(fs.aDFileSet); iSet++) { - SDFileSet *pSet = (SDFileSet *)taosArrayGet(fs.aDFileSet, iSet); - int32_t expLevel = tsdbFidLevel(pSet->fid, &pTsdb->keepCfg, now); - SDiskID did; + fdTo = taosOpenFile(fname, TD_FILE_WRITE | TD_FILE_CREATE | TD_FILE_TRUNC); + if (fdTo == NULL) code = terrno; + TSDB_CHECK_CODE(code, lino, _exit); - if (expLevel < 0) { - taosMemoryFree(pSet->pHeadF); - taosMemoryFree(pSet->pDataF); - taosMemoryFree(pSet->pSmaF); - for (int32_t iStt = 0; iStt < pSet->nSttF; iStt++) { - taosMemoryFree(pSet->aSttF[iStt]); - } - taosArrayRemove(fs.aDFileSet, iSet); - iSet--; - } else { - if (expLevel == 0) continue; - if (tfsAllocDisk(pTsdb->pVnode->pTfs, expLevel, &did) < 0) { - code = terrno; - goto _exit; - } - - if (did.level == pSet->diskId.level) continue; - - // copy file to new disk (todo) - SDFileSet fSet = *pSet; - fSet.diskId = did; - - code = tsdbDFileSetCopy(pTsdb, pSet, &fSet); - TSDB_CHECK_CODE(code, lino, _exit); - - code = tsdbFSUpsertFSet(&fs, &fSet); - TSDB_CHECK_CODE(code, lino, _exit); - } + int64_t n = taosFSendFile(fdTo, fdFrom, 0, tsdbLogicToFileSize(from->f->size, rtner->szPage)); + if (n < 0) { + code = TAOS_SYSTEM_ERROR(errno); + TSDB_CHECK_CODE(code, lino, _exit); } + taosCloseFile(&fdFrom); + taosCloseFile(&fdTo); - // do change fs - code = tsdbFSPrepareCommit(pTsdb, &fs); +_exit: + if (code) { + TSDB_ERROR_LOG(TD_VID(rtner->tsdb->pVnode), lino, code); + taosCloseFile(&fdFrom); + taosCloseFile(&fdTo); + } + return code; +} + +static int32_t tsdbDoMigrateFileObj(SRTNer *rtner, const STFileObj *fobj, const SDiskID *did) { + int32_t code = 0; + int32_t lino = 0; + STFileOp op = {0}; + + // remove old + op = (STFileOp){ + .optype = TSDB_FOP_REMOVE, + .fid = fobj->f->fid, + .of = fobj->f[0], + }; + + code = TARRAY2_APPEND(rtner->fopArr, op); + TSDB_CHECK_CODE(code, lino, _exit); + + // create new + op = (STFileOp){ + .optype = TSDB_FOP_CREATE, + .fid = fobj->f->fid, + .nf = + { + .type = fobj->f->type, + .did = did[0], + .fid = fobj->f->fid, + .cid = rtner->cid, + .size = fobj->f->size, + .stt[0] = + { + .level = fobj->f->stt[0].level, + }, + }, + }; + + code = TARRAY2_APPEND(rtner->fopArr, op); + TSDB_CHECK_CODE(code, lino, _exit); + + // do copy the file + code = tsdbDoCopyFile(rtner, fobj, &op.nf); TSDB_CHECK_CODE(code, lino, _exit); _exit: if (code) { - tsdbError("vgId:%d %s failed at line %d since %s", TD_VID(pTsdb->pVnode), __func__, lino, tstrerror(code)); - } else { - tsdbInfo("vgId:%d %s done", TD_VID(pTsdb->pVnode), __func__); + TSDB_ERROR_LOG(TD_VID(rtner->tsdb->pVnode), lino, code); } - tsdbFSDestroy(&fs); return code; } -static int32_t tsdbCommitRetentionImpl(STsdb *pTsdb) { return tsdbFSCommit(pTsdb); } +typedef struct { + STsdb *tsdb; + int64_t now; +} SRtnArg; -int32_t tsdbCommitRetention(STsdb *pTsdb) { - taosThreadRwlockWrlock(&pTsdb->rwLock); - tsdbCommitRetentionImpl(pTsdb); - taosThreadRwlockUnlock(&pTsdb->rwLock); - tsdbInfo("vgId:%d %s done", TD_VID(pTsdb->pVnode), __func__); - return 0; +static int32_t tsdbDoRetentionBegin(SRtnArg *arg, SRTNer *rtner) { + int32_t code = 0; + int32_t lino = 0; + + STsdb *tsdb = arg->tsdb; + + rtner->tsdb = tsdb; + rtner->szPage = tsdb->pVnode->config.tsdbPageSize; + rtner->now = arg->now; + rtner->cid = tsdbFSAllocEid(tsdb->pFS); + + code = tsdbFSCreateCopySnapshot(tsdb->pFS, &rtner->fsetArr); + TSDB_CHECK_CODE(code, lino, _exit); + +_exit: + if (code) { + TSDB_ERROR_LOG(TD_VID(rtner->tsdb->pVnode), lino, code); + } else { + tsdbInfo("vid:%d, cid:%" PRId64 ", %s done", TD_VID(rtner->tsdb->pVnode), rtner->cid, __func__); + } + return code; +} + +static int32_t tsdbDoRetentionEnd(SRTNer *rtner) { + int32_t code = 0; + int32_t lino = 0; + + if (TARRAY2_SIZE(rtner->fopArr) == 0) goto _exit; + + code = tsdbFSEditBegin(rtner->tsdb->pFS, rtner->fopArr, TSDB_FEDIT_MERGE); + TSDB_CHECK_CODE(code, lino, _exit); + + taosThreadRwlockWrlock(&rtner->tsdb->rwLock); + + code = tsdbFSEditCommit(rtner->tsdb->pFS); + if (code) { + taosThreadRwlockUnlock(&rtner->tsdb->rwLock); + TSDB_CHECK_CODE(code, lino, _exit); + } + + taosThreadRwlockUnlock(&rtner->tsdb->rwLock); + + TARRAY2_DESTROY(rtner->fopArr, NULL); + +_exit: + if (code) { + TSDB_ERROR_LOG(TD_VID(rtner->tsdb->pVnode), lino, code); + } else { + tsdbInfo("vid:%d, cid:%" PRId64 ", %s done", TD_VID(rtner->tsdb->pVnode), rtner->cid, __func__); + } + tsdbFSDestroyCopySnapshot(&rtner->fsetArr); + return code; +} + +static int32_t tsdbDoRetention2(void *arg) { + int32_t code = 0; + int32_t lino = 0; + SRTNer rtner[1] = {0}; + + code = tsdbDoRetentionBegin(arg, rtner); + TSDB_CHECK_CODE(code, lino, _exit); + + for (rtner->ctx->fsetArrIdx = 0; rtner->ctx->fsetArrIdx < TARRAY2_SIZE(rtner->fsetArr); rtner->ctx->fsetArrIdx++) { + rtner->ctx->fset = TARRAY2_GET(rtner->fsetArr, rtner->ctx->fsetArrIdx); + + STFileObj *fobj; + int32_t expLevel = tsdbFidLevel(rtner->ctx->fset->fid, &rtner->tsdb->keepCfg, rtner->now); + + if (expLevel < 0) { // remove the file set + for (int32_t ftype = 0; (ftype < TSDB_FTYPE_MAX) && (fobj = rtner->ctx->fset->farr[ftype], 1); ++ftype) { + if (fobj == NULL) continue; + + code = tsdbDoRemoveFileObject(rtner, fobj); + TSDB_CHECK_CODE(code, lino, _exit); + } + + SSttLvl *lvl; + TARRAY2_FOREACH(rtner->ctx->fset->lvlArr, lvl) { + TARRAY2_FOREACH(lvl->fobjArr, fobj) { + code = tsdbDoRemoveFileObject(rtner, fobj); + TSDB_CHECK_CODE(code, lino, _exit); + } + } + } else if (expLevel == 0) { + continue; + } else { + SDiskID did; + + if (tfsAllocDisk(rtner->tsdb->pVnode->pTfs, expLevel, &did) < 0) { + code = terrno; + TSDB_CHECK_CODE(code, lino, _exit); + } + tfsMkdirRecurAt(rtner->tsdb->pVnode->pTfs, rtner->tsdb->path, did); + + // data + for (int32_t ftype = 0; ftype < TSDB_FTYPE_MAX && (fobj = rtner->ctx->fset->farr[ftype], 1); ++ftype) { + if (fobj == NULL) continue; + + if (fobj->f->did.level == did.level) continue; + code = tsdbDoMigrateFileObj(rtner, fobj, &did); + TSDB_CHECK_CODE(code, lino, _exit); + } + + // stt + SSttLvl *lvl; + TARRAY2_FOREACH(rtner->ctx->fset->lvlArr, lvl) { + TARRAY2_FOREACH(lvl->fobjArr, fobj) { + if (fobj->f->did.level == did.level) continue; + + code = tsdbDoMigrateFileObj(rtner, fobj, &did); + TSDB_CHECK_CODE(code, lino, _exit); + } + } + } + } + + code = tsdbDoRetentionEnd(rtner); + TSDB_CHECK_CODE(code, lino, _exit); + +_exit: + if (code) { + TSDB_ERROR_LOG(TD_VID(rtner->tsdb->pVnode), lino, code); + } + taosMemoryFree(arg); + return code; +} + +int32_t tsdbAsyncRetention(STsdb *tsdb, int64_t now, int64_t *taskid) { + SRtnArg *arg = taosMemoryMalloc(sizeof(*arg)); + if (arg == NULL) return TSDB_CODE_OUT_OF_MEMORY; + + arg->tsdb = tsdb; + arg->now = now; + + int32_t code = tsdbFSScheduleBgTask(tsdb->pFS, TSDB_BG_TASK_RETENTION, tsdbDoRetention2, arg, taskid); + if (code) taosMemoryFree(arg); + + return code; +} + +int32_t tsdbSyncRetention(STsdb *tsdb, int64_t now) { + int64_t taskid; + + int32_t code = tsdbAsyncRetention(tsdb, now, &taskid); + if (code) return code; + + return tsdbFSWaitBgTask(tsdb->pFS, taskid); } \ No newline at end of file diff --git a/source/dnode/vnode/src/tsdb/tsdbSnapshot.c b/source/dnode/vnode/src/tsdb/tsdbSnapshot.c index df2aebe45b..011b9bd5a4 100644 --- a/source/dnode/vnode/src/tsdb/tsdbSnapshot.c +++ b/source/dnode/vnode/src/tsdb/tsdbSnapshot.c @@ -14,553 +14,519 @@ */ #include "tsdb.h" +#include "tsdbDataFileRW.h" +#include "tsdbFS2.h" +#include "tsdbFSetRW.h" +#include "tsdbIter.h" +#include "tsdbSttFileRW.h" extern int32_t tsdbUpdateTableSchema(SMeta* pMeta, int64_t suid, int64_t uid, SSkmInfo* pSkmInfo); -extern int32_t tsdbWriteDataBlock(SDataFWriter* pWriter, SBlockData* pBlockData, SMapData* mDataBlk, int8_t cmprAlg); -extern int32_t tsdbWriteSttBlock(SDataFWriter* pWriter, SBlockData* pBlockData, SArray* aSttBlk, int8_t cmprAlg); // STsdbSnapReader ======================================== struct STsdbSnapReader { - STsdb* pTsdb; - int64_t sver; - int64_t ever; - int8_t type; + STsdb* tsdb; + int64_t sver; + int64_t ever; + int8_t type; + uint8_t* aBuf[5]; + SSkmInfo skmTb[1]; - STsdbFS fs; - TABLEID tbid; - SSkmInfo skmTable; + TFileSetArray* fsetArr; - // timeseries data - int8_t dataDone; - int32_t fid; + // context + struct { + int32_t fsetArrIdx; + STFileSet* fset; + bool isDataDone; + bool isTombDone; + } ctx[1]; - SDataFReader* pDataFReader; - STsdbDataIter2* iterList; - STsdbDataIter2* pIter; - SRBTree rbt; - SBlockData bData; + // reader + SDataFileReader* dataReader; + TSttFileReaderArray sttReaderArr[1]; - // tombstone data - int8_t delDone; - SDelFReader* pDelFReader; - STsdbDataIter2* pTIter; - SArray* aDelData; + // iter + TTsdbIterArray dataIterArr[1]; + SIterMerger* dataIterMerger; + TTsdbIterArray tombIterArr[1]; + SIterMerger* tombIterMerger; + + // data + SBlockData blockData[1]; + STombBlock tombBlock[1]; }; -static int32_t tsdbSnapReadFileDataStart(STsdbSnapReader* pReader) { +static int32_t tsdbSnapReadFileSetOpenReader(STsdbSnapReader* reader) { int32_t code = 0; int32_t lino = 0; - SDFileSet* pSet = taosArraySearch(pReader->fs.aDFileSet, &(SDFileSet){.fid = pReader->fid}, tDFileSetCmprFn, TD_GT); - if (pSet == NULL) { - pReader->fid = INT32_MAX; - goto _exit; - } + ASSERT(reader->dataReader == NULL); + ASSERT(TARRAY2_SIZE(reader->sttReaderArr) == 0); - pReader->fid = pSet->fid; - - tRBTreeCreate(&pReader->rbt, tsdbDataIterCmprFn); - - code = tsdbDataFReaderOpen(&pReader->pDataFReader, pReader->pTsdb, pSet); - TSDB_CHECK_CODE(code, lino, _exit); - - code = tsdbOpenDataFileDataIter(pReader->pDataFReader, &pReader->pIter); - TSDB_CHECK_CODE(code, lino, _exit); - - if (pReader->pIter) { - // iter to next with filter info (sver, ever) - code = tsdbDataIterNext2( - pReader->pIter, - &(STsdbFilterInfo){.flag = TSDB_FILTER_FLAG_BY_VERSION | TSDB_FILTER_FLAG_IGNORE_DROPPED_TABLE, // flag - .sver = pReader->sver, - .ever = pReader->ever}); - TSDB_CHECK_CODE(code, lino, _exit); - - if (pReader->pIter->rowInfo.suid || pReader->pIter->rowInfo.uid) { - // add to rbtree - tRBTreePut(&pReader->rbt, &pReader->pIter->rbtn); - - // add to iterList - pReader->pIter->next = pReader->iterList; - pReader->iterList = pReader->pIter; - } else { - tsdbCloseDataIter2(pReader->pIter); + // data + SDataFileReaderConfig config = { + .tsdb = reader->tsdb, + .szPage = reader->tsdb->pVnode->config.tsdbPageSize, + .bufArr = reader->aBuf, + }; + bool hasDataFile = false; + for (int32_t ftype = 0; ftype < TSDB_FTYPE_MAX; ftype++) { + if (reader->ctx->fset->farr[ftype] != NULL) { + hasDataFile = true; + config.files[ftype].exist = true; + config.files[ftype].file = reader->ctx->fset->farr[ftype]->f[0]; } } - for (int32_t iStt = 0; iStt < pSet->nSttF; ++iStt) { - code = tsdbOpenSttFileDataIter(pReader->pDataFReader, iStt, &pReader->pIter); + if (hasDataFile) { + code = tsdbDataFileReaderOpen(NULL, &config, &reader->dataReader); TSDB_CHECK_CODE(code, lino, _exit); + } - if (pReader->pIter) { - // iter to valid row - code = tsdbDataIterNext2( - pReader->pIter, - &(STsdbFilterInfo){.flag = TSDB_FILTER_FLAG_BY_VERSION | TSDB_FILTER_FLAG_IGNORE_DROPPED_TABLE, // flag - .sver = pReader->sver, - .ever = pReader->ever}); + // stt + SSttLvl* lvl; + TARRAY2_FOREACH(reader->ctx->fset->lvlArr, lvl) { + STFileObj* fobj; + TARRAY2_FOREACH(lvl->fobjArr, fobj) { + SSttFileReader* sttReader; + SSttFileReaderConfig config = { + .tsdb = reader->tsdb, + .szPage = reader->tsdb->pVnode->config.tsdbPageSize, + .file = fobj->f[0], + .bufArr = reader->aBuf, + }; + + code = tsdbSttFileReaderOpen(fobj->fname, &config, &sttReader); TSDB_CHECK_CODE(code, lino, _exit); - if (pReader->pIter->rowInfo.suid || pReader->pIter->rowInfo.uid) { - // add to rbtree - tRBTreePut(&pReader->rbt, &pReader->pIter->rbtn); - - // add to iterList - pReader->pIter->next = pReader->iterList; - pReader->iterList = pReader->pIter; - } else { - tsdbCloseDataIter2(pReader->pIter); - } + code = TARRAY2_APPEND(reader->sttReaderArr, sttReader); + TSDB_CHECK_CODE(code, lino, _exit); } } - pReader->pIter = NULL; - _exit: if (code) { - tsdbError("vgId:%d %s failed at line %d since %s", TD_VID(pReader->pTsdb->pVnode), __func__, lino, tstrerror(code)); - } else { - tsdbInfo("vgId:%d %s done, fid:%d", TD_VID(pReader->pTsdb->pVnode), __func__, pReader->fid); + TSDB_ERROR_LOG(TD_VID(reader->tsdb->pVnode), code, lino); } return code; } -static void tsdbSnapReadFileDataEnd(STsdbSnapReader* pReader) { - while (pReader->iterList) { - STsdbDataIter2* pIter = pReader->iterList; - pReader->iterList = pIter->next; - tsdbCloseDataIter2(pIter); - } - - tsdbDataFReaderClose(&pReader->pDataFReader); -} - -static int32_t tsdbSnapReadNextRow(STsdbSnapReader* pReader, SRowInfo** ppRowInfo) { +static int32_t tsdbSnapReadFileSetCloseReader(STsdbSnapReader* reader) { int32_t code = 0; int32_t lino = 0; - if (pReader->pIter) { - code = tsdbDataIterNext2(pReader->pIter, &(STsdbFilterInfo){.flag = TSDB_FILTER_FLAG_BY_VERSION | - TSDB_FILTER_FLAG_IGNORE_DROPPED_TABLE, // flag - .sver = pReader->sver, - .ever = pReader->ever}); - TSDB_CHECK_CODE(code, lino, _exit); - - if (pReader->pIter->rowInfo.suid == 0 && pReader->pIter->rowInfo.uid == 0) { - pReader->pIter = NULL; - } else { - SRBTreeNode* pNode = tRBTreeMin(&pReader->rbt); - if (pNode) { - int32_t c = tsdbDataIterCmprFn(&pReader->pIter->rbtn, pNode); - if (c > 0) { - tRBTreePut(&pReader->rbt, &pReader->pIter->rbtn); - pReader->pIter = NULL; - } else if (c == 0) { - ASSERT(0); - } - } - } - } - - if (pReader->pIter == NULL) { - SRBTreeNode* pNode = tRBTreeMin(&pReader->rbt); - if (pNode) { - tRBTreeDrop(&pReader->rbt, pNode); - pReader->pIter = TSDB_RBTN_TO_DATA_ITER(pNode); - } - } - - if (ppRowInfo) { - if (pReader->pIter) { - *ppRowInfo = &pReader->pIter->rowInfo; - } else { - *ppRowInfo = NULL; - } - } + TARRAY2_CLEAR(reader->sttReaderArr, tsdbSttFileReaderClose); + tsdbDataFileReaderClose(&reader->dataReader); _exit: if (code) { - tsdbError("vgId:%d %s failed at line %d since %s", TD_VID(pReader->pTsdb->pVnode), __func__, lino, tstrerror(code)); + TSDB_ERROR_LOG(TD_VID(reader->tsdb->pVnode), code, lino); } return code; } -static int32_t tsdbSnapReadGetRow(STsdbSnapReader* pReader, SRowInfo** ppRowInfo) { - if (pReader->pIter) { - *ppRowInfo = &pReader->pIter->rowInfo; - return 0; +static int32_t tsdbSnapReadFileSetOpenIter(STsdbSnapReader* reader) { + int32_t code = 0; + int32_t lino = 0; + + ASSERT(reader->dataIterMerger == NULL); + ASSERT(reader->tombIterMerger == NULL); + ASSERT(TARRAY2_SIZE(reader->dataIterArr) == 0); + ASSERT(TARRAY2_SIZE(reader->tombIterArr) == 0); + + STsdbIter* iter; + STsdbIterConfig config = { + .filterByVersion = true, + .verRange[0] = reader->sver, + .verRange[1] = reader->ever, + }; + + // data file + if (reader->dataReader) { + // data + config.type = TSDB_ITER_TYPE_DATA; + config.dataReader = reader->dataReader; + + code = tsdbIterOpen(&config, &iter); + TSDB_CHECK_CODE(code, lino, _exit); + + code = TARRAY2_APPEND(reader->dataIterArr, iter); + TSDB_CHECK_CODE(code, lino, _exit); + + // tomb + config.type = TSDB_ITER_TYPE_DATA_TOMB; + config.dataReader = reader->dataReader; + + code = tsdbIterOpen(&config, &iter); + TSDB_CHECK_CODE(code, lino, _exit); + + code = TARRAY2_APPEND(reader->tombIterArr, iter); + TSDB_CHECK_CODE(code, lino, _exit); } - return tsdbSnapReadNextRow(pReader, ppRowInfo); + // stt file + SSttFileReader* sttReader; + TARRAY2_FOREACH(reader->sttReaderArr, sttReader) { + // data + config.type = TSDB_ITER_TYPE_STT; + config.sttReader = sttReader; + + code = tsdbIterOpen(&config, &iter); + TSDB_CHECK_CODE(code, lino, _exit); + + code = TARRAY2_APPEND(reader->dataIterArr, iter); + TSDB_CHECK_CODE(code, lino, _exit); + + // tomb + config.type = TSDB_ITER_TYPE_STT_TOMB; + config.sttReader = sttReader; + + code = tsdbIterOpen(&config, &iter); + TSDB_CHECK_CODE(code, lino, _exit); + + code = TARRAY2_APPEND(reader->tombIterArr, iter); + TSDB_CHECK_CODE(code, lino, _exit); + } + + // merger + code = tsdbIterMergerOpen(reader->dataIterArr, &reader->dataIterMerger, false); + TSDB_CHECK_CODE(code, lino, _exit); + + code = tsdbIterMergerOpen(reader->tombIterArr, &reader->tombIterMerger, true); + TSDB_CHECK_CODE(code, lino, _exit); + +_exit: + if (code) { + TSDB_ERROR_LOG(TD_VID(reader->tsdb->pVnode), code, lino); + } + return code; } -static int32_t tsdbSnapCmprData(STsdbSnapReader* pReader, uint8_t** ppData) { - int32_t code = 0; +static int32_t tsdbSnapReadFileSetCloseIter(STsdbSnapReader* reader) { + tsdbIterMergerClose(&reader->dataIterMerger); + tsdbIterMergerClose(&reader->tombIterMerger); + TARRAY2_CLEAR(reader->dataIterArr, tsdbIterClose); + TARRAY2_CLEAR(reader->tombIterArr, tsdbIterClose); + return 0; +} - ASSERT(pReader->bData.nRow); +static int32_t tsdbSnapReadFileSetBegin(STsdbSnapReader* reader) { + int32_t code = 0; + int32_t lino = 0; + + ASSERT(reader->ctx->fset == NULL); + + if (reader->ctx->fsetArrIdx < TARRAY2_SIZE(reader->fsetArr)) { + reader->ctx->fset = TARRAY2_GET(reader->fsetArr, reader->ctx->fsetArrIdx++); + reader->ctx->isDataDone = false; + reader->ctx->isTombDone = false; + + code = tsdbSnapReadFileSetOpenReader(reader); + TSDB_CHECK_CODE(code, lino, _exit); + + code = tsdbSnapReadFileSetOpenIter(reader); + TSDB_CHECK_CODE(code, lino, _exit); + } + +_exit: + if (code) { + TSDB_ERROR_LOG(TD_VID(reader->tsdb->pVnode), code, lino); + } + return code; +} + +static int32_t tsdbSnapReadFileSetEnd(STsdbSnapReader* reader) { + tsdbSnapReadFileSetCloseIter(reader); + tsdbSnapReadFileSetCloseReader(reader); + reader->ctx->fset = NULL; + return 0; +} + +static int32_t tsdbSnapCmprData(STsdbSnapReader* reader, uint8_t** data) { + int32_t code = 0; + int32_t lino = 0; int32_t aBufN[5] = {0}; - code = tCmprBlockData(&pReader->bData, NO_COMPRESSION, NULL, NULL, pReader->aBuf, aBufN); - if (code) goto _exit; + code = tCmprBlockData(reader->blockData, NO_COMPRESSION, NULL, NULL, reader->aBuf, aBufN); + TSDB_CHECK_CODE(code, lino, _exit); int32_t size = aBufN[0] + aBufN[1] + aBufN[2] + aBufN[3]; - *ppData = taosMemoryMalloc(sizeof(SSnapDataHdr) + size); - if (*ppData == NULL) { + *data = taosMemoryMalloc(sizeof(SSnapDataHdr) + size); + if (*data == NULL) { code = TSDB_CODE_OUT_OF_MEMORY; - goto _exit; + TSDB_CHECK_CODE(code, lino, _exit); } - SSnapDataHdr* pHdr = (SSnapDataHdr*)*ppData; - pHdr->type = pReader->type; + SSnapDataHdr* pHdr = (SSnapDataHdr*)*data; + pHdr->type = reader->type; pHdr->size = size; - memcpy(pHdr->data, pReader->aBuf[3], aBufN[3]); - memcpy(pHdr->data + aBufN[3], pReader->aBuf[2], aBufN[2]); + memcpy(pHdr->data, reader->aBuf[3], aBufN[3]); + memcpy(pHdr->data + aBufN[3], reader->aBuf[2], aBufN[2]); if (aBufN[1]) { - memcpy(pHdr->data + aBufN[3] + aBufN[2], pReader->aBuf[1], aBufN[1]); + memcpy(pHdr->data + aBufN[3] + aBufN[2], reader->aBuf[1], aBufN[1]); } if (aBufN[0]) { - memcpy(pHdr->data + aBufN[3] + aBufN[2] + aBufN[1], pReader->aBuf[0], aBufN[0]); + memcpy(pHdr->data + aBufN[3] + aBufN[2] + aBufN[1], reader->aBuf[0], aBufN[0]); } _exit: + if (code) { + TSDB_ERROR_LOG(TD_VID(reader->tsdb->pVnode), lino, code); + } return code; } -static int32_t tsdbSnapReadTimeSeriesData(STsdbSnapReader* pReader, uint8_t** ppData) { - int32_t code = 0; - int32_t lino = 0; +static int32_t tsdbSnapReadTimeSeriesData(STsdbSnapReader* reader, uint8_t** data) { + int32_t code = 0; + int32_t lino = 0; + SMetaInfo info; - STsdb* pTsdb = pReader->pTsdb; + tBlockDataReset(reader->blockData); - tBlockDataReset(&pReader->bData); - - for (;;) { - // start a new file read if need - if (pReader->pDataFReader == NULL) { - code = tsdbSnapReadFileDataStart(pReader); - TSDB_CHECK_CODE(code, lino, _exit); - } - - if (pReader->pDataFReader == NULL) break; - - SRowInfo* pRowInfo; - code = tsdbSnapReadGetRow(pReader, &pRowInfo); - TSDB_CHECK_CODE(code, lino, _exit); - - if (pRowInfo == NULL) { - tsdbSnapReadFileDataEnd(pReader); - continue; - } - - code = tsdbUpdateTableSchema(pTsdb->pVnode->pMeta, pRowInfo->suid, pRowInfo->uid, &pReader->skmTable); - TSDB_CHECK_CODE(code, lino, _exit); - - code = tBlockDataInit(&pReader->bData, (TABLEID*)pRowInfo, pReader->skmTable.pTSchema, NULL, 0); - TSDB_CHECK_CODE(code, lino, _exit); - - do { - if (!TABLE_SAME_SCHEMA(pReader->bData.suid, pReader->bData.uid, pRowInfo->suid, pRowInfo->uid)) break; - - if (pReader->bData.uid && pReader->bData.uid != pRowInfo->uid) { - code = tRealloc((uint8_t**)&pReader->bData.aUid, sizeof(int64_t) * (pReader->bData.nRow + 1)); + TABLEID tbid[1] = {0}; + for (SRowInfo* row; (row = tsdbIterMergerGetData(reader->dataIterMerger));) { + // skip dropped table + if (row->uid != tbid->uid) { + tbid->suid = row->suid; + tbid->uid = row->uid; + if (metaGetInfo(reader->tsdb->pVnode->pMeta, tbid->uid, &info, NULL) != 0) { + code = tsdbIterMergerSkipTableData(reader->dataIterMerger, tbid); TSDB_CHECK_CODE(code, lino, _exit); - - for (int32_t iRow = 0; iRow < pReader->bData.nRow; ++iRow) { - pReader->bData.aUid[iRow] = pReader->bData.uid; - } - pReader->bData.uid = 0; + continue; } - - code = tBlockDataAppendRow(&pReader->bData, &pRowInfo->row, NULL, pRowInfo->uid); - TSDB_CHECK_CODE(code, lino, _exit); - - code = tsdbSnapReadNextRow(pReader, &pRowInfo); - TSDB_CHECK_CODE(code, lino, _exit); - - if (pReader->bData.nRow >= 81920) break; - } while (pRowInfo); - - ASSERT(pReader->bData.nRow > 0); - - break; - } - - if (pReader->bData.nRow > 0) { - ASSERT(pReader->bData.suid || pReader->bData.uid); - - code = tsdbSnapCmprData(pReader, ppData); - TSDB_CHECK_CODE(code, lino, _exit); - } - -_exit: - if (code) { - tsdbError("vgId:%d %s failed at line %d since %s", TD_VID(pTsdb->pVnode), __func__, lino, tstrerror(code)); - } - return code; -} - -static int32_t tsdbSnapCmprTombData(STsdbSnapReader* pReader, uint8_t** ppData) { - int32_t code = 0; - int32_t lino = 0; - - int64_t size = sizeof(TABLEID); - for (int32_t iDelData = 0; iDelData < taosArrayGetSize(pReader->aDelData); ++iDelData) { - size += tPutDelData(NULL, taosArrayGet(pReader->aDelData, iDelData)); - } - - uint8_t* pData = (uint8_t*)taosMemoryMalloc(sizeof(SSnapDataHdr) + size); - if (pData == NULL) { - code = TSDB_CODE_OUT_OF_MEMORY; - TSDB_CHECK_CODE(code, lino, _exit); - } - - SSnapDataHdr* pHdr = (SSnapDataHdr*)pData; - pHdr->type = SNAP_DATA_DEL; - pHdr->size = size; - - TABLEID* pId = (TABLEID*)(pData + sizeof(SSnapDataHdr)); - *pId = pReader->tbid; - - size = sizeof(SSnapDataHdr) + sizeof(TABLEID); - for (int32_t iDelData = 0; iDelData < taosArrayGetSize(pReader->aDelData); ++iDelData) { - size += tPutDelData(pData + size, taosArrayGet(pReader->aDelData, iDelData)); - } - -_exit: - if (code) { - tsdbError("vgId:%d %s failed at line %d since %s", TD_VID(pReader->pTsdb->pVnode), __func__, lino, tstrerror(code)); - } - *ppData = pData; - return code; -} - -static void tsdbSnapReadGetTombData(STsdbSnapReader* pReader, SDelInfo** ppDelInfo) { - if (pReader->pTIter == NULL || (pReader->pTIter->delInfo.suid == 0 && pReader->pTIter->delInfo.uid == 0)) { - *ppDelInfo = NULL; - } else { - *ppDelInfo = &pReader->pTIter->delInfo; - } -} - -static int32_t tsdbSnapReadNextTombData(STsdbSnapReader* pReader, SDelInfo** ppDelInfo) { - int32_t code = 0; - int32_t lino = 0; - - code = tsdbDataIterNext2( - pReader->pTIter, &(STsdbFilterInfo){.flag = TSDB_FILTER_FLAG_BY_VERSION | TSDB_FILTER_FLAG_IGNORE_DROPPED_TABLE, - .sver = pReader->sver, - .ever = pReader->ever}); - TSDB_CHECK_CODE(code, lino, _exit); - - if (ppDelInfo) { - tsdbSnapReadGetTombData(pReader, ppDelInfo); - } - -_exit: - if (code) { - tsdbError("vgId:%d %s failed at line %d since %s", TD_VID(pReader->pTsdb->pVnode), __func__, lino, tstrerror(code)); - } - return code; -} - -static int32_t tsdbSnapReadTombData(STsdbSnapReader* pReader, uint8_t** ppData) { - int32_t code = 0; - int32_t lino = 0; - - STsdb* pTsdb = pReader->pTsdb; - - // open tombstone data iter if need - if (pReader->pDelFReader == NULL) { - if (pReader->fs.pDelFile == NULL) goto _exit; - - // open - code = tsdbDelFReaderOpen(&pReader->pDelFReader, pReader->fs.pDelFile, pTsdb); - TSDB_CHECK_CODE(code, lino, _exit); - - code = tsdbOpenTombFileDataIter(pReader->pDelFReader, &pReader->pTIter); - TSDB_CHECK_CODE(code, lino, _exit); - - if (pReader->pTIter) { - code = tsdbSnapReadNextTombData(pReader, NULL); - TSDB_CHECK_CODE(code, lino, _exit); } - } - // loop to get tombstone data - SDelInfo* pDelInfo; - tsdbSnapReadGetTombData(pReader, &pDelInfo); + if (reader->blockData->suid == 0 && reader->blockData->uid == 0) { + code = tsdbUpdateSkmTb(reader->tsdb, (TABLEID*)row, reader->skmTb); + TSDB_CHECK_CODE(code, lino, _exit); - if (pDelInfo == NULL) goto _exit; - - pReader->tbid = *(TABLEID*)pDelInfo; - - if (pReader->aDelData) { - taosArrayClear(pReader->aDelData); - } else if ((pReader->aDelData = taosArrayInit(16, sizeof(SDelData))) == NULL) { - code = TSDB_CODE_OUT_OF_MEMORY; - TSDB_CHECK_CODE(code, lino, _exit); - } - - while (pDelInfo && pDelInfo->suid == pReader->tbid.suid && pDelInfo->uid == pReader->tbid.uid) { - if (taosArrayPush(pReader->aDelData, &pDelInfo->delData) == NULL) { - code = TSDB_CODE_OUT_OF_MEMORY; + TABLEID tbid1 = { + .suid = row->suid, + .uid = row->suid ? 0 : row->uid, + }; + code = tBlockDataInit(reader->blockData, &tbid1, reader->skmTb->pTSchema, NULL, 0); TSDB_CHECK_CODE(code, lino, _exit); } - code = tsdbSnapReadNextTombData(pReader, &pDelInfo); + if (!TABLE_SAME_SCHEMA(reader->blockData->suid, reader->blockData->uid, row->suid, row->uid)) { + break; + } + + code = tBlockDataAppendRow(reader->blockData, &row->row, NULL, row->uid); TSDB_CHECK_CODE(code, lino, _exit); + + code = tsdbIterMergerNext(reader->dataIterMerger); + TSDB_CHECK_CODE(code, lino, _exit); + + if (reader->blockData->nRow >= 81920) { + break; + } } - // encode tombstone data - if (taosArrayGetSize(pReader->aDelData) > 0) { - code = tsdbSnapCmprTombData(pReader, ppData); + if (reader->blockData->nRow > 0) { + ASSERT(reader->blockData->suid || reader->blockData->uid); + code = tsdbSnapCmprData(reader, data); TSDB_CHECK_CODE(code, lino, _exit); } _exit: if (code) { - tsdbError("vgId:%d %s failed at line %d since %s", TD_VID(pTsdb->pVnode), __func__, lino, tstrerror(code)); - } else { - tsdbDebug("vgId:%d %s done", TD_VID(pTsdb->pVnode), __func__); + TSDB_ERROR_LOG(TD_VID(reader->tsdb->pVnode), code, lino); } return code; } -int32_t tsdbSnapReaderOpen(STsdb* pTsdb, int64_t sver, int64_t ever, int8_t type, STsdbSnapReader** ppReader) { +static int32_t tsdbSnapCmprTombData(STsdbSnapReader* reader, uint8_t** data) { int32_t code = 0; int32_t lino = 0; - // alloc - STsdbSnapReader* pReader = (STsdbSnapReader*)taosMemoryCalloc(1, sizeof(*pReader)); - if (pReader == NULL) { + int64_t size = sizeof(SSnapDataHdr); + for (int32_t i = 0; i < ARRAY_SIZE(reader->tombBlock->dataArr); i++) { + size += TARRAY2_DATA_LEN(reader->tombBlock->dataArr + i); + } + + data[0] = taosMemoryMalloc(size); + if (data[0] == NULL) { code = TSDB_CODE_OUT_OF_MEMORY; TSDB_CHECK_CODE(code, lino, _exit); } - pReader->pTsdb = pTsdb; - pReader->sver = sver; - pReader->ever = ever; - pReader->type = type; - taosThreadRwlockRdlock(&pTsdb->rwLock); - code = tsdbFSRef(pTsdb, &pReader->fs); + SSnapDataHdr* hdr = (SSnapDataHdr*)data[0]; + hdr->type = SNAP_DATA_DEL; + hdr->size = size; + + uint8_t* tdata = hdr->data; + for (int32_t i = 0; i < ARRAY_SIZE(reader->tombBlock->dataArr); i++) { + memcpy(tdata, TARRAY2_DATA(reader->tombBlock->dataArr + i), TARRAY2_DATA_LEN(reader->tombBlock->dataArr + i)); + tdata += TARRAY2_DATA_LEN(reader->tombBlock->dataArr + i); + } + +_exit: if (code) { - taosThreadRwlockUnlock(&pTsdb->rwLock); + TSDB_ERROR_LOG(TD_VID(reader->tsdb->pVnode), code, lino); + } + return code; +} + +static int32_t tsdbSnapReadTombData(STsdbSnapReader* reader, uint8_t** data) { + int32_t code = 0; + int32_t lino = 0; + SMetaInfo info; + + tTombBlockClear(reader->tombBlock); + + TABLEID tbid[1] = {0}; + for (STombRecord* record; (record = tsdbIterMergerGetTombRecord(reader->tombIterMerger)) != NULL;) { + if (record->uid != tbid->uid) { + tbid->suid = record->suid; + tbid->uid = record->uid; + if (metaGetInfo(reader->tsdb->pVnode->pMeta, tbid->uid, &info, NULL) != 0) { + code = tsdbIterMergerSkipTableData(reader->tombIterMerger, tbid); + TSDB_CHECK_CODE(code, lino, _exit); + continue; + } + } + + code = tTombBlockPut(reader->tombBlock, record); + TSDB_CHECK_CODE(code, lino, _exit); + + if (TOMB_BLOCK_SIZE(reader->tombBlock) >= 81920) { + break; + } + } + + if (TOMB_BLOCK_SIZE(reader->tombBlock) > 0) { + code = tsdbSnapCmprTombData(reader, data); TSDB_CHECK_CODE(code, lino, _exit); } - taosThreadRwlockUnlock(&pTsdb->rwLock); - // init - pReader->fid = INT32_MIN; +_exit: + if (code) { + TSDB_ERROR_LOG(TD_VID(reader->tsdb->pVnode), code, lino); + } + return code; +} - code = tBlockDataCreate(&pReader->bData); +int32_t tsdbSnapReaderOpen(STsdb* tsdb, int64_t sver, int64_t ever, int8_t type, STsdbSnapReader** reader) { + int32_t code = 0; + int32_t lino = 0; + + reader[0] = (STsdbSnapReader*)taosMemoryCalloc(1, sizeof(*reader[0])); + if (reader[0] == NULL) return TSDB_CODE_OUT_OF_MEMORY; + + reader[0]->tsdb = tsdb; + reader[0]->sver = sver; + reader[0]->ever = ever; + reader[0]->type = type; + + code = tsdbFSCreateRefSnapshot(tsdb->pFS, &reader[0]->fsetArr); TSDB_CHECK_CODE(code, lino, _exit); _exit: if (code) { - tsdbError("vgId:%d %s failed at line %d since %s, sver:%" PRId64 " ever:%" PRId64 " type:%d", TD_VID(pTsdb->pVnode), + tsdbError("vgId:%d %s failed at line %d since %s, sver:%" PRId64 " ever:%" PRId64 " type:%d", TD_VID(tsdb->pVnode), __func__, lino, tstrerror(code), sver, ever, type); - if (pReader) { - tBlockDataDestroy(&pReader->bData); - tsdbFSUnref(pTsdb, &pReader->fs); - taosMemoryFree(pReader); - pReader = NULL; - } + tsdbFSDestroyRefSnapshot(&reader[0]->fsetArr); + taosMemoryFree(reader[0]); + reader[0] = NULL; } else { - tsdbInfo("vgId:%d %s done, sver:%" PRId64 " ever:%" PRId64 " type:%d", TD_VID(pTsdb->pVnode), __func__, sver, ever, + tsdbInfo("vgId:%d %s done, sver:%" PRId64 " ever:%" PRId64 " type:%d", TD_VID(tsdb->pVnode), __func__, sver, ever, type); } - *ppReader = pReader; return code; } -int32_t tsdbSnapReaderClose(STsdbSnapReader** ppReader) { +int32_t tsdbSnapReaderClose(STsdbSnapReader** reader) { + if (reader[0] == NULL) return 0; + int32_t code = 0; int32_t lino = 0; - STsdbSnapReader* pReader = *ppReader; - STsdb* pTsdb = pReader->pTsdb; + STsdb* tsdb = reader[0]->tsdb; - // tombstone - if (pReader->pTIter) { - tsdbCloseDataIter2(pReader->pTIter); - pReader->pTIter = NULL; - } - if (pReader->pDelFReader) { - tsdbDelFReaderClose(&pReader->pDelFReader); - } - taosArrayDestroy(pReader->aDelData); + tTombBlockDestroy(reader[0]->tombBlock); + tBlockDataDestroy(reader[0]->blockData); - // timeseries - while (pReader->iterList) { - STsdbDataIter2* pIter = pReader->iterList; - pReader->iterList = pIter->next; - tsdbCloseDataIter2(pIter); - } - if (pReader->pDataFReader) { - tsdbDataFReaderClose(&pReader->pDataFReader); - } - tBlockDataDestroy(&pReader->bData); + tsdbIterMergerClose(&reader[0]->dataIterMerger); + tsdbIterMergerClose(&reader[0]->tombIterMerger); + TARRAY2_DESTROY(reader[0]->dataIterArr, tsdbIterClose); + TARRAY2_DESTROY(reader[0]->tombIterArr, tsdbIterClose); + TARRAY2_DESTROY(reader[0]->sttReaderArr, tsdbSttFileReaderClose); + tsdbDataFileReaderClose(&reader[0]->dataReader); - // other - tDestroyTSchema(pReader->skmTable.pTSchema); - tsdbFSUnref(pReader->pTsdb, &pReader->fs); - for (int32_t iBuf = 0; iBuf < sizeof(pReader->aBuf) / sizeof(pReader->aBuf[0]); iBuf++) { - tFree(pReader->aBuf[iBuf]); + tsdbFSDestroyRefSnapshot(&reader[0]->fsetArr); + tDestroyTSchema(reader[0]->skmTb->pTSchema); + + for (int32_t i = 0; i < ARRAY_SIZE(reader[0]->aBuf); ++i) { + tFree(reader[0]->aBuf[i]); } - taosMemoryFree(pReader); + + taosMemoryFree(reader[0]); + reader[0] = NULL; _exit: if (code) { - tsdbError("vgId:%d %s failed at line %d since %s", TD_VID(pTsdb->pVnode), __func__, lino, tstrerror(code)); + TSDB_ERROR_LOG(TD_VID(tsdb->pVnode), lino, code); } else { - tsdbDebug("vgId:%d %s done", TD_VID(pTsdb->pVnode), __func__); + tsdbDebug("vgId:%d %s done", TD_VID(tsdb->pVnode), __func__); } - *ppReader = NULL; return code; } -int32_t tsdbSnapRead(STsdbSnapReader* pReader, uint8_t** ppData) { +int32_t tsdbSnapRead(STsdbSnapReader* reader, uint8_t** data) { int32_t code = 0; int32_t lino = 0; - *ppData = NULL; + data[0] = NULL; - // read data file - if (!pReader->dataDone) { - code = tsdbSnapReadTimeSeriesData(pReader, ppData); - TSDB_CHECK_CODE(code, lino, _exit); - if (*ppData) { - goto _exit; - } else { - pReader->dataDone = 1; - } - } + for (;;) { + if (reader->ctx->fset == NULL) { + code = tsdbSnapReadFileSetBegin(reader); + TSDB_CHECK_CODE(code, lino, _exit); - // read del file - if (!pReader->delDone) { - code = tsdbSnapReadTombData(pReader, ppData); - TSDB_CHECK_CODE(code, lino, _exit); - if (*ppData) { - goto _exit; - } else { - pReader->delDone = 1; + if (reader->ctx->fset == NULL) { + break; + } } + + if (!reader->ctx->isDataDone) { + code = tsdbSnapReadTimeSeriesData(reader, data); + TSDB_CHECK_CODE(code, lino, _exit); + if (data[0]) { + goto _exit; + } else { + reader->ctx->isDataDone = true; + } + } + + if (!reader->ctx->isTombDone) { + code = tsdbSnapReadTombData(reader, data); + TSDB_CHECK_CODE(code, lino, _exit); + if (data[0]) { + goto _exit; + } else { + reader->ctx->isTombDone = true; + } + } + + code = tsdbSnapReadFileSetEnd(reader); + TSDB_CHECK_CODE(code, lino, _exit); } _exit: if (code) { - tsdbError("vgId:%d %s failed at line %d since %s", TD_VID(pReader->pTsdb->pVnode), __func__, lino, tstrerror(code)); + TSDB_ERROR_LOG(TD_VID(reader->tsdb->pVnode), code, lino); } else { - tsdbDebug("vgId:%d %s done", TD_VID(pReader->pTsdb->pVnode), __func__); + tsdbDebug("vgId:%d %s done", TD_VID(reader->tsdb->pVnode), __func__); } return code; } // STsdbSnapWriter ======================================== struct STsdbSnapWriter { - STsdb* pTsdb; + STsdb* tsdb; int64_t sver; int64_t ever; int32_t minutes; @@ -569,973 +535,595 @@ struct STsdbSnapWriter { int32_t maxRow; int8_t cmprAlg; int64_t commitID; + int32_t szPage; + int64_t compactVersion; + int64_t now; uint8_t* aBuf[5]; - STsdbFS fs; - TABLEID tbid; + TFileSetArray* fsetArr; + TFileOpArray fopArr[1]; - // time-series data - SBlockData inData; + struct { + bool fsetWriteBegin; + int32_t fid; + STFileSet* fset; + SDiskID did; + bool hasData; + bool hasTomb; - int32_t fid; - SSkmInfo skmTable; + // reader + SDataFileReader* dataReader; + TSttFileReaderArray sttReaderArr[1]; - /* reader */ - SDataFReader* pDataFReader; - STsdbDataIter2* iterList; - STsdbDataIter2* pDIter; - STsdbDataIter2* pSIter; - SRBTree rbt; // SRBTree + // iter/merger + TTsdbIterArray dataIterArr[1]; + SIterMerger* dataIterMerger; + TTsdbIterArray tombIterArr[1]; + SIterMerger* tombIterMerger; - /* writer */ - SDataFWriter* pDataFWriter; - SArray* aBlockIdx; - SMapData mDataBlk; // SMapData - SArray* aSttBlk; // SArray - SBlockData bData; - SBlockData sData; - - // tombstone data - /* reader */ - SDelFReader* pDelFReader; - STsdbDataIter2* pTIter; - - /* writer */ - SDelFWriter* pDelFWriter; - SArray* aDelIdx; - SArray* aDelData; + // writer + SFSetWriter* fsetWriter; + } ctx[1]; }; -// SNAP_DATA_TSDB -static int32_t tsdbSnapWriteTableDataStart(STsdbSnapWriter* pWriter, TABLEID* pId) { +// APIs +static int32_t tsdbSnapWriteTimeSeriesRow(STsdbSnapWriter* writer, SRowInfo* row) { int32_t code = 0; int32_t lino = 0; - if (pId) { - pWriter->tbid = *pId; - } else { - pWriter->tbid = (TABLEID){INT64_MAX, INT64_MAX}; - } - - if (pWriter->pDIter) { - STsdbDataIter2* pIter = pWriter->pDIter; - - // assert last table data end - ASSERT(pIter->dIter.iRow >= pIter->dIter.bData.nRow); - ASSERT(pIter->dIter.iDataBlk >= pIter->dIter.mDataBlk.nItem); - - for (;;) { - if (pIter->dIter.iBlockIdx >= taosArrayGetSize(pIter->dIter.aBlockIdx)) { - pWriter->pDIter = NULL; - break; - } - - SBlockIdx* pBlockIdx = (SBlockIdx*)taosArrayGet(pIter->dIter.aBlockIdx, pIter->dIter.iBlockIdx); - - int32_t c = tTABLEIDCmprFn(pBlockIdx, &pWriter->tbid); - if (c < 0) { - code = tsdbReadDataBlk(pIter->dIter.pReader, pBlockIdx, &pIter->dIter.mDataBlk); - TSDB_CHECK_CODE(code, lino, _exit); - - SBlockIdx* pNewBlockIdx = taosArrayReserve(pWriter->aBlockIdx, 1); - if (pNewBlockIdx == NULL) { - code = TSDB_CODE_OUT_OF_MEMORY; - TSDB_CHECK_CODE(code, lino, _exit); - } - - pNewBlockIdx->suid = pBlockIdx->suid; - pNewBlockIdx->uid = pBlockIdx->uid; - - code = tsdbWriteDataBlk(pWriter->pDataFWriter, &pIter->dIter.mDataBlk, pNewBlockIdx); - TSDB_CHECK_CODE(code, lino, _exit); - - pIter->dIter.iBlockIdx++; - } else if (c == 0) { - code = tsdbReadDataBlk(pIter->dIter.pReader, pBlockIdx, &pIter->dIter.mDataBlk); - TSDB_CHECK_CODE(code, lino, _exit); - - pIter->dIter.iDataBlk = 0; - pIter->dIter.iBlockIdx++; - - break; - } else { - pIter->dIter.iDataBlk = pIter->dIter.mDataBlk.nItem; - break; - } + while (writer->ctx->hasData) { + SRowInfo* row1 = tsdbIterMergerGetData(writer->ctx->dataIterMerger); + if (row1 == NULL) { + writer->ctx->hasData = false; + break; } - } - if (pId) { - code = tsdbUpdateTableSchema(pWriter->pTsdb->pVnode->pMeta, pId->suid, pId->uid, &pWriter->skmTable); - TSDB_CHECK_CODE(code, lino, _exit); - - tMapDataReset(&pWriter->mDataBlk); - - code = tBlockDataInit(&pWriter->bData, pId, pWriter->skmTable.pTSchema, NULL, 0); - TSDB_CHECK_CODE(code, lino, _exit); - } - - if (!TABLE_SAME_SCHEMA(pWriter->tbid.suid, pWriter->tbid.uid, pWriter->sData.suid, pWriter->sData.uid)) { - if ((pWriter->sData.nRow > 0)) { - code = tsdbWriteSttBlock(pWriter->pDataFWriter, &pWriter->sData, pWriter->aSttBlk, pWriter->cmprAlg); + int32_t c = tRowInfoCmprFn(row1, row); + if (c <= 0) { + code = tsdbFSetWriteRow(writer->ctx->fsetWriter, row1); TSDB_CHECK_CODE(code, lino, _exit); - } - if (pId) { - TABLEID id = {.suid = pWriter->tbid.suid, .uid = pWriter->tbid.suid ? 0 : pWriter->tbid.uid}; - code = tBlockDataInit(&pWriter->sData, &id, pWriter->skmTable.pTSchema, NULL, 0); + code = tsdbIterMergerNext(writer->ctx->dataIterMerger); TSDB_CHECK_CODE(code, lino, _exit); - } - } - -_exit: - if (code) { - tsdbError("vgId:%d %s failed at line %d since %s", TD_VID(pWriter->pTsdb->pVnode), __func__, lino, tstrerror(code)); - } else { - tsdbTrace("vgId:%d %s done, suid:%" PRId64 " uid:%" PRId64, TD_VID(pWriter->pTsdb->pVnode), __func__, - pWriter->tbid.suid, pWriter->tbid.uid); - } - return code; -} - -static int32_t tsdbSnapWriteTableRowImpl(STsdbSnapWriter* pWriter, TSDBROW* pRow) { - int32_t code = 0; - int32_t lino = 0; - - code = tBlockDataAppendRow(&pWriter->bData, pRow, pWriter->skmTable.pTSchema, pWriter->tbid.uid); - TSDB_CHECK_CODE(code, lino, _exit); - - if (pWriter->bData.nRow >= pWriter->maxRow) { - code = tsdbWriteDataBlock(pWriter->pDataFWriter, &pWriter->bData, &pWriter->mDataBlk, pWriter->cmprAlg); - TSDB_CHECK_CODE(code, lino, _exit); - } - -_exit: - if (code) { - tsdbError("vgId:%d %s failed at line %d since %s", TD_VID(pWriter->pTsdb->pVnode), __func__, lino, tstrerror(code)); - } - return code; -} - -static int32_t tsdbSnapWriteTableRow(STsdbSnapWriter* pWriter, TSDBROW* pRow) { - int32_t code = 0; - int32_t lino = 0; - - TSDBKEY inKey = pRow ? TSDBROW_KEY(pRow) : TSDBKEY_MAX; - - if (pWriter->pDIter == NULL || (pWriter->pDIter->dIter.iRow >= pWriter->pDIter->dIter.bData.nRow && - pWriter->pDIter->dIter.iDataBlk >= pWriter->pDIter->dIter.mDataBlk.nItem)) { - goto _write_row; - } else { - for (;;) { - while (pWriter->pDIter->dIter.iRow < pWriter->pDIter->dIter.bData.nRow) { - TSDBROW row = tsdbRowFromBlockData(&pWriter->pDIter->dIter.bData, pWriter->pDIter->dIter.iRow); - - int32_t c = tsdbKeyCmprFn(&inKey, &TSDBROW_KEY(&row)); - if (c < 0) { - goto _write_row; - } else if (c > 0) { - code = tsdbSnapWriteTableRowImpl(pWriter, &row); - TSDB_CHECK_CODE(code, lino, _exit); - - pWriter->pDIter->dIter.iRow++; - } else { - ASSERT(0); - } - } - - for (;;) { - if (pWriter->pDIter->dIter.iDataBlk >= pWriter->pDIter->dIter.mDataBlk.nItem) goto _write_row; - - // FIXME: Here can be slow, use array instead - SDataBlk dataBlk; - tMapDataGetItemByIdx(&pWriter->pDIter->dIter.mDataBlk, pWriter->pDIter->dIter.iDataBlk, &dataBlk, tGetDataBlk); - - int32_t c = tDataBlkCmprFn(&dataBlk, &(SDataBlk){.minKey = inKey, .maxKey = inKey}); - if (c > 0) { - goto _write_row; - } else if (c < 0) { - if (pWriter->bData.nRow > 0) { - code = tsdbWriteDataBlock(pWriter->pDataFWriter, &pWriter->bData, &pWriter->mDataBlk, pWriter->cmprAlg); - TSDB_CHECK_CODE(code, lino, _exit); - } - - tMapDataPutItem(&pWriter->mDataBlk, &dataBlk, tPutDataBlk); - pWriter->pDIter->dIter.iDataBlk++; - } else { - code = tsdbReadDataBlockEx(pWriter->pDataFReader, &dataBlk, &pWriter->pDIter->dIter.bData); - TSDB_CHECK_CODE(code, lino, _exit); - - pWriter->pDIter->dIter.iRow = 0; - pWriter->pDIter->dIter.iDataBlk++; - break; - } - } - } - } - -_write_row: - if (pRow) { - code = tsdbSnapWriteTableRowImpl(pWriter, pRow); - TSDB_CHECK_CODE(code, lino, _exit); - } - -_exit: - if (code) { - tsdbError("vgId:%d %s failed at line %d since %s", TD_VID(pWriter->pTsdb->pVnode), __func__, lino, tstrerror(code)); - } - return code; -} - -static int32_t tsdbSnapWriteTableDataEnd(STsdbSnapWriter* pWriter) { - int32_t code = 0; - int32_t lino = 0; - - // write a NULL row to end current table data write - code = tsdbSnapWriteTableRow(pWriter, NULL); - TSDB_CHECK_CODE(code, lino, _exit); - - if (pWriter->bData.nRow > 0) { - if (pWriter->bData.nRow < pWriter->minRow) { - ASSERT(TABLE_SAME_SCHEMA(pWriter->sData.suid, pWriter->sData.uid, pWriter->tbid.suid, pWriter->tbid.uid)); - for (int32_t iRow = 0; iRow < pWriter->bData.nRow; iRow++) { - code = - tBlockDataAppendRow(&pWriter->sData, &tsdbRowFromBlockData(&pWriter->bData, iRow), NULL, pWriter->tbid.uid); - TSDB_CHECK_CODE(code, lino, _exit); - - if (pWriter->sData.nRow >= pWriter->maxRow) { - code = tsdbWriteSttBlock(pWriter->pDataFWriter, &pWriter->sData, pWriter->aSttBlk, pWriter->cmprAlg); - TSDB_CHECK_CODE(code, lino, _exit); - } - } - - tBlockDataClear(&pWriter->bData); } else { - code = tsdbWriteDataBlock(pWriter->pDataFWriter, &pWriter->bData, &pWriter->mDataBlk, pWriter->cmprAlg); - TSDB_CHECK_CODE(code, lino, _exit); + break; } } - if (pWriter->mDataBlk.nItem) { - SBlockIdx* pBlockIdx = taosArrayReserve(pWriter->aBlockIdx, 1); - if (pBlockIdx == NULL) { - code = TSDB_CODE_OUT_OF_MEMORY; - TSDB_CHECK_CODE(code, lino, _exit); - } - - pBlockIdx->suid = pWriter->tbid.suid; - pBlockIdx->uid = pWriter->tbid.uid; - - code = tsdbWriteDataBlk(pWriter->pDataFWriter, &pWriter->mDataBlk, pBlockIdx); - TSDB_CHECK_CODE(code, lino, _exit); - } - -_exit: - if (code) { - tsdbError("vgId:%d %s failed at line %d since %s", TD_VID(pWriter->pTsdb->pVnode), __func__, lino, tstrerror(code)); - } - return code; -} - -static int32_t tsdbSnapWriteFileDataStart(STsdbSnapWriter* pWriter, int32_t fid) { - int32_t code = 0; - int32_t lino = 0; - - ASSERT(pWriter->pDataFWriter == NULL && pWriter->fid < fid); - - STsdb* pTsdb = pWriter->pTsdb; - - pWriter->fid = fid; - pWriter->tbid = (TABLEID){0}; - SDFileSet* pSet = taosArraySearch(pWriter->fs.aDFileSet, &(SDFileSet){.fid = fid}, tDFileSetCmprFn, TD_EQ); - - // open reader - pWriter->pDataFReader = NULL; - pWriter->iterList = NULL; - pWriter->pDIter = NULL; - pWriter->pSIter = NULL; - tRBTreeCreate(&pWriter->rbt, tsdbDataIterCmprFn); - if (pSet) { - code = tsdbDataFReaderOpen(&pWriter->pDataFReader, pTsdb, pSet); - TSDB_CHECK_CODE(code, lino, _exit); - - code = tsdbOpenDataFileDataIter(pWriter->pDataFReader, &pWriter->pDIter); - TSDB_CHECK_CODE(code, lino, _exit); - if (pWriter->pDIter) { - pWriter->pDIter->next = pWriter->iterList; - pWriter->iterList = pWriter->pDIter; - } - - for (int32_t iStt = 0; iStt < pSet->nSttF; iStt++) { - code = tsdbOpenSttFileDataIter(pWriter->pDataFReader, iStt, &pWriter->pSIter); - TSDB_CHECK_CODE(code, lino, _exit); - - if (pWriter->pSIter) { - code = tsdbDataIterNext2(pWriter->pSIter, NULL); - TSDB_CHECK_CODE(code, lino, _exit); - - // add to tree - tRBTreePut(&pWriter->rbt, &pWriter->pSIter->rbtn); - - // add to list - pWriter->pSIter->next = pWriter->iterList; - pWriter->iterList = pWriter->pSIter; - } - } - - pWriter->pSIter = NULL; - } - - // open writer - SDiskID diskId; - if (pSet) { - diskId = pSet->diskId; - } else { - code = tfsAllocDisk(pTsdb->pVnode->pTfs, 0 /*TODO*/, &diskId); - TSDB_CHECK_CODE(code, lino, _exit); - code = tfsMkdirRecurAt(pTsdb->pVnode->pTfs, pTsdb->path, diskId); - TSDB_CHECK_CODE(code, lino, _exit); - } - SDFileSet wSet = {.diskId = diskId, - .fid = fid, - .pHeadF = &(SHeadFile){.commitID = pWriter->commitID}, - .pDataF = (pSet) ? pSet->pDataF : &(SDataFile){.commitID = pWriter->commitID}, - .pSmaF = (pSet) ? pSet->pSmaF : &(SSmaFile){.commitID = pWriter->commitID}, - .nSttF = 1, - .aSttF = {&(SSttFile){.commitID = pWriter->commitID}}}; - code = tsdbDataFWriterOpen(&pWriter->pDataFWriter, pTsdb, &wSet); - TSDB_CHECK_CODE(code, lino, _exit); - - if (pWriter->aBlockIdx) { - taosArrayClear(pWriter->aBlockIdx); - } else if ((pWriter->aBlockIdx = taosArrayInit(0, sizeof(SBlockIdx))) == NULL) { - code = TSDB_CODE_OUT_OF_MEMORY; - TSDB_CHECK_CODE(code, lino, _exit); - } - - tMapDataReset(&pWriter->mDataBlk); - - if (pWriter->aSttBlk) { - taosArrayClear(pWriter->aSttBlk); - } else if ((pWriter->aSttBlk = taosArrayInit(0, sizeof(SSttBlk))) == NULL) { - code = TSDB_CODE_OUT_OF_MEMORY; - TSDB_CHECK_CODE(code, lino, _exit); - } - - tBlockDataReset(&pWriter->bData); - tBlockDataReset(&pWriter->sData); - -_exit: - if (code) { - tsdbError("vgId:%d %s failed at line %d since %s, fid:%d", TD_VID(pTsdb->pVnode), __func__, lino, tstrerror(code), - fid); - } else { - tsdbDebug("vgId:%d %s done, fid:%d", TD_VID(pTsdb->pVnode), __func__, fid); - } - return code; -} - -static int32_t tsdbSnapWriteTableData(STsdbSnapWriter* pWriter, SRowInfo* pRowInfo) { - int32_t code = 0; - int32_t lino = 0; - - // switch to new table if need - if (pRowInfo == NULL || pRowInfo->uid != pWriter->tbid.uid) { - if (pWriter->tbid.uid) { - code = tsdbSnapWriteTableDataEnd(pWriter); - TSDB_CHECK_CODE(code, lino, _exit); - } - - code = tsdbSnapWriteTableDataStart(pWriter, (TABLEID*)pRowInfo); - TSDB_CHECK_CODE(code, lino, _exit); - } - - if (pRowInfo == NULL) goto _exit; - - code = tsdbSnapWriteTableRow(pWriter, &pRowInfo->row); - TSDB_CHECK_CODE(code, lino, _exit); - -_exit: - if (code) { - tsdbError("vgId:%d %s failed at line %d since %s", TD_VID(pWriter->pTsdb->pVnode), __func__, lino, tstrerror(code)); - } - return code; -} - -static int32_t tsdbSnapWriteNextRow(STsdbSnapWriter* pWriter, SRowInfo** ppRowInfo) { - int32_t code = 0; - int32_t lino = 0; - - if (pWriter->pSIter) { - code = tsdbDataIterNext2(pWriter->pSIter, NULL); - TSDB_CHECK_CODE(code, lino, _exit); - - if (pWriter->pSIter->rowInfo.suid == 0 && pWriter->pSIter->rowInfo.uid == 0) { - pWriter->pSIter = NULL; - } else { - SRBTreeNode* pNode = tRBTreeMin(&pWriter->rbt); - if (pNode) { - int32_t c = tsdbDataIterCmprFn(&pWriter->pSIter->rbtn, pNode); - if (c > 0) { - tRBTreePut(&pWriter->rbt, &pWriter->pSIter->rbtn); - pWriter->pSIter = NULL; - } else if (c == 0) { - ASSERT(0); - } - } - } - } - - if (pWriter->pSIter == NULL) { - SRBTreeNode* pNode = tRBTreeMin(&pWriter->rbt); - if (pNode) { - tRBTreeDrop(&pWriter->rbt, pNode); - pWriter->pSIter = TSDB_RBTN_TO_DATA_ITER(pNode); - } - } - - if (ppRowInfo) { - if (pWriter->pSIter) { - *ppRowInfo = &pWriter->pSIter->rowInfo; - } else { - *ppRowInfo = NULL; - } - } - -_exit: - if (code) { - tsdbError("vgId:%d %s failed at line %d since %s", TD_VID(pWriter->pTsdb->pVnode), __func__, lino, tstrerror(code)); - } - return code; -} - -static int32_t tsdbSnapWriteGetRow(STsdbSnapWriter* pWriter, SRowInfo** ppRowInfo) { - int32_t code = 0; - int32_t lino = 0; - - if (pWriter->pSIter) { - *ppRowInfo = &pWriter->pSIter->rowInfo; + if (row->suid == INT64_MAX) { + ASSERT(writer->ctx->hasData == false); goto _exit; } - code = tsdbSnapWriteNextRow(pWriter, ppRowInfo); + code = tsdbFSetWriteRow(writer->ctx->fsetWriter, row); TSDB_CHECK_CODE(code, lino, _exit); _exit: if (code) { - tsdbError("vgId:%d %s failed at line %d since %s", TD_VID(pWriter->pTsdb->pVnode), __func__, lino, tstrerror(code)); + TSDB_ERROR_LOG(TD_VID(writer->tsdb->pVnode), lino, code); } return code; } -static int32_t tsdbSnapWriteFileDataEnd(STsdbSnapWriter* pWriter) { +static int32_t tsdbSnapWriteFileSetOpenReader(STsdbSnapWriter* writer) { int32_t code = 0; int32_t lino = 0; - ASSERT(pWriter->pDataFWriter); + ASSERT(writer->ctx->dataReader == NULL); + ASSERT(TARRAY2_SIZE(writer->ctx->sttReaderArr) == 0); - // consume remain data and end with a NULL table row - SRowInfo* pRowInfo; - code = tsdbSnapWriteGetRow(pWriter, &pRowInfo); - TSDB_CHECK_CODE(code, lino, _exit); - for (;;) { - code = tsdbSnapWriteTableData(pWriter, pRowInfo); - TSDB_CHECK_CODE(code, lino, _exit); + if (writer->ctx->fset) { + // open data reader + SDataFileReaderConfig dataFileReaderConfig = { + .tsdb = writer->tsdb, + .bufArr = writer->aBuf, + .szPage = writer->szPage, + }; - if (pRowInfo == NULL) break; + for (int32_t ftype = 0; ftype < TSDB_FTYPE_MAX; ++ftype) { + if (writer->ctx->fset->farr[ftype] == NULL) { + continue; + } - code = tsdbSnapWriteNextRow(pWriter, &pRowInfo); - TSDB_CHECK_CODE(code, lino, _exit); - } - - // do file-level updates - code = tsdbWriteSttBlk(pWriter->pDataFWriter, pWriter->aSttBlk); - TSDB_CHECK_CODE(code, lino, _exit); - - code = tsdbWriteBlockIdx(pWriter->pDataFWriter, pWriter->aBlockIdx); - TSDB_CHECK_CODE(code, lino, _exit); - - code = tsdbUpdateDFileSetHeader(pWriter->pDataFWriter); - TSDB_CHECK_CODE(code, lino, _exit); - - code = tsdbFSUpsertFSet(&pWriter->fs, &pWriter->pDataFWriter->wSet); - TSDB_CHECK_CODE(code, lino, _exit); - - code = tsdbDataFWriterClose(&pWriter->pDataFWriter, 1); - TSDB_CHECK_CODE(code, lino, _exit); - - if (pWriter->pDataFReader) { - code = tsdbDataFReaderClose(&pWriter->pDataFReader); - TSDB_CHECK_CODE(code, lino, _exit); - } - - // clear sources - while (pWriter->iterList) { - STsdbDataIter2* pIter = pWriter->iterList; - pWriter->iterList = pIter->next; - tsdbCloseDataIter2(pIter); - } - -_exit: - if (code) { - tsdbError("vgId:%d %s failed since %s", TD_VID(pWriter->pTsdb->pVnode), __func__, tstrerror(code)); - } else { - tsdbDebug("vgId:%d %s is done", TD_VID(pWriter->pTsdb->pVnode), __func__); - } - return code; -} - -static int32_t tsdbSnapWriteTimeSeriesData(STsdbSnapWriter* pWriter, SSnapDataHdr* pHdr) { - int32_t code = 0; - int32_t lino = 0; - - code = tDecmprBlockData(pHdr->data, pHdr->size, &pWriter->inData, pWriter->aBuf); - TSDB_CHECK_CODE(code, lino, _exit); - - ASSERT(pWriter->inData.nRow > 0); - - // switch to new data file if need - int32_t fid = tsdbKeyFid(pWriter->inData.aTSKEY[0], pWriter->minutes, pWriter->precision); - if (pWriter->fid != fid) { - if (pWriter->pDataFWriter) { - code = tsdbSnapWriteFileDataEnd(pWriter); - TSDB_CHECK_CODE(code, lino, _exit); + dataFileReaderConfig.files[ftype].exist = true; + dataFileReaderConfig.files[ftype].file = writer->ctx->fset->farr[ftype]->f[0]; } - code = tsdbSnapWriteFileDataStart(pWriter, fid); + code = tsdbDataFileReaderOpen(NULL, &dataFileReaderConfig, &writer->ctx->dataReader); TSDB_CHECK_CODE(code, lino, _exit); - } - // loop write each row - SRowInfo* pRowInfo; - code = tsdbSnapWriteGetRow(pWriter, &pRowInfo); - TSDB_CHECK_CODE(code, lino, _exit); - for (int32_t iRow = 0; iRow < pWriter->inData.nRow; ++iRow) { - SRowInfo rInfo = {.suid = pWriter->inData.suid, - .uid = pWriter->inData.uid ? pWriter->inData.uid : pWriter->inData.aUid[iRow], - .row = tsdbRowFromBlockData(&pWriter->inData, iRow)}; + // open stt reader array + SSttLvl* lvl; + TARRAY2_FOREACH(writer->ctx->fset->lvlArr, lvl) { + STFileObj* fobj; + TARRAY2_FOREACH(lvl->fobjArr, fobj) { + SSttFileReader* reader; + SSttFileReaderConfig sttFileReaderConfig = { + .tsdb = writer->tsdb, + .szPage = writer->szPage, + .bufArr = writer->aBuf, + .file = fobj->f[0], + }; - for (;;) { - if (pRowInfo == NULL) { - code = tsdbSnapWriteTableData(pWriter, &rInfo); + code = tsdbSttFileReaderOpen(fobj->fname, &sttFileReaderConfig, &reader); TSDB_CHECK_CODE(code, lino, _exit); - break; - } else { - int32_t c = tRowInfoCmprFn(&rInfo, pRowInfo); - if (c < 0) { - code = tsdbSnapWriteTableData(pWriter, &rInfo); - TSDB_CHECK_CODE(code, lino, _exit); - break; - } else if (c > 0) { - code = tsdbSnapWriteTableData(pWriter, pRowInfo); - TSDB_CHECK_CODE(code, lino, _exit); - code = tsdbSnapWriteNextRow(pWriter, &pRowInfo); - TSDB_CHECK_CODE(code, lino, _exit); - } else { - ASSERT(0); - } + code = TARRAY2_APPEND(writer->ctx->sttReaderArr, reader); + TSDB_CHECK_CODE(code, lino, _exit); } } } _exit: if (code) { - tsdbError("vgId:%d %s failed at line %d since %s", TD_VID(pWriter->pTsdb->pVnode), __func__, lino, tstrerror(code)); - } else { - tsdbDebug("vgId:%d %s done, suid:%" PRId64 " uid:%" PRId64 " nRow:%d", TD_VID(pWriter->pTsdb->pVnode), __func__, - pWriter->inData.suid, pWriter->inData.uid, pWriter->inData.nRow); + TSDB_ERROR_LOG(TD_VID(writer->tsdb->pVnode), lino, code); } return code; } -// SNAP_DATA_DEL -static int32_t tsdbSnapWriteDelTableDataStart(STsdbSnapWriter* pWriter, TABLEID* pId) { - int32_t code = 0; - int32_t lino = 0; - - if (pId) { - pWriter->tbid = *pId; - } else { - pWriter->tbid = (TABLEID){.suid = INT64_MAX, .uid = INT64_MAX}; - } - - taosArrayClear(pWriter->aDelData); - - if (pWriter->pTIter) { - while (pWriter->pTIter->tIter.iDelIdx < taosArrayGetSize(pWriter->pTIter->tIter.aDelIdx)) { - SDelIdx* pDelIdx = taosArrayGet(pWriter->pTIter->tIter.aDelIdx, pWriter->pTIter->tIter.iDelIdx); - - int32_t c = tTABLEIDCmprFn(pDelIdx, &pWriter->tbid); - if (c < 0) { - code = tsdbReadDelDatav1(pWriter->pDelFReader, pDelIdx, pWriter->pTIter->tIter.aDelData, INT64_MAX); - TSDB_CHECK_CODE(code, lino, _exit); - - SDelIdx* pDelIdxNew = taosArrayReserve(pWriter->aDelIdx, 1); - if (pDelIdxNew == NULL) { - code = TSDB_CODE_OUT_OF_MEMORY; - TSDB_CHECK_CODE(code, lino, _exit); - } - - pDelIdxNew->suid = pDelIdx->suid; - pDelIdxNew->uid = pDelIdx->uid; - - code = tsdbWriteDelData(pWriter->pDelFWriter, pWriter->pTIter->tIter.aDelData, pDelIdxNew); - TSDB_CHECK_CODE(code, lino, _exit); - - pWriter->pTIter->tIter.iDelIdx++; - } else if (c == 0) { - code = tsdbReadDelDatav1(pWriter->pDelFReader, pDelIdx, pWriter->aDelData, INT64_MAX); - TSDB_CHECK_CODE(code, lino, _exit); - - pWriter->pTIter->tIter.iDelIdx++; - break; - } else { - break; - } - } - } - -_exit: - if (code) { - tsdbError("vgId:%d %s failed at line %d since %s", TD_VID(pWriter->pTsdb->pVnode), __func__, lino, tstrerror(code)); - } else { - tsdbTrace("vgId:%d %s done, suid:%" PRId64 " uid:%" PRId64, TD_VID(pWriter->pTsdb->pVnode), __func__, - pWriter->tbid.suid, pWriter->tbid.uid); - } - return code; +static int32_t tsdbSnapWriteFileSetCloseReader(STsdbSnapWriter* writer) { + TARRAY2_CLEAR(writer->ctx->sttReaderArr, tsdbSttFileReaderClose); + tsdbDataFileReaderClose(&writer->ctx->dataReader); + return 0; } -static int32_t tsdbSnapWriteDelTableDataEnd(STsdbSnapWriter* pWriter) { +static int32_t tsdbSnapWriteFileSetOpenIter(STsdbSnapWriter* writer) { int32_t code = 0; int32_t lino = 0; - if (taosArrayGetSize(pWriter->aDelData) > 0) { - SDelIdx* pDelIdx = taosArrayReserve(pWriter->aDelIdx, 1); - if (pDelIdx == NULL) { - code = TSDB_CODE_OUT_OF_MEMORY; - TSDB_CHECK_CODE(code, lino, _exit); - } + // data ieter + if (writer->ctx->dataReader) { + STsdbIter* iter; + STsdbIterConfig config = {0}; - pDelIdx->suid = pWriter->tbid.suid; - pDelIdx->uid = pWriter->tbid.uid; + // data + config.type = TSDB_ITER_TYPE_DATA; + config.dataReader = writer->ctx->dataReader; - code = tsdbWriteDelData(pWriter->pDelFWriter, pWriter->aDelData, pDelIdx); + code = tsdbIterOpen(&config, &iter); + TSDB_CHECK_CODE(code, lino, _exit); + + code = TARRAY2_APPEND(writer->ctx->dataIterArr, iter); + TSDB_CHECK_CODE(code, lino, _exit); + + // tome + config.type = TSDB_ITER_TYPE_DATA_TOMB; + config.dataReader = writer->ctx->dataReader; + + code = tsdbIterOpen(&config, &iter); + TSDB_CHECK_CODE(code, lino, _exit); + + code = TARRAY2_APPEND(writer->ctx->tombIterArr, iter); TSDB_CHECK_CODE(code, lino, _exit); } -_exit: - if (code) { - tsdbError("vgId:%d %s failed at line %d since %s", TD_VID(pWriter->pTsdb->pVnode), __func__, lino, tstrerror(code)); - } else { - tsdbTrace("vgId:%d %s done", TD_VID(pWriter->pTsdb->pVnode), __func__); - } - return code; -} + // stt iter + SSttFileReader* sttFileReader; + TARRAY2_FOREACH(writer->ctx->sttReaderArr, sttFileReader) { + STsdbIter* iter; + STsdbIterConfig config = {0}; -static int32_t tsdbSnapWriteDelTableData(STsdbSnapWriter* pWriter, TABLEID* pId, uint8_t* pData, int64_t size) { - int32_t code = 0; - int32_t lino = 0; + // data + config.type = TSDB_ITER_TYPE_STT; + config.sttReader = sttFileReader; - if (pId == NULL || pId->uid != pWriter->tbid.uid) { - if (pWriter->tbid.uid) { - code = tsdbSnapWriteDelTableDataEnd(pWriter); - TSDB_CHECK_CODE(code, lino, _exit); - } + code = tsdbIterOpen(&config, &iter); + TSDB_CHECK_CODE(code, lino, _exit); - code = tsdbSnapWriteDelTableDataStart(pWriter, pId); + code = TARRAY2_APPEND(writer->ctx->dataIterArr, iter); + TSDB_CHECK_CODE(code, lino, _exit); + + // tomb + config.type = TSDB_ITER_TYPE_STT_TOMB; + config.sttReader = sttFileReader; + + code = tsdbIterOpen(&config, &iter); + TSDB_CHECK_CODE(code, lino, _exit); + + code = TARRAY2_APPEND(writer->ctx->tombIterArr, iter); TSDB_CHECK_CODE(code, lino, _exit); } - if (pId == NULL) goto _exit; - - int64_t n = 0; - while (n < size) { - SDelData delData; - n += tGetDelData(pData + n, &delData); - - if (taosArrayPush(pWriter->aDelData, &delData) == NULL) { - code = TSDB_CODE_OUT_OF_MEMORY; - TSDB_CHECK_CODE(code, lino, _exit); - } - } - ASSERT(n == size); - -_exit: - if (code) { - tsdbError("vgId:%d %s failed at line %d since %s", TD_VID(pWriter->pTsdb->pVnode), __func__, lino, tstrerror(code)); - } - return code; -} - -static int32_t tsdbSnapWriteDelDataStart(STsdbSnapWriter* pWriter) { - int32_t code = 0; - int32_t lino = 0; - - STsdb* pTsdb = pWriter->pTsdb; - SDelFile* pDelFile = pWriter->fs.pDelFile; - - pWriter->tbid = (TABLEID){0}; - - // reader - if (pDelFile) { - code = tsdbDelFReaderOpen(&pWriter->pDelFReader, pDelFile, pTsdb); - TSDB_CHECK_CODE(code, lino, _exit); - - code = tsdbOpenTombFileDataIter(pWriter->pDelFReader, &pWriter->pTIter); - TSDB_CHECK_CODE(code, lino, _exit); - } - - // writer - code = tsdbDelFWriterOpen(&pWriter->pDelFWriter, &(SDelFile){.commitID = pWriter->commitID}, pTsdb); + // open merger + code = tsdbIterMergerOpen(writer->ctx->dataIterArr, &writer->ctx->dataIterMerger, false); TSDB_CHECK_CODE(code, lino, _exit); - if ((pWriter->aDelIdx = taosArrayInit(0, sizeof(SDelIdx))) == NULL) { - code = TSDB_CODE_OUT_OF_MEMORY; + code = tsdbIterMergerOpen(writer->ctx->tombIterArr, &writer->ctx->tombIterMerger, true); + TSDB_CHECK_CODE(code, lino, _exit); + +_exit: + if (code) { + TSDB_ERROR_LOG(TD_VID(writer->tsdb->pVnode), lino, code); + } + return code; +} + +static int32_t tsdbSnapWriteFileSetCloseIter(STsdbSnapWriter* writer) { + tsdbIterMergerClose(&writer->ctx->dataIterMerger); + tsdbIterMergerClose(&writer->ctx->tombIterMerger); + TARRAY2_CLEAR(writer->ctx->dataIterArr, tsdbIterClose); + TARRAY2_CLEAR(writer->ctx->tombIterArr, tsdbIterClose); + return 0; +} + +static int32_t tsdbSnapWriteFileSetOpenWriter(STsdbSnapWriter* writer) { + int32_t code = 0; + int32_t lino = 0; + + SFSetWriterConfig config = { + .tsdb = writer->tsdb, + .toSttOnly = false, + .compactVersion = writer->compactVersion, + .minRow = writer->minRow, + .maxRow = writer->maxRow, + .szPage = writer->szPage, + .cmprAlg = writer->cmprAlg, + .fid = writer->ctx->fid, + .cid = writer->commitID, + .did = writer->ctx->did, + .level = 0, + }; + + code = tsdbFSetWriterOpen(&config, &writer->ctx->fsetWriter); + TSDB_CHECK_CODE(code, lino, _exit); + +_exit: + if (code) { + TSDB_ERROR_LOG(TD_VID(writer->tsdb->pVnode), lino, code); + } + return code; +} + +static int32_t tsdbSnapWriteFileSetCloseWriter(STsdbSnapWriter* writer) { + return tsdbFSetWriterClose(&writer->ctx->fsetWriter, 0, writer->fopArr); +} + +static int32_t tsdbSnapWriteFileSetBegin(STsdbSnapWriter* writer, int32_t fid) { + int32_t code = 0; + int32_t lino = 0; + + ASSERT(writer->ctx->fsetWriteBegin == false); + + STFileSet* fset = &(STFileSet){.fid = fid}; + + writer->ctx->fid = fid; + STFileSet** fsetPtr = TARRAY2_SEARCH(writer->fsetArr, &fset, tsdbTFileSetCmprFn, TD_EQ); + writer->ctx->fset = (fsetPtr == NULL) ? NULL : *fsetPtr; + + int32_t level = tsdbFidLevel(fid, &writer->tsdb->keepCfg, taosGetTimestampSec()); + if (tfsAllocDisk(writer->tsdb->pVnode->pTfs, level, &writer->ctx->did)) { + code = TSDB_CODE_NO_AVAIL_DISK; TSDB_CHECK_CODE(code, lino, _exit); } - if ((pWriter->aDelData = taosArrayInit(0, sizeof(SDelData))) == NULL) { - code = TSDB_CODE_OUT_OF_MEMORY; + tfsMkdirRecurAt(writer->tsdb->pVnode->pTfs, writer->tsdb->path, writer->ctx->did); + + writer->ctx->hasData = true; + writer->ctx->hasTomb = true; + + code = tsdbSnapWriteFileSetOpenReader(writer); + TSDB_CHECK_CODE(code, lino, _exit); + + code = tsdbSnapWriteFileSetOpenIter(writer); + TSDB_CHECK_CODE(code, lino, _exit); + + code = tsdbSnapWriteFileSetOpenWriter(writer); + TSDB_CHECK_CODE(code, lino, _exit); + + writer->ctx->fsetWriteBegin = true; + +_exit: + if (code) { + TSDB_ERROR_LOG(TD_VID(writer->tsdb->pVnode), lino, code); + } + return code; +} + +static int32_t tsdbSnapWriteTombRecord(STsdbSnapWriter* writer, const STombRecord* record) { + int32_t code = 0; + int32_t lino = 0; + + while (writer->ctx->hasTomb) { + STombRecord* record1 = tsdbIterMergerGetTombRecord(writer->ctx->tombIterMerger); + if (record1 == NULL) { + writer->ctx->hasTomb = false; + break; + } + + int32_t c = tTombRecordCompare(record1, record); + if (c <= 0) { + code = tsdbFSetWriteTombRecord(writer->ctx->fsetWriter, record1); + TSDB_CHECK_CODE(code, lino, _exit); + } else { + break; + } + } + + if (record->suid == INT64_MAX) { + ASSERT(writer->ctx->hasTomb == false); + goto _exit; + } + + code = tsdbFSetWriteTombRecord(writer->ctx->fsetWriter, record); + TSDB_CHECK_CODE(code, lino, _exit); + +_exit: + if (code) { + TSDB_ERROR_LOG(TD_VID(writer->tsdb->pVnode), lino, code); + } + return code; +} + +static int32_t tsdbSnapWriteFileSetEnd(STsdbSnapWriter* writer) { + if (!writer->ctx->fsetWriteBegin) return 0; + + int32_t code = 0; + int32_t lino = 0; + + SRowInfo row = { + .suid = INT64_MAX, + .uid = INT64_MAX, + }; + + code = tsdbSnapWriteTimeSeriesRow(writer, &row); + TSDB_CHECK_CODE(code, lino, _exit); + + STombRecord record = { + .suid = INT64_MAX, + .uid = INT64_MAX, + }; + + code = tsdbSnapWriteTombRecord(writer, &record); + TSDB_CHECK_CODE(code, lino, _exit); + + // close write + code = tsdbSnapWriteFileSetCloseWriter(writer); + TSDB_CHECK_CODE(code, lino, _exit); + + code = tsdbSnapWriteFileSetCloseIter(writer); + TSDB_CHECK_CODE(code, lino, _exit); + + code = tsdbSnapWriteFileSetCloseReader(writer); + TSDB_CHECK_CODE(code, lino, _exit); + + writer->ctx->fsetWriteBegin = false; + +_exit: + if (code) { + TSDB_ERROR_LOG(TD_VID(writer->tsdb->pVnode), lino, code); + } + return code; +} + +static int32_t tsdbSnapWriteTimeSeriesData(STsdbSnapWriter* writer, SSnapDataHdr* hdr) { + int32_t code = 0; + int32_t lino = 0; + + SBlockData blockData[1] = {0}; + + code = tDecmprBlockData(hdr->data, hdr->size - sizeof(*hdr), blockData, writer->aBuf); + TSDB_CHECK_CODE(code, lino, _exit); + + int32_t fid = tsdbKeyFid(blockData->aTSKEY[0], writer->minutes, writer->precision); + if (!writer->ctx->fsetWriteBegin || fid != writer->ctx->fid) { + code = tsdbSnapWriteFileSetEnd(writer); + TSDB_CHECK_CODE(code, lino, _exit); + + code = tsdbSnapWriteFileSetBegin(writer, fid); TSDB_CHECK_CODE(code, lino, _exit); } + for (int32_t i = 0; i < blockData->nRow; ++i) { + SRowInfo rowInfo = { + .suid = blockData->suid, + .uid = blockData->uid ? blockData->uid : blockData->aUid[i], + .row = tsdbRowFromBlockData(blockData, i), + }; + + code = tsdbSnapWriteTimeSeriesRow(writer, &rowInfo); + TSDB_CHECK_CODE(code, lino, _exit); + } + +_exit: + if (code) { + TSDB_ERROR_LOG(TD_VID(writer->tsdb->pVnode), lino, code); + } else { + tsdbDebug("vgId:%d %s done, suid:%" PRId64 " uid:%" PRId64 " nRow:%d", TD_VID(writer->tsdb->pVnode), __func__, + blockData->suid, blockData->uid, blockData->nRow); + } + tBlockDataDestroy(blockData); + return code; +} + +static int32_t tsdbSnapWriteDecmprTombBlock(SSnapDataHdr* hdr, STombBlock* tombBlock) { + int32_t code = 0; + int32_t lino = 0; + + int64_t size = hdr->size - sizeof(*hdr); + ASSERT(size % TOMB_RECORD_ELEM_NUM == 0); + size = size / TOMB_RECORD_ELEM_NUM; + ASSERT(size % sizeof(int64_t) == 0); + + int64_t* data = (int64_t*)hdr->data; + for (int32_t i = 0; i < TOMB_RECORD_ELEM_NUM; ++i) { + code = TARRAY2_APPEND_BATCH(&tombBlock->dataArr[i], hdr->data + i * size, size / sizeof(int64_t)); + TSDB_CHECK_CODE(code, lino, _exit); + } + +_exit: + return code; +} + +static int32_t tsdbSnapWriteTombData(STsdbSnapWriter* writer, SSnapDataHdr* hdr) { + int32_t code = 0; + int32_t lino = 0; + + STombRecord record; + STombBlock tombBlock[1] = {0}; + + code = tsdbSnapWriteDecmprTombBlock(hdr, tombBlock); + TSDB_CHECK_CODE(code, lino, _exit); + + tTombBlockGet(tombBlock, 0, &record); + int32_t fid = tsdbKeyFid(record.skey, writer->minutes, writer->precision); + if (!writer->ctx->fsetWriteBegin || fid != writer->ctx->fid) { + code = tsdbSnapWriteFileSetEnd(writer); + TSDB_CHECK_CODE(code, lino, _exit); + + code = tsdbSnapWriteFileSetBegin(writer, fid); + TSDB_CHECK_CODE(code, lino, _exit); + } + + if (writer->ctx->hasData) { + SRowInfo row = { + .suid = INT64_MAX, + .uid = INT64_MAX, + }; + + code = tsdbSnapWriteTimeSeriesRow(writer, &row); + TSDB_CHECK_CODE(code, lino, _exit); + } + + ASSERT(writer->ctx->hasData == false); + + for (int32_t i = 0; i < TOMB_BLOCK_SIZE(tombBlock); ++i) { + tTombBlockGet(tombBlock, i, &record); + + code = tsdbSnapWriteTombRecord(writer, &record); + TSDB_CHECK_CODE(code, lino, _exit); + } + + tTombBlockDestroy(tombBlock); + +_exit: + if (code) { + TSDB_ERROR_LOG(TD_VID(writer->tsdb->pVnode), lino, code); + } + return code; +} + +int32_t tsdbSnapWriterOpen(STsdb* pTsdb, int64_t sver, int64_t ever, STsdbSnapWriter** writer) { + int32_t code = 0; + int32_t lino = 0; + + writer[0] = taosMemoryCalloc(1, sizeof(*writer[0])); + if (writer[0] == NULL) return TSDB_CODE_OUT_OF_MEMORY; + + writer[0]->tsdb = pTsdb; + writer[0]->sver = sver; + writer[0]->ever = ever; + writer[0]->minutes = pTsdb->keepCfg.days; + writer[0]->precision = pTsdb->keepCfg.precision; + writer[0]->minRow = pTsdb->pVnode->config.tsdbCfg.minRows; + writer[0]->maxRow = pTsdb->pVnode->config.tsdbCfg.maxRows; + writer[0]->commitID = tsdbFSAllocEid(pTsdb->pFS); + writer[0]->szPage = pTsdb->pVnode->config.tsdbPageSize; + writer[0]->compactVersion = INT64_MAX; + writer[0]->now = taosGetTimestampMs(); + + code = tsdbFSCreateCopySnapshot(pTsdb->pFS, &writer[0]->fsetArr); + TSDB_CHECK_CODE(code, lino, _exit); + + tsdbFSDisableBgTask(pTsdb->pFS); + _exit: if (code) { tsdbError("vgId:%d %s failed at line %d since %s", TD_VID(pTsdb->pVnode), __func__, lino, tstrerror(code)); - } else { - tsdbDebug("vgId:%d %s done", TD_VID(pTsdb->pVnode), __func__); - } - return code; -} - -static int32_t tsdbSnapWriteDelDataEnd(STsdbSnapWriter* pWriter) { - int32_t code = 0; - int32_t lino = 0; - - STsdb* pTsdb = pWriter->pTsdb; - - // end remaining table with NULL data - code = tsdbSnapWriteDelTableData(pWriter, NULL, NULL, 0); - TSDB_CHECK_CODE(code, lino, _exit); - - // update file-level info - code = tsdbWriteDelIdx(pWriter->pDelFWriter, pWriter->aDelIdx); - TSDB_CHECK_CODE(code, lino, _exit); - - code = tsdbUpdateDelFileHdr(pWriter->pDelFWriter); - TSDB_CHECK_CODE(code, lino, _exit); - - code = tsdbFSUpsertDelFile(&pWriter->fs, &pWriter->pDelFWriter->fDel); - TSDB_CHECK_CODE(code, lino, _exit); - - code = tsdbDelFWriterClose(&pWriter->pDelFWriter, 1); - TSDB_CHECK_CODE(code, lino, _exit); - - if (pWriter->pDelFReader) { - code = tsdbDelFReaderClose(&pWriter->pDelFReader); - TSDB_CHECK_CODE(code, lino, _exit); - } - - if (pWriter->pTIter) { - tsdbCloseDataIter2(pWriter->pTIter); - pWriter->pTIter = NULL; - } - -_exit: - if (code) { - tsdbError("vgId:%d %s failed at line %d since %s", TD_VID(pTsdb->pVnode), __func__, lino, tstrerror(code)); - } else { - tsdbInfo("vgId:%d %s done", TD_VID(pTsdb->pVnode), __func__); - } - return code; -} - -static int32_t tsdbSnapWriteDelData(STsdbSnapWriter* pWriter, SSnapDataHdr* pHdr) { - int32_t code = 0; - int32_t lino = 0; - - STsdb* pTsdb = pWriter->pTsdb; - - // start to write del data if need - if (pWriter->pDelFWriter == NULL) { - code = tsdbSnapWriteDelDataStart(pWriter); - TSDB_CHECK_CODE(code, lino, _exit); - } - - // do write del data - code = tsdbSnapWriteDelTableData(pWriter, (TABLEID*)pHdr->data, pHdr->data + sizeof(TABLEID), - pHdr->size - sizeof(TABLEID)); - TSDB_CHECK_CODE(code, lino, _exit); - -_exit: - if (code) { - tsdbError("vgId:%d %s failed since %s", TD_VID(pTsdb->pVnode), __func__, tstrerror(code)); - } else { - tsdbTrace("vgId:%d %s done", TD_VID(pTsdb->pVnode), __func__); - } - return code; -} - -// APIs -int32_t tsdbSnapWriterOpen(STsdb* pTsdb, int64_t sver, int64_t ever, STsdbSnapWriter** ppWriter) { - int32_t code = 0; - int32_t lino = 0; - - // alloc - STsdbSnapWriter* pWriter = (STsdbSnapWriter*)taosMemoryCalloc(1, sizeof(*pWriter)); - if (pWriter == NULL) { - code = TSDB_CODE_OUT_OF_MEMORY; - TSDB_CHECK_CODE(code, lino, _exit); - } - pWriter->pTsdb = pTsdb; - pWriter->sver = sver; - pWriter->ever = ever; - pWriter->minutes = pTsdb->keepCfg.days; - pWriter->precision = pTsdb->keepCfg.precision; - pWriter->minRow = pTsdb->pVnode->config.tsdbCfg.minRows; - pWriter->maxRow = pTsdb->pVnode->config.tsdbCfg.maxRows; - pWriter->cmprAlg = pTsdb->pVnode->config.tsdbCfg.compression; - pWriter->commitID = pTsdb->pVnode->state.commitID; - - code = tsdbFSCopy(pTsdb, &pWriter->fs); - TSDB_CHECK_CODE(code, lino, _exit); - - // SNAP_DATA_TSDB - code = tBlockDataCreate(&pWriter->inData); - TSDB_CHECK_CODE(code, lino, _exit); - - pWriter->fid = INT32_MIN; - - code = tBlockDataCreate(&pWriter->bData); - TSDB_CHECK_CODE(code, lino, _exit); - - code = tBlockDataCreate(&pWriter->sData); - TSDB_CHECK_CODE(code, lino, _exit); - - // SNAP_DATA_DEL - -_exit: - if (code) { - tsdbError("vgId:%d %s failed at line %d since %s", TD_VID(pTsdb->pVnode), __func__, lino, tstrerror(code)); - if (pWriter) { - tBlockDataDestroy(&pWriter->sData); - tBlockDataDestroy(&pWriter->bData); - tBlockDataDestroy(&pWriter->inData); - tsdbFSDestroy(&pWriter->fs); - taosMemoryFree(pWriter); - pWriter = NULL; - } } else { tsdbInfo("vgId:%d %s done, sver:%" PRId64 " ever:%" PRId64, TD_VID(pTsdb->pVnode), __func__, sver, ever); } - *ppWriter = pWriter; return code; } -int32_t tsdbSnapWriterPrepareClose(STsdbSnapWriter* pWriter) { +int32_t tsdbSnapWriterPrepareClose(STsdbSnapWriter* writer) { int32_t code = 0; int32_t lino = 0; - if (pWriter->pDataFWriter) { - code = tsdbSnapWriteFileDataEnd(pWriter); - TSDB_CHECK_CODE(code, lino, _exit); - } + code = tsdbSnapWriteFileSetEnd(writer); + TSDB_CHECK_CODE(code, lino, _exit); - if (pWriter->pDelFWriter) { - code = tsdbSnapWriteDelDataEnd(pWriter); - TSDB_CHECK_CODE(code, lino, _exit); - } - - code = tsdbFSPrepareCommit(pWriter->pTsdb, &pWriter->fs); + code = tsdbFSEditBegin(writer->tsdb->pFS, writer->fopArr, TSDB_FEDIT_COMMIT); TSDB_CHECK_CODE(code, lino, _exit); _exit: if (code) { - tsdbError("vgId:%d %s failed at line %d since %s", TD_VID(pWriter->pTsdb->pVnode), __func__, lino, tstrerror(code)); + TSDB_ERROR_LOG(TD_VID(writer->tsdb->pVnode), lino, code); } else { - tsdbDebug("vgId:%d %s done", TD_VID(pWriter->pTsdb->pVnode), __func__); + tsdbDebug("vgId:%d %s done", TD_VID(writer->tsdb->pVnode), __func__); } return code; } -int32_t tsdbSnapWriterClose(STsdbSnapWriter** ppWriter, int8_t rollback) { +int32_t tsdbSnapWriterClose(STsdbSnapWriter** writer, int8_t rollback) { + if (writer[0] == NULL) return 0; + int32_t code = 0; int32_t lino = 0; - STsdbSnapWriter* pWriter = *ppWriter; - STsdb* pTsdb = pWriter->pTsdb; + STsdb* tsdb = writer[0]->tsdb; if (rollback) { - tsdbRollbackCommit(pWriter->pTsdb); + code = tsdbFSEditAbort(writer[0]->tsdb->pFS); + TSDB_CHECK_CODE(code, lino, _exit); } else { - // lock - taosThreadRwlockWrlock(&pTsdb->rwLock); + taosThreadRwlockWrlock(&writer[0]->tsdb->rwLock); - code = tsdbFSCommit(pWriter->pTsdb); + code = tsdbFSEditCommit(writer[0]->tsdb->pFS); if (code) { - taosThreadRwlockUnlock(&pTsdb->rwLock); + taosThreadRwlockUnlock(&writer[0]->tsdb->rwLock); TSDB_CHECK_CODE(code, lino, _exit); } - // unlock - taosThreadRwlockUnlock(&pTsdb->rwLock); + taosThreadRwlockUnlock(&writer[0]->tsdb->rwLock); + } + tsdbFSEnableBgTask(tsdb->pFS); + + tsdbIterMergerClose(&writer[0]->ctx->tombIterMerger); + tsdbIterMergerClose(&writer[0]->ctx->dataIterMerger); + TARRAY2_DESTROY(writer[0]->ctx->tombIterArr, tsdbIterClose); + TARRAY2_DESTROY(writer[0]->ctx->dataIterArr, tsdbIterClose); + TARRAY2_DESTROY(writer[0]->ctx->sttReaderArr, tsdbSttFileReaderClose); + tsdbDataFileReaderClose(&writer[0]->ctx->dataReader); + + TARRAY2_DESTROY(writer[0]->fopArr, NULL); + tsdbFSDestroyCopySnapshot(&writer[0]->fsetArr); + + for (int32_t i = 0; i < ARRAY_SIZE(writer[0]->aBuf); ++i) { + tFree(writer[0]->aBuf[i]); } - // SNAP_DATA_DEL - taosArrayDestroy(pWriter->aDelData); - taosArrayDestroy(pWriter->aDelIdx); - - // SNAP_DATA_TSDB - tBlockDataDestroy(&pWriter->sData); - tBlockDataDestroy(&pWriter->bData); - taosArrayDestroy(pWriter->aSttBlk); - tMapDataClear(&pWriter->mDataBlk); - taosArrayDestroy(pWriter->aBlockIdx); - tDestroyTSchema(pWriter->skmTable.pTSchema); - tBlockDataDestroy(&pWriter->inData); - - for (int32_t iBuf = 0; iBuf < sizeof(pWriter->aBuf) / sizeof(uint8_t*); iBuf++) { - tFree(pWriter->aBuf[iBuf]); - } - tsdbFSDestroy(&pWriter->fs); - taosMemoryFree(pWriter); - *ppWriter = NULL; + taosMemoryFree(writer[0]); + writer[0] = NULL; _exit: if (code) { - tsdbError("vgId:%d %s failed at line %d since %s", TD_VID(pTsdb->pVnode), __func__, lino, tstrerror(code)); + TSDB_ERROR_LOG(TD_VID(tsdb->pVnode), lino, code); } else { - tsdbInfo("vgId:%d %s done", TD_VID(pTsdb->pVnode), __func__); + tsdbInfo("vgId:%d %s done", TD_VID(tsdb->pVnode), __func__); } return code; } -int32_t tsdbSnapWrite(STsdbSnapWriter* pWriter, SSnapDataHdr* pHdr) { +int32_t tsdbSnapWrite(STsdbSnapWriter* writer, SSnapDataHdr* hdr) { int32_t code = 0; int32_t lino = 0; - if (pHdr->type == SNAP_DATA_TSDB) { - code = tsdbSnapWriteTimeSeriesData(pWriter, pHdr); + if (hdr->type == SNAP_DATA_TSDB) { + code = tsdbSnapWriteTimeSeriesData(writer, hdr); TSDB_CHECK_CODE(code, lino, _exit); - goto _exit; - } else if (pWriter->pDataFWriter) { - code = tsdbSnapWriteFileDataEnd(pWriter); + } else if (hdr->type == SNAP_DATA_DEL) { + code = tsdbSnapWriteTombData(writer, hdr); TSDB_CHECK_CODE(code, lino, _exit); - } - - if (pHdr->type == SNAP_DATA_DEL) { - code = tsdbSnapWriteDelData(pWriter, pHdr); - TSDB_CHECK_CODE(code, lino, _exit); - goto _exit; + } else { + ASSERT(0); } _exit: if (code) { tsdbError("vgId:%d %s failed at line %d since %s, type:%d index:%" PRId64 " size:%" PRId64, - TD_VID(pWriter->pTsdb->pVnode), __func__, lino, tstrerror(code), pHdr->type, pHdr->index, pHdr->size); + TD_VID(writer->tsdb->pVnode), __func__, lino, tstrerror(code), hdr->type, hdr->index, hdr->size); } else { - tsdbDebug("vgId:%d %s done, type:%d index:%" PRId64 " size:%" PRId64, TD_VID(pWriter->pTsdb->pVnode), __func__, - pHdr->type, pHdr->index, pHdr->size); + tsdbDebug("vgId:%d %s done, type:%d index:%" PRId64 " size:%" PRId64, TD_VID(writer->tsdb->pVnode), __func__, + hdr->type, hdr->index, hdr->size); } return code; } diff --git a/source/dnode/vnode/src/tsdb/tsdbSttFileRW.c b/source/dnode/vnode/src/tsdb/tsdbSttFileRW.c new file mode 100644 index 0000000000..db8d14d228 --- /dev/null +++ b/source/dnode/vnode/src/tsdb/tsdbSttFileRW.c @@ -0,0 +1,982 @@ +/* + * Copyright (c) 2019 TAOS Data, Inc. + * + * This program is free software: you can use, redistribute, and/or modify + * it under the terms of the GNU Affero General Public License, version 3 + * or later ("AGPL"), as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. + * + * You should have received a copy of the GNU Affero General Public License + * along with this program. If not, see . + */ + +#include "tsdbSttFileRW.h" + +// SSttFReader ============================================================ +struct SSttFileReader { + SSttFileReaderConfig config[1]; + STsdbFD *fd; + SSttFooter footer[1]; + struct { + bool sttBlkLoaded; + bool statisBlkLoaded; + bool tombBlkLoaded; + } ctx[1]; + TSttBlkArray sttBlkArray[1]; + TStatisBlkArray statisBlkArray[1]; + TTombBlkArray tombBlkArray[1]; + uint8_t *bufArr[5]; +}; + +// SSttFileReader +int32_t tsdbSttFileReaderOpen(const char *fname, const SSttFileReaderConfig *config, SSttFileReader **reader) { + int32_t code = 0; + int32_t lino = 0; + + reader[0] = taosMemoryCalloc(1, sizeof(*reader[0])); + if (reader[0] == NULL) return TSDB_CODE_OUT_OF_MEMORY; + + reader[0]->config[0] = config[0]; + if (reader[0]->config->bufArr == NULL) { + reader[0]->config->bufArr = reader[0]->bufArr; + } + + // open file + if (fname) { + code = tsdbOpenFile(fname, config->szPage, TD_FILE_READ, &reader[0]->fd); + TSDB_CHECK_CODE(code, lino, _exit); + } else { + char fname1[TSDB_FILENAME_LEN]; + tsdbTFileName(config->tsdb, config->file, fname1); + code = tsdbOpenFile(fname1, config->szPage, TD_FILE_READ, &reader[0]->fd); + TSDB_CHECK_CODE(code, lino, _exit); + } + + // // open each segment reader + int64_t offset = config->file->size - sizeof(SSttFooter); + ASSERT(offset >= TSDB_FHDR_SIZE); + + code = tsdbReadFile(reader[0]->fd, offset, (uint8_t *)(reader[0]->footer), sizeof(SSttFooter)); + TSDB_CHECK_CODE(code, lino, _exit); + +_exit: + if (code) { + TSDB_ERROR_LOG(TD_VID(config->tsdb->pVnode), lino, code); + tsdbSttFileReaderClose(reader); + } + return code; +} + +int32_t tsdbSttFileReaderClose(SSttFileReader **reader) { + if (reader[0]) { + for (int32_t i = 0; i < ARRAY_SIZE(reader[0]->bufArr); ++i) { + tFree(reader[0]->bufArr[i]); + } + tsdbCloseFile(&reader[0]->fd); + TARRAY2_DESTROY(reader[0]->tombBlkArray, NULL); + TARRAY2_DESTROY(reader[0]->statisBlkArray, NULL); + TARRAY2_DESTROY(reader[0]->sttBlkArray, NULL); + taosMemoryFree(reader[0]); + reader[0] = NULL; + } + return 0; +} + +// SSttFSegReader +int32_t tsdbSttFileReadStatisBlk(SSttFileReader *reader, const TStatisBlkArray **statisBlkArray) { + if (!reader->ctx->statisBlkLoaded) { + if (reader->footer->statisBlkPtr->size > 0) { + ASSERT(reader->footer->statisBlkPtr->size % sizeof(SStatisBlk) == 0); + + int32_t size = reader->footer->statisBlkPtr->size / sizeof(SStatisBlk); + void *data = taosMemoryMalloc(reader->footer->statisBlkPtr->size); + if (!data) return TSDB_CODE_OUT_OF_MEMORY; + + int32_t code = + tsdbReadFile(reader->fd, reader->footer->statisBlkPtr->offset, data, reader->footer->statisBlkPtr->size); + if (code) { + taosMemoryFree(data); + return code; + } + + TARRAY2_INIT_EX(reader->statisBlkArray, size, size, data); + } else { + TARRAY2_INIT(reader->statisBlkArray); + } + + reader->ctx->statisBlkLoaded = true; + } + + statisBlkArray[0] = reader->statisBlkArray; + return 0; +} + +int32_t tsdbSttFileReadTombBlk(SSttFileReader *reader, const TTombBlkArray **tombBlkArray) { + if (!reader->ctx->tombBlkLoaded) { + if (reader->footer->tombBlkPtr->size > 0) { + ASSERT(reader->footer->tombBlkPtr->size % sizeof(STombBlk) == 0); + + int32_t size = reader->footer->tombBlkPtr->size / sizeof(STombBlk); + void *data = taosMemoryMalloc(reader->footer->tombBlkPtr->size); + if (!data) return TSDB_CODE_OUT_OF_MEMORY; + + int32_t code = + tsdbReadFile(reader->fd, reader->footer->tombBlkPtr->offset, data, reader->footer->tombBlkPtr->size); + if (code) { + taosMemoryFree(data); + return code; + } + + TARRAY2_INIT_EX(reader->tombBlkArray, size, size, data); + } else { + TARRAY2_INIT(reader->tombBlkArray); + } + + reader->ctx->tombBlkLoaded = true; + } + + tombBlkArray[0] = reader->tombBlkArray; + return 0; +} + +int32_t tsdbSttFileReadSttBlk(SSttFileReader *reader, const TSttBlkArray **sttBlkArray) { + if (!reader->ctx->sttBlkLoaded) { + if (reader->footer->sttBlkPtr->size > 0) { + ASSERT(reader->footer->sttBlkPtr->size % sizeof(SSttBlk) == 0); + + int32_t size = reader->footer->sttBlkPtr->size / sizeof(SSttBlk); + void *data = taosMemoryMalloc(reader->footer->sttBlkPtr->size); + if (!data) return TSDB_CODE_OUT_OF_MEMORY; + + int32_t code = tsdbReadFile(reader->fd, reader->footer->sttBlkPtr->offset, data, reader->footer->sttBlkPtr->size); + if (code) { + taosMemoryFree(data); + return code; + } + + TARRAY2_INIT_EX(reader->sttBlkArray, size, size, data); + } else { + TARRAY2_INIT(reader->sttBlkArray); + } + + reader->ctx->sttBlkLoaded = true; + } + + sttBlkArray[0] = reader->sttBlkArray; + return 0; +} + +int32_t tsdbSttFileReadBlockData(SSttFileReader *reader, const SSttBlk *sttBlk, SBlockData *bData) { + int32_t code = 0; + int32_t lino = 0; + + code = tRealloc(&reader->config->bufArr[0], sttBlk->bInfo.szBlock); + TSDB_CHECK_CODE(code, lino, _exit); + + code = tsdbReadFile(reader->fd, sttBlk->bInfo.offset, reader->config->bufArr[0], sttBlk->bInfo.szBlock); + TSDB_CHECK_CODE(code, lino, _exit); + + code = tDecmprBlockData(reader->config->bufArr[0], sttBlk->bInfo.szBlock, bData, &reader->config->bufArr[1]); + TSDB_CHECK_CODE(code, lino, _exit); + +_exit: + if (code) { + TSDB_ERROR_LOG(TD_VID(reader->config->tsdb->pVnode), lino, code); + } + return code; +} + +int32_t tsdbSttFileReadBlockDataByColumn(SSttFileReader *reader, const SSttBlk *sttBlk, SBlockData *bData, + STSchema *pTSchema, int16_t cids[], int32_t ncid) { + int32_t code = 0; + int32_t lino = 0; + + TABLEID tbid = {.suid = sttBlk->suid}; + if (tbid.suid == 0) { + tbid.uid = sttBlk->minUid; + } else { + tbid.uid = 0; + } + + code = tBlockDataInit(bData, &tbid, pTSchema, cids, ncid); + TSDB_CHECK_CODE(code, lino, _exit); + + // uid + version + tskey + code = tRealloc(&reader->config->bufArr[0], sttBlk->bInfo.szKey); + TSDB_CHECK_CODE(code, lino, _exit); + + code = tsdbReadFile(reader->fd, sttBlk->bInfo.offset, reader->config->bufArr[0], sttBlk->bInfo.szKey); + TSDB_CHECK_CODE(code, lino, _exit); + + // hdr + SDiskDataHdr hdr[1]; + int32_t size = 0; + + size += tGetDiskDataHdr(reader->config->bufArr[0] + size, hdr); + + ASSERT(hdr->delimiter == TSDB_FILE_DLMT); + + bData->nRow = hdr->nRow; + bData->uid = hdr->uid; + + // uid + if (hdr->uid == 0) { + ASSERT(hdr->szUid); + code = tsdbDecmprData(reader->config->bufArr[0] + size, hdr->szUid, TSDB_DATA_TYPE_BIGINT, hdr->cmprAlg, + (uint8_t **)&bData->aUid, sizeof(int64_t) * hdr->nRow, &reader->config->bufArr[1]); + TSDB_CHECK_CODE(code, lino, _exit); + } else { + ASSERT(hdr->szUid == 0); + } + size += hdr->szUid; + + // version + code = tsdbDecmprData(reader->config->bufArr[0] + size, hdr->szVer, TSDB_DATA_TYPE_BIGINT, hdr->cmprAlg, + (uint8_t **)&bData->aVersion, sizeof(int64_t) * hdr->nRow, &reader->config->bufArr[1]); + TSDB_CHECK_CODE(code, lino, _exit); + size += hdr->szVer; + + // ts + code = tsdbDecmprData(reader->config->bufArr[0] + size, hdr->szKey, TSDB_DATA_TYPE_TIMESTAMP, hdr->cmprAlg, + (uint8_t **)&bData->aTSKEY, sizeof(TSKEY) * hdr->nRow, &reader->config->bufArr[1]); + TSDB_CHECK_CODE(code, lino, _exit); + size += hdr->szKey; + + ASSERT(size == sttBlk->bInfo.szKey); + + // other columns + if (bData->nColData > 0) { + if (hdr->szBlkCol > 0) { + code = tRealloc(&reader->config->bufArr[0], hdr->szBlkCol); + TSDB_CHECK_CODE(code, lino, _exit); + + code = tsdbReadFile(reader->fd, sttBlk->bInfo.offset + sttBlk->bInfo.szKey, reader->config->bufArr[0], + hdr->szBlkCol); + TSDB_CHECK_CODE(code, lino, _exit); + } + + SBlockCol bc[1] = {{.cid = 0}}; + SBlockCol *blockCol = bc; + + size = 0; + for (int32_t i = 0; i < bData->nColData; i++) { + SColData *colData = tBlockDataGetColDataByIdx(bData, i); + + while (blockCol && blockCol->cid < colData->cid) { + if (size < hdr->szBlkCol) { + size += tGetBlockCol(reader->config->bufArr[0] + size, blockCol); + } else { + ASSERT(size == hdr->szBlkCol); + blockCol = NULL; + } + } + + if (blockCol == NULL || blockCol->cid > colData->cid) { + for (int32_t iRow = 0; iRow < hdr->nRow; iRow++) { + code = tColDataAppendValue(colData, &COL_VAL_NONE(colData->cid, colData->type)); + TSDB_CHECK_CODE(code, lino, _exit); + } + } else { + ASSERT(blockCol->type == colData->type); + ASSERT(blockCol->flag && blockCol->flag != HAS_NONE); + + if (blockCol->flag == HAS_NULL) { + for (int32_t iRow = 0; iRow < hdr->nRow; iRow++) { + code = tColDataAppendValue(colData, &COL_VAL_NULL(blockCol->cid, blockCol->type)); + TSDB_CHECK_CODE(code, lino, _exit); + } + } else { + int32_t size1 = blockCol->szBitmap + blockCol->szOffset + blockCol->szValue; + + code = tRealloc(&reader->config->bufArr[1], size1); + TSDB_CHECK_CODE(code, lino, _exit); + + code = tsdbReadFile(reader->fd, sttBlk->bInfo.offset + sttBlk->bInfo.szKey + hdr->szBlkCol + blockCol->offset, + reader->config->bufArr[1], size1); + TSDB_CHECK_CODE(code, lino, _exit); + + code = tsdbDecmprColData(reader->config->bufArr[1], blockCol, hdr->cmprAlg, hdr->nRow, colData, + &reader->config->bufArr[2]); + TSDB_CHECK_CODE(code, lino, _exit); + } + } + } + } + +_exit: + if (code) { + TSDB_ERROR_LOG(TD_VID(reader->config->tsdb->pVnode), lino, code); + } + return code; +} + +int32_t tsdbSttFileReadTombBlock(SSttFileReader *reader, const STombBlk *tombBlk, STombBlock *tombBlock) { + int32_t code = 0; + int32_t lino = 0; + + code = tRealloc(&reader->config->bufArr[0], tombBlk->dp->size); + TSDB_CHECK_CODE(code, lino, _exit); + + code = tsdbReadFile(reader->fd, tombBlk->dp->offset, reader->config->bufArr[0], tombBlk->dp->size); + if (code) TSDB_CHECK_CODE(code, lino, _exit); + + int64_t size = 0; + tTombBlockClear(tombBlock); + for (int32_t i = 0; i < ARRAY_SIZE(tombBlock->dataArr); ++i) { + code = tsdbDecmprData(reader->config->bufArr[0] + size, tombBlk->size[i], TSDB_DATA_TYPE_BIGINT, tombBlk->cmprAlg, + &reader->config->bufArr[1], sizeof(int64_t) * tombBlk->numRec, &reader->config->bufArr[2]); + TSDB_CHECK_CODE(code, lino, _exit); + + code = TARRAY2_APPEND_BATCH(&tombBlock->dataArr[i], reader->config->bufArr[1], tombBlk->numRec); + TSDB_CHECK_CODE(code, lino, _exit); + + size += tombBlk->size[i]; + } + + ASSERT(size == tombBlk->dp->size); +_exit: + if (code) { + TSDB_ERROR_LOG(TD_VID(reader->config->tsdb->pVnode), lino, code); + } + return code; +} + +int32_t tsdbSttFileReadStatisBlock(SSttFileReader *reader, const SStatisBlk *statisBlk, STbStatisBlock *statisBlock) { + int32_t code = 0; + int32_t lino = 0; + + code = tRealloc(&reader->config->bufArr[0], statisBlk->dp->size); + TSDB_CHECK_CODE(code, lino, _exit); + + code = tsdbReadFile(reader->fd, statisBlk->dp->offset, reader->config->bufArr[0], statisBlk->dp->size); + TSDB_CHECK_CODE(code, lino, _exit); + + int64_t size = 0; + tStatisBlockClear(statisBlock); + for (int32_t i = 0; i < ARRAY_SIZE(statisBlock->dataArr); ++i) { + code = + tsdbDecmprData(reader->config->bufArr[0] + size, statisBlk->size[i], TSDB_DATA_TYPE_BIGINT, statisBlk->cmprAlg, + &reader->config->bufArr[1], sizeof(int64_t) * statisBlk->numRec, &reader->config->bufArr[2]); + TSDB_CHECK_CODE(code, lino, _exit); + + code = TARRAY2_APPEND_BATCH(statisBlock->dataArr + i, reader->config->bufArr[1], statisBlk->numRec); + TSDB_CHECK_CODE(code, lino, _exit); + + size += statisBlk->size[i]; + } + + ASSERT(size == statisBlk->dp->size); + +_exit: + if (code) { + TSDB_ERROR_LOG(TD_VID(reader->config->tsdb->pVnode), lino, code); + } + return code; +} + +// SSttFWriter ============================================================ +struct SSttFileWriter { + SSttFileWriterConfig config[1]; + struct { + bool opened; + TABLEID tbid[1]; + } ctx[1]; + // file + STsdbFD *fd; + STFile file[1]; + // data + SSttFooter footer[1]; + TTombBlkArray tombBlkArray[1]; + TSttBlkArray sttBlkArray[1]; + TStatisBlkArray statisBlkArray[1]; + STombBlock tombBlock[1]; + STbStatisBlock staticBlock[1]; + SBlockData blockData[1]; + // helper data + SSkmInfo skmTb[1]; + SSkmInfo skmRow[1]; + uint8_t *bufArr[5]; +}; + +int32_t tsdbFileDoWriteBlockData(STsdbFD *fd, SBlockData *blockData, int8_t cmprAlg, int64_t *fileSize, + TSttBlkArray *sttBlkArray, uint8_t **bufArr) { + if (blockData->nRow == 0) return 0; + + int32_t code = 0; + + SSttBlk sttBlk[1] = {{ + .suid = blockData->suid, + .minUid = blockData->uid ? blockData->uid : blockData->aUid[0], + .maxUid = blockData->uid ? blockData->uid : blockData->aUid[blockData->nRow - 1], + .minKey = blockData->aTSKEY[0], + .maxKey = blockData->aTSKEY[0], + .minVer = blockData->aVersion[0], + .maxVer = blockData->aVersion[0], + .nRow = blockData->nRow, + }}; + + for (int32_t iRow = 1; iRow < blockData->nRow; iRow++) { + if (sttBlk->minKey > blockData->aTSKEY[iRow]) sttBlk->minKey = blockData->aTSKEY[iRow]; + if (sttBlk->maxKey < blockData->aTSKEY[iRow]) sttBlk->maxKey = blockData->aTSKEY[iRow]; + if (sttBlk->minVer > blockData->aVersion[iRow]) sttBlk->minVer = blockData->aVersion[iRow]; + if (sttBlk->maxVer < blockData->aVersion[iRow]) sttBlk->maxVer = blockData->aVersion[iRow]; + } + + int32_t sizeArr[5] = {0}; + code = tCmprBlockData(blockData, cmprAlg, NULL, NULL, bufArr, sizeArr); + if (code) return code; + + sttBlk->bInfo.offset = *fileSize; + sttBlk->bInfo.szKey = sizeArr[2] + sizeArr[3]; + sttBlk->bInfo.szBlock = sizeArr[0] + sizeArr[1] + sttBlk->bInfo.szKey; + + for (int32_t i = 3; i >= 0; i--) { + if (sizeArr[i]) { + code = tsdbWriteFile(fd, *fileSize, bufArr[i], sizeArr[i]); + if (code) return code; + *fileSize += sizeArr[i]; + } + } + + code = TARRAY2_APPEND_PTR(sttBlkArray, sttBlk); + if (code) return code; + + tBlockDataClear(blockData); + + return 0; +} + +static int32_t tsdbSttFileDoWriteBlockData(SSttFileWriter *writer) { + if (writer->blockData->nRow == 0) return 0; + + int32_t code = 0; + int32_t lino = 0; + + code = tsdbFileDoWriteBlockData(writer->fd, writer->blockData, writer->config->cmprAlg, &writer->file->size, + writer->sttBlkArray, writer->config->bufArr); + TSDB_CHECK_CODE(code, lino, _exit); + +_exit: + if (code) { + TSDB_ERROR_LOG(TD_VID(writer->config->tsdb->pVnode), lino, code); + } + return code; +} + +static int32_t tsdbSttFileDoWriteStatisBlock(SSttFileWriter *writer) { + if (STATIS_BLOCK_SIZE(writer->staticBlock) == 0) return 0; + + int32_t code = 0; + int32_t lino = 0; + + SStatisBlk statisBlk[1] = {{ + .dp[0] = + { + .offset = writer->file->size, + .size = 0, + }, + .minTbid = + { + .suid = TARRAY2_FIRST(writer->staticBlock->suid), + .uid = TARRAY2_FIRST(writer->staticBlock->uid), + }, + .maxTbid = + { + .suid = TARRAY2_LAST(writer->staticBlock->suid), + .uid = TARRAY2_LAST(writer->staticBlock->uid), + }, + .numRec = STATIS_BLOCK_SIZE(writer->staticBlock), + .cmprAlg = writer->config->cmprAlg, + }}; + + for (int32_t i = 0; i < STATIS_RECORD_NUM_ELEM; i++) { + code = tsdbCmprData((uint8_t *)TARRAY2_DATA(writer->staticBlock->dataArr + i), + TARRAY2_DATA_LEN(&writer->staticBlock->dataArr[i]), TSDB_DATA_TYPE_BIGINT, statisBlk->cmprAlg, + &writer->config->bufArr[0], 0, &statisBlk->size[i], &writer->config->bufArr[1]); + TSDB_CHECK_CODE(code, lino, _exit); + + code = tsdbWriteFile(writer->fd, writer->file->size, writer->config->bufArr[0], statisBlk->size[i]); + TSDB_CHECK_CODE(code, lino, _exit); + + statisBlk->dp->size += statisBlk->size[i]; + writer->file->size += statisBlk->size[i]; + } + + code = TARRAY2_APPEND_PTR(writer->statisBlkArray, statisBlk); + TSDB_CHECK_CODE(code, lino, _exit); + + tStatisBlockClear(writer->staticBlock); + +_exit: + if (code) { + TSDB_ERROR_LOG(TD_VID(writer->config->tsdb->pVnode), lino, code); + } + return code; +} + +int32_t tsdbFileWriteTombBlock(STsdbFD *fd, STombBlock *tombBlock, int8_t cmprAlg, int64_t *fileSize, + TTombBlkArray *tombBlkArray, uint8_t **bufArr) { + int32_t code; + + if (TOMB_BLOCK_SIZE(tombBlock) == 0) return 0; + + STombBlk tombBlk[1] = {{ + .dp[0] = + { + .offset = *fileSize, + .size = 0, + }, + .minTbid = + { + .suid = TARRAY2_FIRST(tombBlock->suid), + .uid = TARRAY2_FIRST(tombBlock->uid), + }, + .maxTbid = + { + .suid = TARRAY2_LAST(tombBlock->suid), + .uid = TARRAY2_LAST(tombBlock->uid), + }, + .minVer = TARRAY2_FIRST(tombBlock->version), + .maxVer = TARRAY2_FIRST(tombBlock->version), + .numRec = TOMB_BLOCK_SIZE(tombBlock), + .cmprAlg = cmprAlg, + }}; + + for (int32_t i = 1; i < TOMB_BLOCK_SIZE(tombBlock); i++) { + if (tombBlk->minVer > TARRAY2_GET(tombBlock->version, i)) { + tombBlk->minVer = TARRAY2_GET(tombBlock->version, i); + } + if (tombBlk->maxVer < TARRAY2_GET(tombBlock->version, i)) { + tombBlk->maxVer = TARRAY2_GET(tombBlock->version, i); + } + } + + for (int32_t i = 0; i < ARRAY_SIZE(tombBlock->dataArr); i++) { + code = tsdbCmprData((uint8_t *)TARRAY2_DATA(&tombBlock->dataArr[i]), TARRAY2_DATA_LEN(&tombBlock->dataArr[i]), + TSDB_DATA_TYPE_BIGINT, tombBlk->cmprAlg, &bufArr[0], 0, &tombBlk->size[i], &bufArr[1]); + if (code) return code; + + code = tsdbWriteFile(fd, *fileSize, bufArr[0], tombBlk->size[i]); + if (code) return code; + + tombBlk->dp->size += tombBlk->size[i]; + *fileSize += tombBlk->size[i]; + } + + code = TARRAY2_APPEND_PTR(tombBlkArray, tombBlk); + if (code) return code; + + tTombBlockClear(tombBlock); + return 0; +} + +static int32_t tsdbSttFileDoWriteTombBlock(SSttFileWriter *writer) { + if (TOMB_BLOCK_SIZE(writer->tombBlock) == 0) return 0; + + int32_t code = 0; + int32_t lino = 0; + + code = tsdbFileWriteTombBlock(writer->fd, writer->tombBlock, writer->config->cmprAlg, &writer->file->size, + writer->tombBlkArray, writer->config->bufArr); + TSDB_CHECK_CODE(code, lino, _exit); + +_exit: + if (code) { + TSDB_ERROR_LOG(TD_VID(writer->config->tsdb->pVnode), lino, code); + } + return code; +} + +int32_t tsdbFileWriteSttBlk(STsdbFD *fd, const TSttBlkArray *sttBlkArray, SFDataPtr *ptr, int64_t *fileSize) { + ptr->size = TARRAY2_DATA_LEN(sttBlkArray); + if (ptr->size > 0) { + ptr->offset = *fileSize; + + int32_t code = tsdbWriteFile(fd, *fileSize, (const uint8_t *)TARRAY2_DATA(sttBlkArray), ptr->size); + if (code) { + return code; + } + + *fileSize += ptr->size; + } + return 0; +} + +static int32_t tsdbSttFileDoWriteSttBlk(SSttFileWriter *writer) { + int32_t code = 0; + int32_t lino; + + code = tsdbFileWriteSttBlk(writer->fd, writer->sttBlkArray, writer->footer->sttBlkPtr, &writer->file->size); + TSDB_CHECK_CODE(code, lino, _exit); + +_exit: + if (code) { + TSDB_ERROR_LOG(TD_VID(writer->config->tsdb->pVnode), lino, code); + } + return code; +} + +static int32_t tsdbSttFileDoWriteStatisBlk(SSttFileWriter *writer) { + int32_t code = 0; + int32_t lino; + + writer->footer->statisBlkPtr->size = TARRAY2_DATA_LEN(writer->statisBlkArray); + if (writer->footer->statisBlkPtr->size) { + writer->footer->statisBlkPtr->offset = writer->file->size; + code = tsdbWriteFile(writer->fd, writer->file->size, (const uint8_t *)TARRAY2_DATA(writer->statisBlkArray), + writer->footer->statisBlkPtr->size); + TSDB_CHECK_CODE(code, lino, _exit); + writer->file->size += writer->footer->statisBlkPtr->size; + } + +_exit: + if (code) { + TSDB_ERROR_LOG(TD_VID(writer->config->tsdb->pVnode), lino, code); + } + return code; +} + +int32_t tsdbFileWriteTombBlk(STsdbFD *fd, const TTombBlkArray *tombBlkArray, SFDataPtr *ptr, int64_t *fileSize) { + ptr->size = TARRAY2_DATA_LEN(tombBlkArray); + if (ptr->size > 0) { + ptr->offset = *fileSize; + + int32_t code = tsdbWriteFile(fd, *fileSize, (const uint8_t *)TARRAY2_DATA(tombBlkArray), ptr->size); + if (code) { + return code; + } + + *fileSize += ptr->size; + } + return 0; +} + +static int32_t tsdbSttFileDoWriteTombBlk(SSttFileWriter *writer) { + int32_t code = 0; + int32_t lino = 0; + + code = tsdbFileWriteTombBlk(writer->fd, writer->tombBlkArray, writer->footer->tombBlkPtr, &writer->file->size); + TSDB_CHECK_CODE(code, lino, _exit); + +_exit: + if (code) { + TSDB_ERROR_LOG(TD_VID(writer->config->tsdb->pVnode), lino, code); + } + return code; +} + +int32_t tsdbFileWriteSttFooter(STsdbFD *fd, const SSttFooter *footer, int64_t *fileSize) { + int32_t code = tsdbWriteFile(fd, *fileSize, (const uint8_t *)footer, sizeof(*footer)); + if (code) return code; + *fileSize += sizeof(*footer); + return 0; +} + +static int32_t tsdbSttFileDoWriteFooter(SSttFileWriter *writer) { + return tsdbFileWriteSttFooter(writer->fd, writer->footer, &writer->file->size); +} + +static int32_t tsdbSttFWriterDoOpen(SSttFileWriter *writer) { + int32_t code = 0; + int32_t lino = 0; + + // set + if (!writer->config->skmTb) writer->config->skmTb = writer->skmTb; + if (!writer->config->skmRow) writer->config->skmRow = writer->skmRow; + if (!writer->config->bufArr) writer->config->bufArr = writer->bufArr; + + writer->file[0] = (STFile){ + .type = TSDB_FTYPE_STT, + .did = writer->config->did, + .fid = writer->config->fid, + .cid = writer->config->cid, + .size = 0, + .stt[0] = + { + .level = writer->config->level, + }, + }; + + // open file + int32_t flag = TD_FILE_READ | TD_FILE_WRITE | TD_FILE_CREATE | TD_FILE_TRUNC; + char fname[TSDB_FILENAME_LEN]; + + tsdbTFileName(writer->config->tsdb, writer->file, fname); + code = tsdbOpenFile(fname, writer->config->szPage, flag, &writer->fd); + TSDB_CHECK_CODE(code, lino, _exit); + + uint8_t hdr[TSDB_FHDR_SIZE] = {0}; + code = tsdbWriteFile(writer->fd, 0, hdr, sizeof(hdr)); + TSDB_CHECK_CODE(code, lino, _exit); + writer->file->size += sizeof(hdr); + + writer->ctx->opened = true; + +_exit: + if (code) { + TSDB_ERROR_LOG(TD_VID(writer->config->tsdb->pVnode), lino, code); + } + return code; +} + +static void tsdbSttFWriterDoClose(SSttFileWriter *writer) { + ASSERT(writer->fd == NULL); + + for (int32_t i = 0; i < ARRAY_SIZE(writer->bufArr); ++i) { + tFree(writer->bufArr[i]); + } + tDestroyTSchema(writer->skmRow->pTSchema); + tDestroyTSchema(writer->skmTb->pTSchema); + tTombBlockDestroy(writer->tombBlock); + tStatisBlockDestroy(writer->staticBlock); + tBlockDataDestroy(writer->blockData); + TARRAY2_DESTROY(writer->tombBlkArray, NULL); + TARRAY2_DESTROY(writer->statisBlkArray, NULL); + TARRAY2_DESTROY(writer->sttBlkArray, NULL); +} + +static int32_t tsdbSttFileDoUpdateHeader(SSttFileWriter *writer) { + // TODO + return 0; +} + +static int32_t tsdbSttFWriterCloseCommit(SSttFileWriter *writer, TFileOpArray *opArray) { + int32_t lino; + int32_t code; + + code = tsdbSttFileDoWriteBlockData(writer); + TSDB_CHECK_CODE(code, lino, _exit); + + code = tsdbSttFileDoWriteStatisBlock(writer); + TSDB_CHECK_CODE(code, lino, _exit); + + code = tsdbSttFileDoWriteTombBlock(writer); + TSDB_CHECK_CODE(code, lino, _exit); + + code = tsdbSttFileDoWriteSttBlk(writer); + TSDB_CHECK_CODE(code, lino, _exit); + + code = tsdbSttFileDoWriteStatisBlk(writer); + TSDB_CHECK_CODE(code, lino, _exit); + + code = tsdbSttFileDoWriteTombBlk(writer); + TSDB_CHECK_CODE(code, lino, _exit); + + code = tsdbSttFileDoWriteFooter(writer); + TSDB_CHECK_CODE(code, lino, _exit); + + code = tsdbSttFileDoUpdateHeader(writer); + TSDB_CHECK_CODE(code, lino, _exit); + + code = tsdbFsyncFile(writer->fd); + TSDB_CHECK_CODE(code, lino, _exit); + + tsdbCloseFile(&writer->fd); + + ASSERT(writer->file->size > 0); + STFileOp op = (STFileOp){ + .optype = TSDB_FOP_CREATE, + .fid = writer->config->fid, + .nf = writer->file[0], + }; + + code = TARRAY2_APPEND(opArray, op); + TSDB_CHECK_CODE(code, lino, _exit); + +_exit: + if (code) { + TSDB_ERROR_LOG(TD_VID(writer->config->tsdb->pVnode), lino, code); + } + return code; +} + +static int32_t tsdbSttFWriterCloseAbort(SSttFileWriter *writer) { + char fname[TSDB_FILENAME_LEN]; + tsdbTFileName(writer->config->tsdb, writer->file, fname); + tsdbCloseFile(&writer->fd); + taosRemoveFile(fname); + return 0; +} + +int32_t tsdbSttFileWriterOpen(const SSttFileWriterConfig *config, SSttFileWriter **writer) { + writer[0] = taosMemoryCalloc(1, sizeof(*writer[0])); + if (writer[0] == NULL) return TSDB_CODE_OUT_OF_MEMORY; + + writer[0]->config[0] = config[0]; + writer[0]->ctx->opened = false; + return 0; +} + +int32_t tsdbSttFileWriterClose(SSttFileWriter **writer, int8_t abort, TFileOpArray *opArray) { + int32_t code = 0; + int32_t lino = 0; + + if (writer[0]->ctx->opened) { + if (abort) { + code = tsdbSttFWriterCloseAbort(writer[0]); + TSDB_CHECK_CODE(code, lino, _exit); + } else { + code = tsdbSttFWriterCloseCommit(writer[0], opArray); + TSDB_CHECK_CODE(code, lino, _exit); + } + tsdbSttFWriterDoClose(writer[0]); + } + taosMemoryFree(writer[0]); + writer[0] = NULL; + +_exit: + if (code) { + TSDB_ERROR_LOG(TD_VID(writer[0]->config->tsdb->pVnode), lino, code); + } + return code; +} + +int32_t tsdbSttFileWriteRow(SSttFileWriter *writer, SRowInfo *row) { + int32_t code = 0; + int32_t lino = 0; + + if (!writer->ctx->opened) { + code = tsdbSttFWriterDoOpen(writer); + TSDB_CHECK_CODE(code, lino, _exit); + } + + if (!TABLE_SAME_SCHEMA(row->suid, row->uid, writer->ctx->tbid->suid, writer->ctx->tbid->uid)) { + code = tsdbSttFileDoWriteBlockData(writer); + TSDB_CHECK_CODE(code, lino, _exit); + + code = tsdbUpdateSkmTb(writer->config->tsdb, (TABLEID *)row, writer->config->skmTb); + TSDB_CHECK_CODE(code, lino, _exit); + + TABLEID id = {.suid = row->suid, .uid = row->suid ? 0 : row->uid}; + code = tBlockDataInit(writer->blockData, &id, writer->config->skmTb->pTSchema, NULL, 0); + TSDB_CHECK_CODE(code, lino, _exit); + } + + TSDBKEY key[1]; + if (row->row.type == TSDBROW_ROW_FMT) { + key->ts = row->row.pTSRow->ts; + key->version = row->row.version; + } else { + key->ts = row->row.pBlockData->aTSKEY[row->row.iRow]; + key->version = row->row.pBlockData->aVersion[row->row.iRow]; + } + + if (writer->ctx->tbid->uid != row->uid) { + writer->ctx->tbid->suid = row->suid; + writer->ctx->tbid->uid = row->uid; + + if (STATIS_BLOCK_SIZE(writer->staticBlock) >= writer->config->maxRow) { + code = tsdbSttFileDoWriteStatisBlock(writer); + TSDB_CHECK_CODE(code, lino, _exit); + } + + STbStatisRecord record = { + .suid = row->suid, + .uid = row->uid, + .firstKey = key->ts, + .lastKey = key->ts, + .count = 1, + }; + code = tStatisBlockPut(writer->staticBlock, &record); + TSDB_CHECK_CODE(code, lino, _exit); + } else { + ASSERT(key->ts >= TARRAY2_LAST(writer->staticBlock->lastKey)); + + if (key->ts > TARRAY2_LAST(writer->staticBlock->lastKey)) { + TARRAY2_LAST(writer->staticBlock->count)++; + TARRAY2_LAST(writer->staticBlock->lastKey) = key->ts; + } + } + + if (row->row.type == TSDBROW_ROW_FMT) { + code = tsdbUpdateSkmRow(writer->config->tsdb, writer->ctx->tbid, // + TSDBROW_SVERSION(&row->row), writer->config->skmRow); + TSDB_CHECK_CODE(code, lino, _exit); + } + + // row to col conversion + if (key->version <= writer->config->compactVersion // + && writer->blockData->nRow > 0 // + && writer->blockData->aTSKEY[writer->blockData->nRow - 1] == key->ts // + && (writer->blockData->uid // + ? writer->blockData->uid // + : writer->blockData->aUid[writer->blockData->nRow - 1]) == row->uid // + ) { + code = tBlockDataUpdateRow(writer->blockData, &row->row, writer->config->skmRow->pTSchema); + TSDB_CHECK_CODE(code, lino, _exit); + } else { + if (writer->blockData->nRow >= writer->config->maxRow) { + code = tsdbSttFileDoWriteBlockData(writer); + TSDB_CHECK_CODE(code, lino, _exit); + } + + code = tBlockDataAppendRow(writer->blockData, &row->row, writer->config->skmRow->pTSchema, row->uid); + TSDB_CHECK_CODE(code, lino, _exit); + } + +_exit: + if (code) { + TSDB_ERROR_LOG(TD_VID(writer->config->tsdb->pVnode), lino, code); + } + return code; +} + +int32_t tsdbSttFileWriteBlockData(SSttFileWriter *writer, SBlockData *bdata) { + int32_t code = 0; + int32_t lino = 0; + + SRowInfo row[1]; + row->suid = bdata->suid; + for (int32_t i = 0; i < bdata->nRow; i++) { + row->uid = bdata->uid ? bdata->uid : bdata->aUid[i]; + row->row = tsdbRowFromBlockData(bdata, i); + + code = tsdbSttFileWriteRow(writer, row); + TSDB_CHECK_CODE(code, lino, _exit); + } + +_exit: + if (code) { + TSDB_ERROR_LOG(TD_VID(writer->config->tsdb->pVnode), lino, code); + } + return code; +} + +int32_t tsdbSttFileWriteTombRecord(SSttFileWriter *writer, const STombRecord *record) { + int32_t code; + int32_t lino; + + if (!writer->ctx->opened) { + code = tsdbSttFWriterDoOpen(writer); + TSDB_CHECK_CODE(code, lino, _exit); + } else { + if (writer->blockData->nRow > 0) { + code = tsdbSttFileDoWriteBlockData(writer); + TSDB_CHECK_CODE(code, lino, _exit); + } + + if (STATIS_BLOCK_SIZE(writer->staticBlock) > 0) { + code = tsdbSttFileDoWriteStatisBlock(writer); + TSDB_CHECK_CODE(code, lino, _exit); + } + } + + code = tTombBlockPut(writer->tombBlock, record); + TSDB_CHECK_CODE(code, lino, _exit); + + if (TOMB_BLOCK_SIZE(writer->tombBlock) >= writer->config->maxRow) { + code = tsdbSttFileDoWriteTombBlock(writer); + TSDB_CHECK_CODE(code, lino, _exit); + } + +_exit: + if (code) { + TSDB_ERROR_LOG(TD_VID(writer->config->tsdb->pVnode), lino, code); + } + return code; +} + +bool tsdbSttFileWriterIsOpened(SSttFileWriter *writer) { return writer->ctx->opened; } \ No newline at end of file diff --git a/source/dnode/vnode/src/tsdb/tsdbSttFileRW.h b/source/dnode/vnode/src/tsdb/tsdbSttFileRW.h new file mode 100644 index 0000000000..242b55795c --- /dev/null +++ b/source/dnode/vnode/src/tsdb/tsdbSttFileRW.h @@ -0,0 +1,93 @@ +/* + * Copyright (c) 2019 TAOS Data, Inc. + * + * This program is free software: you can use, redistribute, and/or modify + * it under the terms of the GNU Affero General Public License, version 3 + * or later ("AGPL"), as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. + * + * You should have received a copy of the GNU Affero General Public License + * along with this program. If not, see . + */ + +#include "tsdbFS2.h" +#include "tsdbUtil2.h" + +#ifndef _TSDB_STT_FILE_RW_H +#define _TSDB_STT_FILE_RW_H + +#ifdef __cplusplus +extern "C" { +#endif + +typedef TARRAY2(SSttBlk) TSttBlkArray; +typedef TARRAY2(SStatisBlk) TStatisBlkArray; + +typedef struct { + SFDataPtr sttBlkPtr[1]; + SFDataPtr statisBlkPtr[1]; + SFDataPtr tombBlkPtr[1]; + SFDataPtr rsrvd[2]; +} SSttFooter; + +// SSttFileReader ========================================== +typedef struct SSttFileReader SSttFileReader; +typedef struct SSttFileReaderConfig SSttFileReaderConfig; +typedef TARRAY2(SSttFileReader *) TSttFileReaderArray; + +// SSttFileReader +int32_t tsdbSttFileReaderOpen(const char *fname, const SSttFileReaderConfig *config, SSttFileReader **reader); +int32_t tsdbSttFileReaderClose(SSttFileReader **reader); + +// SSttSegReader +int32_t tsdbSttFileReadSttBlk(SSttFileReader *reader, const TSttBlkArray **sttBlkArray); +int32_t tsdbSttFileReadStatisBlk(SSttFileReader *reader, const TStatisBlkArray **statisBlkArray); +int32_t tsdbSttFileReadTombBlk(SSttFileReader *reader, const TTombBlkArray **delBlkArray); + +int32_t tsdbSttFileReadBlockData(SSttFileReader *reader, const SSttBlk *sttBlk, SBlockData *bData); +int32_t tsdbSttFileReadBlockDataByColumn(SSttFileReader *reader, const SSttBlk *sttBlk, SBlockData *bData, + STSchema *pTSchema, int16_t cids[], int32_t ncid); +int32_t tsdbSttFileReadStatisBlock(SSttFileReader *reader, const SStatisBlk *statisBlk, STbStatisBlock *sData); +int32_t tsdbSttFileReadTombBlock(SSttFileReader *reader, const STombBlk *delBlk, STombBlock *dData); + +struct SSttFileReaderConfig { + STsdb *tsdb; + int32_t szPage; + STFile file[1]; + uint8_t **bufArr; +}; + +// SSttFileWriter ========================================== +typedef struct SSttFileWriter SSttFileWriter; +typedef struct SSttFileWriterConfig SSttFileWriterConfig; + +int32_t tsdbSttFileWriterOpen(const SSttFileWriterConfig *config, SSttFileWriter **writer); +int32_t tsdbSttFileWriterClose(SSttFileWriter **writer, int8_t abort, TFileOpArray *opArray); +int32_t tsdbSttFileWriteRow(SSttFileWriter *writer, SRowInfo *row); +int32_t tsdbSttFileWriteBlockData(SSttFileWriter *writer, SBlockData *pBlockData); +int32_t tsdbSttFileWriteTombRecord(SSttFileWriter *writer, const STombRecord *record); +bool tsdbSttFileWriterIsOpened(SSttFileWriter *writer); + +struct SSttFileWriterConfig { + STsdb *tsdb; + int32_t maxRow; + int32_t szPage; + int8_t cmprAlg; + int64_t compactVersion; + SDiskID did; + int32_t fid; + int64_t cid; + int32_t level; + SSkmInfo *skmTb; + SSkmInfo *skmRow; + uint8_t **bufArr; +}; + +#ifdef __cplusplus +} +#endif + +#endif /*_TSDB_STT_FILE_RW_H*/ \ No newline at end of file diff --git a/source/dnode/vnode/src/tsdb/tsdbUpgrade.c b/source/dnode/vnode/src/tsdb/tsdbUpgrade.c new file mode 100644 index 0000000000..59ba51c371 --- /dev/null +++ b/source/dnode/vnode/src/tsdb/tsdbUpgrade.c @@ -0,0 +1,640 @@ +/* + * Copyright (c) 2019 TAOS Data, Inc. + * + * This program is free software: you can use, redistribute, and/or modify + * it under the terms of the GNU Affero General Public License, version 3 + * or later ("AGPL"), as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. + * + * You should have received a copy of the GNU Affero General Public License + * along with this program. If not, see . + */ + +#include "tsdbUpgrade.h" + +// old +extern void tsdbGetCurrentFName(STsdb *pTsdb, char *current, char *current_t); +extern int32_t tsdbReadDataBlockEx(SDataFReader *pReader, SDataBlk *pDataBlk, SBlockData *pBlockData); + +// new +extern int32_t save_fs(const TFileSetArray *arr, const char *fname); +extern int32_t current_fname(STsdb *pTsdb, char *fname, EFCurrentT ftype); +extern int32_t tsdbFileWriteBrinBlock(STsdbFD *fd, SBrinBlock *brinBlock, int8_t cmprAlg, int64_t *fileSize, + TBrinBlkArray *brinBlkArray, uint8_t **bufArr); +extern int32_t tsdbFileWriteBrinBlk(STsdbFD *fd, TBrinBlkArray *brinBlkArray, SFDataPtr *ptr, int64_t *fileSize); +extern int32_t tsdbFileWriteHeadFooter(STsdbFD *fd, int64_t *fileSize, const SHeadFooter *footer); +extern int32_t tsdbSttLvlInit(int32_t level, SSttLvl **lvl); +extern int32_t tsdbSttLvlClear(SSttLvl **lvl); +extern int32_t tsdbFileWriteSttBlk(STsdbFD *fd, const TSttBlkArray *sttBlkArray, SFDataPtr *ptr, int64_t *fileSize); +extern int32_t tsdbFileWriteSttFooter(STsdbFD *fd, const SSttFooter *footer, int64_t *fileSize); +extern int32_t tsdbFileWriteTombBlock(STsdbFD *fd, STombBlock *tombBlock, int8_t cmprAlg, int64_t *fileSize, + TTombBlkArray *tombBlkArray, uint8_t **bufArr); +extern int32_t tsdbFileWriteTombBlk(STsdbFD *fd, const TTombBlkArray *tombBlkArray, SFDataPtr *ptr, int64_t *fileSize); +extern int32_t tsdbFileWriteTombFooter(STsdbFD *fd, const STombFooter *footer, int64_t *fileSize); + +static int32_t tsdbUpgradeHead(STsdb *tsdb, SDFileSet *pDFileSet, SDataFReader *reader, STFileSet *fset) { + int32_t code = 0; + int32_t lino = 0; + + // init + struct { + // config + int32_t maxRow; + int8_t cmprAlg; + int32_t szPage; + uint8_t *bufArr[8]; + // reader + SArray *aBlockIdx; + SMapData mDataBlk[1]; + SBlockData blockData[1]; + // writer + STsdbFD *fd; + SBrinBlock brinBlock[1]; + TBrinBlkArray brinBlkArray[1]; + SHeadFooter footer[1]; + } ctx[1] = {{ + .maxRow = tsdb->pVnode->config.tsdbCfg.maxRows, + .cmprAlg = tsdb->pVnode->config.tsdbCfg.compression, + .szPage = tsdb->pVnode->config.tsdbPageSize, + }}; + + // read SBlockIdx array + if ((ctx->aBlockIdx = taosArrayInit(0, sizeof(SBlockIdx))) == NULL) { + code = TSDB_CODE_OUT_OF_MEMORY; + TSDB_CHECK_CODE(code, lino, _exit); + } + + code = tsdbReadBlockIdx(reader, ctx->aBlockIdx); + TSDB_CHECK_CODE(code, lino, _exit); + + if (taosArrayGetSize(ctx->aBlockIdx) > 0) { + // init/open file fd + STFile file = { + .type = TSDB_FTYPE_HEAD, + .did = pDFileSet->diskId, + .fid = fset->fid, + .cid = pDFileSet->pHeadF->commitID, + .size = pDFileSet->pHeadF->size, + }; + + code = tsdbTFileObjInit(tsdb, &file, &fset->farr[TSDB_FTYPE_HEAD]); + TSDB_CHECK_CODE(code, lino, _exit); + + // open fd + char fname[TSDB_FILENAME_LEN]; + tsdbTFileName(tsdb, &file, fname); + + code = tsdbOpenFile(fname, ctx->szPage, TD_FILE_READ | TD_FILE_WRITE, &ctx->fd); + TSDB_CHECK_CODE(code, lino, _exit); + + // convert + for (int32_t iBlockIdx = 0; iBlockIdx < taosArrayGetSize(ctx->aBlockIdx); ++iBlockIdx) { + SBlockIdx *pBlockIdx = taosArrayGet(ctx->aBlockIdx, iBlockIdx); + + code = tsdbReadDataBlk(reader, pBlockIdx, ctx->mDataBlk); + TSDB_CHECK_CODE(code, lino, _exit); + + for (int32_t iDataBlk = 0; iDataBlk < ctx->mDataBlk->nItem; ++iDataBlk) { + SDataBlk dataBlk[1]; + tMapDataGetItemByIdx(ctx->mDataBlk, iDataBlk, dataBlk, tGetDataBlk); + + SBrinRecord record = { + .suid = pBlockIdx->suid, + .uid = pBlockIdx->uid, + .firstKey = dataBlk->minKey.ts, + .firstKeyVer = dataBlk->minKey.version, + .lastKey = dataBlk->maxKey.ts, + .lastKeyVer = dataBlk->maxKey.version, + .minVer = dataBlk->minVer, + .maxVer = dataBlk->maxVer, + .blockOffset = dataBlk->aSubBlock->offset, + .smaOffset = dataBlk->smaInfo.offset, + .blockSize = dataBlk->aSubBlock->szBlock, + .blockKeySize = dataBlk->aSubBlock->szKey, + .smaSize = dataBlk->smaInfo.size, + .numRow = dataBlk->nRow, + .count = dataBlk->nRow, + }; + + if (dataBlk->hasDup) { + code = tsdbReadDataBlockEx(reader, dataBlk, ctx->blockData); + TSDB_CHECK_CODE(code, lino, _exit); + + record.count = 1; + for (int32_t i = 1; i < ctx->blockData->nRow; ++i) { + if (ctx->blockData->aTSKEY[i] != ctx->blockData->aTSKEY[i - 1]) { + record.count++; + } + } + } + + code = tBrinBlockPut(ctx->brinBlock, &record); + TSDB_CHECK_CODE(code, lino, _exit); + + if (BRIN_BLOCK_SIZE(ctx->brinBlock) >= ctx->maxRow) { + code = tsdbFileWriteBrinBlock(ctx->fd, ctx->brinBlock, ctx->cmprAlg, &fset->farr[TSDB_FTYPE_HEAD]->f->size, + ctx->brinBlkArray, ctx->bufArr); + TSDB_CHECK_CODE(code, lino, _exit); + } + } + } + + if (BRIN_BLOCK_SIZE(ctx->brinBlock) > 0) { + code = tsdbFileWriteBrinBlock(ctx->fd, ctx->brinBlock, ctx->cmprAlg, &fset->farr[TSDB_FTYPE_HEAD]->f->size, + ctx->brinBlkArray, ctx->bufArr); + TSDB_CHECK_CODE(code, lino, _exit); + } + + code = tsdbFileWriteBrinBlk(ctx->fd, ctx->brinBlkArray, ctx->footer->brinBlkPtr, + &fset->farr[TSDB_FTYPE_HEAD]->f->size); + TSDB_CHECK_CODE(code, lino, _exit); + + code = tsdbFileWriteHeadFooter(ctx->fd, &fset->farr[TSDB_FTYPE_HEAD]->f->size, ctx->footer); + TSDB_CHECK_CODE(code, lino, _exit); + + code = tsdbFsyncFile(ctx->fd); + TSDB_CHECK_CODE(code, lino, _exit); + + tsdbCloseFile(&ctx->fd); + } + +_exit: + if (code) { + TSDB_ERROR_LOG(TD_VID(tsdb->pVnode), lino, code); + } + TARRAY2_DESTROY(ctx->brinBlkArray, NULL); + tBrinBlockDestroy(ctx->brinBlock); + tBlockDataDestroy(ctx->blockData); + tMapDataClear(ctx->mDataBlk); + taosArrayDestroy(ctx->aBlockIdx); + for (int32_t i = 0; i < ARRAY_SIZE(ctx->bufArr); ++i) { + tFree(ctx->bufArr[i]); + } + return code; +} + +static int32_t tsdbUpgradeData(STsdb *tsdb, SDFileSet *pDFileSet, SDataFReader *reader, STFileSet *fset) { + int32_t code = 0; + int32_t lino = 0; + + if (fset->farr[TSDB_FTYPE_HEAD] == NULL) { + return 0; + } + + STFile file = { + .type = TSDB_FTYPE_DATA, + .did = pDFileSet->diskId, + .fid = fset->fid, + .cid = pDFileSet->pDataF->commitID, + .size = pDFileSet->pDataF->size, + }; + + code = tsdbTFileObjInit(tsdb, &file, &fset->farr[TSDB_FTYPE_DATA]); + TSDB_CHECK_CODE(code, lino, _exit); + +_exit: + if (code) { + TSDB_ERROR_LOG(TD_VID(tsdb->pVnode), lino, code); + } + return code; +} + +static int32_t tsdbUpgradeSma(STsdb *tsdb, SDFileSet *pDFileSet, SDataFReader *reader, STFileSet *fset) { + int32_t code = 0; + int32_t lino = 0; + + if (fset->farr[TSDB_FTYPE_HEAD] == NULL) { + return 0; + } + + STFile file = { + .type = TSDB_FTYPE_SMA, + .did = pDFileSet->diskId, + .fid = fset->fid, + .cid = pDFileSet->pSmaF->commitID, + .size = pDFileSet->pSmaF->size, + }; + + code = tsdbTFileObjInit(tsdb, &file, &fset->farr[TSDB_FTYPE_SMA]); + TSDB_CHECK_CODE(code, lino, _exit); + +_exit: + if (code) { + TSDB_ERROR_LOG(TD_VID(tsdb->pVnode), lino, code); + } + return code; +} + +static int32_t tsdbUpgradeSttFile(STsdb *tsdb, SDFileSet *pDFileSet, SDataFReader *reader, STFileSet *fset, + int32_t iStt, SSttLvl *lvl) { + int32_t code = 0; + int32_t lino = 0; + + SArray *aSttBlk = taosArrayInit(0, sizeof(SSttBlk)); + if (aSttBlk == NULL) { + code = TSDB_CODE_OUT_OF_MEMORY; + TSDB_CHECK_CODE(code, lino, _exit); + } + + code = tsdbReadSttBlk(reader, iStt, aSttBlk); + TSDB_CHECK_CODE(code, lino, _exit); + + if (taosArrayGetSize(aSttBlk) > 0) { + SSttFile *pSttF = pDFileSet->aSttF[iStt]; + STFileObj *fobj; + struct { + int32_t szPage; + // writer + STsdbFD *fd; + TSttBlkArray sttBlkArray[1]; + SSttFooter footer[1]; + } ctx[1] = {{ + .szPage = tsdb->pVnode->config.tsdbPageSize, + }}; + + STFile file = { + .type = TSDB_FTYPE_STT, + .did = pDFileSet->diskId, + .fid = fset->fid, + .cid = pSttF->commitID, + .size = pSttF->size, + }; + code = tsdbTFileObjInit(tsdb, &file, &fobj); + TSDB_CHECK_CODE(code, lino, _exit1); + + code = tsdbOpenFile(fobj->fname, ctx->szPage, TD_FILE_READ | TD_FILE_WRITE, &ctx->fd); + TSDB_CHECK_CODE(code, lino, _exit1); + + for (int32_t iSttBlk = 0; iSttBlk < taosArrayGetSize(aSttBlk); iSttBlk++) { + code = TARRAY2_APPEND_PTR(ctx->sttBlkArray, (SSttBlk *)taosArrayGet(aSttBlk, iSttBlk)); + TSDB_CHECK_CODE(code, lino, _exit1); + } + + code = tsdbFileWriteSttBlk(ctx->fd, ctx->sttBlkArray, ctx->footer->sttBlkPtr, &fobj->f->size); + TSDB_CHECK_CODE(code, lino, _exit1); + + code = tsdbFileWriteSttFooter(ctx->fd, ctx->footer, &fobj->f->size); + TSDB_CHECK_CODE(code, lino, _exit1); + + code = tsdbFsyncFile(ctx->fd); + TSDB_CHECK_CODE(code, lino, _exit1); + + tsdbCloseFile(&ctx->fd); + + code = TARRAY2_APPEND(lvl->fobjArr, fobj); + TSDB_CHECK_CODE(code, lino, _exit1); + + _exit1: + TARRAY2_DESTROY(ctx->sttBlkArray, NULL); + } + +_exit: + if (code) { + TSDB_ERROR_LOG(TD_VID(tsdb->pVnode), lino, code); + } + taosArrayDestroy(aSttBlk); + return code; +} + +static int32_t tsdbUpgradeStt(STsdb *tsdb, SDFileSet *pDFileSet, SDataFReader *reader, STFileSet *fset) { + int32_t code = 0; + int32_t lino = 0; + + if (pDFileSet->nSttF == 0) { + return 0; + } + + SSttLvl *lvl; + code = tsdbSttLvlInit(0, &lvl); + TSDB_CHECK_CODE(code, lino, _exit); + + for (int32_t iStt = 0; iStt < pDFileSet->nSttF; ++iStt) { + code = tsdbUpgradeSttFile(tsdb, pDFileSet, reader, fset, iStt, lvl); + TSDB_CHECK_CODE(code, lino, _exit); + } + + if (TARRAY2_SIZE(lvl->fobjArr) > 0) { + code = TARRAY2_APPEND(fset->lvlArr, lvl); + TSDB_CHECK_CODE(code, lino, _exit); + } else { + tsdbSttLvlClear(&lvl); + } + +_exit: + if (code) { + TSDB_ERROR_LOG(TD_VID(tsdb->pVnode), lino, code); + } + return code; +} + +static int32_t tsdbUpgradeFileSet(STsdb *tsdb, SDFileSet *pDFileSet, TFileSetArray *fileSetArray) { + int32_t code = 0; + int32_t lino = 0; + + SDataFReader *reader; + STFileSet *fset; + + code = tsdbTFileSetInit(pDFileSet->fid, &fset); + TSDB_CHECK_CODE(code, lino, _exit); + + code = tsdbDataFReaderOpen(&reader, tsdb, pDFileSet); + TSDB_CHECK_CODE(code, lino, _exit); + + // .head + code = tsdbUpgradeHead(tsdb, pDFileSet, reader, fset); + TSDB_CHECK_CODE(code, lino, _exit); + + // .data + code = tsdbUpgradeData(tsdb, pDFileSet, reader, fset); + TSDB_CHECK_CODE(code, lino, _exit); + + // .sma + code = tsdbUpgradeSma(tsdb, pDFileSet, reader, fset); + TSDB_CHECK_CODE(code, lino, _exit); + + // .stt + if (pDFileSet->nSttF > 0) { + code = tsdbUpgradeStt(tsdb, pDFileSet, reader, fset); + TSDB_CHECK_CODE(code, lino, _exit); + } + + tsdbDataFReaderClose(&reader); + + code = TARRAY2_APPEND(fileSetArray, fset); + TSDB_CHECK_CODE(code, lino, _exit); + +_exit: + if (code) { + TSDB_ERROR_LOG(TD_VID(tsdb->pVnode), lino, code); + } + return code; +} + +static int32_t tsdbUpgradeOpenTombFile(STsdb *tsdb, STFileSet *fset, STsdbFD **fd, STFileObj **fobj, bool *toStt) { + int32_t code = 0; + int32_t lino = 0; + + if (TARRAY2_SIZE(fset->lvlArr) == 0) { // to .tomb file + *toStt = false; + + STFile file = { + .type = TSDB_FTYPE_TOMB, + .did = fset->farr[TSDB_FTYPE_HEAD]->f->did, + .fid = fset->fid, + .cid = 0, + .size = 0, + }; + + code = tsdbTFileObjInit(tsdb, &file, fobj); + TSDB_CHECK_CODE(code, lino, _exit); + + fset->farr[TSDB_FTYPE_TOMB] = *fobj; + } else { // to .stt file + *toStt = true; + SSttLvl *lvl = TARRAY2_GET(fset->lvlArr, 0); + + STFile file = { + .type = TSDB_FTYPE_STT, + .did = TARRAY2_GET(lvl->fobjArr, 0)->f->did, + .fid = fset->fid, + .cid = 0, + .size = 0, + }; + + code = tsdbTFileObjInit(tsdb, &file, fobj); + TSDB_CHECK_CODE(code, lino, _exit); + + code = TARRAY2_APPEND(lvl->fobjArr, fobj[0]); + TSDB_CHECK_CODE(code, lino, _exit); + } + + char fname[TSDB_FILENAME_LEN] = {0}; + code = tsdbOpenFile(fobj[0]->fname, tsdb->pVnode->config.tsdbPageSize, + TD_FILE_READ | TD_FILE_WRITE | TD_FILE_TRUNC | TD_FILE_CREATE, fd); + TSDB_CHECK_CODE(code, lino, _exit); + + uint8_t hdr[TSDB_FHDR_SIZE] = {0}; + code = tsdbWriteFile(fd[0], 0, hdr, TSDB_FHDR_SIZE); + TSDB_CHECK_CODE(code, lino, _exit); + fobj[0]->f->size += TSDB_FHDR_SIZE; + +_exit: + if (code) { + TSDB_ERROR_LOG(TD_VID(tsdb->pVnode), lino, code); + } + return code; +} + +static int32_t tsdbDumpTombDataToFSet(STsdb *tsdb, SDelFReader *reader, SArray *aDelIdx, STFileSet *fset) { + int32_t code = 0; + int32_t lino = 0; + + struct { + // context + bool toStt; + int8_t cmprAlg; + int32_t maxRow; + int64_t minKey; + int64_t maxKey; + uint8_t *bufArr[8]; + // reader + SArray *aDelData; + // writer + STsdbFD *fd; + STFileObj *fobj; + STombBlock tombBlock[1]; + TTombBlkArray tombBlkArray[1]; + STombFooter tombFooter[1]; + SSttFooter sttFooter[1]; + } ctx[1] = {{ + .maxRow = tsdb->pVnode->config.tsdbCfg.maxRows, + .cmprAlg = tsdb->pVnode->config.tsdbCfg.compression, + }}; + + tsdbFidKeyRange(fset->fid, tsdb->keepCfg.days, tsdb->keepCfg.precision, &ctx->minKey, &ctx->maxKey); + + if ((ctx->aDelData = taosArrayInit(0, sizeof(SDelData))) == NULL) { + code = TSDB_CODE_OUT_OF_MEMORY; + TSDB_CHECK_CODE(code, lino, _exit); + } + + for (int32_t iDelIdx = 0; iDelIdx < taosArrayGetSize(aDelIdx); iDelIdx++) { + SDelIdx *pDelIdx = (SDelIdx *)taosArrayGet(aDelIdx, iDelIdx); + + code = tsdbReadDelData(reader, pDelIdx, ctx->aDelData); + TSDB_CHECK_CODE(code, lino, _exit); + + for (int32_t iDelData = 0; iDelData < taosArrayGetSize(ctx->aDelData); iDelData++) { + SDelData *pDelData = (SDelData *)taosArrayGet(ctx->aDelData, iDelData); + + STombRecord record = { + .suid = pDelIdx->suid, + .uid = pDelIdx->uid, + .version = pDelData->version, + .skey = pDelData->sKey, + .ekey = pDelData->eKey, + }; + + code = tTombBlockPut(ctx->tombBlock, &record); + TSDB_CHECK_CODE(code, lino, _exit); + + if (TOMB_BLOCK_SIZE(ctx->tombBlock) > ctx->maxRow) { + if (ctx->fd == NULL) { + code = tsdbUpgradeOpenTombFile(tsdb, fset, &ctx->fd, &ctx->fobj, &ctx->toStt); + TSDB_CHECK_CODE(code, lino, _exit); + } + code = tsdbFileWriteTombBlock(ctx->fd, ctx->tombBlock, ctx->cmprAlg, &ctx->fobj->f->size, ctx->tombBlkArray, + ctx->bufArr); + TSDB_CHECK_CODE(code, lino, _exit); + } + } + } + + if (TOMB_BLOCK_SIZE(ctx->tombBlock) > 0) { + if (ctx->fd == NULL) { + code = tsdbUpgradeOpenTombFile(tsdb, fset, &ctx->fd, &ctx->fobj, &ctx->toStt); + TSDB_CHECK_CODE(code, lino, _exit); + } + code = tsdbFileWriteTombBlock(ctx->fd, ctx->tombBlock, ctx->cmprAlg, &ctx->fobj->f->size, ctx->tombBlkArray, + ctx->bufArr); + TSDB_CHECK_CODE(code, lino, _exit); + } + + if (ctx->fd != NULL) { + if (ctx->toStt) { + code = tsdbFileWriteTombBlk(ctx->fd, ctx->tombBlkArray, ctx->sttFooter->tombBlkPtr, &ctx->fobj->f->size); + TSDB_CHECK_CODE(code, lino, _exit); + + code = tsdbFileWriteSttFooter(ctx->fd, ctx->sttFooter, &ctx->fobj->f->size); + TSDB_CHECK_CODE(code, lino, _exit); + } else { + code = tsdbFileWriteTombBlk(ctx->fd, ctx->tombBlkArray, ctx->tombFooter->tombBlkPtr, &ctx->fobj->f->size); + TSDB_CHECK_CODE(code, lino, _exit); + + code = tsdbFileWriteTombFooter(ctx->fd, ctx->tombFooter, &ctx->fobj->f->size); + TSDB_CHECK_CODE(code, lino, _exit); + } + + code = tsdbFsyncFile(ctx->fd); + TSDB_CHECK_CODE(code, lino, _exit); + + tsdbCloseFile(&ctx->fd); + } + +_exit: + if (code) { + TSDB_ERROR_LOG(TD_VID(tsdb->pVnode), lino, code); + } + for (int32_t i = 0; i < ARRAY_SIZE(ctx->bufArr); i++) { + tFree(ctx->bufArr[i]); + } + TARRAY2_DESTROY(ctx->tombBlkArray, NULL); + tTombBlockDestroy(ctx->tombBlock); + taosArrayDestroy(ctx->aDelData); + return code; +} + +static int32_t tsdbUpgradeTombFile(STsdb *tsdb, SDelFile *pDelFile, TFileSetArray *fileSetArray) { + int32_t code = 0; + int32_t lino = 0; + + SDelFReader *reader = NULL; + SArray *aDelIdx = NULL; + + if ((aDelIdx = taosArrayInit(0, sizeof(SDelIdx))) == NULL) { + code = TSDB_CODE_OUT_OF_MEMORY; + TSDB_CHECK_CODE(code, lino, _exit); + } + + code = tsdbDelFReaderOpen(&reader, pDelFile, tsdb); + TSDB_CHECK_CODE(code, lino, _exit); + + code = tsdbReadDelIdx(reader, aDelIdx); + TSDB_CHECK_CODE(code, lino, _exit); + + if (taosArrayGetSize(aDelIdx) > 0) { + STFileSet *fset; + TARRAY2_FOREACH(fileSetArray, fset) { + code = tsdbDumpTombDataToFSet(tsdb, reader, aDelIdx, fset); + TSDB_CHECK_CODE(code, lino, _exit); + } + } + +_exit: + if (code) { + TSDB_ERROR_LOG(TD_VID(tsdb->pVnode), lino, code); + } + tsdbDelFReaderClose(&reader); + taosArrayDestroy(aDelIdx); + return code; +} + +static int32_t tsdbDoUpgradeFileSystem(STsdb *tsdb, TFileSetArray *fileSetArray) { + int32_t code = 0; + int32_t lino = 0; + + // upgrade each file set + for (int32_t i = 0; i < taosArrayGetSize(tsdb->fs.aDFileSet); i++) { + code = tsdbUpgradeFileSet(tsdb, taosArrayGet(tsdb->fs.aDFileSet, i), fileSetArray); + TSDB_CHECK_CODE(code, lino, _exit); + } + + // upgrade tomb file + if (tsdb->fs.pDelFile != NULL) { + code = tsdbUpgradeTombFile(tsdb, tsdb->fs.pDelFile, fileSetArray); + TSDB_CHECK_CODE(code, lino, _exit); + } + +_exit: + if (code) { + TSDB_ERROR_LOG(TD_VID(tsdb->pVnode), lino, code); + } + return code; +} + +static int32_t tsdbUpgradeFileSystem(STsdb *tsdb, int8_t rollback) { + int32_t code = 0; + int32_t lino = 0; + + TFileSetArray fileSetArray[1] = {0}; + + // open old file system + code = tsdbFSOpen(tsdb, rollback); + TSDB_CHECK_CODE(code, lino, _exit); + + code = tsdbDoUpgradeFileSystem(tsdb, fileSetArray); + TSDB_CHECK_CODE(code, lino, _exit); + + // close file system + code = tsdbFSClose(tsdb); + TSDB_CHECK_CODE(code, lino, _exit); + + // save new file system + char fname[TSDB_FILENAME_LEN]; + current_fname(tsdb, fname, TSDB_FCURRENT); + code = save_fs(fileSetArray, fname); + TSDB_CHECK_CODE(code, lino, _exit); + +_exit: + if (code) { + TSDB_ERROR_LOG(TD_VID(tsdb->pVnode), lino, code); + } + TARRAY2_DESTROY(fileSetArray, tsdbTFileSetClear); + return code; +} + +int32_t tsdbCheckAndUpgradeFileSystem(STsdb *tsdb, int8_t rollback) { + char fname[TSDB_FILENAME_LEN]; + + tsdbGetCurrentFName(tsdb, fname, NULL); + if (!taosCheckExistFile(fname)) return 0; + + int32_t code = tsdbUpgradeFileSystem(tsdb, rollback); + if (code) return code; + + taosRemoveFile(fname); + return 0; +} \ No newline at end of file diff --git a/source/dnode/vnode/src/tsdb/tsdbUpgrade.h b/source/dnode/vnode/src/tsdb/tsdbUpgrade.h new file mode 100644 index 0000000000..f9aac94e00 --- /dev/null +++ b/source/dnode/vnode/src/tsdb/tsdbUpgrade.h @@ -0,0 +1,35 @@ +/* + * Copyright (c) 2019 TAOS Data, Inc. + * + * This program is free software: you can use, redistribute, and/or modify + * it under the terms of the GNU Affero General Public License, version 3 + * or later ("AGPL"), as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. + * + * You should have received a copy of the GNU Affero General Public License + * along with this program. If not, see . + */ + +#include "tsdb.h" +#include "tsdbDataFileRW.h" +#include "tsdbDef.h" +#include "tsdbFS2.h" +#include "tsdbUtil2.h" + +#ifndef _TSDB_UPGRADE_H_ +#define _TSDB_UPGRADE_H_ + +#ifdef __cplusplus +extern "C" { +#endif + +int32_t tsdbCheckAndUpgradeFileSystem(STsdb *tsdb, int8_t rollback); + +#ifdef __cplusplus +} +#endif + +#endif /*_TSDB_UPGRADE_H_*/ \ No newline at end of file diff --git a/source/dnode/vnode/src/tsdb/tsdbUtil2.c b/source/dnode/vnode/src/tsdb/tsdbUtil2.c new file mode 100644 index 0000000000..e938caa118 --- /dev/null +++ b/source/dnode/vnode/src/tsdb/tsdbUtil2.c @@ -0,0 +1,191 @@ +/* + * Copyright (c) 2019 TAOS Data, Inc. + * + * This program is free software: you can use, redistribute, and/or modify + * it under the terms of the GNU Affero General Public License, version 3 + * or later ("AGPL"), as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. + * + * You should have received a copy of the GNU Affero General Public License + * along with this program. If not, see . + */ + +#include "tsdbUtil2.h" + +// SDelBlock ---------- +int32_t tTombBlockInit(STombBlock *tombBlock) { + for (int32_t i = 0; i < TOMB_RECORD_ELEM_NUM; ++i) { + TARRAY2_INIT(&tombBlock->dataArr[i]); + } + return 0; +} + +int32_t tTombBlockDestroy(STombBlock *tombBlock) { + for (int32_t i = 0; i < TOMB_RECORD_ELEM_NUM; ++i) { + TARRAY2_DESTROY(&tombBlock->dataArr[i], NULL); + } + return 0; +} + +int32_t tTombBlockClear(STombBlock *tombBlock) { + for (int32_t i = 0; i < TOMB_RECORD_ELEM_NUM; ++i) { + TARRAY2_CLEAR(&tombBlock->dataArr[i], NULL); + } + return 0; +} + +int32_t tTombBlockPut(STombBlock *tombBlock, const STombRecord *record) { + int32_t code; + for (int32_t i = 0; i < TOMB_RECORD_ELEM_NUM; ++i) { + code = TARRAY2_APPEND(&tombBlock->dataArr[i], record->dataArr[i]); + if (code) return code; + } + return 0; +} + +int32_t tTombBlockGet(STombBlock *tombBlock, int32_t idx, STombRecord *record) { + if (idx >= TOMB_BLOCK_SIZE(tombBlock)) return TSDB_CODE_OUT_OF_RANGE; + for (int32_t i = 0; i < TOMB_RECORD_ELEM_NUM; ++i) { + record->dataArr[i] = TARRAY2_GET(&tombBlock->dataArr[i], idx); + } + return 0; +} + +int32_t tTombRecordCompare(const STombRecord *r1, const STombRecord *r2) { + if (r1->suid < r2->suid) return -1; + if (r1->suid > r2->suid) return 1; + if (r1->uid < r2->uid) return -1; + if (r1->uid > r2->uid) return 1; + if (r1->version < r2->version) return -1; + if (r1->version > r2->version) return 1; + return 0; +} + +// STbStatisBlock ---------- +int32_t tStatisBlockInit(STbStatisBlock *statisBlock) { + for (int32_t i = 0; i < STATIS_RECORD_NUM_ELEM; ++i) { + TARRAY2_INIT(&statisBlock->dataArr[i]); + } + return 0; +} + +int32_t tStatisBlockDestroy(STbStatisBlock *statisBlock) { + for (int32_t i = 0; i < STATIS_RECORD_NUM_ELEM; ++i) { + TARRAY2_DESTROY(&statisBlock->dataArr[i], NULL); + } + return 0; +} + +int32_t tStatisBlockClear(STbStatisBlock *statisBlock) { + for (int32_t i = 0; i < STATIS_RECORD_NUM_ELEM; ++i) { + TARRAY2_CLEAR(&statisBlock->dataArr[i], NULL); + } + return 0; +} + +int32_t tStatisBlockPut(STbStatisBlock *statisBlock, const STbStatisRecord *record) { + int32_t code; + for (int32_t i = 0; i < STATIS_RECORD_NUM_ELEM; ++i) { + code = TARRAY2_APPEND(&statisBlock->dataArr[i], record->dataArr[i]); + if (code) return code; + } + return 0; +} + +int32_t tStatisBlockGet(STbStatisBlock *statisBlock, int32_t idx, STbStatisRecord *record) { + if (idx >= STATIS_BLOCK_SIZE(statisBlock)) return TSDB_CODE_OUT_OF_RANGE; + for (int32_t i = 0; i < STATIS_RECORD_NUM_ELEM; ++i) { + record->dataArr[i] = TARRAY2_GET(&statisBlock->dataArr[i], idx); + } + return 0; +} + +// SBrinRecord ---------- +int32_t tBrinBlockInit(SBrinBlock *brinBlock) { + for (int32_t i = 0; i < ARRAY_SIZE(brinBlock->dataArr1); ++i) { + TARRAY2_INIT(&brinBlock->dataArr1[i]); + } + for (int32_t i = 0; i < ARRAY_SIZE(brinBlock->dataArr2); ++i) { + TARRAY2_INIT(&brinBlock->dataArr2[i]); + } + return 0; +} + +int32_t tBrinBlockDestroy(SBrinBlock *brinBlock) { + for (int32_t i = 0; i < ARRAY_SIZE(brinBlock->dataArr1); ++i) { + TARRAY2_DESTROY(&brinBlock->dataArr1[i], NULL); + } + for (int32_t i = 0; i < ARRAY_SIZE(brinBlock->dataArr2); ++i) { + TARRAY2_DESTROY(&brinBlock->dataArr2[i], NULL); + } + return 0; +} + +int32_t tBrinBlockClear(SBrinBlock *brinBlock) { + for (int32_t i = 0; i < ARRAY_SIZE(brinBlock->dataArr1); ++i) { + TARRAY2_CLEAR(&brinBlock->dataArr1[i], NULL); + } + for (int32_t i = 0; i < ARRAY_SIZE(brinBlock->dataArr2); ++i) { + TARRAY2_CLEAR(&brinBlock->dataArr2[i], NULL); + } + return 0; +} + +int32_t tBrinBlockPut(SBrinBlock *brinBlock, const SBrinRecord *record) { + int32_t code; + for (int32_t i = 0; i < ARRAY_SIZE(brinBlock->dataArr1); ++i) { + code = TARRAY2_APPEND(&brinBlock->dataArr1[i], record->dataArr1[i]); + if (code) return code; + } + for (int32_t i = 0; i < ARRAY_SIZE(brinBlock->dataArr2); ++i) { + code = TARRAY2_APPEND(&brinBlock->dataArr2[i], record->dataArr2[i]); + if (code) return code; + } + return 0; +} + +int32_t tBrinBlockGet(SBrinBlock *brinBlock, int32_t idx, SBrinRecord *record) { + if (idx >= BRIN_BLOCK_SIZE(brinBlock)) return TSDB_CODE_OUT_OF_RANGE; + for (int32_t i = 0; i < ARRAY_SIZE(brinBlock->dataArr1); ++i) { + record->dataArr1[i] = TARRAY2_GET(&brinBlock->dataArr1[i], idx); + } + for (int32_t i = 0; i < ARRAY_SIZE(brinBlock->dataArr2); ++i) { + record->dataArr2[i] = TARRAY2_GET(&brinBlock->dataArr2[i], idx); + } + return 0; +} + +// other apis ---------- +int32_t tsdbUpdateSkmTb(STsdb *pTsdb, const TABLEID *tbid, SSkmInfo *pSkmTb) { + if (tbid->suid) { + if (pSkmTb->suid == tbid->suid) { + pSkmTb->uid = tbid->uid; + return 0; + } + } else if (pSkmTb->uid == tbid->uid) { + return 0; + } + + pSkmTb->suid = tbid->suid; + pSkmTb->uid = tbid->uid; + tDestroyTSchema(pSkmTb->pTSchema); + return metaGetTbTSchemaEx(pTsdb->pVnode->pMeta, tbid->suid, tbid->uid, -1, &pSkmTb->pTSchema); +} + +int32_t tsdbUpdateSkmRow(STsdb *pTsdb, const TABLEID *tbid, int32_t sver, SSkmInfo *pSkmRow) { + if (pSkmRow->pTSchema && pSkmRow->suid == tbid->suid) { + if (pSkmRow->suid) { + if (sver == pSkmRow->pTSchema->version) return 0; + } else if (pSkmRow->uid == tbid->uid && pSkmRow->pTSchema->version == sver) { + return 0; + } + } + + pSkmRow->suid = tbid->suid; + pSkmRow->uid = tbid->uid; + tDestroyTSchema(pSkmRow->pTSchema); + return metaGetTbTSchemaEx(pTsdb->pVnode->pMeta, tbid->suid, tbid->uid, sver, &pSkmRow->pTSchema); +} \ No newline at end of file diff --git a/source/dnode/vnode/src/tsdb/tsdbUtil2.h b/source/dnode/vnode/src/tsdb/tsdbUtil2.h new file mode 100644 index 0000000000..fa06368341 --- /dev/null +++ b/source/dnode/vnode/src/tsdb/tsdbUtil2.h @@ -0,0 +1,193 @@ +/* + * Copyright (c) 2019 TAOS Data, Inc. + * + * This program is free software: you can use, redistribute, and/or modify + * it under the terms of the GNU Affero General Public License, version 3 + * or later ("AGPL"), as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. + * + * You should have received a copy of the GNU Affero General Public License + * along with this program. If not, see . + */ + +#ifndef _TSDB_UTIL_H +#define _TSDB_UTIL_H + +#include "tsdbDef.h" + +#ifdef __cplusplus +extern "C" { +#endif + +// STombRecord ---------- +#define TOMB_RECORD_ELEM_NUM 5 +typedef union { + int64_t dataArr[TOMB_RECORD_ELEM_NUM]; + struct { + int64_t suid; + int64_t uid; + int64_t version; + int64_t skey; + int64_t ekey; + }; +} STombRecord; + +typedef union { + TARRAY2(int64_t) dataArr[TOMB_RECORD_ELEM_NUM]; + struct { + TARRAY2(int64_t) suid[1]; + TARRAY2(int64_t) uid[1]; + TARRAY2(int64_t) version[1]; + TARRAY2(int64_t) skey[1]; + TARRAY2(int64_t) ekey[1]; + }; +} STombBlock; + +typedef struct { + SFDataPtr dp[1]; + TABLEID minTbid; + TABLEID maxTbid; + int64_t minVer; + int64_t maxVer; + int32_t numRec; + int32_t size[TOMB_RECORD_ELEM_NUM]; + int8_t cmprAlg; + int8_t rsvd[7]; +} STombBlk; + +typedef TARRAY2(STombBlk) TTombBlkArray; + +#define TOMB_BLOCK_SIZE(db) TARRAY2_SIZE((db)->suid) + +int32_t tTombBlockInit(STombBlock *tombBlock); +int32_t tTombBlockDestroy(STombBlock *tombBlock); +int32_t tTombBlockClear(STombBlock *tombBlock); +int32_t tTombBlockPut(STombBlock *tombBlock, const STombRecord *record); +int32_t tTombBlockGet(STombBlock *tombBlock, int32_t idx, STombRecord *record); +int32_t tTombRecordCompare(const STombRecord *record1, const STombRecord *record2); + +// STbStatisRecord ---------- +#define STATIS_RECORD_NUM_ELEM 5 +typedef union { + int64_t dataArr[STATIS_RECORD_NUM_ELEM]; + struct { + int64_t suid; + int64_t uid; + int64_t firstKey; + int64_t lastKey; + int64_t count; + }; +} STbStatisRecord; + +typedef union { + TARRAY2(int64_t) dataArr[STATIS_RECORD_NUM_ELEM]; + struct { + TARRAY2(int64_t) suid[1]; + TARRAY2(int64_t) uid[1]; + TARRAY2(int64_t) firstKey[1]; + TARRAY2(int64_t) lastKey[1]; + TARRAY2(int64_t) count[1]; + }; +} STbStatisBlock; + +typedef struct { + SFDataPtr dp[1]; + TABLEID minTbid; + TABLEID maxTbid; + int32_t numRec; + int32_t size[STATIS_RECORD_NUM_ELEM]; + int8_t cmprAlg; + int8_t rsvd[7]; +} SStatisBlk; + +#define STATIS_BLOCK_SIZE(db) TARRAY2_SIZE((db)->suid) + +int32_t tStatisBlockInit(STbStatisBlock *statisBlock); +int32_t tStatisBlockDestroy(STbStatisBlock *statisBlock); +int32_t tStatisBlockClear(STbStatisBlock *statisBlock); +int32_t tStatisBlockPut(STbStatisBlock *statisBlock, const STbStatisRecord *record); +int32_t tStatisBlockGet(STbStatisBlock *statisBlock, int32_t idx, STbStatisRecord *record); + +// SBrinRecord ---------- +typedef union { + struct { + int64_t dataArr1[10]; + int32_t dataArr2[5]; + }; + struct { + int64_t suid; + int64_t uid; + int64_t firstKey; + int64_t firstKeyVer; + int64_t lastKey; + int64_t lastKeyVer; + int64_t minVer; + int64_t maxVer; + int64_t blockOffset; + int64_t smaOffset; + int32_t blockSize; + int32_t blockKeySize; + int32_t smaSize; + int32_t numRow; + int32_t count; + }; +} SBrinRecord; + +typedef union { + struct { + TARRAY2(int64_t) dataArr1[10]; + TARRAY2(int32_t) dataArr2[5]; + }; + struct { + TARRAY2(int64_t) suid[1]; + TARRAY2(int64_t) uid[1]; + TARRAY2(int64_t) firstKey[1]; + TARRAY2(int64_t) firstKeyVer[1]; + TARRAY2(int64_t) lastKey[1]; + TARRAY2(int64_t) lastKeyVer[1]; + TARRAY2(int64_t) minVer[1]; + TARRAY2(int64_t) maxVer[1]; + TARRAY2(int64_t) blockOffset[1]; + TARRAY2(int64_t) smaOffset[1]; + TARRAY2(int32_t) blockSize[1]; + TARRAY2(int32_t) blockKeySize[1]; + TARRAY2(int32_t) smaSize[1]; + TARRAY2(int32_t) numRow[1]; + TARRAY2(int32_t) count[1]; + }; +} SBrinBlock; + +typedef struct { + SFDataPtr dp[1]; + TABLEID minTbid; + TABLEID maxTbid; + int64_t minVer; + int64_t maxVer; + int32_t numRec; + int32_t size[15]; + int8_t cmprAlg; + int8_t rsvd[7]; +} SBrinBlk; + +typedef TARRAY2(SBrinBlk) TBrinBlkArray; + +#define BRIN_BLOCK_SIZE(db) TARRAY2_SIZE((db)->suid) + +int32_t tBrinBlockInit(SBrinBlock *brinBlock); +int32_t tBrinBlockDestroy(SBrinBlock *brinBlock); +int32_t tBrinBlockClear(SBrinBlock *brinBlock); +int32_t tBrinBlockPut(SBrinBlock *brinBlock, const SBrinRecord *record); +int32_t tBrinBlockGet(SBrinBlock *brinBlock, int32_t idx, SBrinRecord *record); + +// other apis +int32_t tsdbUpdateSkmTb(STsdb *pTsdb, const TABLEID *tbid, SSkmInfo *pSkmTb); +int32_t tsdbUpdateSkmRow(STsdb *pTsdb, const TABLEID *tbid, int32_t sver, SSkmInfo *pSkmRow); + +#ifdef __cplusplus +} +#endif + +#endif /*_TSDB_UTIL_H*/ \ No newline at end of file diff --git a/source/dnode/vnode/src/vnd/vnodeCfg.c b/source/dnode/vnode/src/vnd/vnodeCfg.c index efe82e1783..2e161d728f 100644 --- a/source/dnode/vnode/src/vnd/vnodeCfg.c +++ b/source/dnode/vnode/src/vnd/vnodeCfg.c @@ -49,7 +49,7 @@ const SVnodeCfg vnodeCfgDefault = {.vgId = -1, .hashBegin = 0, .hashEnd = 0, .hashMethod = 0, - .sttTrigger = TSDB_DEFAULT_STT_FILE, + .sttTrigger = TSDB_DEFAULT_SST_TRIGGER, .tsdbPageSize = TSDB_DEFAULT_PAGE_SIZE}; int vnodeCheckCfg(const SVnodeCfg *pCfg) { @@ -57,7 +57,7 @@ int vnodeCheckCfg(const SVnodeCfg *pCfg) { return 0; } -const char* vnodeRoleToStr(ESyncRole role) { +const char *vnodeRoleToStr(ESyncRole role) { switch (role) { case TAOS_SYNC_ROLE_VOTER: return "true"; @@ -68,11 +68,11 @@ const char* vnodeRoleToStr(ESyncRole role) { } } -const ESyncRole vnodeStrToRole(char* str) { - if(strcmp(str, "true") == 0){ +const ESyncRole vnodeStrToRole(char *str) { + if (strcmp(str, "true") == 0) { return TAOS_SYNC_ROLE_VOTER; } - if(strcmp(str, "false") == 0){ + if (strcmp(str, "false") == 0) { return TAOS_SYNC_ROLE_LEARNER; } @@ -295,10 +295,9 @@ int vnodeDecodeConfig(const SJson *pJson, void *pObj) { char role[10] = {0}; code = tjsonGetStringValue(info, "isReplica", role); if (code < 0) return -1; - if(strlen(role) != 0){ + if (strlen(role) != 0) { pNode->nodeRole = vnodeStrToRole(role); - } - else{ + } else { pNode->nodeRole = TAOS_SYNC_ROLE_VOTER; } vDebug("vgId:%d, decode config, replica:%d ep:%s:%u dnode:%d", pCfg->vgId, i, pNode->nodeFqdn, pNode->nodePort, diff --git a/source/dnode/vnode/src/vnd/vnodeCommit.c b/source/dnode/vnode/src/vnd/vnodeCommit.c index 7109f8466e..33c6f9d533 100644 --- a/source/dnode/vnode/src/vnd/vnodeCommit.c +++ b/source/dnode/vnode/src/vnd/vnodeCommit.c @@ -16,6 +16,13 @@ #include "vnd.h" #include "vnodeInt.h" +extern int32_t tsdbPreCommit(STsdb *pTsdb); +extern int32_t tsdbCommitBegin(STsdb *pTsdb, SCommitInfo *pInfo); +extern int32_t tsdbCommitCommit(STsdb *pTsdb); +extern int32_t tsdbCommitAbort(STsdb *pTsdb); + +#define VND_INFO_FNAME_TMP "vnode_tmp.json" + static int vnodeEncodeInfo(const SVnodeInfo *pInfo, char **ppData); static int vnodeCommitImpl(SCommitInfo *pInfo); @@ -296,7 +303,7 @@ static int32_t vnodePrepareCommit(SVnode *pVnode, SCommitInfo *pInfo) { TSDB_CHECK_CODE(code, lino, _exit); } - tsdbPrepareCommit(pVnode->pTsdb); + tsdbPreCommit(pVnode->pTsdb); metaPrepareAsyncCommit(pVnode->pMeta); @@ -430,8 +437,7 @@ static int vnodeCommitImpl(SCommitInfo *pInfo) { syncBeginSnapshot(pVnode->sync, pInfo->info.state.committed); - // commit each sub-system - code = tsdbCommit(pVnode->pTsdb, pInfo); + code = tsdbCommitBegin(pVnode->pTsdb, pInfo); TSDB_CHECK_CODE(code, lino, _exit); if (!TSDB_CACHE_NO(pVnode->config)) { @@ -455,7 +461,7 @@ static int vnodeCommitImpl(SCommitInfo *pInfo) { TSDB_CHECK_CODE(code, lino, _exit); } - code = tsdbFinishCommit(pVnode->pTsdb); + code = tsdbCommitCommit(pVnode->pTsdb); TSDB_CHECK_CODE(code, lino, _exit); if (VND_IS_RSMA(pVnode)) { diff --git a/source/dnode/vnode/src/vnd/vnodeInitApi.c b/source/dnode/vnode/src/vnd/vnodeInitApi.c index 28a88561af..e2f0fe8f82 100644 --- a/source/dnode/vnode/src/vnd/vnodeInitApi.c +++ b/source/dnode/vnode/src/vnd/vnodeInitApi.c @@ -42,24 +42,24 @@ void initStorageAPI(SStorageAPI* pAPI) { void initTsdbReaderAPI(TsdReader* pReader) { pReader->tsdReaderOpen = (int32_t(*)(void*, SQueryTableDataCond*, void*, int32_t, SSDataBlock*, void**, const char*, - bool, SHashObj**))tsdbReaderOpen; - pReader->tsdReaderClose = tsdbReaderClose; + bool, SHashObj**))tsdbReaderOpen2; + pReader->tsdReaderClose = tsdbReaderClose2; - pReader->tsdNextDataBlock = tsdbNextDataBlock; + pReader->tsdNextDataBlock = tsdbNextDataBlock2; - pReader->tsdReaderRetrieveDataBlock = tsdbRetrieveDataBlock; - pReader->tsdReaderReleaseDataBlock = tsdbReleaseDataBlock; + pReader->tsdReaderRetrieveDataBlock = tsdbRetrieveDataBlock2; + pReader->tsdReaderReleaseDataBlock = tsdbReleaseDataBlock2; - pReader->tsdReaderRetrieveBlockSMAInfo = tsdbRetrieveDatablockSMA; + pReader->tsdReaderRetrieveBlockSMAInfo = tsdbRetrieveDatablockSMA2; pReader->tsdReaderNotifyClosing = tsdbReaderSetCloseFlag; - pReader->tsdReaderResetStatus = tsdbReaderReset; + pReader->tsdReaderResetStatus = tsdbReaderReset2; - pReader->tsdReaderGetDataBlockDistInfo = tsdbGetFileBlocksDistInfo; - pReader->tsdReaderGetNumOfInMemRows = tsdbGetNumOfRowsInMemTable; // todo this function should be moved away + pReader->tsdReaderGetDataBlockDistInfo = tsdbGetFileBlocksDistInfo2; + pReader->tsdReaderGetNumOfInMemRows = tsdbGetNumOfRowsInMemTable2; // todo this function should be moved away - pReader->tsdSetQueryTableList = tsdbSetTableList; - pReader->tsdSetReaderTaskId = (void (*)(void*, const char*))tsdbReaderSetId; + pReader->tsdSetQueryTableList = tsdbSetTableList2; + pReader->tsdSetReaderTaskId = (void (*)(void*, const char*))tsdbReaderSetId2; } void initMetadataAPI(SStoreMeta* pMeta) { diff --git a/source/dnode/vnode/src/vnd/vnodeModule.c b/source/dnode/vnode/src/vnd/vnodeModule.c index 782ffd788d..74a8d14a86 100644 --- a/source/dnode/vnode/src/vnd/vnodeModule.c +++ b/source/dnode/vnode/src/vnd/vnodeModule.c @@ -23,26 +23,24 @@ struct SVnodeTask { void* arg; }; -struct SVnodeGlobal { - int8_t init; - int8_t stop; +typedef struct { int nthreads; TdThread* threads; TdThreadMutex mutex; TdThreadCond hasTask; SVnodeTask queue; +} SVnodeThreadPool; + +struct SVnodeGlobal { + int8_t init; + int8_t stop; + SVnodeThreadPool tp[2]; }; struct SVnodeGlobal vnodeGlobal; static void* loop(void* arg); -static tsem_t canCommit = {0}; - -static void vnodeInitCommit() { tsem_init(&canCommit, 0, 4); }; -void vnode_wait_commit() { tsem_wait(&canCommit); } -void vnode_done_commit() { tsem_wait(&canCommit); } - int vnodeInit(int nthreads) { int8_t init; int ret; @@ -51,28 +49,30 @@ int vnodeInit(int nthreads) { if (init) { return 0; } - - taosThreadMutexInit(&vnodeGlobal.mutex, NULL); - taosThreadCondInit(&vnodeGlobal.hasTask, NULL); - - taosThreadMutexLock(&vnodeGlobal.mutex); - vnodeGlobal.stop = 0; - vnodeGlobal.queue.next = &vnodeGlobal.queue; - vnodeGlobal.queue.prev = &vnodeGlobal.queue; - taosThreadMutexUnlock(&(vnodeGlobal.mutex)); + for (int32_t i = 0; i < ARRAY_SIZE(vnodeGlobal.tp); i++) { + taosThreadMutexInit(&vnodeGlobal.tp[i].mutex, NULL); + taosThreadCondInit(&vnodeGlobal.tp[i].hasTask, NULL); - vnodeGlobal.nthreads = nthreads; - vnodeGlobal.threads = taosMemoryCalloc(nthreads, sizeof(TdThread)); - if (vnodeGlobal.threads == NULL) { - terrno = TSDB_CODE_OUT_OF_MEMORY; - vError("failed to init vnode module since:%s", tstrerror(terrno)); - return -1; - } + taosThreadMutexLock(&vnodeGlobal.tp[i].mutex); - for (int i = 0; i < nthreads; i++) { - taosThreadCreate(&(vnodeGlobal.threads[i]), NULL, loop, NULL); + vnodeGlobal.tp[i].queue.next = &vnodeGlobal.tp[i].queue; + vnodeGlobal.tp[i].queue.prev = &vnodeGlobal.tp[i].queue; + + taosThreadMutexUnlock(&(vnodeGlobal.tp[i].mutex)); + + vnodeGlobal.tp[i].nthreads = nthreads; + vnodeGlobal.tp[i].threads = taosMemoryCalloc(nthreads, sizeof(TdThread)); + if (vnodeGlobal.tp[i].threads == NULL) { + terrno = TSDB_CODE_OUT_OF_MEMORY; + vError("failed to init vnode module since:%s", tstrerror(terrno)); + return -1; + } + + for (int j = 0; j < nthreads; j++) { + taosThreadCreate(&(vnodeGlobal.tp[i].threads[j]), NULL, loop, &vnodeGlobal.tp[i]); + } } if (walInit() < 0) { @@ -92,27 +92,29 @@ void vnodeCleanup() { if (init == 0) return; // set stop - taosThreadMutexLock(&(vnodeGlobal.mutex)); vnodeGlobal.stop = 1; - taosThreadCondBroadcast(&(vnodeGlobal.hasTask)); - taosThreadMutexUnlock(&(vnodeGlobal.mutex)); + for (int32_t i = 0; i < ARRAY_SIZE(vnodeGlobal.tp); i++) { + taosThreadMutexLock(&(vnodeGlobal.tp[i].mutex)); + taosThreadCondBroadcast(&(vnodeGlobal.tp[i].hasTask)); + taosThreadMutexUnlock(&(vnodeGlobal.tp[i].mutex)); - // wait for threads - for (int i = 0; i < vnodeGlobal.nthreads; i++) { - taosThreadJoin(vnodeGlobal.threads[i], NULL); + // wait for threads + for (int j = 0; j < vnodeGlobal.tp[i].nthreads; j++) { + taosThreadJoin(vnodeGlobal.tp[i].threads[j], NULL); + } + + // clear source + taosMemoryFreeClear(vnodeGlobal.tp[i].threads); + taosThreadCondDestroy(&(vnodeGlobal.tp[i].hasTask)); + taosThreadMutexDestroy(&(vnodeGlobal.tp[i].mutex)); } - // clear source - taosMemoryFreeClear(vnodeGlobal.threads); - taosThreadCondDestroy(&(vnodeGlobal.hasTask)); - taosThreadMutexDestroy(&(vnodeGlobal.mutex)); - walCleanUp(); tqCleanUp(); smaCleanUp(); } -int vnodeScheduleTask(int (*execute)(void*), void* arg) { +int vnodeScheduleTaskEx(int tpid, int (*execute)(void*), void* arg) { SVnodeTask* pTask; ASSERT(!vnodeGlobal.stop); @@ -126,35 +128,42 @@ int vnodeScheduleTask(int (*execute)(void*), void* arg) { pTask->execute = execute; pTask->arg = arg; - taosThreadMutexLock(&(vnodeGlobal.mutex)); - pTask->next = &vnodeGlobal.queue; - pTask->prev = vnodeGlobal.queue.prev; - vnodeGlobal.queue.prev->next = pTask; - vnodeGlobal.queue.prev = pTask; - taosThreadCondSignal(&(vnodeGlobal.hasTask)); - taosThreadMutexUnlock(&(vnodeGlobal.mutex)); + taosThreadMutexLock(&(vnodeGlobal.tp[tpid].mutex)); + pTask->next = &vnodeGlobal.tp[tpid].queue; + pTask->prev = vnodeGlobal.tp[tpid].queue.prev; + vnodeGlobal.tp[tpid].queue.prev->next = pTask; + vnodeGlobal.tp[tpid].queue.prev = pTask; + taosThreadCondSignal(&(vnodeGlobal.tp[tpid].hasTask)); + taosThreadMutexUnlock(&(vnodeGlobal.tp[tpid].mutex)); return 0; } +int vnodeScheduleTask(int (*execute)(void*), void* arg) { return vnodeScheduleTaskEx(0, execute, arg); } + /* ------------------------ STATIC METHODS ------------------------ */ static void* loop(void* arg) { - SVnodeTask* pTask; - int ret; + SVnodeThreadPool* tp = (SVnodeThreadPool*)arg; + SVnodeTask* pTask; + int ret; - setThreadName("vnode-commit"); + if (tp == &vnodeGlobal.tp[0]) { + setThreadName("vnode-commit"); + } else if (tp == &vnodeGlobal.tp[1]) { + setThreadName("vnode-merge"); + } for (;;) { - taosThreadMutexLock(&(vnodeGlobal.mutex)); + taosThreadMutexLock(&(tp->mutex)); for (;;) { - pTask = vnodeGlobal.queue.next; - if (pTask == &vnodeGlobal.queue) { + pTask = tp->queue.next; + if (pTask == &tp->queue) { // no task if (vnodeGlobal.stop) { - taosThreadMutexUnlock(&(vnodeGlobal.mutex)); + taosThreadMutexUnlock(&(tp->mutex)); return NULL; } else { - taosThreadCondWait(&(vnodeGlobal.hasTask), &(vnodeGlobal.mutex)); + taosThreadCondWait(&(tp->hasTask), &(tp->mutex)); } } else { // has task @@ -164,7 +173,7 @@ static void* loop(void* arg) { } } - taosThreadMutexUnlock(&(vnodeGlobal.mutex)); + taosThreadMutexUnlock(&(tp->mutex)); pTask->execute(pTask->arg); taosMemoryFree(pTask); diff --git a/source/dnode/vnode/src/vnd/vnodeRetention.c b/source/dnode/vnode/src/vnd/vnodeRetention.c index f582d5e4a4..7af1f8e28f 100644 --- a/source/dnode/vnode/src/vnd/vnodeRetention.c +++ b/source/dnode/vnode/src/vnd/vnodeRetention.c @@ -15,111 +15,27 @@ #include "vnd.h" -typedef struct { - SVnode *pVnode; - int64_t now; - int64_t commitID; - SVnodeInfo info; -} SRetentionInfo; +extern int32_t tsdbSyncRetention(STsdb *tsdb, int64_t now); +extern int32_t tsdbAsyncRetention(STsdb *tsdb, int64_t now, int64_t *taskid); -extern bool tsdbShouldDoRetention(STsdb *pTsdb, int64_t now); -extern int32_t tsdbDoRetention(STsdb *pTsdb, int64_t now); -extern int32_t tsdbCommitRetention(STsdb *pTsdb); +int32_t vnodeDoRetention(SVnode *pVnode, int64_t now) { + int32_t code; + int32_t lino; -static int32_t vnodePrepareRentention(SVnode *pVnode, SRetentionInfo *pInfo) { - int32_t code = 0; - int32_t lino = 0; - - tsem_wait(&pVnode->canCommit); - - pInfo->commitID = ++pVnode->state.commitID; - - char dir[TSDB_FILENAME_LEN] = {0}; - vnodeGetPrimaryDir(pVnode->path, pVnode->diskPrimary, pVnode->pTfs, dir, TSDB_FILENAME_LEN); - - if (vnodeLoadInfo(dir, &pInfo->info) < 0) { - code = terrno; + if (pVnode->config.sttTrigger == 1) { + tsem_wait(&pVnode->canCommit); + code = tsdbSyncRetention(pVnode->pTsdb, now); TSDB_CHECK_CODE(code, lino, _exit); - } -_exit: - if (code) { - vError("vgId:%d %s failed at line %d since %s", TD_VID(pVnode), __func__, lino, tstrerror(code)); + // code = smaDoRetention(pVnode->pSma, now); + // TSDB_CHECK_CODE(code, lino, _exit); tsem_post(&pVnode->canCommit); } else { - vInfo("vgId:%d %s done", TD_VID(pVnode), __func__); - } - return code; -} - -static int32_t vnodeRetentionTask(void *param) { - int32_t code = 0; - int32_t lino = 0; - - SRetentionInfo *pInfo = (SRetentionInfo *)param; - SVnode *pVnode = pInfo->pVnode; - char dir[TSDB_FILENAME_LEN] = {0}; - - vnodeGetPrimaryDir(pVnode->path, pVnode->diskPrimary, pVnode->pTfs, dir, TSDB_FILENAME_LEN); - - // save info - pInfo->info.state.commitID = pInfo->commitID; - - if (vnodeSaveInfo(dir, &pInfo->info) < 0) { - code = terrno; + int64_t taskid; + code = tsdbAsyncRetention(pVnode->pTsdb, now, &taskid); TSDB_CHECK_CODE(code, lino, _exit); } - // do job - code = tsdbDoRetention(pInfo->pVnode->pTsdb, pInfo->now); - TSDB_CHECK_CODE(code, lino, _exit); - - code = smaDoRetention(pInfo->pVnode->pSma, pInfo->now); - TSDB_CHECK_CODE(code, lino, _exit); - - // commit info - vnodeCommitInfo(dir); - - // commit sub-job - tsdbCommitRetention(pVnode->pTsdb); - _exit: - if (code) { - vError("vgId:%d %s failed at line %d since %s", TD_VID(pInfo->pVnode), __func__, lino, tstrerror(code)); - } else { - vInfo("vgId:%d %s done", TD_VID(pInfo->pVnode), __func__); - } - tsem_post(&pInfo->pVnode->canCommit); - taosMemoryFree(pInfo); return code; -} - -int32_t vnodeAsyncRentention(SVnode *pVnode, int64_t now) { - int32_t code = 0; - int32_t lino = 0; - - if (!tsdbShouldDoRetention(pVnode->pTsdb, now)) return code; - - SRetentionInfo *pInfo = (SRetentionInfo *)taosMemoryCalloc(1, sizeof(*pInfo)); - if (pInfo == NULL) { - code = TSDB_CODE_OUT_OF_MEMORY; - TSDB_CHECK_CODE(code, lino, _exit); - } - - pInfo->pVnode = pVnode; - pInfo->now = now; - - code = vnodePrepareRentention(pVnode, pInfo); - TSDB_CHECK_CODE(code, lino, _exit); - - vnodeScheduleTask(vnodeRetentionTask, pInfo); - -_exit: - if (code) { - vError("vgId:%d %s failed at line %d since %s", TD_VID(pVnode), __func__, lino, tstrerror(code)); - if (pInfo) taosMemoryFree(pInfo); - } else { - vInfo("vgId:%d %s done", TD_VID(pInfo->pVnode), __func__); - } - return 0; -} +} \ No newline at end of file diff --git a/source/dnode/vnode/src/vnd/vnodeSvr.c b/source/dnode/vnode/src/vnd/vnodeSvr.c index 33af7631de..cac4a54921 100644 --- a/source/dnode/vnode/src/vnd/vnodeSvr.c +++ b/source/dnode/vnode/src/vnd/vnodeSvr.c @@ -689,7 +689,8 @@ void vnodeUpdateMetaRsp(SVnode *pVnode, STableMetaRsp *pMetaRsp) { pMetaRsp->precision = pVnode->config.tsdbCfg.precision; } -extern int32_t vnodeAsyncRentention(SVnode *pVnode, int64_t now); +extern int32_t vnodeDoRetention(SVnode *pVnode, int64_t now); + static int32_t vnodeProcessTrimReq(SVnode *pVnode, int64_t ver, void *pReq, int32_t len, SRpcMsg *pRsp) { int32_t code = 0; SVTrimDbReq trimReq = {0}; @@ -702,10 +703,7 @@ static int32_t vnodeProcessTrimReq(SVnode *pVnode, int64_t ver, void *pReq, int3 vInfo("vgId:%d, trim vnode request will be processed, time:%d", pVnode->config.vgId, trimReq.timestamp); - // process - vnodeAsyncRentention(pVnode, trimReq.timestamp); - tsem_wait(&pVnode->canCommit); - tsem_post(&pVnode->canCommit); + code = vnodeDoRetention(pVnode, trimReq.timestamp); _exit: return code; @@ -730,7 +728,7 @@ static int32_t vnodeProcessDropTtlTbReq(SVnode *pVnode, int64_t ver, void *pReq, tqUpdateTbUidList(pVnode->pTq, tbUids, false); } - vnodeAsyncRentention(pVnode, ttlReq.timestampSec); + vnodeDoRetention(pVnode, ttlReq.timestampSec); end: taosArrayDestroy(tbUids); diff --git a/source/libs/catalog/inc/catalogInt.h b/source/libs/catalog/inc/catalogInt.h index 5746ea2340..7d47e82164 100644 --- a/source/libs/catalog/inc/catalogInt.h +++ b/source/libs/catalog/inc/catalogInt.h @@ -938,7 +938,7 @@ int32_t ctgInitJob(SCatalog* pCtg, SRequestConnInfo* pConn, SCtgJob** job, const void* param); int32_t ctgLaunchJob(SCtgJob* pJob); int32_t ctgMakeAsyncRes(SCtgJob* pJob); -int32_t ctgLaunchSubTask(SCtgTask* pTask, CTG_TASK_TYPE type, ctgSubTaskCbFp fp, void* param); +int32_t ctgLaunchSubTask(SCtgTask** ppTask, CTG_TASK_TYPE type, ctgSubTaskCbFp fp, void* param); int32_t ctgGetTbCfgCb(SCtgTask* pTask); void ctgFreeHandle(SCatalog* pCatalog); diff --git a/source/libs/catalog/src/ctgAsync.c b/source/libs/catalog/src/ctgAsync.c index 784126eee9..fb5ecf7ad2 100644 --- a/source/libs/catalog/src/ctgAsync.c +++ b/source/libs/catalog/src/ctgAsync.c @@ -2097,7 +2097,7 @@ int32_t ctgLaunchGetTbCfgTask(SCtgTask* pTask) { SCtgTbMetaParam param; param.pName = pCtx->pName; param.flag = 0; - CTG_ERR_JRET(ctgLaunchSubTask(pTask, CTG_TASK_GET_TB_META, ctgGetTbCfgCb, ¶m)); + CTG_ERR_JRET(ctgLaunchSubTask(&pTask, CTG_TASK_GET_TB_META, ctgGetTbCfgCb, ¶m)); return TSDB_CODE_SUCCESS; } } @@ -2108,7 +2108,7 @@ int32_t ctgLaunchGetTbCfgTask(SCtgTask* pTask) { if (NULL == pCtx->pVgInfo) { CTG_ERR_JRET(ctgGetTbHashVgroupFromCache(pCtg, pCtx->pName, &pCtx->pVgInfo)); if (NULL == pCtx->pVgInfo) { - CTG_ERR_JRET(ctgLaunchSubTask(pTask, CTG_TASK_GET_DB_VGROUP, ctgGetTbCfgCb, dbFName)); + CTG_ERR_JRET(ctgLaunchSubTask(&pTask, CTG_TASK_GET_DB_VGROUP, ctgGetTbCfgCb, dbFName)); return TSDB_CODE_SUCCESS; } } @@ -2145,7 +2145,7 @@ int32_t ctgLaunchGetTbTagTask(SCtgTask* pTask) { if (NULL == pCtx->pVgInfo) { CTG_ERR_JRET(ctgGetTbHashVgroupFromCache(pCtg, pCtx->pName, &pCtx->pVgInfo)); if (NULL == pCtx->pVgInfo) { - CTG_ERR_JRET(ctgLaunchSubTask(pTask, CTG_TASK_GET_DB_VGROUP, ctgGetTbTagCb, dbFName)); + CTG_ERR_JRET(ctgLaunchSubTask(&pTask, CTG_TASK_GET_DB_VGROUP, ctgGetTbTagCb, dbFName)); return TSDB_CODE_SUCCESS; } } @@ -2331,7 +2331,7 @@ int32_t ctgLaunchGetUserTask(SCtgTask* pTask) { SCtgTbMetaParam param; param.pName = &pCtx->user.tbName; param.flag = CTG_FLAG_SYNC_OP; - CTG_ERR_RET(ctgLaunchSubTask(pTask, CTG_TASK_GET_TB_META, ctgGetUserCb, ¶m)); + CTG_ERR_RET(ctgLaunchSubTask(&pTask, CTG_TASK_GET_TB_META, ctgGetUserCb, ¶m)); } else { CTG_ERR_RET(ctgGetUserDbAuthFromMnode(pCtg, pConn, pCtx->user.user, NULL, pTask)); } @@ -2541,19 +2541,35 @@ _return: CTG_RET(code); } -int32_t ctgLaunchSubTask(SCtgTask* pTask, CTG_TASK_TYPE type, ctgSubTaskCbFp fp, void* param) { - SCtgJob* pJob = pTask->pJob; +SCtgTask* ctgGetTask(SCtgJob* pJob, int32_t taskId) { + int32_t taskNum = taosArrayGetSize(pJob->pTasks); + + for (int32_t i = 0; i < taskNum; ++i) { + SCtgTask* pTask = taosArrayGet(pJob->pTasks, i); + if (pTask->taskId == taskId) { + return pTask; + } + } + + return NULL; +} + + +int32_t ctgLaunchSubTask(SCtgTask** ppTask, CTG_TASK_TYPE type, ctgSubTaskCbFp fp, void* param) { + SCtgJob* pJob = (*ppTask)->pJob; int32_t subTaskId = -1; bool newTask = false; + int32_t taskId = (*ppTask)->taskId; - ctgClearSubTaskRes(&pTask->subRes); - pTask->subRes.type = type; - pTask->subRes.fp = fp; + ctgClearSubTaskRes(&(*ppTask)->subRes); + (*ppTask)->subRes.type = type; + (*ppTask)->subRes.fp = fp; CTG_ERR_RET(ctgSearchExistingTask(pJob, type, param, &subTaskId)); if (subTaskId < 0) { CTG_ERR_RET(ctgInitTask(pJob, type, param, &subTaskId)); newTask = true; + *ppTask = ctgGetTask(pJob, taskId); } SCtgTask* pSub = taosArrayGet(pJob->pTasks, subTaskId); @@ -2561,10 +2577,10 @@ int32_t ctgLaunchSubTask(SCtgTask* pTask, CTG_TASK_TYPE type, ctgSubTaskCbFp fp, pSub->subTask = true; } - CTG_ERR_RET(ctgSetSubTaskCb(pSub, pTask)); + CTG_ERR_RET(ctgSetSubTaskCb(pSub, *ppTask)); if (newTask) { - SCtgMsgCtx* pMsgCtx = CTG_GET_TASK_MSGCTX(pTask, -1); + SCtgMsgCtx* pMsgCtx = CTG_GET_TASK_MSGCTX(*ppTask, -1); SCtgMsgCtx* pSubMsgCtx = CTG_GET_TASK_MSGCTX(pSub, -1); pSubMsgCtx->pBatchs = pMsgCtx->pBatchs; @@ -2584,6 +2600,7 @@ int32_t ctgLaunchJob(SCtgJob* pJob) { qDebug("QID:0x%" PRIx64 " ctg launch [%dth] task", pJob->queryId, pTask->taskId); CTG_ERR_RET((*gCtgAsyncFps[pTask->type].launchFp)(pTask)); + pTask = taosArrayGet(pJob->pTasks, i); pTask->status = CTG_TASK_LAUNCHED; } diff --git a/source/libs/executor/src/scanoperator.c b/source/libs/executor/src/scanoperator.c index 73cc09cfca..6c8d9ed59f 100644 --- a/source/libs/executor/src/scanoperator.c +++ b/source/libs/executor/src/scanoperator.c @@ -2880,15 +2880,23 @@ int32_t startGroupTableMergeScan(SOperatorInfo* pOperator) { int32_t tableStartIdx = pInfo->tableStartIndex; int32_t tableEndIdx = pInfo->tableEndIndex; - pInfo->sortBufSize = 2048 * pInfo->bufPageSize; - int32_t numOfBufPage = pInfo->sortBufSize / pInfo->bufPageSize; - pInfo->pSortHandle = tsortCreateSortHandle(pInfo->pSortInfo, SORT_BLOCK_TS_MERGE, pInfo->bufPageSize, numOfBufPage, - pInfo->pSortInputBlock, pTaskInfo->id.str, 0, 0, 0); + bool hasLimit = pInfo->limitInfo.limit.limit != -1 || pInfo->limitInfo.limit.offset != -1; int64_t mergeLimit = -1; - if (pInfo->limitInfo.limit.limit != -1 || pInfo->limitInfo.limit.offset != -1) { - mergeLimit = pInfo->limitInfo.limit.limit + pInfo->limitInfo.limit.offset; - } - tsortSetMergeLimit(pInfo->pSortHandle, mergeLimit); + if (hasLimit) { + mergeLimit = pInfo->limitInfo.limit.limit + pInfo->limitInfo.limit.offset; + } + size_t szRow = blockDataGetRowSize(pInfo->pResBlock); + if (hasLimit) { + pInfo->pSortHandle = tsortCreateSortHandle(pInfo->pSortInfo, SORT_SINGLESOURCE_SORT, -1, -1, + NULL, pTaskInfo->id.str, mergeLimit, szRow+8, tsPQSortMemThreshold * 1024* 1024); + } else { + pInfo->sortBufSize = 2048 * pInfo->bufPageSize; + int32_t numOfBufPage = pInfo->sortBufSize / pInfo->bufPageSize; + pInfo->pSortHandle = tsortCreateSortHandle(pInfo->pSortInfo, SORT_BLOCK_TS_MERGE, pInfo->bufPageSize, numOfBufPage, + pInfo->pSortInputBlock, pTaskInfo->id.str, 0, 0, 0); + + tsortSetMergeLimit(pInfo->pSortHandle, mergeLimit); + } tsortSetFetchRawDataFp(pInfo->pSortHandle, getTableDataBlockImpl, NULL, NULL); // one table has one data block diff --git a/source/libs/executor/src/sortoperator.c b/source/libs/executor/src/sortoperator.c index 9c70a95389..459474d06e 100644 --- a/source/libs/executor/src/sortoperator.c +++ b/source/libs/executor/src/sortoperator.c @@ -54,19 +54,19 @@ SOperatorInfo* createSortOperatorInfo(SOperatorInfo* downstream, SSortPhysiNode* int32_t numOfCols = 0; pOperator->exprSupp.pExprInfo = createExprInfo(pSortNode->pExprs, NULL, &numOfCols); pOperator->exprSupp.numOfExprs = numOfCols; - calcSortOperMaxTupleLength(pInfo, pSortNode->pSortKeys); - pInfo->maxRows = -1; - if (pSortNode->node.pLimit) { - SLimitNode* pLimit = (SLimitNode*)pSortNode->node.pLimit; - if (pLimit->limit > 0) pInfo->maxRows = pLimit->limit; - } - int32_t numOfOutputCols = 0; int32_t code = extractColMatchInfo(pSortNode->pTargets, pDescNode, &numOfOutputCols, COL_MATCH_FROM_SLOT_ID, &pInfo->matchInfo); if (code != TSDB_CODE_SUCCESS) { goto _error; } + + calcSortOperMaxTupleLength(pInfo, pSortNode->pSortKeys); + pInfo->maxRows = -1; + if (pSortNode->node.pLimit) { + SLimitNode* pLimit = (SLimitNode*)pSortNode->node.pLimit; + if (pLimit->limit > 0) pInfo->maxRows = pLimit->limit + pLimit->offset; + } pOperator->exprSupp.pCtx = createSqlFunctionCtx(pOperator->exprSupp.pExprInfo, numOfCols, &pOperator->exprSupp.rowEntryInfoOffset, &pTaskInfo->storageAPI.functionStore); diff --git a/source/libs/executor/src/tfill.c b/source/libs/executor/src/tfill.c index 55ef019d76..4e0dff9d4f 100644 --- a/source/libs/executor/src/tfill.c +++ b/source/libs/executor/src/tfill.c @@ -66,20 +66,25 @@ static void setNullRow(SSDataBlock* pBlock, SFillInfo* pFillInfo, int32_t rowInd } static void doSetUserSpecifiedValue(SColumnInfoData* pDst, SVariant* pVar, int32_t rowIndex, int64_t currentKey) { + bool isNull = (TSDB_DATA_TYPE_NULL == pVar->nType) ? true : false; if (pDst->info.type == TSDB_DATA_TYPE_FLOAT) { float v = 0; - GET_TYPED_DATA(v, float, pVar->nType, &pVar->i); - colDataSetVal(pDst, rowIndex, (char*)&v, false); + GET_TYPED_DATA(v, float, pVar->nType, &pVar->f); + colDataSetVal(pDst, rowIndex, (char*)&v, isNull); } else if (pDst->info.type == TSDB_DATA_TYPE_DOUBLE) { double v = 0; - GET_TYPED_DATA(v, double, pVar->nType, &pVar->i); - colDataSetVal(pDst, rowIndex, (char*)&v, false); + GET_TYPED_DATA(v, double, pVar->nType, &pVar->d); + colDataSetVal(pDst, rowIndex, (char*)&v, isNull); } else if (IS_SIGNED_NUMERIC_TYPE(pDst->info.type)) { int64_t v = 0; GET_TYPED_DATA(v, int64_t, pVar->nType, &pVar->i); - colDataSetVal(pDst, rowIndex, (char*)&v, false); + colDataSetVal(pDst, rowIndex, (char*)&v, isNull); + } else if (IS_UNSIGNED_NUMERIC_TYPE(pDst->info.type)) { + uint64_t v = 0; + GET_TYPED_DATA(v, uint64_t, pVar->nType, &pVar->u); + colDataSetVal(pDst, rowIndex, (char*)&v, isNull); } else if (pDst->info.type == TSDB_DATA_TYPE_TIMESTAMP) { - colDataSetVal(pDst, rowIndex, (const char*)¤tKey, false); + colDataSetVal(pDst, rowIndex, (const char*)¤tKey, isNull); } else { // varchar/nchar data colDataSetNULL(pDst, rowIndex); } diff --git a/source/libs/executor/src/timesliceoperator.c b/source/libs/executor/src/timesliceoperator.c index cb74392a10..b019985645 100644 --- a/source/libs/executor/src/timesliceoperator.c +++ b/source/libs/executor/src/timesliceoperator.c @@ -312,6 +312,7 @@ static bool genInterpolationResult(STimeSliceOperatorInfo* pSliceInfo, SExprSupp case TSDB_FILL_SET_VALUE_F: { SVariant* pVar = &pSliceInfo->pFillColInfo[fillColIndex].fillVal; + bool isNull = (TSDB_DATA_TYPE_NULL == pVar->nType) ? true : false; if (pDst->info.type == TSDB_DATA_TYPE_FLOAT) { float v = 0; if (!IS_VAR_DATA_TYPE(pVar->nType)) { @@ -319,7 +320,7 @@ static bool genInterpolationResult(STimeSliceOperatorInfo* pSliceInfo, SExprSupp } else { v = taosStr2Float(varDataVal(pVar->pz), NULL); } - colDataSetVal(pDst, rows, (char*)&v, false); + colDataSetVal(pDst, rows, (char*)&v, isNull); } else if (pDst->info.type == TSDB_DATA_TYPE_DOUBLE) { double v = 0; if (!IS_VAR_DATA_TYPE(pVar->nType)) { @@ -327,7 +328,7 @@ static bool genInterpolationResult(STimeSliceOperatorInfo* pSliceInfo, SExprSupp } else { v = taosStr2Double(varDataVal(pVar->pz), NULL); } - colDataSetVal(pDst, rows, (char*)&v, false); + colDataSetVal(pDst, rows, (char*)&v, isNull); } else if (IS_SIGNED_NUMERIC_TYPE(pDst->info.type)) { int64_t v = 0; if (!IS_VAR_DATA_TYPE(pVar->nType)) { @@ -335,7 +336,7 @@ static bool genInterpolationResult(STimeSliceOperatorInfo* pSliceInfo, SExprSupp } else { v = taosStr2Int64(varDataVal(pVar->pz), NULL, 10); } - colDataSetVal(pDst, rows, (char*)&v, false); + colDataSetVal(pDst, rows, (char*)&v, isNull); } else if (IS_UNSIGNED_NUMERIC_TYPE(pDst->info.type)) { uint64_t v = 0; if (!IS_VAR_DATA_TYPE(pVar->nType)) { @@ -343,7 +344,7 @@ static bool genInterpolationResult(STimeSliceOperatorInfo* pSliceInfo, SExprSupp } else { v = taosStr2UInt64(varDataVal(pVar->pz), NULL, 10); } - colDataSetVal(pDst, rows, (char*)&v, false); + colDataSetVal(pDst, rows, (char*)&v, isNull); } else if (IS_BOOLEAN_TYPE(pDst->info.type)) { bool v = false; if (!IS_VAR_DATA_TYPE(pVar->nType)) { @@ -351,7 +352,7 @@ static bool genInterpolationResult(STimeSliceOperatorInfo* pSliceInfo, SExprSupp } else { v = taosStr2Int8(varDataVal(pVar->pz), NULL, 10); } - colDataSetVal(pDst, rows, (char*)&v, false); + colDataSetVal(pDst, rows, (char*)&v, isNull); } ++fillColIndex; diff --git a/source/libs/executor/src/timewindowoperator.c b/source/libs/executor/src/timewindowoperator.c index e8059afb2f..0a46def23d 100644 --- a/source/libs/executor/src/timewindowoperator.c +++ b/source/libs/executor/src/timewindowoperator.c @@ -2753,6 +2753,7 @@ void streamIntervalReloadState(SOperatorInfo* pOperator) { strlen(STREAM_INTERVAL_OP_STATE_NAME), &pBuf, &size); TSKEY ts = *(TSKEY*)pBuf; taosMemoryFree(pBuf); + pInfo->twAggSup.maxTs = TMAX(pInfo->twAggSup.maxTs, ts); pInfo->statestore.streamStateReloadInfo(pInfo->pState, ts); } SOperatorInfo* downstream = pOperator->pDownstream[0]; diff --git a/source/libs/executor/src/tsort.c b/source/libs/executor/src/tsort.c index 30e7148736..7784bc0c94 100644 --- a/source/libs/executor/src/tsort.c +++ b/source/libs/executor/src/tsort.c @@ -51,6 +51,7 @@ struct SSortHandle { uint32_t tmpRowIdx; int64_t mergeLimit; + int64_t currMergeLimitTs; int32_t sourceId; SSDataBlock* pDataBlock; @@ -921,7 +922,8 @@ static int32_t sortBlocksToExtSource(SSortHandle* pHandle, SArray* aBlk, SBlockO int32_t nMergedRows = 0; bool mergeLimitReached = false; size_t blkPgSz = pgHeaderSz; - + int64_t lastPageBufTs = (order->order == TSDB_ORDER_ASC) ? INT64_MAX : INT64_MIN; + int64_t currTs = (order->order == TSDB_ORDER_ASC) ? INT64_MAX : INT64_MIN; while (nRows < totalRows) { int32_t minIdx = tMergeTreeGetChosenIndex(pTree); SSDataBlock* minBlk = taosArrayGetP(aBlk, minIdx); @@ -929,14 +931,21 @@ static int32_t sortBlocksToExtSource(SSortHandle* pHandle, SArray* aBlk, SBlockO int32_t bufInc = getPageBufIncForRow(minBlk, minRow, pHandle->pDataBlock->info.rows); if (blkPgSz <= pHandle->pageSize && blkPgSz + bufInc > pHandle->pageSize) { + SColumnInfoData* tsCol = taosArrayGet(pHandle->pDataBlock->pDataBlock, order->slotId); + lastPageBufTs = ((int64_t*)tsCol->pData)[pHandle->pDataBlock->info.rows - 1]; appendDataBlockToPageBuf(pHandle, pHandle->pDataBlock, aPgId); nMergedRows += pHandle->pDataBlock->info.rows; blockDataCleanup(pHandle->pDataBlock); blkPgSz = pgHeaderSz; bufInc = getPageBufIncForRow(minBlk, minRow, 0); + if ((pHandle->mergeLimit != -1) && (nMergedRows >= pHandle->mergeLimit)) { mergeLimitReached = true; + if ((lastPageBufTs < pHandle->currMergeLimitTs && order->order == TSDB_ORDER_ASC) || + (lastPageBufTs > pHandle->currMergeLimitTs && order->order == TSDB_ORDER_DESC)) { + pHandle->currMergeLimitTs = lastPageBufTs; + } break; } } @@ -955,8 +964,17 @@ static int32_t sortBlocksToExtSource(SSortHandle* pHandle, SArray* aBlk, SBlockO } if (pHandle->pDataBlock->info.rows > 0) { if (!mergeLimitReached) { + SColumnInfoData* tsCol = taosArrayGet(pHandle->pDataBlock->pDataBlock, order->slotId); + lastPageBufTs = ((int64_t*)tsCol->pData)[pHandle->pDataBlock->info.rows - 1]; appendDataBlockToPageBuf(pHandle, pHandle->pDataBlock, aPgId); nMergedRows += pHandle->pDataBlock->info.rows; + if ((pHandle->mergeLimit != -1) && (nMergedRows >= pHandle->mergeLimit)) { + mergeLimitReached = true; + if ((lastPageBufTs < pHandle->currMergeLimitTs && order->order == TSDB_ORDER_ASC) || + (lastPageBufTs > pHandle->currMergeLimitTs && order->order == TSDB_ORDER_DESC)) { + pHandle->currMergeLimitTs = lastPageBufTs; + } + } } blockDataCleanup(pHandle->pDataBlock); } @@ -982,11 +1000,24 @@ static int32_t createBlocksMergeSortInitialSources(SSortHandle* pHandle) { SSortSource* pSrc = taosArrayGetP(pHandle->pOrderedSource, 0); int32_t szSort = 0; + if (pOrder->order == TSDB_ORDER_ASC) { + pHandle->currMergeLimitTs = INT64_MAX; + } else { + pHandle->currMergeLimitTs = INT64_MIN; + } + SArray* aBlkSort = taosArrayInit(8, POINTER_BYTES); SSHashObj* mUidBlk = tSimpleHashInit(64, taosGetDefaultHashFunction(TSDB_DATA_TYPE_UBIGINT)); while (1) { SSDataBlock* pBlk = pHandle->fetchfp(pSrc->param); - + if (pBlk != NULL) { + SColumnInfoData* tsCol = taosArrayGet(pBlk->pDataBlock, pOrder->slotId); + int64_t firstRowTs = *(int64_t*)tsCol->pData; + if ((pOrder->order == TSDB_ORDER_ASC && firstRowTs > pHandle->currMergeLimitTs) || + (pOrder->order == TSDB_ORDER_DESC && firstRowTs < pHandle->currMergeLimitTs)) { + continue; + } + } if (pBlk != NULL) { szSort += blockDataGetSize(pBlk); @@ -1374,6 +1405,9 @@ static int32_t tsortOpenForPQSort(SSortHandle* pHandle) { } static STupleHandle* tsortPQSortNextTuple(SSortHandle* pHandle) { + if (pHandle->pDataBlock == NULL) { // when no input stream datablock + return NULL; + } blockDataCleanup(pHandle->pDataBlock); blockDataEnsureCapacity(pHandle->pDataBlock, 1); // abandon the top tuple if queue size bigger than max size diff --git a/source/libs/function/src/builtinsimpl.c b/source/libs/function/src/builtinsimpl.c index 3e16a40575..fad8c9ca5b 100644 --- a/source/libs/function/src/builtinsimpl.c +++ b/source/libs/function/src/builtinsimpl.c @@ -920,6 +920,7 @@ void appendSelectivityValue(SqlFunctionCtx* pCtx, int32_t rowIndex, int32_t pos) void replaceTupleData(STuplePos* pDestPos, STuplePos* pSourcePos) { *pDestPos = *pSourcePos; } +#define COMPARE_MINMAX_DATA(type) (( (*(type*)&pDBuf->v) < (*(type*)&pSBuf->v) ) ^ isMinFunc) int32_t minMaxCombine(SqlFunctionCtx* pDestCtx, SqlFunctionCtx* pSourceCtx, int32_t isMinFunc) { SResultRowEntryInfo* pDResInfo = GET_RES_INFO(pDestCtx); SMinmaxResInfo* pDBuf = GET_ROWCELL_INTERBUF(pDResInfo); @@ -927,18 +928,57 @@ int32_t minMaxCombine(SqlFunctionCtx* pDestCtx, SqlFunctionCtx* pSourceCtx, int3 SResultRowEntryInfo* pSResInfo = GET_RES_INFO(pSourceCtx); SMinmaxResInfo* pSBuf = GET_ROWCELL_INTERBUF(pSResInfo); int16_t type = pDBuf->type == TSDB_DATA_TYPE_NULL ? pSBuf->type : pDBuf->type; - if (IS_FLOAT_TYPE(type)) { - if (pSBuf->assign && ((((*(double*)&pDBuf->v) < (*(double*)&pSBuf->v)) ^ isMinFunc) || !pDBuf->assign)) { - *(double*)&pDBuf->v = *(double*)&pSBuf->v; - replaceTupleData(&pDBuf->tuplePos, &pSBuf->tuplePos); - pDBuf->assign = true; - } - } else { - if (pSBuf->assign && (((pDBuf->v < pSBuf->v) ^ isMinFunc) || !pDBuf->assign)) { - pDBuf->v = pSBuf->v; - replaceTupleData(&pDBuf->tuplePos, &pSBuf->tuplePos); - pDBuf->assign = true; + + switch (type) { + case TSDB_DATA_TYPE_DOUBLE: + case TSDB_DATA_TYPE_UBIGINT: + case TSDB_DATA_TYPE_BIGINT: + if (pSBuf->assign && (COMPARE_MINMAX_DATA(int64_t) || !pDBuf->assign)) { + pDBuf->v = pSBuf->v; + replaceTupleData(&pDBuf->tuplePos, &pSBuf->tuplePos); + pDBuf->assign = true; + } + break; + case TSDB_DATA_TYPE_UINT: + case TSDB_DATA_TYPE_INT: + if (pSBuf->assign && (COMPARE_MINMAX_DATA(int32_t) || !pDBuf->assign)) { + pDBuf->v = pSBuf->v; + replaceTupleData(&pDBuf->tuplePos, &pSBuf->tuplePos); + pDBuf->assign = true; + } + break; + case TSDB_DATA_TYPE_USMALLINT: + case TSDB_DATA_TYPE_SMALLINT: + if (pSBuf->assign && (COMPARE_MINMAX_DATA(int16_t) || !pDBuf->assign)) { + pDBuf->v = pSBuf->v; + replaceTupleData(&pDBuf->tuplePos, &pSBuf->tuplePos); + pDBuf->assign = true; + } + break; + case TSDB_DATA_TYPE_BOOL: + case TSDB_DATA_TYPE_UTINYINT: + case TSDB_DATA_TYPE_TINYINT: + if (pSBuf->assign && (COMPARE_MINMAX_DATA(int8_t) || !pDBuf->assign)) { + pDBuf->v = pSBuf->v; + replaceTupleData(&pDBuf->tuplePos, &pSBuf->tuplePos); + pDBuf->assign = true; + } + break; + case TSDB_DATA_TYPE_FLOAT: { + if (pSBuf->assign && (COMPARE_MINMAX_DATA(double) || !pDBuf->assign)) { + pDBuf->v = pSBuf->v; + replaceTupleData(&pDBuf->tuplePos, &pSBuf->tuplePos); + pDBuf->assign = true; + } + break; } + default: + if (pSBuf->assign && (strcmp((char*)&pDBuf->v, (char*)&pSBuf->v) || !pDBuf->assign)) { + pDBuf->v = pSBuf->v; + replaceTupleData(&pDBuf->tuplePos, &pSBuf->tuplePos); + pDBuf->assign = true; + } + break; } pDResInfo->numOfRes = TMAX(pDResInfo->numOfRes, pSResInfo->numOfRes); pDResInfo->isNullRes &= pSResInfo->isNullRes; diff --git a/source/libs/monitor/src/monMain.c b/source/libs/monitor/src/monMain.c index 949e91198a..8f94bfdb96 100644 --- a/source/libs/monitor/src/monMain.c +++ b/source/libs/monitor/src/monMain.c @@ -547,7 +547,7 @@ void monSendReport() { monGenGrantJson(pMonitor); monGenDnodeJson(pMonitor); monGenDiskJson(pMonitor); - monGenLogJson(pMonitor); + //monGenLogJson(pMonitor); // TS-3691 char *pCont = tjsonToString(pMonitor->pJson); // uDebugL("report cont:%s\n", pCont); diff --git a/source/libs/parser/src/parTranslater.c b/source/libs/parser/src/parTranslater.c index a03dab2480..4ea7fe5b63 100644 --- a/source/libs/parser/src/parTranslater.c +++ b/source/libs/parser/src/parTranslater.c @@ -2934,14 +2934,14 @@ static int32_t createMultiResFuncsFromStar(STranslateContext* pCxt, SFunctionNod static int32_t createTags(STranslateContext* pCxt, SNodeList** pOutput) { if (QUERY_NODE_REAL_TABLE != nodeType(((SSelectStmt*)pCxt->pCurrStmt)->pFromTable)) { return generateSyntaxErrMsgExt(&pCxt->msgBuf, TSDB_CODE_PAR_INVALID_TAGS_PC, - "The _TAGS pseudo column can only be used for subtable and supertable queries"); + "The _TAGS pseudo column can only be used for child table and super table queries"); } SRealTableNode* pTable = (SRealTableNode*)(((SSelectStmt*)pCxt->pCurrStmt)->pFromTable); const STableMeta* pMeta = pTable->pMeta; if (TSDB_SUPER_TABLE != pMeta->tableType && TSDB_CHILD_TABLE != pMeta->tableType) { return generateSyntaxErrMsg(&pCxt->msgBuf, TSDB_CODE_PAR_INVALID_TAGS_PC, - "The _TAGS pseudo column can only be used for subtable and supertable queries"); + "The _TAGS pseudo column can only be used for child table and super table queries"); } SSchema* pTagsSchema = getTableTagSchema(pMeta); diff --git a/source/libs/parser/src/parUtil.c b/source/libs/parser/src/parUtil.c index 263318b92f..42a0d1282a 100644 --- a/source/libs/parser/src/parUtil.c +++ b/source/libs/parser/src/parUtil.c @@ -164,6 +164,8 @@ static char* getSyntaxErrFormat(int32_t errCode) { return "%s function is not supported in fill query"; case TSDB_CODE_PAR_INVALID_WINDOW_PC: return "_WSTART, _WEND and _WDURATION can only be used in window query"; + case TSDB_CODE_PAR_INVALID_TAGS_PC: + return "Tags can only applied to super table and child table"; case TSDB_CODE_PAR_WINDOW_NOT_ALLOWED_FUNC: return "%s function is not supported in time window query"; case TSDB_CODE_PAR_STREAM_NOT_ALLOWED_FUNC: diff --git a/source/os/src/osSysinfo.c b/source/os/src/osSysinfo.c index 6f87f6b75b..5f73251e3b 100644 --- a/source/os/src/osSysinfo.c +++ b/source/os/src/osSysinfo.c @@ -961,6 +961,18 @@ char *taosGetCmdlineByPID(int pid) { #endif } +int64_t taosGetOsUptime() { +#ifdef WINDOWS +#elif defined(_TD_DARWIN_64) +#else + struct sysinfo info; + if (0 == sysinfo(&info)) { + return (int64_t)info.uptime * 1000; + } +#endif + return 0; +} + void taosSetCoreDump(bool enable) { if (!enable) return; #ifdef WINDOWS diff --git a/source/util/src/tarray.c b/source/util/src/tarray.c index 8906391a9a..f5e15e7436 100644 --- a/source/util/src/tarray.c +++ b/source/util/src/tarray.c @@ -191,7 +191,7 @@ void* taosArrayGet(const SArray* pArray, size_t index) { } if (index >= pArray->size) { - uError("index is out of range, current:%"PRIzu" max:%d", index, pArray->capacity); + uError("index is out of range, current:%" PRIzu " max:%d", index, pArray->capacity); return NULL; } @@ -221,7 +221,7 @@ size_t taosArrayGetSize(const SArray* pArray) { return TARRAY_SIZE(pArray); } -void* taosArrayInsert(SArray* pArray, size_t index, void* pData) { +void* taosArrayInsert(SArray* pArray, size_t index, const void* pData) { if (pArray == NULL || pData == NULL) { return NULL; } @@ -492,7 +492,7 @@ void* taosDecodeArray(const void* buf, SArray** pArray, FDecode decode, int32_t // order array void taosArraySortPWithExt(SArray* pArray, __ext_compar_fn_t fn, const void* param) { taosqsort(pArray->pData, pArray->size, pArray->elemSize, param, fn); -// taosArrayGetSize(pArray) > 8 ? taosArrayQuickSort(pArray, fn, param) : taosArrayInsertSort(pArray, fn, param); + // taosArrayGetSize(pArray) > 8 ? taosArrayQuickSort(pArray, fn, param) : taosArrayInsertSort(pArray, fn, param); } void taosArraySwap(SArray* a, SArray* b) { diff --git a/source/util/src/trbtree.c b/source/util/src/trbtree.c index e7386d5912..e1000f7bc1 100644 --- a/source/util/src/trbtree.c +++ b/source/util/src/trbtree.c @@ -105,7 +105,7 @@ static void tRBTreeTransplant(SRBTree *pTree, SRBTreeNode *u, SRBTreeNode *v) { v->parent = u->parent; } -static SRBTreeNode *tRBTreeSuccessor(SRBTree *pTree, SRBTreeNode *pNode) { +static SRBTreeNode *tRBTreeSuccessor(const SRBTree *pTree, SRBTreeNode *pNode) { if (pNode->right != pTree->NIL) { pNode = pNode->right; while (pNode->left != pTree->NIL) { @@ -125,7 +125,7 @@ static SRBTreeNode *tRBTreeSuccessor(SRBTree *pTree, SRBTreeNode *pNode) { return pNode; } -static SRBTreeNode *tRBTreePredecessor(SRBTree *pTree, SRBTreeNode *pNode) { +static SRBTreeNode *tRBTreePredecessor(const SRBTree *pTree, SRBTreeNode *pNode) { if (pNode->left != pTree->NIL) { pNode = pNode->left; while (pNode->right != pTree->NIL) { @@ -443,7 +443,7 @@ SRBTreeNode *tRBTreeDropMax(SRBTree *pTree) { return pNode; } -SRBTreeNode *tRBTreeGet(SRBTree *pTree, const SRBTreeNode *pKeyNode) { +SRBTreeNode *tRBTreeGet(const SRBTree *pTree, const SRBTreeNode *pKeyNode) { SRBTreeNode *pNode = pTree->root; while (pNode != pTree->NIL) { diff --git a/source/util/src/tutil.c b/source/util/src/tutil.c index 6d95660103..6b6878ec83 100644 --- a/source/util/src/tutil.c +++ b/source/util/src/tutil.c @@ -351,10 +351,10 @@ int32_t titoa(uint64_t val, size_t radix, char str[]) { int32_t i = 0; uint64_t v = val; - while(v > 0) { + do { buf[i++] = s[v % radix]; v /= radix; - } + } while (v > 0); // reverse order for(int32_t j = 0; j < i; ++j) { diff --git a/tests/develop-test/2-query/show_create_db.py b/tests/develop-test/2-query/show_create_db.py index d4bff819c9..07ca357f9e 100644 --- a/tests/develop-test/2-query/show_create_db.py +++ b/tests/develop-test/2-query/show_create_db.py @@ -42,7 +42,7 @@ class TDTestCase: tdSql.query('show create database scd;') tdSql.checkRows(1) tdSql.checkData(0, 0, 'scd') - tdSql.checkData(0, 1, "CREATE DATABASE `scd` BUFFER 256 CACHESIZE 1 CACHEMODEL 'none' COMP 2 DURATION 14400m WAL_FSYNC_PERIOD 3000 MAXROWS 4096 MINROWS 100 STT_TRIGGER 1 KEEP 5256000m,5256000m,5256000m PAGES 256 PAGESIZE 4 PRECISION 'ms' REPLICA 1 WAL_LEVEL 1 VGROUPS 2 SINGLE_STABLE 0 TABLE_PREFIX 0 TABLE_SUFFIX 0 TSDB_PAGESIZE 4 WAL_RETENTION_PERIOD 0 WAL_RETENTION_SIZE 0") + tdSql.checkData(0, 1, "CREATE DATABASE `scd` BUFFER 256 CACHESIZE 1 CACHEMODEL 'none' COMP 2 DURATION 14400m WAL_FSYNC_PERIOD 3000 MAXROWS 4096 MINROWS 100 STT_TRIGGER 2 KEEP 5256000m,5256000m,5256000m PAGES 256 PAGESIZE 4 PRECISION 'ms' REPLICA 1 WAL_LEVEL 1 VGROUPS 2 SINGLE_STABLE 0 TABLE_PREFIX 0 TABLE_SUFFIX 0 TSDB_PAGESIZE 4 WAL_RETENTION_PERIOD 0 WAL_RETENTION_SIZE 0") tdSql.query('show create database scd2;') tdSql.checkRows(1) @@ -60,7 +60,7 @@ class TDTestCase: tdSql.query('show create database scd;') tdSql.checkRows(1) tdSql.checkData(0, 0, 'scd') - tdSql.checkData(0, 1, "CREATE DATABASE `scd` BUFFER 256 CACHESIZE 1 CACHEMODEL 'none' COMP 2 DURATION 14400m WAL_FSYNC_PERIOD 3000 MAXROWS 4096 MINROWS 100 STT_TRIGGER 1 KEEP 5256000m,5256000m,5256000m PAGES 256 PAGESIZE 4 PRECISION 'ms' REPLICA 1 WAL_LEVEL 1 VGROUPS 2 SINGLE_STABLE 0 TABLE_PREFIX 0 TABLE_SUFFIX 0 TSDB_PAGESIZE 4 WAL_RETENTION_PERIOD 0 WAL_RETENTION_SIZE 0") + tdSql.checkData(0, 1, "CREATE DATABASE `scd` BUFFER 256 CACHESIZE 1 CACHEMODEL 'none' COMP 2 DURATION 14400m WAL_FSYNC_PERIOD 3000 MAXROWS 4096 MINROWS 100 STT_TRIGGER 2 KEEP 5256000m,5256000m,5256000m PAGES 256 PAGESIZE 4 PRECISION 'ms' REPLICA 1 WAL_LEVEL 1 VGROUPS 2 SINGLE_STABLE 0 TABLE_PREFIX 0 TABLE_SUFFIX 0 TSDB_PAGESIZE 4 WAL_RETENTION_PERIOD 0 WAL_RETENTION_SIZE 0") tdSql.query('show create database scd2;') tdSql.checkRows(1) diff --git a/tests/script/tsim/parser/fill.sim b/tests/script/tsim/parser/fill.sim index a66e7d6ab7..0534aa5d5b 100644 --- a/tests/script/tsim/parser/fill.sim +++ b/tests/script/tsim/parser/fill.sim @@ -1224,4 +1224,104 @@ if $data42 != NULL then return -1 endi +print ===================== TD-3625 test fill value NULL +sql use $db + +sql select _wstart,_wend,count(*) from tm0 where ts >= '2020-01-01 01:03:06.000' and ts <= '2020-01-01 01:03:10.000' interval(1s) fill(value, NULL); + +if $rows != 5 then + return -1 +endi + +if $data02 != NULL then + return -1 +endi + +if $data12 != 1 then + return -1 +endi + +if $data22 != 1 then + return -1 +endi + +if $data32 != 1 then + return -1 +endi + +if $data42 != NULL then + return -1 +endi + +sql select _wstart,_wend,count(*),sum(k),avg(k) from tm0 where ts >= '2020-01-01 01:03:06.000' and ts <= '2020-01-01 01:03:10.000' interval(1s) fill(value, 1, NULL, 1); + +if $rows != 5 then + return -1 +endi + +if $data02 != 1 then + return -1 +endi + +if $data12 != 1 then + return -1 +endi + +if $data22 != 1 then + return -1 +endi + +if $data32 != 1 then + return -1 +endi + +if $data42 != 1 then + return -1 +endi + + +if $data03 != NULL then + return -1 +endi + +if $data13 != 7 then + return -1 +endi + +if $data23 != 8 then + return -1 +endi + +if $data33 != 9 then + return -1 +endi + +if $data43 != NULL then + return -1 +endi + + +if $data04 != 1.000000000 then + return -1 +endi + +if $data14 != 7.000000000 then + return -1 +endi + +if $data24 != 8.000000000 then + return -1 +endi + +if $data34 != 9.000000000 then + return -1 +endi + +if $data44 != 1.000000000 then + return -1 +endi + + system sh/exec.sh -n dnode1 -s stop -x SIGINT + + diff --git a/tests/system-test/0-others/splitVGroup.py b/tests/system-test/0-others/splitVGroup.py index 4509961066..9fd00892e4 100644 --- a/tests/system-test/0-others/splitVGroup.py +++ b/tests/system-test/0-others/splitVGroup.py @@ -283,14 +283,14 @@ class TDTestCase: # normal table - # all rows - sql = "select * from @db_name.ta" - self.queryDouble(sql) - # count sql = "select count(*) from @db_name.ta" self.queryDouble(sql) + # all rows + sql = "select * from @db_name.ta" + self.queryDouble(sql) + # sum sql = "select sum(c1) from @db_name.ta" self.queryDouble(sql) @@ -316,7 +316,8 @@ class TDTestCase: tdSql.execute(sql) # wait end - for i in range(100): + seconds = 300 + for i in range(seconds): sql ="show transactions;" rows = tdSql.query(sql) if rows == 0: @@ -325,7 +326,7 @@ class TDTestCase: #tdLog.info(f"i={i} wait split vgroup ...") time.sleep(1) - tdLog.exit("split vgroup transaction is not finished after executing 50s") + tdLog.exit(f"split vgroup transaction is not finished after executing {seconds}s") return False # split error @@ -382,6 +383,14 @@ class TDTestCase: self.expectSplitError("topicdb") tdSql.execute("drop topic toa;") self.expectSplitOk("topicdb") + + # compact and check db2 + def compactAndCheck(self): + tdLog.info("compact db2 and check result ...") + # compact + tdSql.execute(f"compact database {self.db2};") + # check result + self.checkResult() # run def run(self): @@ -390,12 +399,15 @@ class TDTestCase: for i in range(5): # split vgroup on db2 + start = time.time() self.splitVGroup(self.db2) + end = time.time() self.vgroups2 += 1 - + # check two db query result same self.checkResult() - tdLog.info(f"split vgroup i={i} passed.") + spend = "%.3f"%(end-start) + tdLog.info(f"split vgroup i={i} passed. spend = {spend}s") # split empty db self.splitEmptyDB() @@ -403,6 +415,9 @@ class TDTestCase: # check topic and stream forib self.checkForbid() + # compact database + self.compactAndCheck() + # stop def stop(self): tdSql.close() diff --git a/tests/system-test/0-others/taosdMonitor.py b/tests/system-test/0-others/taosdMonitor.py index 8094c4e0f5..6c21eb8daa 100644 --- a/tests/system-test/0-others/taosdMonitor.py +++ b/tests/system-test/0-others/taosdMonitor.py @@ -186,33 +186,6 @@ class RequestHandlerImpl(http.server.BaseHTTPRequestHandler): tdLog.exit("total is null!") - # log_infos ==================================== - - if "log_infos" not in infoDict or infoDict["log_infos"]== None: - tdLog.exit("log_infos is null!") - - if "logs" not in infoDict["log_infos"] or len(infoDict["log_infos"]["logs"]) < 8:#!= 10: - tdLog.exit("logs is null!") - - if "ts" not in infoDict["log_infos"]["logs"][0] or len(infoDict["log_infos"]["logs"][0]["ts"]) <= 10: - tdLog.exit("ts is null!") - - if "level" not in infoDict["log_infos"]["logs"][0] or infoDict["log_infos"]["logs"][0]["level"] not in ["error" ,"info" , "debug" ,"trace"]: - tdLog.exit("level is null!") - - if "content" not in infoDict["log_infos"]["logs"][0] or len(infoDict["log_infos"]["logs"][0]["ts"]) <= 1: - tdLog.exit("content is null!") - - if "summary" not in infoDict["log_infos"] or len(infoDict["log_infos"]["summary"])!= 4: - tdLog.exit("summary is null!") - - - if "total" not in infoDict["log_infos"]["summary"][0] or infoDict["log_infos"]["summary"][0]["total"] < 0 : - tdLog.exit("total is null!") - - if "level" not in infoDict["log_infos"]["summary"][0] or infoDict["log_infos"]["summary"][0]["level"] not in ["error" ,"info" , "debug" ,"trace"]: - tdLog.exit("level is null!") - def do_GET(self): """ process GET request @@ -315,4 +288,3 @@ class TDTestCase: tdCases.addLinux(__file__, TDTestCase()) tdCases.addWindows(__file__, TDTestCase()) - diff --git a/tests/system-test/1-insert/delete_stable.py b/tests/system-test/1-insert/delete_stable.py index 8ebe7b6692..67561c51e5 100644 --- a/tests/system-test/1-insert/delete_stable.py +++ b/tests/system-test/1-insert/delete_stable.py @@ -24,10 +24,11 @@ from util.common import * from util.sqlset import TDSetSql class TDTestCase: + updatecfgDict = {'tsdbdebugFlag': 143} def init(self, conn, logSql, replicaVar=1): self.replicaVar = int(replicaVar) tdLog.debug("start to execute %s" % __file__) - tdSql.init(conn.cursor()) + tdSql.init(conn.cursor(), True) self.dbname = 'db_test' self.ns_dbname = 'ns_test' self.us_dbname = 'us_test' diff --git a/tests/system-test/2-query/interp.py b/tests/system-test/2-query/interp.py index b6cefbe36f..986c63839b 100644 --- a/tests/system-test/2-query/interp.py +++ b/tests/system-test/2-query/interp.py @@ -147,6 +147,57 @@ class TDTestCase: tdSql.checkData(11, 0, 15) tdSql.checkData(12, 0, 1) + for col in col_list: + tdSql.query(f"select interp({col}) from {dbname}.{tbname} range('2020-02-01 00:00:04', '2020-02-01 00:00:16') every(1s) fill(value, 1.0)") + tdSql.checkRows(13) + tdSql.checkData(0, 0, 1) + tdSql.checkData(1, 0, 5) + tdSql.checkData(2, 0, 1) + tdSql.checkData(3, 0, 1) + tdSql.checkData(4, 0, 1) + tdSql.checkData(5, 0, 1) + tdSql.checkData(6, 0, 10) + tdSql.checkData(7, 0, 1) + tdSql.checkData(8, 0, 1) + tdSql.checkData(9, 0, 1) + tdSql.checkData(10, 0, 1) + tdSql.checkData(11, 0, 15) + tdSql.checkData(12, 0, 1) + + for col in col_list: + tdSql.query(f"select interp({col}) from {dbname}.{tbname} range('2020-02-01 00:00:04', '2020-02-01 00:00:16') every(1s) fill(value, true)") + tdSql.checkRows(13) + tdSql.checkData(0, 0, 1) + tdSql.checkData(1, 0, 5) + tdSql.checkData(2, 0, 1) + tdSql.checkData(3, 0, 1) + tdSql.checkData(4, 0, 1) + tdSql.checkData(5, 0, 1) + tdSql.checkData(6, 0, 10) + tdSql.checkData(7, 0, 1) + tdSql.checkData(8, 0, 1) + tdSql.checkData(9, 0, 1) + tdSql.checkData(10, 0, 1) + tdSql.checkData(11, 0, 15) + tdSql.checkData(12, 0, 1) + + for col in col_list: + tdSql.query(f"select interp({col}) from {dbname}.{tbname} range('2020-02-01 00:00:04', '2020-02-01 00:00:16') every(1s) fill(value, NULL)") + tdSql.checkRows(13) + tdSql.checkData(0, 0, None) + tdSql.checkData(1, 0, 5) + tdSql.checkData(2, 0, None) + tdSql.checkData(3, 0, None) + tdSql.checkData(4, 0, None) + tdSql.checkData(5, 0, None) + tdSql.checkData(6, 0, 10) + tdSql.checkData(7, 0, None) + tdSql.checkData(8, 0, None) + tdSql.checkData(9, 0, None) + tdSql.checkData(10, 0, None) + tdSql.checkData(11, 0, 15) + tdSql.checkData(12, 0, None) + tdSql.query(f"select interp(c4) from {dbname}.{tbname} range('2020-02-01 00:00:04', '2020-02-01 00:00:16') every(1s) fill(value, 1)") tdSql.checkRows(13) tdSql.checkData(0, 0, 1.0) @@ -163,6 +214,54 @@ class TDTestCase: tdSql.checkData(11, 0, 15.0) tdSql.checkData(12, 0, 1.0) + tdSql.query(f"select interp(c4) from {dbname}.{tbname} range('2020-02-01 00:00:04', '2020-02-01 00:00:16') every(1s) fill(value, 1.0)") + tdSql.checkRows(13) + tdSql.checkData(0, 0, 1.0) + tdSql.checkData(1, 0, 5.0) + tdSql.checkData(2, 0, 1.0) + tdSql.checkData(3, 0, 1.0) + tdSql.checkData(4, 0, 1.0) + tdSql.checkData(5, 0, 1.0) + tdSql.checkData(6, 0, 10.0) + tdSql.checkData(7, 0, 1.0) + tdSql.checkData(8, 0, 1.0) + tdSql.checkData(9, 0, 1.0) + tdSql.checkData(10, 0, 1.0) + tdSql.checkData(11, 0, 15.0) + tdSql.checkData(12, 0, 1.0) + + tdSql.query(f"select interp(c4) from {dbname}.{tbname} range('2020-02-01 00:00:04', '2020-02-01 00:00:16') every(1s) fill(value, true)") + tdSql.checkRows(13) + tdSql.checkData(0, 0, 1.0) + tdSql.checkData(1, 0, 5.0) + tdSql.checkData(2, 0, 1.0) + tdSql.checkData(3, 0, 1.0) + tdSql.checkData(4, 0, 1.0) + tdSql.checkData(5, 0, 1.0) + tdSql.checkData(6, 0, 10.0) + tdSql.checkData(7, 0, 1.0) + tdSql.checkData(8, 0, 1.0) + tdSql.checkData(9, 0, 1.0) + tdSql.checkData(10, 0, 1.0) + tdSql.checkData(11, 0, 15.0) + tdSql.checkData(12, 0, 1.0) + + tdSql.query(f"select interp(c4) from {dbname}.{tbname} range('2020-02-01 00:00:04', '2020-02-01 00:00:16') every(1s) fill(value, NULL)") + tdSql.checkRows(13) + tdSql.checkData(0, 0, None) + tdSql.checkData(1, 0, 5.0) + tdSql.checkData(2, 0, None) + tdSql.checkData(3, 0, None) + tdSql.checkData(4, 0, None) + tdSql.checkData(5, 0, None) + tdSql.checkData(6, 0, 10.0) + tdSql.checkData(7, 0, None) + tdSql.checkData(8, 0, None) + tdSql.checkData(9, 0, None) + tdSql.checkData(10, 0, None) + tdSql.checkData(11, 0, 15.0) + tdSql.checkData(12, 0, None) + tdSql.query(f"select interp(c5) from {dbname}.{tbname} range('2020-02-01 00:00:04', '2020-02-01 00:00:16') every(1s) fill(value, 1)") tdSql.checkRows(13) tdSql.checkData(0, 0, 1.0) @@ -179,6 +278,54 @@ class TDTestCase: tdSql.checkData(11, 0, 15.0) tdSql.checkData(12, 0, 1.0) + tdSql.query(f"select interp(c5) from {dbname}.{tbname} range('2020-02-01 00:00:04', '2020-02-01 00:00:16') every(1s) fill(value, 1.0)") + tdSql.checkRows(13) + tdSql.checkData(0, 0, 1.0) + tdSql.checkData(1, 0, 5.0) + tdSql.checkData(2, 0, 1.0) + tdSql.checkData(3, 0, 1.0) + tdSql.checkData(4, 0, 1.0) + tdSql.checkData(5, 0, 1.0) + tdSql.checkData(6, 0, 10.0) + tdSql.checkData(7, 0, 1.0) + tdSql.checkData(8, 0, 1.0) + tdSql.checkData(9, 0, 1.0) + tdSql.checkData(10, 0, 1.0) + tdSql.checkData(11, 0, 15.0) + tdSql.checkData(12, 0, 1.0) + + tdSql.query(f"select interp(c5) from {dbname}.{tbname} range('2020-02-01 00:00:04', '2020-02-01 00:00:16') every(1s) fill(value, true)") + tdSql.checkRows(13) + tdSql.checkData(0, 0, 1.0) + tdSql.checkData(1, 0, 5.0) + tdSql.checkData(2, 0, 1.0) + tdSql.checkData(3, 0, 1.0) + tdSql.checkData(4, 0, 1.0) + tdSql.checkData(5, 0, 1.0) + tdSql.checkData(6, 0, 10.0) + tdSql.checkData(7, 0, 1.0) + tdSql.checkData(8, 0, 1.0) + tdSql.checkData(9, 0, 1.0) + tdSql.checkData(10, 0, 1.0) + tdSql.checkData(11, 0, 15.0) + tdSql.checkData(12, 0, 1.0) + + tdSql.query(f"select interp(c5) from {dbname}.{tbname} range('2020-02-01 00:00:04', '2020-02-01 00:00:16') every(1s) fill(value, NULL)") + tdSql.checkRows(13) + tdSql.checkData(0, 0, None) + tdSql.checkData(1, 0, 5.0) + tdSql.checkData(2, 0, None) + tdSql.checkData(3, 0, None) + tdSql.checkData(4, 0, None) + tdSql.checkData(5, 0, None) + tdSql.checkData(6, 0, 10.0) + tdSql.checkData(7, 0, None) + tdSql.checkData(8, 0, None) + tdSql.checkData(9, 0, None) + tdSql.checkData(10, 0, None) + tdSql.checkData(11, 0, 15.0) + tdSql.checkData(12, 0, None) + tdSql.query(f"select interp(c6) from {dbname}.{tbname} range('2020-02-01 00:00:04', '2020-02-01 00:00:16') every(1s) fill(value, 1)") tdSql.checkRows(13) tdSql.checkData(0, 0, True) @@ -195,6 +342,54 @@ class TDTestCase: tdSql.checkData(11, 0, True) tdSql.checkData(12, 0, True) + tdSql.query(f"select interp(c6) from {dbname}.{tbname} range('2020-02-01 00:00:04', '2020-02-01 00:00:16') every(1s) fill(value, 1.0)") + tdSql.checkRows(13) + tdSql.checkData(0, 0, True) + tdSql.checkData(1, 0, True) + tdSql.checkData(2, 0, True) + tdSql.checkData(3, 0, True) + tdSql.checkData(4, 0, True) + tdSql.checkData(5, 0, True) + tdSql.checkData(6, 0, True) + tdSql.checkData(7, 0, True) + tdSql.checkData(8, 0, True) + tdSql.checkData(9, 0, True) + tdSql.checkData(10, 0, True) + tdSql.checkData(11, 0, True) + tdSql.checkData(12, 0, True) + + tdSql.query(f"select interp(c6) from {dbname}.{tbname} range('2020-02-01 00:00:04', '2020-02-01 00:00:16') every(1s) fill(value, true)") + tdSql.checkRows(13) + tdSql.checkData(0, 0, True) + tdSql.checkData(1, 0, True) + tdSql.checkData(2, 0, True) + tdSql.checkData(3, 0, True) + tdSql.checkData(4, 0, True) + tdSql.checkData(5, 0, True) + tdSql.checkData(6, 0, True) + tdSql.checkData(7, 0, True) + tdSql.checkData(8, 0, True) + tdSql.checkData(9, 0, True) + tdSql.checkData(10, 0, True) + tdSql.checkData(11, 0, True) + tdSql.checkData(12, 0, True) + + tdSql.query(f"select interp(c6) from {dbname}.{tbname} range('2020-02-01 00:00:04', '2020-02-01 00:00:16') every(1s) fill(value, NULL)") + tdSql.checkRows(13) + tdSql.checkData(0, 0, None) + tdSql.checkData(1, 0, True) + tdSql.checkData(2, 0, None) + tdSql.checkData(3, 0, None) + tdSql.checkData(4, 0, None) + tdSql.checkData(5, 0, None) + tdSql.checkData(6, 0, True) + tdSql.checkData(7, 0, None) + tdSql.checkData(8, 0, None) + tdSql.checkData(9, 0, None) + tdSql.checkData(10, 0, None) + tdSql.checkData(11, 0, True) + tdSql.checkData(12, 0, None) + ## {} ... tdSql.query(f"select interp(c0) from {dbname}.{tbname} range('2020-02-01 00:00:01', '2020-02-01 00:00:04') every(1s) fill(value, 1)") tdSql.checkRows(4) @@ -2587,25 +2782,25 @@ class TDTestCase: tdSql.checkData(0, 0, '2020-02-02 00:00:00.000') - tdSql.checkData(0, 2, False) + tdSql.checkData(0, 2, None) tdSql.checkData(1, 2, False) - tdSql.checkData(2, 2, False) + tdSql.checkData(2, 2, None) tdSql.checkData(3, 2, True) - tdSql.checkData(4, 2, False) + tdSql.checkData(4, 2, None) tdSql.checkData(5, 2, False) - tdSql.checkData(6, 2, False) + tdSql.checkData(6, 2, None) tdSql.checkData(7, 2, True) - tdSql.checkData(8, 2, False) + tdSql.checkData(8, 2, None) tdSql.checkData(9, 2, True) - tdSql.checkData(10, 2, False) + tdSql.checkData(10, 2, None) tdSql.checkData(11, 2, False) - tdSql.checkData(12, 2, False) + tdSql.checkData(12, 2, None) tdSql.checkData(13, 2, False) - tdSql.checkData(14, 2, False) + tdSql.checkData(14, 2, None) tdSql.checkData(15, 2, None) - tdSql.checkData(16, 2, False) + tdSql.checkData(16, 2, None) tdSql.checkData(17, 2, None) - tdSql.checkData(18, 2, False) + tdSql.checkData(18, 2, None) tdSql.checkData(18, 0, '2020-02-02 00:00:18.000') diff --git a/tests/system-test/2-query/smaBasic.py b/tests/system-test/2-query/smaBasic.py index 43c379ee53..c221a70605 100644 --- a/tests/system-test/2-query/smaBasic.py +++ b/tests/system-test/2-query/smaBasic.py @@ -127,7 +127,7 @@ class TDTestCase: self.c2Sum = None # create database db - sql = f"create database db vgroups 5 replica 3" + sql = f"create database db vgroups 5 replica 3 stt_trigger 1" tdLog.info(sql) tdSql.execute(sql) sql = f"use db"