Merge pull request #21794 from taosdata/enh/tsdb_optimize

for test purpose
This commit is contained in:
wade zhang 2023-07-21 14:18:32 +08:00 committed by GitHub
commit 7b1e809d98
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
64 changed files with 16867 additions and 2871 deletions

View File

@ -22,7 +22,7 @@
extern "C" {
#endif
#define TARRAY_MIN_SIZE 8
#define TARRAY_MIN_SIZE 4
#define TARRAY_GET_ELEM(array, index) ((void*)((char*)((array)->pData) + (index) * (array)->elemSize))
#define TARRAY_ELEM_IDX(array, ele) (POINTER_DISTANCE(ele, (array)->pData) / (array)->elemSize)
@ -138,7 +138,7 @@ size_t taosArrayGetSize(const SArray* pArray);
* @param index
* @param pData
*/
void* taosArrayInsert(SArray* pArray, size_t index, void* pData);
void* taosArrayInsert(SArray* pArray, size_t index, const void* pData);
/**
* set data in array
@ -204,9 +204,9 @@ void taosArrayClearEx(SArray* pArray, void (*fp)(void*));
void* taosArrayDestroy(SArray* pArray);
void taosArrayDestroyP(SArray* pArray, FDelete fp);
void taosArrayDestroyP(SArray* pArray, FDelete fp);
void taosArrayDestroyEx(SArray* pArray, FDelete fp);
void taosArrayDestroyEx(SArray* pArray, FDelete fp);
void taosArraySwap(SArray* a, SArray* b);

173
include/util/tarray2.h Normal file
View File

@ -0,0 +1,173 @@
/*
* Copyright (c) 2019 TAOS Data, Inc. <jhtao@taosdata.com>
*
* This program is free software: you can use, redistribute, and/or modify
* it under the terms of the GNU Affero General Public License, version 3
* or later ("AGPL"), as published by the Free Software Foundation.
*
* This program is distributed in the hope that it will be useful, but WITHOUT
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
* FITNESS FOR A PARTICULAR PURPOSE.
*
* You should have received a copy of the GNU Affero General Public License
* along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
#include "talgo.h"
#ifndef _TD_UTIL_TARRAY2_H_
#define _TD_UTIL_TARRAY2_H_
#ifdef __cplusplus
extern "C" {
#endif
// a: a
// e: element
// ep: element pointer
// cmp: compare function
// idx: index
// cb: callback function
#define TARRAY2(TYPE) \
struct { \
int32_t size; \
int32_t capacity; \
TYPE *data; \
}
typedef void (*TArray2Cb)(void *);
#define TARRAY2_SIZE(a) ((a)->size)
#define TARRAY2_CAPACITY(a) ((a)->capacity)
#define TARRAY2_DATA(a) ((a)->data)
#define TARRAY2_GET(a, i) ((a)->data[i])
#define TARRAY2_GET_PTR(a, i) ((a)->data + i)
#define TARRAY2_FIRST(a) ((a)->data[0])
#define TARRAY2_LAST(a) ((a)->data[(a)->size - 1])
#define TARRAY2_DATA_LEN(a) ((a)->size * sizeof(typeof((a)->data[0])))
static FORCE_INLINE int32_t tarray2_make_room(void *arr, int32_t expSize, int32_t eleSize) {
TARRAY2(void) *a = arr;
int32_t capacity = (a->capacity > 0) ? (a->capacity << 1) : 32;
while (capacity < expSize) {
capacity <<= 1;
}
void *p = taosMemoryRealloc(a->data, capacity * eleSize);
if (p == NULL) return TSDB_CODE_OUT_OF_MEMORY;
a->capacity = capacity;
a->data = p;
return 0;
}
static FORCE_INLINE int32_t tarray2InsertBatch(void *arr, int32_t idx, const void *elePtr, int32_t numEle,
int32_t eleSize) {
TARRAY2(uint8_t) *a = arr;
int32_t ret = 0;
if (a->size + numEle > a->capacity) {
ret = tarray2_make_room(a, a->size + numEle, eleSize);
}
if (ret == 0) {
if (idx < a->size) {
memmove(a->data + (idx + numEle) * eleSize, a->data + idx * eleSize, (a->size - idx) * eleSize);
}
memcpy(a->data + idx * eleSize, elePtr, numEle * eleSize);
a->size += numEle;
}
return ret;
}
static FORCE_INLINE void *tarray2Search(void *arr, const void *elePtr, int32_t eleSize, __compar_fn_t compar,
int32_t flag) {
TARRAY2(void) *a = arr;
return taosbsearch(elePtr, a->data, a->size, eleSize, compar, flag);
}
static FORCE_INLINE int32_t tarray2SearchIdx(void *arr, const void *elePtr, int32_t eleSize, __compar_fn_t compar,
int32_t flag) {
TARRAY2(void) *a = arr;
void *p = taosbsearch(elePtr, a->data, a->size, eleSize, compar, flag);
if (p == NULL) {
return -1;
} else {
return (int32_t)(((uint8_t *)p - (uint8_t *)a->data) / eleSize);
}
}
static FORCE_INLINE int32_t tarray2SortInsert(void *arr, const void *elePtr, int32_t eleSize, __compar_fn_t compar) {
TARRAY2(void) *a = arr;
int32_t idx = tarray2SearchIdx(arr, elePtr, eleSize, compar, TD_GT);
return tarray2InsertBatch(arr, idx < 0 ? a->size : idx, elePtr, 1, eleSize);
}
#define TARRAY2_INIT_EX(a, size_, capacity_, data_) \
do { \
(a)->size = (size_); \
(a)->capacity = (capacity_); \
(a)->data = (data_); \
} while (0)
#define TARRAY2_INIT(a) TARRAY2_INIT_EX(a, 0, 0, NULL)
#define TARRAY2_CLEAR(a, cb) \
do { \
if ((cb) && (a)->size > 0) { \
TArray2Cb cb_ = (TArray2Cb)(cb); \
for (int32_t i = 0; i < (a)->size; ++i) { \
cb_((a)->data + i); \
} \
} \
(a)->size = 0; \
} while (0)
#define TARRAY2_DESTROY(a, cb) \
do { \
TARRAY2_CLEAR(a, cb); \
if ((a)->data) { \
taosMemoryFree((a)->data); \
(a)->data = NULL; \
} \
(a)->capacity = 0; \
} while (0)
#define TARRAY2_INSERT_PTR(a, idx, ep) tarray2InsertBatch(a, idx, ep, 1, sizeof((a)->data[0]))
#define TARRAY2_APPEND_PTR(a, ep) tarray2InsertBatch(a, (a)->size, ep, 1, sizeof((a)->data[0]))
#define TARRAY2_APPEND_BATCH(a, ep, n) tarray2InsertBatch(a, (a)->size, ep, n, sizeof((a)->data[0]))
#define TARRAY2_APPEND(a, e) TARRAY2_APPEND_PTR(a, &(e))
// return (TYPE *)
#define TARRAY2_SEARCH(a, ep, cmp, flag) tarray2Search(a, ep, sizeof(((a)->data[0])), (__compar_fn_t)cmp, flag)
#define TARRAY2_SEARCH_IDX(a, ep, cmp, flag) \
tarray2SearchIdx(a, ep, sizeof(typeof((a)->data[0])), (__compar_fn_t)cmp, flag)
#define TARRAY2_SORT_INSERT(a, e, cmp) tarray2SortInsert(a, &(e), sizeof(((a)->data[0])), (__compar_fn_t)cmp)
#define TARRAY2_SORT_INSERT_P(a, ep, cmp) tarray2SortInsert(a, ep, sizeof(((a)->data[0])), (__compar_fn_t)cmp)
#define TARRAY2_REMOVE(a, idx, cb) \
do { \
if ((idx) < (a)->size) { \
if (cb) { \
TArray2Cb cb_ = (TArray2Cb)(cb); \
cb_((a)->data + (idx)); \
} \
if ((idx) < (a)->size - 1) { \
memmove((a)->data + (idx), (a)->data + (idx) + 1, sizeof(typeof(*(a)->data)) * ((a)->size - (idx)-1)); \
} \
(a)->size--; \
} \
} while (0)
#define TARRAY2_FOREACH(a, e) for (int32_t __i = 0; __i < (a)->size && ((e) = (a)->data[__i], 1); __i++)
#define TARRAY2_FOREACH_REVERSE(a, e) for (int32_t __i = (a)->size - 1; __i >= 0 && ((e) = (a)->data[__i], 1); __i--)
#define TARRAY2_FOREACH_PTR(a, ep) for (int32_t __i = 0; __i < (a)->size && ((ep) = &(a)->data[__i], 1); __i++)
#define TARRAY2_FOREACH_PTR_REVERSE(a, ep) \
for (int32_t __i = (a)->size - 1; __i >= 0 && ((ep) = &(a)->data[__i], 1); __i--)
#ifdef __cplusplus
}
#endif
#endif /*_TD_UTIL_TARRAY2_H_*/

View File

@ -191,16 +191,16 @@ typedef enum ELogicConditionType {
#define TSDB_MAX_COLUMNS 4096
#define TSDB_MIN_COLUMNS 2 // PRIMARY COLUMN(timestamp) + other columns
#define TSDB_NODE_NAME_LEN 64
#define TSDB_TABLE_NAME_LEN 193 // it is a null-terminated string
#define TSDB_TOPIC_NAME_LEN 193 // it is a null-terminated string
#define TSDB_CGROUP_LEN 193 // it is a null-terminated string
#define TSDB_OFFSET_LEN 64 // it is a null-terminated string
#define TSDB_USER_CGROUP_LEN (TSDB_USER_LEN + TSDB_CGROUP_LEN) // it is a null-terminated string
#define TSDB_STREAM_NAME_LEN 193 // it is a null-terminated string
#define TSDB_DB_NAME_LEN 65
#define TSDB_DB_FNAME_LEN (TSDB_ACCT_ID_LEN + TSDB_DB_NAME_LEN + TSDB_NAME_DELIMITER_LEN)
#define TSDB_PRIVILEDGE_CONDITION_LEN 200
#define TSDB_NODE_NAME_LEN 64
#define TSDB_TABLE_NAME_LEN 193 // it is a null-terminated string
#define TSDB_TOPIC_NAME_LEN 193 // it is a null-terminated string
#define TSDB_CGROUP_LEN 193 // it is a null-terminated string
#define TSDB_OFFSET_LEN 64 // it is a null-terminated string
#define TSDB_USER_CGROUP_LEN (TSDB_USER_LEN + TSDB_CGROUP_LEN) // it is a null-terminated string
#define TSDB_STREAM_NAME_LEN 193 // it is a null-terminated string
#define TSDB_DB_NAME_LEN 65
#define TSDB_DB_FNAME_LEN (TSDB_ACCT_ID_LEN + TSDB_DB_NAME_LEN + TSDB_NAME_DELIMITER_LEN)
#define TSDB_PRIVILEDGE_CONDITION_LEN 200
#define TSDB_FUNC_NAME_LEN 65
#define TSDB_FUNC_COMMENT_LEN 1024 * 1024
@ -249,15 +249,15 @@ typedef enum ELogicConditionType {
#define TSDB_LABEL_LEN 8
#define TSDB_JOB_STATUS_LEN 32
#define TSDB_CLUSTER_ID_LEN 40
#define TSDB_FQDN_LEN 128
#define TSDB_EP_LEN (TSDB_FQDN_LEN + 6)
#define TSDB_IPv4ADDR_LEN 16
#define TSDB_FILENAME_LEN 128
#define TSDB_SHOW_SQL_LEN 2048
#define TSDB_CLUSTER_ID_LEN 40
#define TSDB_FQDN_LEN 128
#define TSDB_EP_LEN (TSDB_FQDN_LEN + 6)
#define TSDB_IPv4ADDR_LEN 16
#define TSDB_FILENAME_LEN 128
#define TSDB_SHOW_SQL_LEN 2048
#define TSDB_SHOW_SCHEMA_JSON_LEN TSDB_MAX_COLUMNS * 256
#define TSDB_SLOW_QUERY_SQL_LEN 512
#define TSDB_SHOW_SUBQUERY_LEN 1000
#define TSDB_SLOW_QUERY_SQL_LEN 512
#define TSDB_SHOW_SUBQUERY_LEN 1000
#define TSDB_TRANS_STAGE_LEN 12
#define TSDB_TRANS_TYPE_LEN 16
@ -370,7 +370,7 @@ typedef enum ELogicConditionType {
#define TSDB_DEFAULT_DB_SCHEMALESS TSDB_DB_SCHEMALESS_OFF
#define TSDB_MIN_STT_TRIGGER 1
#define TSDB_MAX_STT_TRIGGER 16
#define TSDB_DEFAULT_SST_TRIGGER 1
#define TSDB_DEFAULT_SST_TRIGGER 2
#define TSDB_MIN_HASH_PREFIX (2 - TSDB_TABLE_NAME_LEN)
#define TSDB_MAX_HASH_PREFIX (TSDB_TABLE_NAME_LEN - 2)
#define TSDB_DEFAULT_HASH_PREFIX 0
@ -410,10 +410,10 @@ typedef enum ELogicConditionType {
#define TSDB_EXPLAIN_RESULT_ROW_SIZE (16 * 1024)
#define TSDB_EXPLAIN_RESULT_COLUMN_NAME "QUERY_PLAN"
#define TSDB_MAX_FIELD_LEN 65519 // 16384:65519
#define TSDB_MAX_BINARY_LEN TSDB_MAX_FIELD_LEN // 16384-8:65519
#define TSDB_MAX_NCHAR_LEN TSDB_MAX_FIELD_LEN // 16384-8:65519
#define TSDB_MAX_GEOMETRY_LEN TSDB_MAX_FIELD_LEN // 16384-8:65519
#define TSDB_MAX_FIELD_LEN 65519 // 16384:65519
#define TSDB_MAX_BINARY_LEN TSDB_MAX_FIELD_LEN // 16384-8:65519
#define TSDB_MAX_NCHAR_LEN TSDB_MAX_FIELD_LEN // 16384-8:65519
#define TSDB_MAX_GEOMETRY_LEN TSDB_MAX_FIELD_LEN // 16384-8:65519
#define PRIMARYKEY_TIMESTAMP_COL_ID 1
#define COL_REACH_END(colId, maxColId) ((colId) > (maxColId))

View File

@ -241,6 +241,54 @@ void tdListNodeGetData(SList *list, SListNode *node, void *target);
void tdListInitIter(SList *list, SListIter *pIter, TD_LIST_DIRECTION_T direction);
SListNode *tdListNext(SListIter *pIter);
// macros ====================================================================================
// q: for queue
// n: for node
// m: for member
#define LISTD(TYPE) \
struct { \
TYPE *next, *prev; \
}
#define LISTD_NEXT(n, m) ((n)->m.next)
#define LISTD_PREV(n, m) ((n)->m.prev)
#define LISTD_INIT(q, m) (LISTD_NEXT(q, m) = LISTD_PREV(q, m) = (q))
#define LISTD_HEAD(q, m) (LISTD_NEXT(q, m))
#define LISTD_TAIL(q, m) (LISTD_PREV(q, m))
#define LISTD_PREV_NEXT(n, m) (LISTD_NEXT(LISTD_PREV(n, m), m))
#define LISTD_NEXT_PREV(n, m) (LISTD_PREV(LISTD_NEXT(n, m), m))
#define LISTD_INSERT_HEAD(q, n, m) \
do { \
LISTD_NEXT(n, m) = LISTD_NEXT(q, m); \
LISTD_PREV(n, m) = (q); \
LISTD_NEXT_PREV(n, m) = (n); \
LISTD_NEXT(q, m) = (n); \
} while (0)
#define LISTD_INSERT_TAIL(q, n, m) \
do { \
LISTD_NEXT(n, m) = (q); \
LISTD_PREV(n, m) = LISTD_PREV(q, m); \
LISTD_PREV_NEXT(n, m) = (n); \
LISTD_PREV(q, m) = (n); \
} while (0)
#define LISTD_REMOVE(n, m) \
do { \
LISTD_PREV_NEXT(n, m) = LISTD_NEXT(n, m); \
LISTD_NEXT_PREV(n, m) = LISTD_PREV(n, m); \
} while (0)
#define LISTD_FOREACH(q, n, m) for ((n) = LISTD_HEAD(q, m); (n) != (q); (n) = LISTD_NEXT(n, m))
#define LISTD_FOREACH_REVERSE(q, n, m) for ((n) = LISTD_TAIL(q, m); (n) != (q); (n) = LISTD_PREV(n, m))
#define LISTD_FOREACH_SAFE(q, n, t, m) \
for ((n) = LISTD_HEAD(q, m), (t) = LISTD_NEXT(n, m); (n) != (q); (n) = (t), (t) = LISTD_NEXT(n, m))
#define LISTD_FOREACH_REVERSE_SAFE(q, n, t, m) \
for ((n) = LISTD_TAIL(q, m), (t) = LISTD_PREV(n, m); (n) != (q); (n) = (t), (t) = LISTD_PREV(n, m))
#ifdef __cplusplus
}
#endif

View File

@ -39,7 +39,7 @@ void tRBTreeDrop(SRBTree *pTree, SRBTreeNode *z);
SRBTreeNode *tRBTreeDropByKey(SRBTree *pTree, void *pKey);
SRBTreeNode *tRBTreeDropMin(SRBTree *pTree);
SRBTreeNode *tRBTreeDropMax(SRBTree *pTree);
SRBTreeNode *tRBTreeGet(SRBTree *pTree, const SRBTreeNode *pKeyNode);
SRBTreeNode *tRBTreeGet(const SRBTree *pTree, const SRBTreeNode *pKeyNode);
// SRBTreeIter =============================================
#define tRBTreeIterCreate(tree, ascend) \
@ -67,9 +67,9 @@ struct SRBTree {
};
struct SRBTreeIter {
int8_t asc;
SRBTree *pTree;
SRBTreeNode *pNode;
int8_t asc;
const SRBTree *pTree;
SRBTreeNode *pNode;
};
#ifdef __cplusplus

View File

@ -29,7 +29,7 @@ extern "C" {
int32_t strdequote(char *src);
size_t strtrim(char *src);
char *strnchr(const char *haystack, char needle, int32_t len, bool skipquote);
TdUcs4* wcsnchr(const TdUcs4* haystack, TdUcs4 needle, size_t len);
TdUcs4 *wcsnchr(const TdUcs4 *haystack, TdUcs4 needle, size_t len);
char **strsplit(char *src, const char *delim, int32_t *num);
char *strtolower(char *dst, const char *src);
@ -37,11 +37,11 @@ char *strntolower(char *dst, const char *src, int32_t n);
char *strntolower_s(char *dst, const char *src, int32_t n);
int64_t strnatoi(char *num, int32_t len);
size_t tstrncspn(const char *str, size_t ssize, const char *reject, size_t rsize);
size_t twcsncspn(const TdUcs4 *wcs, size_t size, const TdUcs4 *reject, size_t rsize);
size_t tstrncspn(const char *str, size_t ssize, const char *reject, size_t rsize);
size_t twcsncspn(const TdUcs4 *wcs, size_t size, const TdUcs4 *reject, size_t rsize);
char *strbetween(char *string, char *begin, char *end);
char *paGetToken(char *src, char **token, int32_t *tokenLen);
char *strbetween(char *string, char *begin, char *end);
char *paGetToken(char *src, char **token, int32_t *tokenLen);
int32_t taosByteArrayToHexStr(char bytes[], int32_t len, char hexstr[]);
int32_t taosHexStrToByteArray(char hexstr[], char bytes[]);
@ -81,12 +81,13 @@ static FORCE_INLINE void taosEncryptPass_c(uint8_t *inBuf, size_t len, char *tar
static FORCE_INLINE int32_t taosGetTbHashVal(const char *tbname, int32_t tblen, int32_t method, int32_t prefix,
int32_t suffix) {
if ((prefix == 0 && suffix == 0) || (tblen <= (prefix + suffix)) || (tblen <= -1 * (prefix + suffix)) || prefix * suffix < 0) {
if ((prefix == 0 && suffix == 0) || (tblen <= (prefix + suffix)) || (tblen <= -1 * (prefix + suffix)) ||
prefix * suffix < 0) {
return MurmurHash3_32(tbname, tblen);
} else if (prefix > 0 || suffix > 0) {
return MurmurHash3_32(tbname + prefix, tblen - prefix - suffix);
} else {
char tbName[TSDB_TABLE_FNAME_LEN];
char tbName[TSDB_TABLE_FNAME_LEN];
int32_t offset = 0;
if (prefix < 0) {
offset = -1 * prefix;
@ -94,20 +95,33 @@ static FORCE_INLINE int32_t taosGetTbHashVal(const char *tbname, int32_t tblen,
}
if (suffix < 0) {
strncpy(tbName + offset, tbname + tblen + suffix, -1 * suffix);
offset += -1 *suffix;
offset += -1 * suffix;
}
return MurmurHash3_32(tbName, offset);
}
}
#define TSDB_CHECK_CODE(CODE, LINO, LABEL) \
if (CODE) { \
LINO = __LINE__; \
goto LABEL; \
do { \
if ((CODE)) { \
LINO = __LINE__; \
goto LABEL; \
} \
} while (0)
#define TSDB_CHECK_NULL(ptr, CODE, LINO, LABEL, ERRNO) \
if ((ptr) == NULL) { \
(CODE) = (ERRNO); \
(LINO) = __LINE__; \
goto LABEL; \
}
#define ARRAY_SIZE(a) (sizeof(a) / sizeof(a[0]))
#define VND_CHECK_CODE(CODE, LINO, LABEL) TSDB_CHECK_CODE(CODE, LINO, LABEL)
#define TCONTAINER_OF(ptr, type, member) ((type *)((char *)(ptr)-offsetof(type, member)))
#ifdef __cplusplus
}
#endif

View File

@ -2245,15 +2245,18 @@ static int32_t tColDataUpdateValue72(SColData *pColData, uint8_t *pData, uint32_
}
return 0;
}
static FORCE_INLINE int32_t tColDataUpdateNothing(SColData *pColData, uint8_t *pData, uint32_t nData, bool forward) {
return 0;
}
static int32_t (*tColDataUpdateValueImpl[8][3])(SColData *pColData, uint8_t *pData, uint32_t nData, bool forward) = {
{NULL, NULL, NULL}, // 0
{tColDataUpdateValue10, NULL, tColDataUpdateValue12}, // HAS_NONE
{tColDataUpdateValue20, NULL, NULL}, // HAS_NULL
{tColDataUpdateValue30, NULL, tColDataUpdateValue32}, // HAS_NULL|HAS_NONE
{tColDataUpdateValue40, NULL, tColDataUpdateValue42}, // HAS_VALUE
{tColDataUpdateValue50, NULL, tColDataUpdateValue52}, // HAS_VALUE|HAS_NONE
{tColDataUpdateValue60, NULL, tColDataUpdateValue62}, // HAS_VALUE|HAS_NULL
{tColDataUpdateValue70, NULL, tColDataUpdateValue72}, // HAS_VALUE|HAS_NULL|HAS_NONE
{NULL, NULL, NULL}, // 0
{tColDataUpdateValue10, tColDataUpdateNothing, tColDataUpdateValue12}, // HAS_NONE
{tColDataUpdateValue20, tColDataUpdateNothing, tColDataUpdateNothing}, // HAS_NULL
{tColDataUpdateValue30, tColDataUpdateNothing, tColDataUpdateValue32}, // HAS_NULL|HAS_NONE
{tColDataUpdateValue40, tColDataUpdateNothing, tColDataUpdateValue42}, // HAS_VALUE
{tColDataUpdateValue50, tColDataUpdateNothing, tColDataUpdateValue52}, // HAS_VALUE|HAS_NONE
{tColDataUpdateValue60, tColDataUpdateNothing, tColDataUpdateValue62}, // HAS_VALUE|HAS_NULL
{tColDataUpdateValue70, tColDataUpdateNothing, tColDataUpdateValue72}, // HAS_VALUE|HAS_NULL|HAS_NONE
// VALUE NONE NULL
};

View File

@ -1,21 +1,18 @@
# vnode
add_library(vnode STATIC "")
target_sources(
vnode
PRIVATE
# vnode
"src/vnd/vnodeOpen.c"
"src/vnd/vnodeBufPool.c"
"src/vnd/vnodeCfg.c"
"src/vnd/vnodeCommit.c"
"src/vnd/vnodeQuery.c"
"src/vnd/vnodeModule.c"
"src/vnd/vnodeSvr.c"
"src/vnd/vnodeSync.c"
"src/vnd/vnodeSnapshot.c"
"src/vnd/vnodeRetention.c"
"src/vnd/vnodeInitApi.c"
set(
VNODE_SOURCE_FILES
"src/vnd/vnodeOpen.c"
"src/vnd/vnodeBufPool.c"
"src/vnd/vnodeCfg.c"
"src/vnd/vnodeCommit.c"
"src/vnd/vnodeQuery.c"
"src/vnd/vnodeModule.c"
"src/vnd/vnodeSvr.c"
"src/vnd/vnodeSync.c"
"src/vnd/vnodeSnapshot.c"
"src/vnd/vnodeRetention.c"
"src/vnd/vnodeInitApi.c"
# meta
"src/meta/metaOpen.c"
@ -38,23 +35,23 @@ target_sources(
"src/sma/smaSnapshot.c"
"src/sma/smaTimeRange.c"
# tsdb
"src/tsdb/tsdbCommit.c"
"src/tsdb/tsdbFile.c"
"src/tsdb/tsdbFS.c"
"src/tsdb/tsdbOpen.c"
"src/tsdb/tsdbMemTable.c"
"src/tsdb/tsdbRead.c"
"src/tsdb/tsdbCache.c"
"src/tsdb/tsdbWrite.c"
"src/tsdb/tsdbReaderWriter.c"
"src/tsdb/tsdbUtil.c"
"src/tsdb/tsdbSnapshot.c"
"src/tsdb/tsdbCacheRead.c"
"src/tsdb/tsdbRetention.c"
"src/tsdb/tsdbDiskData.c"
"src/tsdb/tsdbMergeTree.c"
"src/tsdb/tsdbDataIter.c"
# # tsdb
# "src/tsdb/tsdbCommit.c"
# "src/tsdb/tsdbFile.c"
# "src/tsdb/tsdbFS.c"
# "src/tsdb/tsdbOpen.c"
# "src/tsdb/tsdbMemTable.c"
# "src/tsdb/tsdbRead.c"
# "src/tsdb/tsdbCache.c"
# "src/tsdb/tsdbWrite.c"
# "src/tsdb/tsdbReaderWriter.c"
# "src/tsdb/tsdbUtil.c"
# "src/tsdb/tsdbSnapshot.c"
# "src/tsdb/tsdbCacheRead.c"
# "src/tsdb/tsdbRetention.c"
# "src/tsdb/tsdbDiskData.c"
# "src/tsdb/tsdbMergeTree.c"
# "src/tsdb/tsdbDataIter.c"
# tq
"src/tq/tq.c"
@ -71,6 +68,19 @@ target_sources(
"src/tq/tqOffsetSnapshot.c"
)
aux_source_directory("src/tsdb/" TSDB_SOURCE_FILES)
list(
APPEND
VNODE_SOURCE_FILES
${TSDB_SOURCE_FILES}
)
target_sources(
vnode
PRIVATE
${VNODE_SOURCE_FILES}
)
IF (TD_VNODE_PLUGINS)
target_sources(
vnode

View File

@ -168,6 +168,27 @@ uint64_t tsdbGetReaderMaxVersion(STsdbReader *pReader);
void tsdbReaderSetCloseFlag(STsdbReader *pReader);
int64_t tsdbGetLastTimestamp(SVnode *pVnode, void *pTableList, int32_t numOfTables, const char *pIdStr);
//======================================================================================================================
int32_t tsdbReaderOpen2(void *pVnode, SQueryTableDataCond *pCond, void *pTableList, int32_t numOfTables,
SSDataBlock *pResBlock, void **ppReader, const char *idstr, bool countOnly,
SHashObj **pIgnoreTables);
int32_t tsdbSetTableList2(STsdbReader *pReader, const void *pTableList, int32_t num);
void tsdbReaderSetId2(STsdbReader *pReader, const char *idstr);
void tsdbReaderClose2(STsdbReader *pReader);
int32_t tsdbNextDataBlock2(STsdbReader *pReader, bool *hasNext);
int32_t tsdbRetrieveDatablockSMA2(STsdbReader *pReader, SSDataBlock *pDataBlock, bool *allHave, bool *hasNullSMA);
void tsdbReleaseDataBlock2(STsdbReader *pReader);
SSDataBlock *tsdbRetrieveDataBlock2(STsdbReader *pTsdbReadHandle, SArray *pColumnIdList);
int32_t tsdbReaderReset2(STsdbReader *pReader, SQueryTableDataCond *pCond);
int32_t tsdbGetFileBlocksDistInfo2(STsdbReader *pReader, STableBlockDistInfo *pTableBlockInfo);
int64_t tsdbGetNumOfRowsInMemTable2(STsdbReader *pHandle);
void *tsdbGetIdx2(SMeta *pMeta);
void *tsdbGetIvtIdx2(SMeta *pMeta);
uint64_t tsdbGetReaderMaxVersion2(STsdbReader *pReader);
void tsdbReaderSetCloseFlag2(STsdbReader *pReader);
int64_t tsdbGetLastTimestamp2(SVnode *pVnode, void *pTableList, int32_t numOfTables, const char *pIdStr);
//======================================================================================================================
int32_t tsdbReuseCacherowsReader(void *pReader, void *pTableIdList, int32_t numOfTables);
int32_t tsdbCacherowsReaderOpen(void *pVnode, int32_t type, void *pTableIdList, int32_t numOfTables, int32_t numOfCols,
SArray *pCidList, int32_t *pSlotIds, uint64_t suid, void **pReader, const char *idstr);

View File

@ -16,6 +16,9 @@
#ifndef _TD_VNODE_TSDB_H_
#define _TD_VNODE_TSDB_H_
// #include "../tsdb/tsdbFile2.h"
// #include "../tsdb/tsdbMerge.h"
// #include "../tsdb/tsdbSttFileRW.h"
#include "tsimplehash.h"
#include "vnodeInt.h"
@ -75,9 +78,8 @@ typedef struct STsdbFilterInfo STsdbFilterInfo;
#define TSDBROW_ROW_FMT ((int8_t)0x0)
#define TSDBROW_COL_FMT ((int8_t)0x1)
#define TSDB_FILE_DLMT ((uint32_t)0xF00AFA0F)
#define TSDB_MAX_SUBBLOCKS 8
#define TSDB_FHDR_SIZE 512
#define TSDB_FILE_DLMT ((uint32_t)0xF00AFA0F)
#define TSDB_FHDR_SIZE 512
#define VERSION_MIN 0
#define VERSION_MAX INT64_MAX
@ -165,6 +167,7 @@ void tBlockDataDestroy(SBlockData *pBlockData);
int32_t tBlockDataInit(SBlockData *pBlockData, TABLEID *pId, STSchema *pTSchema, int16_t *aCid, int32_t nCid);
void tBlockDataReset(SBlockData *pBlockData);
int32_t tBlockDataAppendRow(SBlockData *pBlockData, TSDBROW *pRow, STSchema *pTSchema, int64_t uid);
int32_t tBlockDataUpdateRow(SBlockData *pBlockData, TSDBROW *pRow, STSchema *pTSchema);
int32_t tBlockDataTryUpsertRow(SBlockData *pBlockData, TSDBROW *pRow, int64_t uid);
int32_t tBlockDataUpsertRow(SBlockData *pBlockData, TSDBROW *pRow, STSchema *pTSchema, int64_t uid);
void tBlockDataClear(SBlockData *pBlockData);
@ -198,7 +201,7 @@ int32_t tMapDataToArray(SMapData *pMapData, int32_t itemSize, int32_t (*tGetItem
// other
int32_t tsdbKeyFid(TSKEY key, int32_t minutes, int8_t precision);
void tsdbFidKeyRange(int32_t fid, int32_t minutes, int8_t precision, TSKEY *minKey, TSKEY *maxKey);
int32_t tsdbFidLevel(int32_t fid, STsdbKeepCfg *pKeepCfg, int64_t now);
int32_t tsdbFidLevel(int32_t fid, STsdbKeepCfg *pKeepCfg, int64_t nowSec);
int32_t tsdbBuildDeleteSkyline(SArray *aDelData, int32_t sidx, int32_t eidx, SArray *aSkyline);
int32_t tPutColumnDataAgg(uint8_t *p, SColumnDataAgg *pColAgg);
int32_t tGetColumnDataAgg(uint8_t *p, SColumnDataAgg *pColAgg);
@ -302,8 +305,11 @@ int32_t tsdbReadDelIdx(SDelFReader *pReader, SArray *aDelIdx);
// tsdbRead.c ==============================================================================================
int32_t tsdbTakeReadSnap(STsdbReader *pReader, _query_reseek_func_t reseek, STsdbReadSnap **ppSnap);
void tsdbUntakeReadSnap(STsdbReader *pReader, STsdbReadSnap *pSnap, bool proactive);
int32_t tsdbTakeReadSnap2(STsdbReader *pReader, _query_reseek_func_t reseek, STsdbReadSnap **ppSnap);
void tsdbUntakeReadSnap2(STsdbReader *pReader, STsdbReadSnap *pSnap, bool proactive);
// tsdbMerge.c ==============================================================================================
int32_t tsdbMerge(STsdb *pTsdb);
int32_t tsdbMerge(void *arg);
// tsdbDiskData ==============================================================================================
int32_t tDiskDataBuilderCreate(SDiskDataBuilder **ppBuilder);
@ -362,19 +368,20 @@ typedef struct {
} SCacheFlushState;
struct STsdb {
char *path;
SVnode *pVnode;
STsdbKeepCfg keepCfg;
TdThreadRwlock rwLock;
SMemTable *mem;
SMemTable *imem;
STsdbFS fs;
SLRUCache *lruCache;
SCacheFlushState flushState;
TdThreadMutex lruMutex;
SLRUCache *biCache;
TdThreadMutex biMutex;
SRocksCache rCache;
char *path;
SVnode *pVnode;
STsdbKeepCfg keepCfg;
TdThreadRwlock rwLock;
SMemTable *mem;
SMemTable *imem;
STsdbFS fs; // old
SLRUCache *lruCache;
SCacheFlushState flushState;
TdThreadMutex lruMutex;
SLRUCache *biCache;
TdThreadMutex biMutex;
struct STFileSystem *pFS; // new
SRocksCache rCache;
};
struct TSDBKEY {
@ -410,6 +417,7 @@ struct STbData {
SDelData *pTail;
SMemSkipList sl;
STbData *next;
SRBTreeNode rbtn[1];
};
struct SMemTable {
@ -423,11 +431,10 @@ struct SMemTable {
TSKEY maxKey;
int64_t nRow;
int64_t nDel;
struct {
int32_t nTbData;
int32_t nBucket;
STbData **aBucket;
};
int32_t nTbData;
int32_t nBucket;
STbData **aBucket;
SRBTree tbDataTree[1];
};
struct TSDBROW {
@ -500,7 +507,7 @@ struct SDataBlk {
int32_t nRow;
int8_t hasDup;
int8_t nSubBlock;
SBlockInfo aSubBlock[TSDB_MAX_SUBBLOCKS];
SBlockInfo aSubBlock[1];
SSmaInfo smaInfo;
};
@ -652,12 +659,19 @@ struct SDelFWriter {
uint8_t *aBuf[1];
};
#include "tarray2.h"
//#include "tsdbFS2.h"
// struct STFileSet;
typedef struct STFileSet STFileSet;
typedef TARRAY2(STFileSet *) TFileSetArray;
struct STsdbReadSnap {
SMemTable *pMem;
SQueryNode *pNode;
SMemTable *pIMem;
SQueryNode *pINode;
STsdbFS fs;
SMemTable *pMem;
SQueryNode *pNode;
SMemTable *pIMem;
SQueryNode *pINode;
TFileSetArray *pfSetArray;
STsdbFS fs;
};
struct SDataFWriter {
@ -696,6 +710,7 @@ typedef struct {
typedef struct SSttBlockLoadInfo {
SBlockData blockData[2];
void *pSttStatisBlkArray;
SArray *aSttBlk;
int32_t blockIndex[2]; // to denote the loaded block in the corresponding position.
int32_t currentLoadBlockIndex;
@ -704,10 +719,9 @@ typedef struct SSttBlockLoadInfo {
STSchema *pSchema;
int16_t *colIds;
int32_t numOfCols;
bool checkRemainingRow;
bool checkRemainingRow; // todo: no assign value?
bool isLast;
bool sttBlockLoaded;
int32_t numOfStt;
// keep the last access position, this position may be used to reduce the binary times for
// starting last block data for a new table
@ -766,60 +780,106 @@ struct SDiskDataBuilder {
};
typedef struct SLDataIter {
SRBTreeNode node;
SSttBlk *pSttBlk;
SDataFReader *pReader;
int32_t iStt;
int8_t backward;
int32_t iSttBlk;
int32_t iRow;
SRowInfo rInfo;
uint64_t uid;
STimeWindow timeWindow;
SVersionRange verRange;
SSttBlockLoadInfo *pBlockLoadInfo;
bool ignoreEarlierTs;
SRBTreeNode node;
SSttBlk *pSttBlk;
int32_t iStt; // for debug purpose
int8_t backward;
int32_t iSttBlk;
int32_t iRow;
SRowInfo rInfo;
uint64_t uid;
STimeWindow timeWindow;
SVersionRange verRange;
SSttBlockLoadInfo *pBlockLoadInfo;
bool ignoreEarlierTs;
struct SSttFileReader *pReader;
} SLDataIter;
#define tMergeTreeGetRow(_t) (&((_t)->pIter->rInfo.row))
int32_t tMergeTreeOpen(SMergeTree *pMTree, int8_t backward, SDataFReader *pFReader, uint64_t suid, uint64_t uid,
STimeWindow *pTimeWindow, SVersionRange *pVerRange, SSttBlockLoadInfo *pBlockLoadInfo,
bool destroyLoadInfo, const char *idStr, bool strictTimeRange, SLDataIter *pLDataIter);
void tMergeTreeAddIter(SMergeTree *pMTree, SLDataIter *pIter);
bool tMergeTreeNext(SMergeTree *pMTree);
bool tMergeTreeIgnoreEarlierTs(SMergeTree *pMTree);
void tMergeTreeClose(SMergeTree *pMTree);
struct SSttFileReader;
typedef int32_t (*_load_tomb_fn)(STsdbReader *pReader, struct SSttFileReader *pSttFileReader,
SSttBlockLoadInfo *pLoadInfo);
typedef struct {
int8_t backward;
STsdb *pTsdb;
uint64_t suid;
uint64_t uid;
STimeWindow timewindow;
SVersionRange verRange;
bool strictTimeRange;
SArray *pSttFileBlockIterArray;
void *pCurrentFileset;
STSchema *pSchema;
int16_t *pCols;
int32_t numOfCols;
_load_tomb_fn loadTombFn;
void *pReader;
void *idstr;
} SMergeTreeConf;
int32_t tMergeTreeOpen2(SMergeTree *pMTree, SMergeTreeConf *pConf);
void tMergeTreeAddIter(SMergeTree *pMTree, SLDataIter *pIter);
bool tMergeTreeNext(SMergeTree *pMTree);
bool tMergeTreeIgnoreEarlierTs(SMergeTree *pMTree);
void tMergeTreeClose(SMergeTree *pMTree);
SSttBlockLoadInfo *tCreateLastBlockLoadInfo(STSchema *pSchema, int16_t *colList, int32_t numOfCols, int32_t numOfStt);
SSttBlockLoadInfo *tCreateOneLastBlockLoadInfo(STSchema *pSchema, int16_t *colList, int32_t numOfCols);
void resetLastBlockLoadInfo(SSttBlockLoadInfo *pLoadInfo);
void getLastBlockLoadInfo(SSttBlockLoadInfo *pLoadInfo, int64_t *blocks, double *el);
void *destroyLastBlockLoadInfo(SSttBlockLoadInfo *pLoadInfo);
void *destroySttBlockReader(SArray *pLDataIterArray, int64_t *blocks, double *el);
// tsdbCache ==============================================================================================
typedef enum {
READ_MODE_COUNT_ONLY = 0x1,
READ_MODE_ALL,
} EReadMode;
typedef struct STsdbReaderInfo {
uint64_t suid;
STSchema *pSchema;
EReadMode readMode;
uint64_t rowsNum;
STimeWindow window;
SVersionRange verRange;
int16_t order;
} STsdbReaderInfo;
typedef struct {
SArray *pTombData;
} STableLoadInfo;
struct SDataFileReader;
typedef struct SCacheRowsReader {
STsdb *pTsdb;
SVersionRange verRange;
TdThreadMutex readerMutex;
SVnode *pVnode;
STSchema *pSchema;
STSchema *pCurrSchema;
uint64_t uid;
uint64_t suid;
char **transferBuf; // todo remove it soon
int32_t numOfCols;
SArray *pCidList;
int32_t *pSlotIds;
int32_t type;
int32_t tableIndex; // currently returned result tables
STableKeyInfo *pTableList; // table id list
int32_t numOfTables;
SSttBlockLoadInfo *pLoadInfo;
SLDataIter *pDataIter;
STsdbReadSnap *pReadSnap;
SDataFReader *pDataFReader;
SDataFReader *pDataFReaderLast;
const char *idstr;
int64_t lastTs;
STsdb *pTsdb;
STsdbReaderInfo info;
TdThreadMutex readerMutex;
SVnode *pVnode;
STSchema *pSchema;
STSchema *pCurrSchema;
uint64_t uid;
char **transferBuf; // todo remove it soon
int32_t numOfCols;
SArray *pCidList;
int32_t *pSlotIds;
int32_t type;
int32_t tableIndex; // currently returned result tables
STableKeyInfo *pTableList; // table id list
int32_t numOfTables;
uint64_t *uidList;
SSHashObj *pTableMap;
SArray *pLDataIterArray;
struct SDataFileReader *pFileReader;
STsdbReadSnap *pReadSnap;
char *idstr;
int64_t lastTs;
} SCacheRowsReader;
typedef struct {

View File

@ -49,7 +49,8 @@ int32_t vnodeEncodeConfig(const void* pObj, SJson* pJson);
int32_t vnodeDecodeConfig(const SJson* pJson, void* pObj);
// vnodeModule.c
int32_t vnodeScheduleTask(int32_t (*execute)(void*), void* arg);
int vnodeScheduleTask(int (*execute)(void*), void* arg);
int vnodeScheduleTaskEx(int tpid, int (*execute)(void*), void* arg);
// vnodeBufPool.c
typedef struct SVBufPoolNode SVBufPoolNode;

View File

@ -180,8 +180,8 @@ SArray* metaGetSmaTbUids(SMeta* pMeta);
void* metaGetIdx(SMeta* pMeta);
void* metaGetIvtIdx(SMeta* pMeta);
int64_t metaGetTbNum(SMeta *pMeta);
void metaReaderDoInit(SMetaReader *pReader, SMeta *pMeta, int32_t flags);
int64_t metaGetTbNum(SMeta* pMeta);
void metaReaderDoInit(SMetaReader* pReader, SMeta* pMeta, int32_t flags);
int32_t metaCreateTSma(SMeta* pMeta, int64_t version, SSmaCfg* pCfg);
int32_t metaDropTSma(SMeta* pMeta, int64_t indexUid);
@ -198,12 +198,12 @@ int32_t metaGetInfo(SMeta* pMeta, int64_t uid, SMetaInfo* pInfo, SMetaReader* pR
int tsdbOpen(SVnode* pVnode, STsdb** ppTsdb, const char* dir, STsdbKeepCfg* pKeepCfg, int8_t rollback);
int tsdbClose(STsdb** pTsdb);
int32_t tsdbBegin(STsdb* pTsdb);
int32_t tsdbPrepareCommit(STsdb* pTsdb);
int32_t tsdbCommit(STsdb* pTsdb, SCommitInfo* pInfo);
// int32_t tsdbPrepareCommit(STsdb* pTsdb);
// int32_t tsdbCommit(STsdb* pTsdb, SCommitInfo* pInfo);
int32_t tsdbCacheCommit(STsdb* pTsdb);
int32_t tsdbCompact(STsdb* pTsdb, SCompactInfo* pInfo);
int32_t tsdbFinishCommit(STsdb* pTsdb);
int32_t tsdbRollbackCommit(STsdb* pTsdb);
// int32_t tsdbFinishCommit(STsdb* pTsdb);
// int32_t tsdbRollbackCommit(STsdb* pTsdb);
int tsdbScanAndConvertSubmitMsg(STsdb* pTsdb, SSubmitReq2* pMsg);
int tsdbInsertData(STsdb* pTsdb, int64_t version, SSubmitReq2* pMsg, SSubmitRsp2* pRsp);
int32_t tsdbInsertTableData(STsdb* pTsdb, int64_t version, SSubmitTbData* pSubmitTbData, int32_t* affectedRows);

View File

@ -103,15 +103,16 @@ _exit:
return code;
}
int32_t smaFinishCommit(SSma *pSma) {
extern int32_t tsdbCommitCommit(STsdb *tsdb);
int32_t smaFinishCommit(SSma *pSma) {
int32_t code = 0;
int32_t lino = 0;
SVnode *pVnode = pSma->pVnode;
if (VND_RSMA1(pVnode) && (code = tsdbFinishCommit(VND_RSMA1(pVnode))) < 0) {
if (VND_RSMA1(pVnode) && (code = tsdbCommitCommit(VND_RSMA1(pVnode))) < 0) {
TSDB_CHECK_CODE(code, lino, _exit);
}
if (VND_RSMA2(pVnode) && (code = tsdbFinishCommit(VND_RSMA2(pVnode))) < 0) {
if (VND_RSMA2(pVnode) && (code = tsdbCommitCommit(VND_RSMA2(pVnode))) < 0) {
TSDB_CHECK_CODE(code, lino, _exit);
}
_exit:
@ -130,6 +131,7 @@ _exit:
* @param isCommit
* @return int32_t
*/
extern int32_t tsdbPreCommit(STsdb *tsdb);
static int32_t tdProcessRSmaAsyncPreCommitImpl(SSma *pSma, bool isCommit) {
int32_t code = 0;
int32_t lino = 0;
@ -186,11 +188,11 @@ static int32_t tdProcessRSmaAsyncPreCommitImpl(SSma *pSma, bool isCommit) {
// all rsma results are written completely
STsdb *pTsdb = NULL;
if ((pTsdb = VND_RSMA1(pSma->pVnode))) {
code = tsdbPrepareCommit(pTsdb);
code = tsdbPreCommit(pTsdb);
TSDB_CHECK_CODE(code, lino, _exit);
}
if ((pTsdb = VND_RSMA2(pSma->pVnode))) {
code = tsdbPrepareCommit(pTsdb);
code = tsdbPreCommit(pTsdb);
TSDB_CHECK_CODE(code, lino, _exit);
}
@ -207,6 +209,7 @@ _exit:
* @param pSma
* @return int32_t
*/
extern int32_t tsdbCommitBegin(STsdb *tsdb, SCommitInfo *info);
static int32_t tdProcessRSmaAsyncCommitImpl(SSma *pSma, SCommitInfo *pInfo) {
int32_t code = 0;
int32_t lino = 0;
@ -217,10 +220,10 @@ static int32_t tdProcessRSmaAsyncCommitImpl(SSma *pSma, SCommitInfo *pInfo) {
goto _exit;
}
code = tsdbCommit(VND_RSMA1(pVnode), pInfo);
code = tsdbCommitBegin(VND_RSMA1(pVnode), pInfo);
TSDB_CHECK_CODE(code, lino, _exit);
code = tsdbCommit(VND_RSMA2(pVnode), pInfo);
code = tsdbCommitBegin(VND_RSMA2(pVnode), pInfo);
TSDB_CHECK_CODE(code, lino, _exit);
_exit:

View File

@ -264,7 +264,7 @@ static int32_t tdSetRSmaInfoItemParams(SSma *pSma, SRSmaParam *param, SRSmaStat
return TSDB_CODE_FAILED;
}
SReadHandle handle = { .vnode = pVnode, .initTqReader = 1, .pStateBackend = pStreamState };
SReadHandle handle = {.vnode = pVnode, .initTqReader = 1, .pStateBackend = pStreamState};
initStorageAPI(&handle.api);
pRSmaInfo->taskInfo[idx] = qCreateStreamExecTaskInfo(param->qmsg[idx], &handle, TD_VID(pVnode));
@ -572,8 +572,8 @@ int32_t smaDoRetention(SSma *pSma, int64_t now) {
for (int32_t i = 0; i < TSDB_RETENTION_L2; ++i) {
if (pSma->pRSmaTsdb[i]) {
code = tsdbDoRetention(pSma->pRSmaTsdb[i], now);
if (code) goto _end;
// code = tsdbDoRetention(pSma->pRSmaTsdb[i], now);
// if (code) goto _end;
}
}
@ -612,7 +612,6 @@ static int32_t tdRSmaExecAndSubmitResult(SSma *pSma, qTaskInfo_t taskInfo, SRSma
blockDebugShowDataBlocks(pResList, flag);
#endif
for (int32_t i = 0; i < taosArrayGetSize(pResList); ++i) {
output = taosArrayGetP(pResList, i);
smaDebug("vgId:%d, result block, uid:%" PRIu64 ", groupid:%" PRIu64 ", rows:%" PRIi64, SMA_VID(pSma),
output->info.id.uid, output->info.id.groupId, output->info.rows);
@ -1114,8 +1113,8 @@ static void tdRSmaFetchTrigger(void *param, void *tmrId) {
}
if (!(pStat = (SRSmaStat *)tdAcquireSmaRef(smaMgmt.rsetId, pRSmaRef->refId))) {
smaWarn("rsma fetch task not start since rsma stat already destroyed, rsetId:%d refId:%" PRIi64 ")",
smaMgmt.rsetId, pRSmaRef->refId); // pRSmaRef freed in taosHashRemove
smaWarn("rsma fetch task not start since rsma stat already destroyed, rsetId:%d refId:%" PRIi64 ")", smaMgmt.rsetId,
pRSmaRef->refId); // pRSmaRef freed in taosHashRemove
taosHashRemove(smaMgmt.refHash, &param, POINTER_BYTES);
return;
}

File diff suppressed because it is too large Load Diff

View File

@ -17,6 +17,7 @@
#include "tarray.h"
#include "tcommon.h"
#include "tsdb.h"
#include "tsdbDataFileRW.h"
#define HASTYPE(_type, _t) (((_type) & (_t)) == (_t))
@ -124,11 +125,29 @@ int32_t tsdbReuseCacherowsReader(void* reader, void* pTableIdList, int32_t numOf
pReader->numOfTables = numOfTables;
pReader->lastTs = INT64_MIN;
resetLastBlockLoadInfo(pReader->pLoadInfo);
int64_t blocks;
double elapse;
pReader->pLDataIterArray = destroySttBlockReader(pReader->pLDataIterArray, &blocks, &elapse);
pReader->pLDataIterArray = taosArrayInit(4, POINTER_BYTES);
return TSDB_CODE_SUCCESS;
}
static int32_t uidComparFunc(const void* p1, const void* p2) {
uint64_t pu1 = *(uint64_t*)p1;
uint64_t pu2 = *(uint64_t*)p2;
if (pu1 == pu2) {
return 0;
} else {
return (pu1 < pu2) ? -1 : 1;
}
}
static void freeTableInfoFunc(void* param) {
void** p = (void**)param;
taosMemoryFreeClear(*p);
}
int32_t tsdbCacherowsReaderOpen(void* pVnode, int32_t type, void* pTableIdList, int32_t numOfTables, int32_t numOfCols,
SArray* pCidList, int32_t* pSlotIds, uint64_t suid, void** pReader, const char* idstr) {
*pReader = NULL;
@ -140,11 +159,11 @@ int32_t tsdbCacherowsReaderOpen(void* pVnode, int32_t type, void* pTableIdList,
p->type = type;
p->pVnode = pVnode;
p->pTsdb = p->pVnode->pTsdb;
p->verRange = (SVersionRange){.minVer = 0, .maxVer = UINT64_MAX};
p->info.verRange = (SVersionRange){.minVer = 0, .maxVer = UINT64_MAX};
p->info.suid = suid;
p->numOfCols = numOfCols;
p->pCidList = pCidList;
p->pSlotIds = pSlotIds;
p->suid = suid;
if (numOfTables == 0) {
*pReader = p;
@ -154,6 +173,27 @@ int32_t tsdbCacherowsReaderOpen(void* pVnode, int32_t type, void* pTableIdList,
p->pTableList = pTableIdList;
p->numOfTables = numOfTables;
p->pTableMap = tSimpleHashInit(numOfTables, taosGetDefaultHashFunction(TSDB_DATA_TYPE_BIGINT));
if (p->pTableMap == NULL) {
tsdbCacherowsReaderClose(p);
return TSDB_CODE_OUT_OF_MEMORY;
}
p->uidList = taosMemoryMalloc(numOfTables * sizeof(uint64_t));
if (p->uidList == NULL) {
tsdbCacherowsReaderClose(p);
return TSDB_CODE_OUT_OF_MEMORY;
}
for (int32_t i = 0; i < numOfTables; ++i) {
uint64_t uid = p->pTableList[i].uid;
p->uidList[i] = uid;
STableLoadInfo* pInfo = taosMemoryCalloc(1, sizeof(STableLoadInfo));
tSimpleHashPut(p->pTableMap, &uid, sizeof(uint64_t), &pInfo, POINTER_BYTES);
}
tSimpleHashSetFreeFp(p->pTableMap, freeTableInfoFunc);
taosSort(p->uidList, numOfTables, sizeof(uint64_t), uidComparFunc);
int32_t code = setTableSchema(p, suid, idstr);
if (code != TSDB_CODE_SUCCESS) {
tsdbCacherowsReaderClose(p);
@ -178,14 +218,8 @@ int32_t tsdbCacherowsReaderOpen(void* pVnode, int32_t type, void* pTableIdList,
SVnodeCfg* pCfg = &((SVnode*)pVnode)->config;
int32_t numOfStt = pCfg->sttTrigger;
p->pLoadInfo = tCreateLastBlockLoadInfo(p->pSchema, NULL, 0, numOfStt);
if (p->pLoadInfo == NULL) {
tsdbCacherowsReaderClose(p);
return TSDB_CODE_OUT_OF_MEMORY;
}
p->pDataIter = taosMemoryCalloc(pCfg->sttTrigger, sizeof(SLDataIter));
if (p->pDataIter == NULL) {
p->pLDataIterArray = taosArrayInit(4, POINTER_BYTES);
if (p->pLDataIterArray == NULL) {
tsdbCacherowsReaderClose(p);
return TSDB_CODE_OUT_OF_MEMORY;
}
@ -214,14 +248,34 @@ void* tsdbCacherowsReaderClose(void* pReader) {
taosMemoryFree(p->pSchema);
}
taosMemoryFree(p->pDataIter);
taosMemoryFree(p->pCurrSchema);
destroyLastBlockLoadInfo(p->pLoadInfo);
int64_t loadBlocks = 0;
double elapse = 0;
destroySttBlockReader(p->pLDataIterArray, &loadBlocks, &elapse);
if (p->pFileReader) {
tsdbDataFileReaderClose(&p->pFileReader);
p->pFileReader = NULL;
}
taosMemoryFree((void*)p->idstr);
taosThreadMutexDestroy(&p->readerMutex);
if (p->pTableMap) {
void* pe = NULL;
int32_t iter = 0;
while ((pe = tSimpleHashIterate(p->pTableMap, pe, &iter)) != NULL) {
STableLoadInfo* pInfo = *(STableLoadInfo**)pe;
pInfo->pTombData = taosArrayDestroy(pInfo->pTombData);
}
tSimpleHashCleanup(p->pTableMap);
}
if (p->uidList) {
taosMemoryFree(p->uidList);
}
taosMemoryFree(pReader);
return NULL;
}
@ -298,12 +352,10 @@ int32_t tsdbRetrieveCacheRows(void* pReader, SSDataBlock* pResBlock, const int32
}
taosThreadMutexLock(&pr->readerMutex);
code = tsdbTakeReadSnap((STsdbReader*)pr, tsdbCacheQueryReseek, &pr->pReadSnap);
code = tsdbTakeReadSnap2((STsdbReader*)pr, tsdbCacheQueryReseek, &pr->pReadSnap);
if (code != TSDB_CODE_SUCCESS) {
goto _end;
}
pr->pDataFReader = NULL;
pr->pDataFReaderLast = NULL;
int8_t ltype = (pr->type & CACHESCAN_RETRIEVE_LAST) >> 3;
@ -424,11 +476,13 @@ int32_t tsdbRetrieveCacheRows(void* pReader, SSDataBlock* pResBlock, const int32
}
_end:
tsdbDataFReaderClose(&pr->pDataFReaderLast);
tsdbDataFReaderClose(&pr->pDataFReader);
tsdbUntakeReadSnap2((STsdbReader*)pr, pr->pReadSnap, true);
int64_t loadBlocks = 0;
double elapse = 0;
pr->pLDataIterArray = destroySttBlockReader(pr->pLDataIterArray, &loadBlocks, &elapse);
pr->pLDataIterArray = taosArrayInit(4, POINTER_BYTES);
resetLastBlockLoadInfo(pr->pLoadInfo);
tsdbUntakeReadSnap((STsdbReader*)pr, pr->pReadSnap, true);
taosThreadMutexUnlock(&pr->readerMutex);
if (pRes != NULL) {

View File

@ -0,0 +1,605 @@
/*
* Copyright (c) 2019 TAOS Data, Inc. <jhtao@taosdata.com>
*
* This program is free software: you can use, redistribute, and/or modify
* it under the terms of the GNU Affero General Public License, version 3
* or later ("AGPL"), as published by the Free Software Foundation.
*
* This program is distributed in the hope that it will be useful, but WITHOUT
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
* FITNESS FOR A PARTICULAR PURPOSE.
*
* You should have received a copy of the GNU Affero General Public License
* along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
#include "tsdbCommit2.h"
// extern dependencies
typedef struct {
STsdb *tsdb;
TFileSetArray *fsetArr;
TFileOpArray fopArray[1];
// SSkmInfo skmTb[1];
// SSkmInfo skmRow[1];
int32_t minutes;
int8_t precision;
int32_t minRow;
int32_t maxRow;
int8_t cmprAlg;
int32_t sttTrigger;
int32_t szPage;
int64_t compactVersion;
struct {
int64_t cid;
int64_t now;
TSKEY nextKey;
int32_t fid;
int32_t expLevel;
SDiskID did;
TSKEY minKey;
TSKEY maxKey;
STFileSet *fset;
TABLEID tbid[1];
bool hasTSData;
} ctx[1];
// reader
SSttFileReader *sttReader;
// iter
TTsdbIterArray dataIterArray[1];
SIterMerger *dataIterMerger;
TTsdbIterArray tombIterArray[1];
SIterMerger *tombIterMerger;
// writer
SFSetWriter *writer;
} SCommitter2;
static int32_t tsdbCommitOpenWriter(SCommitter2 *committer) {
int32_t code = 0;
int32_t lino = 0;
SFSetWriterConfig config = {
.tsdb = committer->tsdb,
.toSttOnly = true,
.compactVersion = committer->compactVersion,
.minRow = committer->minRow,
.maxRow = committer->maxRow,
.szPage = committer->szPage,
.cmprAlg = committer->cmprAlg,
.fid = committer->ctx->fid,
.cid = committer->ctx->cid,
.did = committer->ctx->did,
.level = 0,
};
if (committer->sttTrigger == 1) {
config.toSttOnly = false;
if (committer->ctx->fset) {
for (int32_t ftype = TSDB_FTYPE_MIN; ftype < TSDB_FTYPE_MAX; ftype++) {
if (committer->ctx->fset->farr[ftype] != NULL) {
config.files[ftype].exist = true;
config.files[ftype].file = committer->ctx->fset->farr[ftype]->f[0];
}
}
}
}
code = tsdbFSetWriterOpen(&config, &committer->writer);
TSDB_CHECK_CODE(code, lino, _exit);
_exit:
if (code) {
TSDB_ERROR_LOG(TD_VID(committer->tsdb->pVnode), lino, code);
}
return code;
}
static int32_t tsdbCommitCloseWriter(SCommitter2 *committer) {
return tsdbFSetWriterClose(&committer->writer, 0, committer->fopArray);
}
static int32_t tsdbCommitTSData(SCommitter2 *committer) {
int32_t code = 0;
int32_t lino = 0;
int64_t numOfRow = 0;
SMetaInfo info;
committer->ctx->hasTSData = false;
committer->ctx->tbid->suid = 0;
committer->ctx->tbid->uid = 0;
for (SRowInfo *row; (row = tsdbIterMergerGetData(committer->dataIterMerger)) != NULL;) {
if (row->uid != committer->ctx->tbid->uid) {
committer->ctx->tbid->suid = row->suid;
committer->ctx->tbid->uid = row->uid;
if (metaGetInfo(committer->tsdb->pVnode->pMeta, row->uid, &info, NULL) != 0) {
code = tsdbIterMergerSkipTableData(committer->dataIterMerger, committer->ctx->tbid);
TSDB_CHECK_CODE(code, lino, _exit);
continue;
}
}
int64_t ts = TSDBROW_TS(&row->row);
if (ts > committer->ctx->maxKey) {
committer->ctx->nextKey = TMIN(committer->ctx->nextKey, ts);
code = tsdbIterMergerSkipTableData(committer->dataIterMerger, committer->ctx->tbid);
TSDB_CHECK_CODE(code, lino, _exit);
continue;
}
committer->ctx->hasTSData = true;
numOfRow++;
code = tsdbFSetWriteRow(committer->writer, row);
TSDB_CHECK_CODE(code, lino, _exit);
code = tsdbIterMergerNext(committer->dataIterMerger);
TSDB_CHECK_CODE(code, lino, _exit);
}
_exit:
if (code) {
TSDB_ERROR_LOG(TD_VID(committer->tsdb->pVnode), lino, code);
} else {
tsdbDebug("vgId:%d fid:%d commit %" PRId64 " rows", TD_VID(committer->tsdb->pVnode), committer->ctx->fid, numOfRow);
}
return code;
}
static int32_t tsdbCommitTombData(SCommitter2 *committer) {
int32_t code = 0;
int32_t lino = 0;
int64_t numRecord = 0;
SMetaInfo info;
if (committer->ctx->fset == NULL && !committer->ctx->hasTSData) {
return 0;
}
committer->ctx->tbid->suid = 0;
committer->ctx->tbid->uid = 0;
for (STombRecord *record; (record = tsdbIterMergerGetTombRecord(committer->tombIterMerger));) {
if (record->uid != committer->ctx->tbid->uid) {
committer->ctx->tbid->suid = record->suid;
committer->ctx->tbid->uid = record->uid;
if (metaGetInfo(committer->tsdb->pVnode->pMeta, record->uid, &info, NULL) != 0) {
code = tsdbIterMergerSkipTableData(committer->dataIterMerger, committer->ctx->tbid);
TSDB_CHECK_CODE(code, lino, _exit);
continue;
}
}
if (record->ekey < committer->ctx->minKey) {
goto _next;
} else if (record->skey > committer->ctx->maxKey) {
committer->ctx->maxKey = TMIN(record->skey, committer->ctx->maxKey);
goto _next;
}
if (record->ekey > committer->ctx->maxKey) {
committer->ctx->maxKey = committer->ctx->maxKey + 1;
}
record->skey = TMAX(record->skey, committer->ctx->minKey);
record->ekey = TMIN(record->ekey, committer->ctx->maxKey);
numRecord++;
code = tsdbFSetWriteTombRecord(committer->writer, record);
TSDB_CHECK_CODE(code, lino, _exit);
_next:
code = tsdbIterMergerNext(committer->tombIterMerger);
TSDB_CHECK_CODE(code, lino, _exit);
}
_exit:
if (code) {
TSDB_ERROR_LOG(TD_VID(committer->tsdb->pVnode), lino, code);
} else {
tsdbDebug("vgId:%d fid:%d commit %" PRId64 " tomb records", TD_VID(committer->tsdb->pVnode), committer->ctx->fid,
numRecord);
}
return code;
}
static int32_t tsdbCommitOpenReader(SCommitter2 *committer) {
int32_t code = 0;
int32_t lino = 0;
ASSERT(committer->sttReader == NULL);
if (committer->ctx->fset == NULL //
|| committer->sttTrigger > 1 //
|| TARRAY2_SIZE(committer->ctx->fset->lvlArr) == 0 //
) {
return 0;
}
ASSERT(TARRAY2_SIZE(committer->ctx->fset->lvlArr) == 1);
SSttLvl *lvl = TARRAY2_FIRST(committer->ctx->fset->lvlArr);
ASSERT(lvl->level == 0);
if (TARRAY2_SIZE(lvl->fobjArr) == 0) {
return 0;
}
ASSERT(TARRAY2_SIZE(lvl->fobjArr) == 1);
STFileObj *fobj = TARRAY2_FIRST(lvl->fobjArr);
SSttFileReaderConfig config = {
.tsdb = committer->tsdb,
.szPage = committer->szPage,
.file = fobj->f[0],
};
code = tsdbSttFileReaderOpen(fobj->fname, &config, &committer->sttReader);
TSDB_CHECK_CODE(code, lino, _exit);
STFileOp op = {
.optype = TSDB_FOP_REMOVE,
.fid = fobj->f->fid,
.of = fobj->f[0],
};
code = TARRAY2_APPEND(committer->fopArray, op);
TSDB_CHECK_CODE(code, lino, _exit);
_exit:
if (code) {
TSDB_ERROR_LOG(TD_VID(committer->tsdb->pVnode), lino, code);
}
return code;
}
static int32_t tsdbCommitCloseReader(SCommitter2 *committer) { return tsdbSttFileReaderClose(&committer->sttReader); }
static int32_t tsdbCommitOpenIter(SCommitter2 *committer) {
int32_t code = 0;
int32_t lino = 0;
ASSERT(TARRAY2_SIZE(committer->dataIterArray) == 0);
ASSERT(committer->dataIterMerger == NULL);
ASSERT(TARRAY2_SIZE(committer->tombIterArray) == 0);
ASSERT(committer->tombIterMerger == NULL);
STsdbIter *iter;
STsdbIterConfig config = {0};
// mem data iter
config.type = TSDB_ITER_TYPE_MEMT;
config.memt = committer->tsdb->imem;
config.from->ts = committer->ctx->minKey;
config.from->version = VERSION_MIN;
code = tsdbIterOpen(&config, &iter);
TSDB_CHECK_CODE(code, lino, _exit);
code = TARRAY2_APPEND(committer->dataIterArray, iter);
TSDB_CHECK_CODE(code, lino, _exit);
// mem tomb iter
config.type = TSDB_ITER_TYPE_MEMT_TOMB;
config.memt = committer->tsdb->imem;
code = tsdbIterOpen(&config, &iter);
TSDB_CHECK_CODE(code, lino, _exit);
code = TARRAY2_APPEND(committer->tombIterArray, iter);
TSDB_CHECK_CODE(code, lino, _exit);
// STT
if (committer->sttReader) {
// data iter
config.type = TSDB_ITER_TYPE_STT;
config.sttReader = committer->sttReader;
code = tsdbIterOpen(&config, &iter);
TSDB_CHECK_CODE(code, lino, _exit);
code = TARRAY2_APPEND(committer->dataIterArray, iter);
TSDB_CHECK_CODE(code, lino, _exit);
// tomb iter
config.type = TSDB_ITER_TYPE_STT_TOMB;
config.sttReader = committer->sttReader;
code = tsdbIterOpen(&config, &iter);
TSDB_CHECK_CODE(code, lino, _exit);
code = TARRAY2_APPEND(committer->tombIterArray, iter);
TSDB_CHECK_CODE(code, lino, _exit);
}
// open merger
code = tsdbIterMergerOpen(committer->dataIterArray, &committer->dataIterMerger, false);
TSDB_CHECK_CODE(code, lino, _exit);
code = tsdbIterMergerOpen(committer->tombIterArray, &committer->tombIterMerger, true);
TSDB_CHECK_CODE(code, lino, _exit);
_exit:
if (code) {
TSDB_ERROR_LOG(TD_VID(committer->tsdb->pVnode), lino, code);
}
return code;
}
static int32_t tsdbCommitCloseIter(SCommitter2 *committer) {
tsdbIterMergerClose(&committer->tombIterMerger);
tsdbIterMergerClose(&committer->dataIterMerger);
TARRAY2_CLEAR(committer->tombIterArray, tsdbIterClose);
TARRAY2_CLEAR(committer->dataIterArray, tsdbIterClose);
return 0;
}
static int32_t tsdbCommitFileSetBegin(SCommitter2 *committer) {
int32_t code = 0;
int32_t lino = 0;
STsdb *tsdb = committer->tsdb;
committer->ctx->fid = tsdbKeyFid(committer->ctx->nextKey, committer->minutes, committer->precision);
committer->ctx->expLevel = tsdbFidLevel(committer->ctx->fid, &tsdb->keepCfg, committer->ctx->now);
tsdbFidKeyRange(committer->ctx->fid, committer->minutes, committer->precision, &committer->ctx->minKey,
&committer->ctx->maxKey);
code = tfsAllocDisk(committer->tsdb->pVnode->pTfs, committer->ctx->expLevel, &committer->ctx->did);
TSDB_CHECK_CODE(code, lino, _exit);
tfsMkdirRecurAt(committer->tsdb->pVnode->pTfs, committer->tsdb->path, committer->ctx->did);
STFileSet fset = {.fid = committer->ctx->fid};
committer->ctx->fset = &fset;
STFileSet **fsetPtr = TARRAY2_SEARCH(committer->fsetArr, &committer->ctx->fset, tsdbTFileSetCmprFn, TD_EQ);
committer->ctx->fset = (fsetPtr == NULL) ? NULL : *fsetPtr;
committer->ctx->tbid->suid = 0;
committer->ctx->tbid->uid = 0;
ASSERT(TARRAY2_SIZE(committer->dataIterArray) == 0);
ASSERT(committer->dataIterMerger == NULL);
ASSERT(committer->writer == NULL);
code = tsdbCommitOpenReader(committer);
TSDB_CHECK_CODE(code, lino, _exit);
code = tsdbCommitOpenIter(committer);
TSDB_CHECK_CODE(code, lino, _exit);
code = tsdbCommitOpenWriter(committer);
TSDB_CHECK_CODE(code, lino, _exit);
// reset nextKey
committer->ctx->nextKey = TSKEY_MAX;
_exit:
if (code) {
TSDB_ERROR_LOG(TD_VID(tsdb->pVnode), lino, code);
} else {
tsdbDebug("vgId:%d %s done, fid:%d minKey:%" PRId64 " maxKey:%" PRId64 " expLevel:%d", TD_VID(tsdb->pVnode),
__func__, committer->ctx->fid, committer->ctx->minKey, committer->ctx->maxKey, committer->ctx->expLevel);
}
return 0;
}
static int32_t tsdbCommitFileSetEnd(SCommitter2 *committer) {
int32_t code = 0;
int32_t lino = 0;
code = tsdbCommitCloseWriter(committer);
TSDB_CHECK_CODE(code, lino, _exit);
code = tsdbCommitCloseIter(committer);
TSDB_CHECK_CODE(code, lino, _exit);
code = tsdbCommitCloseReader(committer);
TSDB_CHECK_CODE(code, lino, _exit);
_exit:
if (code) {
TSDB_ERROR_LOG(TD_VID(committer->tsdb->pVnode), lino, code);
} else {
tsdbDebug("vgId:%d %s done, fid:%d", TD_VID(committer->tsdb->pVnode), __func__, committer->ctx->fid);
}
return code;
}
static int32_t tsdbCommitFileSet(SCommitter2 *committer) {
int32_t code = 0;
int32_t lino = 0;
// fset commit start
code = tsdbCommitFileSetBegin(committer);
TSDB_CHECK_CODE(code, lino, _exit);
// commit fset
code = tsdbCommitTSData(committer);
TSDB_CHECK_CODE(code, lino, _exit);
code = tsdbCommitTombData(committer);
TSDB_CHECK_CODE(code, lino, _exit);
// fset commit end
code = tsdbCommitFileSetEnd(committer);
TSDB_CHECK_CODE(code, lino, _exit);
_exit:
if (code) {
TSDB_ERROR_LOG(TD_VID(committer->tsdb->pVnode), lino, code);
} else {
tsdbDebug("vgId:%d %s done, fid:%d", TD_VID(committer->tsdb->pVnode), __func__, committer->ctx->fid);
}
return code;
}
static int32_t tsdbOpenCommitter(STsdb *tsdb, SCommitInfo *info, SCommitter2 *committer) {
int32_t code = 0;
int32_t lino = 0;
memset(committer, 0, sizeof(committer[0]));
committer->tsdb = tsdb;
code = tsdbFSCreateCopySnapshot(tsdb->pFS, &committer->fsetArr);
TSDB_CHECK_CODE(code, lino, _exit);
committer->minutes = tsdb->keepCfg.days;
committer->precision = tsdb->keepCfg.precision;
committer->minRow = info->info.config.tsdbCfg.minRows;
committer->maxRow = info->info.config.tsdbCfg.maxRows;
committer->cmprAlg = info->info.config.tsdbCfg.compression;
committer->sttTrigger = info->info.config.sttTrigger;
committer->szPage = info->info.config.tsdbPageSize;
committer->compactVersion = INT64_MAX;
committer->ctx->cid = tsdbFSAllocEid(tsdb->pFS);
committer->ctx->now = taosGetTimestampSec();
committer->ctx->nextKey = tsdb->imem->minKey;
if (tsdb->imem->nDel > 0) {
SRBTreeIter iter[1] = {tRBTreeIterCreate(tsdb->imem->tbDataTree, 1)};
for (SRBTreeNode *node = tRBTreeIterNext(iter); node; node = tRBTreeIterNext(iter)) {
STbData *tbData = TCONTAINER_OF(node, STbData, rbtn);
for (SDelData *delData = tbData->pHead; delData; delData = delData->pNext) {
if (delData->sKey < committer->ctx->nextKey) {
committer->ctx->nextKey = delData->sKey;
}
}
}
}
_exit:
if (code) {
TSDB_ERROR_LOG(TD_VID(tsdb->pVnode), lino, code);
} else {
tsdbDebug("vgId:%d %s done", TD_VID(tsdb->pVnode), __func__);
}
return code;
}
static int32_t tsdbCloseCommitter(SCommitter2 *committer, int32_t eno) {
int32_t code = 0;
int32_t lino = 0;
if (eno == 0) {
code = tsdbFSEditBegin(committer->tsdb->pFS, committer->fopArray, TSDB_FEDIT_COMMIT);
TSDB_CHECK_CODE(code, lino, _exit);
} else {
// TODO
ASSERT(0);
}
ASSERT(committer->writer == NULL);
ASSERT(committer->dataIterMerger == NULL);
ASSERT(committer->tombIterMerger == NULL);
TARRAY2_DESTROY(committer->dataIterArray, NULL);
TARRAY2_DESTROY(committer->tombIterArray, NULL);
TARRAY2_DESTROY(committer->fopArray, NULL);
tsdbFSDestroyCopySnapshot(&committer->fsetArr);
_exit:
if (code) {
tsdbError("vgId:%d %s failed at line %d since %s, eid:%" PRId64, TD_VID(committer->tsdb->pVnode), __func__, lino,
tstrerror(code), committer->ctx->cid);
} else {
tsdbDebug("vgId:%d %s done, eid:%" PRId64, TD_VID(committer->tsdb->pVnode), __func__, committer->ctx->cid);
}
return code;
}
int32_t tsdbPreCommit(STsdb *tsdb) {
taosThreadRwlockWrlock(&tsdb->rwLock);
ASSERT(tsdb->imem == NULL);
tsdb->imem = tsdb->mem;
tsdb->mem = NULL;
taosThreadRwlockUnlock(&tsdb->rwLock);
return 0;
}
int32_t tsdbCommitBegin(STsdb *tsdb, SCommitInfo *info) {
if (!tsdb) return 0;
int32_t code = 0;
int32_t lino = 0;
SMemTable *imem = tsdb->imem;
int64_t nRow = imem->nRow;
int64_t nDel = imem->nDel;
if (nRow == 0 && nDel == 0) {
taosThreadRwlockWrlock(&tsdb->rwLock);
tsdb->imem = NULL;
taosThreadRwlockUnlock(&tsdb->rwLock);
tsdbUnrefMemTable(imem, NULL, true);
} else {
SCommitter2 committer[1];
code = tsdbOpenCommitter(tsdb, info, committer);
TSDB_CHECK_CODE(code, lino, _exit);
while (committer->ctx->nextKey != TSKEY_MAX) {
code = tsdbCommitFileSet(committer);
TSDB_CHECK_CODE(code, lino, _exit);
}
code = tsdbCloseCommitter(committer, code);
TSDB_CHECK_CODE(code, lino, _exit);
}
_exit:
if (code) {
TSDB_ERROR_LOG(TD_VID(tsdb->pVnode), lino, code);
} else {
tsdbInfo("vgId:%d %s done, nRow:%" PRId64 " nDel:%" PRId64, TD_VID(tsdb->pVnode), __func__, nRow, nDel);
}
return code;
}
int32_t tsdbCommitCommit(STsdb *tsdb) {
int32_t code = 0;
int32_t lino = 0;
if (tsdb->imem == NULL) goto _exit;
SMemTable *pMemTable = tsdb->imem;
taosThreadRwlockWrlock(&tsdb->rwLock);
code = tsdbFSEditCommit(tsdb->pFS);
if (code) {
taosThreadRwlockUnlock(&tsdb->rwLock);
TSDB_CHECK_CODE(code, lino, _exit);
}
tsdb->imem = NULL;
taosThreadRwlockUnlock(&tsdb->rwLock);
tsdbUnrefMemTable(pMemTable, NULL, true);
_exit:
if (code) {
TSDB_ERROR_LOG(TD_VID(tsdb->pVnode), lino, code);
} else {
tsdbInfo("vgId:%d %s done", TD_VID(tsdb->pVnode), __func__);
}
return code;
}
int32_t tsdbCommitAbort(STsdb *pTsdb) {
int32_t code = 0;
int32_t lino = 0;
if (pTsdb->imem == NULL) goto _exit;
code = tsdbFSEditAbort(pTsdb->pFS);
TSDB_CHECK_CODE(code, lino, _exit);
_exit:
if (code) {
tsdbError("vgId:%d, %s failed at line %d since %s", TD_VID(pTsdb->pVnode), __func__, lino, tstrerror(code));
} else {
tsdbInfo("vgId:%d %s done", TD_VID(pTsdb->pVnode), __func__);
}
return code;
}

View File

@ -0,0 +1,33 @@
/*
* Copyright (c) 2019 TAOS Data, Inc. <jhtao@taosdata.com>
*
* This program is free software: you can use, redistribute, and/or modify
* it under the terms of the GNU Affero General Public License, version 3
* or later ("AGPL"), as published by the Free Software Foundation.
*
* This program is distributed in the hope that it will be useful, but WITHOUT
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
* FITNESS FOR A PARTICULAR PURPOSE.
*
* You should have received a copy of the GNU Affero General Public License
* along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
#include "tsdbDataFileRW.h"
#include "tsdbFS2.h"
#include "tsdbFSetRW.h"
#include "tsdbIter.h"
#include "tsdbSttFileRW.h"
#ifndef _TSDB_COMMIT_H_
#define _TSDB_COMMIT_H_
#ifdef __cplusplus
extern "C" {
#endif
#ifdef __cplusplus
}
#endif
#endif /*_TSDB_COMMIT_H_*/

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,104 @@
/*
* Copyright (c) 2019 TAOS Data, Inc. <jhtao@taosdata.com>
*
* This program is free software: you can use, redistribute, and/or modify
* it under the terms of the GNU Affero General Public License, version 3
* or later ("AGPL"), as published by the Free Software Foundation.
*
* This program is distributed in the hope that it will be useful, but WITHOUT
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
* FITNESS FOR A PARTICULAR PURPOSE.
*
* You should have received a copy of the GNU Affero General Public License
* along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
#include "tsdbDef.h"
#include "tsdbFSet2.h"
#include "tsdbSttFileRW.h"
#include "tsdbUtil2.h"
#ifndef _TSDB_DATA_FILE_RW_H
#define _TSDB_DATA_FILE_RW_H
#ifdef __cplusplus
extern "C" {
#endif
typedef TARRAY2(SBlockIdx) TBlockIdxArray;
typedef TARRAY2(SDataBlk) TDataBlkArray;
typedef TARRAY2(SColumnDataAgg) TColumnDataAggArray;
typedef struct {
SFDataPtr brinBlkPtr[1];
SFDataPtr rsrvd[2];
} SHeadFooter;
typedef struct {
SFDataPtr tombBlkPtr[1];
SFDataPtr rsrvd[2];
} STombFooter;
// SDataFileReader =============================================
typedef struct SDataFileReader SDataFileReader;
typedef struct SDataFileReaderConfig {
STsdb *tsdb;
int32_t szPage;
struct {
bool exist;
STFile file;
} files[TSDB_FTYPE_MAX];
uint8_t **bufArr;
} SDataFileReaderConfig;
int32_t tsdbDataFileReaderOpen(const char *fname[/* TSDB_FTYPE_MAX */], const SDataFileReaderConfig *config,
SDataFileReader **reader);
int32_t tsdbDataFileReaderClose(SDataFileReader **reader);
// .head
int32_t tsdbDataFileReadBrinBlk(SDataFileReader *reader, const TBrinBlkArray **brinBlkArray);
int32_t tsdbDataFileReadBrinBlock(SDataFileReader *reader, const SBrinBlk *brinBlk, SBrinBlock *brinBlock);
// .data
int32_t tsdbDataFileReadBlockData(SDataFileReader *reader, const SBrinRecord *record, SBlockData *bData);
int32_t tsdbDataFileReadBlockDataByColumn(SDataFileReader *reader, const SBrinRecord *record, SBlockData *bData,
STSchema *pTSchema, int16_t cids[], int32_t ncid);
// .sma
int32_t tsdbDataFileReadBlockSma(SDataFileReader *reader, const SBrinRecord *record,
TColumnDataAggArray *columnDataAggArray);
// .tomb
int32_t tsdbDataFileReadTombBlk(SDataFileReader *reader, const TTombBlkArray **tombBlkArray);
int32_t tsdbDataFileReadTombBlock(SDataFileReader *reader, const STombBlk *tombBlk, STombBlock *tData);
// SDataFileWriter =============================================
typedef struct SDataFileWriter SDataFileWriter;
typedef struct SDataFileWriterConfig {
STsdb *tsdb;
int8_t cmprAlg;
int32_t maxRow;
int32_t szPage;
int32_t fid;
int64_t cid;
SDiskID did;
int64_t compactVersion;
struct {
bool exist;
STFile file;
} files[TSDB_FTYPE_MAX];
SSkmInfo *skmTb;
SSkmInfo *skmRow;
uint8_t **bufArr;
} SDataFileWriterConfig;
int32_t tsdbDataFileWriterOpen(const SDataFileWriterConfig *config, SDataFileWriter **writer);
int32_t tsdbDataFileWriterClose(SDataFileWriter **writer, bool abort, TFileOpArray *opArr);
int32_t tsdbDataFileWriteRow(SDataFileWriter *writer, SRowInfo *row);
int32_t tsdbDataFileWriteBlockData(SDataFileWriter *writer, SBlockData *bData);
int32_t tsdbDataFileFlush(SDataFileWriter *writer);
int32_t tsdbDataFileWriteTombRecord(SDataFileWriter *writer, const STombRecord *record);
#ifdef __cplusplus
}
#endif
#endif /*_TSDB_DATA_FILE_RW_H*/

View File

@ -0,0 +1,44 @@
/*
* Copyright (c) 2019 TAOS Data, Inc. <jhtao@taosdata.com>
*
* This program is free software: you can use, redistribute, and/or modify
* it under the terms of the GNU Affero General Public License, version 3
* or later ("AGPL"), as published by the Free Software Foundation.
*
* This program is distributed in the hope that it will be useful, but WITHOUT
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
* FITNESS FOR A PARTICULAR PURPOSE.
*
* You should have received a copy of the GNU Affero General Public License
* along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
#include "tarray2.h"
#include "tsdb.h"
#ifndef _TD_TSDB_DEF_H_
#define _TD_TSDB_DEF_H_
#ifdef __cplusplus
extern "C" {
#endif
#define TSDB_ERROR_LOG(vid, lino, code) \
tsdbError("vgId:%d %s failed at line %d since %s", vid, __func__, lino, tstrerror(code))
typedef struct SFDataPtr {
int64_t offset;
int64_t size;
} SFDataPtr;
extern int32_t tsdbOpenFile(const char *path, int32_t szPage, int32_t flag, STsdbFD **ppFD);
extern void tsdbCloseFile(STsdbFD **ppFD);
extern int32_t tsdbWriteFile(STsdbFD *pFD, int64_t offset, const uint8_t *pBuf, int64_t size);
extern int32_t tsdbReadFile(STsdbFD *pFD, int64_t offset, uint8_t *pBuf, int64_t size);
extern int32_t tsdbFsyncFile(STsdbFD *pFD);
#ifdef __cplusplus
}
#endif
#endif /*_TD_TSDB_DEF_H_*/

View File

@ -181,10 +181,10 @@ static int32_t tsdbScanAndTryFixFS(STsdb *pTsdb) {
TSDB_CHECK_CODE(code, lino, _exit);
}
if (size != tsdbLogicToFileSize(pTsdb->fs.pDelFile->size, pTsdb->pVnode->config.tsdbPageSize)) {
code = TSDB_CODE_FILE_CORRUPTED;
TSDB_CHECK_CODE(code, lino, _exit);
}
// if (size != tsdbLogicToFileSize(pTsdb->fs.pDelFile->size, pTsdb->pVnode->config.tsdbPageSize)) {
// code = TSDB_CODE_FILE_CORRUPTED;
// TSDB_CHECK_CODE(code, lino, _exit);
// }
}
// SArray<SDFileSet>
@ -199,10 +199,10 @@ static int32_t tsdbScanAndTryFixFS(STsdb *pTsdb) {
code = TAOS_SYSTEM_ERROR(errno);
TSDB_CHECK_CODE(code, lino, _exit);
}
if (size != tsdbLogicToFileSize(pSet->pHeadF->size, pTsdb->pVnode->config.tsdbPageSize)) {
code = TSDB_CODE_FILE_CORRUPTED;
TSDB_CHECK_CODE(code, lino, _exit);
}
// if (size != tsdbLogicToFileSize(pSet->pHeadF->size, pTsdb->pVnode->config.tsdbPageSize)) {
// code = TSDB_CODE_FILE_CORRUPTED;
// TSDB_CHECK_CODE(code, lino, _exit);
// }
// data =========
tsdbDataFileName(pTsdb, pSet->diskId, pSet->fid, pSet->pDataF, fname);
@ -210,10 +210,10 @@ static int32_t tsdbScanAndTryFixFS(STsdb *pTsdb) {
code = TAOS_SYSTEM_ERROR(errno);
TSDB_CHECK_CODE(code, lino, _exit);
}
if (size < tsdbLogicToFileSize(pSet->pDataF->size, pTsdb->pVnode->config.tsdbPageSize)) {
code = TSDB_CODE_FILE_CORRUPTED;
TSDB_CHECK_CODE(code, lino, _exit);
}
// if (size < tsdbLogicToFileSize(pSet->pDataF->size, pTsdb->pVnode->config.tsdbPageSize)) {
// code = TSDB_CODE_FILE_CORRUPTED;
// TSDB_CHECK_CODE(code, lino, _exit);
// }
// else if (size > tsdbLogicToFileSize(pSet->pDataF->size, pTsdb->pVnode->config.tsdbPageSize)) {
// code = tsdbDFileRollback(pTsdb, pSet, TSDB_DATA_FILE);
// TSDB_CHECK_CODE(code, lino, _exit);
@ -225,10 +225,10 @@ static int32_t tsdbScanAndTryFixFS(STsdb *pTsdb) {
code = TAOS_SYSTEM_ERROR(errno);
TSDB_CHECK_CODE(code, lino, _exit);
}
if (size < tsdbLogicToFileSize(pSet->pSmaF->size, pTsdb->pVnode->config.tsdbPageSize)) {
code = TSDB_CODE_FILE_CORRUPTED;
TSDB_CHECK_CODE(code, lino, _exit);
}
// if (size < tsdbLogicToFileSize(pSet->pSmaF->size, pTsdb->pVnode->config.tsdbPageSize)) {
// code = TSDB_CODE_FILE_CORRUPTED;
// TSDB_CHECK_CODE(code, lino, _exit);
// }
// else if (size > tsdbLogicToFileSize(pSet->pSmaF->size, pTsdb->pVnode->config.tsdbPageSize)) {
// code = tsdbDFileRollback(pTsdb, pSet, TSDB_SMA_FILE);
// TSDB_CHECK_CODE(code, lino, _exit);
@ -241,10 +241,10 @@ static int32_t tsdbScanAndTryFixFS(STsdb *pTsdb) {
code = TAOS_SYSTEM_ERROR(errno);
TSDB_CHECK_CODE(code, lino, _exit);
}
if (size != tsdbLogicToFileSize(pSet->aSttF[iStt]->size, pTsdb->pVnode->config.tsdbPageSize)) {
code = TSDB_CODE_FILE_CORRUPTED;
TSDB_CHECK_CODE(code, lino, _exit);
}
// if (size != tsdbLogicToFileSize(pSet->aSttF[iStt]->size, pTsdb->pVnode->config.tsdbPageSize)) {
// code = TSDB_CODE_FILE_CORRUPTED;
// TSDB_CHECK_CODE(code, lino, _exit);
// }
}
}
@ -270,7 +270,7 @@ int32_t tDFileSetCmprFn(const void *p1, const void *p2) {
return 0;
}
static void tsdbGetCurrentFName(STsdb *pTsdb, char *current, char *current_t) {
void tsdbGetCurrentFName(STsdb *pTsdb, char *current, char *current_t) {
SVnode *pVnode = pTsdb->pVnode;
int32_t offset = 0;
@ -289,7 +289,7 @@ static void tsdbGetCurrentFName(STsdb *pTsdb, char *current, char *current_t) {
}
}
static int32_t tsdbLoadFSFromFile(const char *fname, STsdbFS *pFS) {
static int32_t load_fs(const char *fname, STsdbFS *pFS) {
int32_t code = 0;
int32_t lino = 0;
uint8_t *pData = NULL;
@ -666,7 +666,7 @@ static int32_t tsdbFSApplyChange(STsdb *pTsdb, STsdbFS *pFS) {
taosArrayRemove(pTsdb->fs.aDFileSet, iOld);
} else {
code = tsdbNewFileSet(pTsdb, &fSet, pSetNew);
TSDB_CHECK_CODE(code, lino, _exit)
TSDB_CHECK_CODE(code, lino, _exit);
if (taosArrayInsert(pTsdb->fs.aDFileSet, iOld, &fSet) == NULL) {
code = TSDB_CODE_OUT_OF_MEMORY;
@ -682,7 +682,7 @@ static int32_t tsdbFSApplyChange(STsdb *pTsdb, STsdbFS *pFS) {
taosArrayRemove(pTsdb->fs.aDFileSet, iOld);
} else {
code = tsdbNewFileSet(pTsdb, &fSet, pSetNew);
TSDB_CHECK_CODE(code, lino, _exit)
TSDB_CHECK_CODE(code, lino, _exit);
if (taosArrayInsert(pTsdb->fs.aDFileSet, iOld, &fSet) == NULL) {
code = TSDB_CODE_OUT_OF_MEMORY;
@ -723,7 +723,7 @@ int32_t tsdbFSCommit(STsdb *pTsdb) {
code = tsdbFSCreate(&fs);
TSDB_CHECK_CODE(code, lino, _exit);
code = tsdbLoadFSFromFile(current, &fs);
code = load_fs(current, &fs);
TSDB_CHECK_CODE(code, lino, _exit);
// apply file change
@ -768,7 +768,7 @@ int32_t tsdbFSOpen(STsdb *pTsdb, int8_t rollback) {
tsdbGetCurrentFName(pTsdb, current, current_t);
if (taosCheckExistFile(current)) {
code = tsdbLoadFSFromFile(current, &pTsdb->fs);
code = load_fs(current, &pTsdb->fs);
TSDB_CHECK_CODE(code, lino, _exit);
if (taosCheckExistFile(current_t)) {

View File

@ -0,0 +1,885 @@
/*
* Copyright (c) 2019 TAOS Data, Inc. <jhtao@taosdata.com>
*
* This program is free software: you can use, redistribute, and/or modify
* it under the terms of the GNU Affero General Public License, version 3
* or later ("AGPL"), as published by the Free Software Foundation.
*
* This program is distributed in the hope that it will be useful, but WITHOUT
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
* FITNESS FOR A PARTICULAR PURPOSE.
*
* You should have received a copy of the GNU Affero General Public License
* along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
#include "tsdbFS2.h"
#include "tsdbUpgrade.h"
#include "vnd.h"
extern int vnodeScheduleTask(int (*execute)(void *), void *arg);
extern int vnodeScheduleTaskEx(int tpid, int (*execute)(void *), void *arg);
#define TSDB_FS_EDIT_MIN TSDB_FEDIT_COMMIT
#define TSDB_FS_EDIT_MAX (TSDB_FEDIT_MERGE + 1)
enum {
TSDB_FS_STATE_NONE = 0,
TSDB_FS_STATE_OPEN,
TSDB_FS_STATE_EDIT,
TSDB_FS_STATE_CLOSE,
};
static const char *gCurrentFname[] = {
[TSDB_FCURRENT] = "current.json",
[TSDB_FCURRENT_C] = "current.c.json",
[TSDB_FCURRENT_M] = "current.m.json",
};
static int32_t create_fs(STsdb *pTsdb, STFileSystem **fs) {
fs[0] = taosMemoryCalloc(1, sizeof(*fs[0]));
if (fs[0] == NULL) return TSDB_CODE_OUT_OF_MEMORY;
fs[0]->tsdb = pTsdb;
tsem_init(&fs[0]->canEdit, 0, 1);
fs[0]->state = TSDB_FS_STATE_NONE;
fs[0]->neid = 0;
TARRAY2_INIT(fs[0]->fSetArr);
TARRAY2_INIT(fs[0]->fSetArrTmp);
// background task queue
taosThreadMutexInit(fs[0]->mutex, NULL);
fs[0]->bgTaskQueue->next = fs[0]->bgTaskQueue;
fs[0]->bgTaskQueue->prev = fs[0]->bgTaskQueue;
return 0;
}
static int32_t destroy_fs(STFileSystem **fs) {
if (fs[0] == NULL) return 0;
taosThreadMutexDestroy(fs[0]->mutex);
ASSERT(fs[0]->bgTaskNum == 0);
TARRAY2_DESTROY(fs[0]->fSetArr, NULL);
TARRAY2_DESTROY(fs[0]->fSetArrTmp, NULL);
tsem_destroy(&fs[0]->canEdit);
taosMemoryFree(fs[0]);
fs[0] = NULL;
return 0;
}
int32_t current_fname(STsdb *pTsdb, char *fname, EFCurrentT ftype) {
int32_t offset = 0;
vnodeGetPrimaryDir(pTsdb->path, pTsdb->pVnode->diskPrimary, pTsdb->pVnode->pTfs, fname, TSDB_FILENAME_LEN);
offset = strlen(fname);
snprintf(fname + offset, TSDB_FILENAME_LEN - offset - 1, "%s%s", TD_DIRSEP, gCurrentFname[ftype]);
return 0;
}
static int32_t save_json(const cJSON *json, const char *fname) {
int32_t code = 0;
char *data = cJSON_PrintUnformatted(json);
if (data == NULL) return TSDB_CODE_OUT_OF_MEMORY;
TdFilePtr fp = taosOpenFile(fname, TD_FILE_WRITE | TD_FILE_CREATE | TD_FILE_TRUNC);
if (fp == NULL) {
code = TAOS_SYSTEM_ERROR(code);
goto _exit;
}
if (taosWriteFile(fp, data, strlen(data)) < 0) {
code = TAOS_SYSTEM_ERROR(code);
goto _exit;
}
if (taosFsyncFile(fp) < 0) {
code = TAOS_SYSTEM_ERROR(code);
goto _exit;
}
taosCloseFile(&fp);
_exit:
taosMemoryFree(data);
return code;
}
static int32_t load_json(const char *fname, cJSON **json) {
int32_t code = 0;
char *data = NULL;
TdFilePtr fp = taosOpenFile(fname, TD_FILE_READ);
if (fp == NULL) return TAOS_SYSTEM_ERROR(code);
int64_t size;
if (taosFStatFile(fp, &size, NULL) < 0) {
code = TAOS_SYSTEM_ERROR(code);
goto _exit;
}
data = taosMemoryMalloc(size + 1);
if (data == NULL) {
code = TSDB_CODE_OUT_OF_MEMORY;
goto _exit;
}
if (taosReadFile(fp, data, size) < 0) {
code = TAOS_SYSTEM_ERROR(code);
goto _exit;
}
data[size] = '\0';
json[0] = cJSON_Parse(data);
if (json[0] == NULL) {
code = TSDB_CODE_FILE_CORRUPTED;
goto _exit;
}
_exit:
taosCloseFile(&fp);
if (data) taosMemoryFree(data);
if (code) json[0] = NULL;
return code;
}
int32_t save_fs(const TFileSetArray *arr, const char *fname) {
int32_t code = 0;
int32_t lino = 0;
cJSON *json = cJSON_CreateObject();
if (!json) return TSDB_CODE_OUT_OF_MEMORY;
// fmtv
if (cJSON_AddNumberToObject(json, "fmtv", 1) == NULL) {
code = TSDB_CODE_OUT_OF_MEMORY;
TSDB_CHECK_CODE(code, lino, _exit);
}
// fset
cJSON *ajson = cJSON_AddArrayToObject(json, "fset");
if (!ajson) TSDB_CHECK_CODE(code = TSDB_CODE_OUT_OF_MEMORY, lino, _exit);
const STFileSet *fset;
TARRAY2_FOREACH(arr, fset) {
cJSON *item = cJSON_CreateObject();
if (!item) TSDB_CHECK_CODE(code = TSDB_CODE_OUT_OF_MEMORY, lino, _exit);
cJSON_AddItemToArray(ajson, item);
code = tsdbTFileSetToJson(fset, item);
TSDB_CHECK_CODE(code, lino, _exit);
}
code = save_json(json, fname);
TSDB_CHECK_CODE(code, lino, _exit);
_exit:
if (code) {
tsdbError("%s failed at line %d since %s", __func__, lino, tstrerror(code));
}
cJSON_Delete(json);
return code;
}
static int32_t load_fs(STsdb *pTsdb, const char *fname, TFileSetArray *arr) {
int32_t code = 0;
int32_t lino = 0;
TARRAY2_CLEAR(arr, tsdbTFileSetClear);
// load json
cJSON *json = NULL;
code = load_json(fname, &json);
TSDB_CHECK_CODE(code, lino, _exit);
// parse json
const cJSON *item1;
/* fmtv */
item1 = cJSON_GetObjectItem(json, "fmtv");
if (cJSON_IsNumber(item1)) {
ASSERT(item1->valuedouble == 1);
} else {
TSDB_CHECK_CODE(code = TSDB_CODE_FILE_CORRUPTED, lino, _exit);
}
/* fset */
item1 = cJSON_GetObjectItem(json, "fset");
if (cJSON_IsArray(item1)) {
const cJSON *item2;
cJSON_ArrayForEach(item2, item1) {
STFileSet *fset;
code = tsdbJsonToTFileSet(pTsdb, item2, &fset);
TSDB_CHECK_CODE(code, lino, _exit);
code = TARRAY2_APPEND(arr, fset);
TSDB_CHECK_CODE(code, lino, _exit);
}
} else {
TSDB_CHECK_CODE(code = TSDB_CODE_FILE_CORRUPTED, lino, _exit);
}
_exit:
if (code) {
tsdbError("%s failed at line %d since %s, fname:%s", __func__, lino, tstrerror(code), fname);
}
if (json) cJSON_Delete(json);
return code;
}
static bool is_same_file(const STFile *f1, const STFile f2) {
if (f1->type != f2.type) return false;
if (f1->did.level != f2.did.level) return false;
if (f1->did.id != f2.did.id) return false;
if (f1->cid != f2.cid) return false;
return true;
}
static int32_t apply_commit(STFileSystem *fs) {
int32_t code = 0;
TFileSetArray *fsetArray1 = fs->fSetArr;
TFileSetArray *fsetArray2 = fs->fSetArrTmp;
int32_t i1 = 0, i2 = 0;
while (i1 < TARRAY2_SIZE(fsetArray1) || i2 < TARRAY2_SIZE(fsetArray2)) {
STFileSet *fset1 = i1 < TARRAY2_SIZE(fsetArray1) ? TARRAY2_GET(fsetArray1, i1) : NULL;
STFileSet *fset2 = i2 < TARRAY2_SIZE(fsetArray2) ? TARRAY2_GET(fsetArray2, i2) : NULL;
if (fset1 && fset2) {
if (fset1->fid < fset2->fid) {
// delete fset1
TARRAY2_REMOVE(fsetArray1, i1, tsdbTFileSetRemove);
} else if (fset1->fid > fset2->fid) {
// create new file set with fid of fset2->fid
code = tsdbTFileSetInitDup(fs->tsdb, fset2, &fset1);
if (code) return code;
code = TARRAY2_SORT_INSERT(fsetArray1, fset1, tsdbTFileSetCmprFn);
if (code) return code;
i1++;
i2++;
} else {
// edit
code = tsdbTFileSetApplyEdit(fs->tsdb, fset2, fset1);
if (code) return code;
i1++;
i2++;
}
} else if (fset1) {
// delete fset1
TARRAY2_REMOVE(fsetArray1, i1, tsdbTFileSetRemove);
} else {
// create new file set with fid of fset2->fid
code = tsdbTFileSetInitDup(fs->tsdb, fset2, &fset1);
if (code) return code;
code = TARRAY2_SORT_INSERT(fsetArray1, fset1, tsdbTFileSetCmprFn);
if (code) return code;
i1++;
i2++;
}
}
return 0;
}
static int32_t commit_edit(STFileSystem *fs) {
char current[TSDB_FILENAME_LEN];
char current_t[TSDB_FILENAME_LEN];
current_fname(fs->tsdb, current, TSDB_FCURRENT);
if (fs->etype == TSDB_FEDIT_COMMIT) {
current_fname(fs->tsdb, current_t, TSDB_FCURRENT_C);
} else if (fs->etype == TSDB_FEDIT_MERGE) {
current_fname(fs->tsdb, current_t, TSDB_FCURRENT_M);
} else {
ASSERT(0);
}
int32_t code;
int32_t lino;
if ((code = taosRenameFile(current_t, current))) {
TSDB_CHECK_CODE(code = TAOS_SYSTEM_ERROR(code), lino, _exit);
}
code = apply_commit(fs);
TSDB_CHECK_CODE(code, lino, _exit);
_exit:
if (code) {
tsdbError("vgId:%d %s failed at line %d since %s", TD_VID(fs->tsdb->pVnode), __func__, lino, tstrerror(code));
} else {
tsdbInfo("vgId:%d %s success, etype:%d", TD_VID(fs->tsdb->pVnode), __func__, fs->etype);
}
return code;
}
// static int32_t
static int32_t apply_abort(STFileSystem *fs) {
// TODO
return 0;
}
static int32_t abort_edit(STFileSystem *fs) {
char fname[TSDB_FILENAME_LEN];
if (fs->etype == TSDB_FEDIT_COMMIT) {
current_fname(fs->tsdb, fname, TSDB_FCURRENT_C);
} else if (fs->etype == TSDB_FEDIT_MERGE) {
current_fname(fs->tsdb, fname, TSDB_FCURRENT_M);
} else {
ASSERT(0);
}
int32_t code;
int32_t lino;
if ((code = taosRemoveFile(fname))) {
TSDB_CHECK_CODE(code = TAOS_SYSTEM_ERROR(code), lino, _exit);
}
code = apply_abort(fs);
TSDB_CHECK_CODE(code, lino, _exit);
_exit:
if (code) {
tsdbError("vgId:%d %s failed since %s", TD_VID(fs->tsdb->pVnode), __func__, tstrerror(code));
} else {
tsdbInfo("vgId:%d %s success, etype:%d", TD_VID(fs->tsdb->pVnode), __func__, fs->etype);
}
return code;
}
static int32_t tsdbFSScanAndFix(STFileSystem *fs) {
fs->neid = 0;
// get max commit id
const STFileSet *fset;
TARRAY2_FOREACH(fs->fSetArr, fset) { fs->neid = TMAX(fs->neid, tsdbTFileSetMaxCid(fset)); }
// TODO
return 0;
}
static int32_t tsdbFSDupState(STFileSystem *fs) {
int32_t code;
const TFileSetArray *src = fs->fSetArr;
TFileSetArray *dst = fs->fSetArrTmp;
TARRAY2_CLEAR(dst, tsdbTFileSetClear);
const STFileSet *fset1;
TARRAY2_FOREACH(src, fset1) {
STFileSet *fset2;
code = tsdbTFileSetInitDup(fs->tsdb, fset1, &fset2);
if (code) return code;
code = TARRAY2_APPEND(dst, fset2);
if (code) return code;
}
return 0;
}
static int32_t open_fs(STFileSystem *fs, int8_t rollback) {
int32_t code = 0;
int32_t lino = 0;
STsdb *pTsdb = fs->tsdb;
char fCurrent[TSDB_FILENAME_LEN];
char cCurrent[TSDB_FILENAME_LEN];
char mCurrent[TSDB_FILENAME_LEN];
current_fname(pTsdb, fCurrent, TSDB_FCURRENT);
current_fname(pTsdb, cCurrent, TSDB_FCURRENT_C);
current_fname(pTsdb, mCurrent, TSDB_FCURRENT_M);
if (taosCheckExistFile(fCurrent)) { // current.json exists
code = load_fs(pTsdb, fCurrent, fs->fSetArr);
TSDB_CHECK_CODE(code, lino, _exit);
if (taosCheckExistFile(cCurrent)) {
// current.c.json exists
fs->etype = TSDB_FEDIT_COMMIT;
if (rollback) {
code = abort_edit(fs);
TSDB_CHECK_CODE(code, lino, _exit);
} else {
code = load_fs(pTsdb, cCurrent, fs->fSetArrTmp);
TSDB_CHECK_CODE(code, lino, _exit);
code = commit_edit(fs);
TSDB_CHECK_CODE(code, lino, _exit);
}
} else if (taosCheckExistFile(mCurrent)) {
// current.m.json exists
fs->etype = TSDB_FEDIT_MERGE;
code = abort_edit(fs);
TSDB_CHECK_CODE(code, lino, _exit);
}
code = tsdbFSDupState(fs);
TSDB_CHECK_CODE(code, lino, _exit);
code = tsdbFSScanAndFix(fs);
TSDB_CHECK_CODE(code, lino, _exit);
} else {
code = save_fs(fs->fSetArr, fCurrent);
TSDB_CHECK_CODE(code, lino, _exit);
}
_exit:
if (code) {
tsdbError("vgId:%d %s failed at line %d since %s", TD_VID(pTsdb->pVnode), __func__, lino, tstrerror(code));
} else {
tsdbInfo("vgId:%d %s success", TD_VID(pTsdb->pVnode), __func__);
}
return 0;
}
static int32_t close_file_system(STFileSystem *fs) {
TARRAY2_CLEAR(fs->fSetArr, tsdbTFileSetClear);
TARRAY2_CLEAR(fs->fSetArrTmp, tsdbTFileSetClear);
// TODO
return 0;
}
static int32_t apply_edit(STFileSystem *pFS) {
int32_t code = 0;
ASSERTS(0, "TODO: Not implemented yet");
return code;
}
static int32_t fset_cmpr_fn(const struct STFileSet *pSet1, const struct STFileSet *pSet2) {
if (pSet1->fid < pSet2->fid) {
return -1;
} else if (pSet1->fid > pSet2->fid) {
return 1;
}
return 0;
}
static int32_t edit_fs(STFileSystem *fs, const TFileOpArray *opArray) {
int32_t code = 0;
int32_t lino = 0;
code = tsdbFSDupState(fs);
if (code) return code;
TFileSetArray *fsetArray = fs->fSetArrTmp;
STFileSet *fset = NULL;
const STFileOp *op;
TARRAY2_FOREACH_PTR(opArray, op) {
if (!fset || fset->fid != op->fid) {
STFileSet tfset = {.fid = op->fid};
fset = &tfset;
STFileSet **fsetPtr = TARRAY2_SEARCH(fsetArray, &fset, tsdbTFileSetCmprFn, TD_EQ);
fset = (fsetPtr == NULL) ? NULL : *fsetPtr;
if (!fset) {
code = tsdbTFileSetInit(op->fid, &fset);
TSDB_CHECK_CODE(code, lino, _exit);
code = TARRAY2_SORT_INSERT(fsetArray, fset, tsdbTFileSetCmprFn);
TSDB_CHECK_CODE(code, lino, _exit);
}
}
code = tsdbTFileSetEdit(fs->tsdb, fset, op);
TSDB_CHECK_CODE(code, lino, _exit);
}
// remove empty file set
int32_t i = 0;
while (i < TARRAY2_SIZE(fsetArray)) {
fset = TARRAY2_GET(fsetArray, i);
if (tsdbTFileSetIsEmpty(fset)) {
TARRAY2_REMOVE(fsetArray, i, tsdbTFileSetClear);
} else {
i++;
}
}
_exit:
if (code) {
TSDB_ERROR_LOG(TD_VID(fs->tsdb->pVnode), lino, code);
}
return code;
}
int32_t tsdbOpenFS(STsdb *pTsdb, STFileSystem **fs, int8_t rollback) {
int32_t code;
int32_t lino;
code = tsdbCheckAndUpgradeFileSystem(pTsdb, rollback);
TSDB_CHECK_CODE(code, lino, _exit);
code = create_fs(pTsdb, fs);
TSDB_CHECK_CODE(code, lino, _exit);
code = open_fs(fs[0], rollback);
TSDB_CHECK_CODE(code, lino, _exit);
_exit:
if (code) {
tsdbError("vgId:%d %s failed at line %d since %s", TD_VID(pTsdb->pVnode), __func__, lino, tstrerror(code));
destroy_fs(fs);
} else {
tsdbInfo("vgId:%d %s success", TD_VID(pTsdb->pVnode), __func__);
}
return 0;
}
static void tsdbDoWaitBgTask(STFileSystem *fs, STFSBgTask *task) {
task->numWait++;
taosThreadCondWait(task->done, fs->mutex);
task->numWait--;
if (task->numWait == 0) {
taosThreadCondDestroy(task->done);
taosMemoryFree(task);
}
}
static void tsdbDoDoneBgTask(STFileSystem *fs, STFSBgTask *task) {
if (task->numWait > 0) {
taosThreadCondBroadcast(task->done);
} else {
taosThreadCondDestroy(task->done);
taosMemoryFree(task);
}
}
int32_t tsdbCloseFS(STFileSystem **fs) {
if (fs[0] == NULL) return 0;
taosThreadMutexLock(fs[0]->mutex);
fs[0]->stop = true;
if (fs[0]->bgTaskRunning) {
tsdbDoWaitBgTask(fs[0], fs[0]->bgTaskRunning);
}
taosThreadMutexUnlock(fs[0]->mutex);
close_file_system(fs[0]);
destroy_fs(fs);
return 0;
}
int64_t tsdbFSAllocEid(STFileSystem *fs) {
taosThreadRwlockRdlock(&fs->tsdb->rwLock);
int64_t cid = ++fs->neid;
taosThreadRwlockUnlock(&fs->tsdb->rwLock);
return cid;
}
int32_t tsdbFSEditBegin(STFileSystem *fs, const TFileOpArray *opArray, EFEditT etype) {
int32_t code = 0;
int32_t lino;
char current_t[TSDB_FILENAME_LEN];
switch (etype) {
case TSDB_FEDIT_COMMIT:
current_fname(fs->tsdb, current_t, TSDB_FCURRENT_C);
break;
case TSDB_FEDIT_MERGE:
current_fname(fs->tsdb, current_t, TSDB_FCURRENT_M);
break;
default:
ASSERT(0);
}
tsem_wait(&fs->canEdit);
fs->etype = etype;
// edit
code = edit_fs(fs, opArray);
TSDB_CHECK_CODE(code, lino, _exit);
// save fs
code = save_fs(fs->fSetArrTmp, current_t);
TSDB_CHECK_CODE(code, lino, _exit);
_exit:
if (code) {
tsdbError("vgId:%d %s failed at line %d since %s, etype:%d", TD_VID(fs->tsdb->pVnode), __func__, lino,
tstrerror(code), etype);
} else {
tsdbInfo("vgId:%d %s done, etype:%d", TD_VID(fs->tsdb->pVnode), __func__, etype);
}
return code;
}
int32_t tsdbFSEditCommit(STFileSystem *fs) {
int32_t code = 0;
int32_t lino = 0;
// commit
code = commit_edit(fs);
TSDB_CHECK_CODE(code, lino, _exit);
// schedule merge
if (fs->tsdb->pVnode->config.sttTrigger != 1) {
STFileSet *fset;
TARRAY2_FOREACH_REVERSE(fs->fSetArr, fset) {
if (TARRAY2_SIZE(fset->lvlArr) == 0) continue;
SSttLvl *lvl = TARRAY2_FIRST(fset->lvlArr);
if (lvl->level != 0 || TARRAY2_SIZE(lvl->fobjArr) < fs->tsdb->pVnode->config.sttTrigger) continue;
code = tsdbFSScheduleBgTask(fs, TSDB_BG_TASK_MERGER, tsdbMerge, fs->tsdb, NULL);
TSDB_CHECK_CODE(code, lino, _exit);
break;
}
}
_exit:
if (code) {
TSDB_ERROR_LOG(TD_VID(fs->tsdb->pVnode), lino, code);
} else {
tsdbDebug("vgId:%d %s done, etype:%d", TD_VID(fs->tsdb->pVnode), __func__, fs->etype);
tsem_post(&fs->canEdit);
}
return code;
}
int32_t tsdbFSEditAbort(STFileSystem *fs) {
int32_t code = abort_edit(fs);
tsem_post(&fs->canEdit);
return code;
}
int32_t tsdbFSGetFSet(STFileSystem *fs, int32_t fid, STFileSet **fset) {
STFileSet tfset = {.fid = fid};
STFileSet *pset = &tfset;
STFileSet **fsetPtr = TARRAY2_SEARCH(fs->fSetArr, &pset, tsdbTFileSetCmprFn, TD_EQ);
fset[0] = (fsetPtr == NULL) ? NULL : fsetPtr[0];
return 0;
}
int32_t tsdbFSCreateCopySnapshot(STFileSystem *fs, TFileSetArray **fsetArr) {
int32_t code = 0;
STFileSet *fset;
STFileSet *fset1;
fsetArr[0] = taosMemoryMalloc(sizeof(TFileSetArray));
if (fsetArr == NULL) return TSDB_CODE_OUT_OF_MEMORY;
TARRAY2_INIT(fsetArr[0]);
taosThreadRwlockRdlock(&fs->tsdb->rwLock);
TARRAY2_FOREACH(fs->fSetArr, fset) {
code = tsdbTFileSetInitDup(fs->tsdb, fset, &fset1);
if (code) break;
code = TARRAY2_APPEND(fsetArr[0], fset1);
if (code) break;
}
taosThreadRwlockUnlock(&fs->tsdb->rwLock);
if (code) {
TARRAY2_DESTROY(fsetArr[0], tsdbTFileSetClear);
taosMemoryFree(fsetArr[0]);
fsetArr[0] = NULL;
}
return code;
}
int32_t tsdbFSDestroyCopySnapshot(TFileSetArray **fsetArr) {
if (fsetArr[0]) {
TARRAY2_DESTROY(fsetArr[0], tsdbTFileSetClear);
taosMemoryFree(fsetArr[0]);
fsetArr[0] = NULL;
}
return 0;
}
int32_t tsdbFSCreateRefSnapshot(STFileSystem *fs, TFileSetArray **fsetArr) {
int32_t code = 0;
STFileSet *fset, *fset1;
fsetArr[0] = taosMemoryCalloc(1, sizeof(*fsetArr[0]));
if (fsetArr[0] == NULL) return TSDB_CODE_OUT_OF_MEMORY;
taosThreadRwlockRdlock(&fs->tsdb->rwLock);
TARRAY2_FOREACH(fs->fSetArr, fset) {
code = tsdbTFileSetInitRef(fs->tsdb, fset, &fset1);
if (code) break;
code = TARRAY2_APPEND(fsetArr[0], fset1);
if (code) break;
}
taosThreadRwlockUnlock(&fs->tsdb->rwLock);
if (code) {
TARRAY2_DESTROY(fsetArr[0], tsdbTFileSetClear);
fsetArr[0] = NULL;
}
return code;
}
int32_t tsdbFSDestroyRefSnapshot(TFileSetArray **fsetArr) {
if (fsetArr[0]) {
TARRAY2_DESTROY(fsetArr[0], tsdbTFileSetClear);
taosMemoryFreeClear(fsetArr[0]);
fsetArr[0] = NULL;
}
return 0;
}
const char *gFSBgTaskName[] = {NULL, "MERGE", "RETENTION", "COMPACT"};
static int32_t tsdbFSRunBgTask(void *arg) {
STFileSystem *fs = (STFileSystem *)arg;
ASSERT(fs->bgTaskRunning != NULL);
fs->bgTaskRunning->launchTime = taosGetTimestampMs();
fs->bgTaskRunning->run(fs->bgTaskRunning->arg);
fs->bgTaskRunning->finishTime = taosGetTimestampMs();
tsdbDebug("vgId:%d bg task:%s task id:%" PRId64 " finished, schedule time:%" PRId64 " launch time:%" PRId64
" finish time:%" PRId64,
TD_VID(fs->tsdb->pVnode), gFSBgTaskName[fs->bgTaskRunning->type], fs->bgTaskRunning->taskid,
fs->bgTaskRunning->scheduleTime, fs->bgTaskRunning->launchTime, fs->bgTaskRunning->finishTime);
taosThreadMutexLock(fs->mutex);
// free last
tsdbDoDoneBgTask(fs, fs->bgTaskRunning);
fs->bgTaskRunning = NULL;
// schedule next
if (fs->bgTaskNum > 0) {
if (fs->stop) {
while (fs->bgTaskNum > 0) {
STFSBgTask *task = fs->bgTaskQueue->next;
task->prev->next = task->next;
task->next->prev = task->prev;
fs->bgTaskNum--;
tsdbDoDoneBgTask(fs, task);
}
} else {
// pop task from head
fs->bgTaskRunning = fs->bgTaskQueue->next;
fs->bgTaskRunning->prev->next = fs->bgTaskRunning->next;
fs->bgTaskRunning->next->prev = fs->bgTaskRunning->prev;
fs->bgTaskNum--;
vnodeScheduleTaskEx(1, tsdbFSRunBgTask, arg);
}
}
taosThreadMutexUnlock(fs->mutex);
return 0;
}
static int32_t tsdbFSScheduleBgTaskImpl(STFileSystem *fs, EFSBgTaskT type, int32_t (*run)(void *), void *arg,
int64_t *taskid) {
if (fs->stop) {
return 0; // TODO: use a better error code
}
// check if same task is on
// if (fs->bgTaskRunning && fs->bgTaskRunning->type == type) {
// return 0;
// }
for (STFSBgTask *task = fs->bgTaskQueue->next; task != fs->bgTaskQueue; task = task->next) {
if (task->type == type) {
return 0;
}
}
// do schedule task
STFSBgTask *task = taosMemoryCalloc(1, sizeof(STFSBgTask));
if (task == NULL) return TSDB_CODE_OUT_OF_MEMORY;
taosThreadCondInit(task->done, NULL);
task->type = type;
task->run = run;
task->arg = arg;
task->scheduleTime = taosGetTimestampMs();
task->taskid = ++fs->taskid;
if (fs->bgTaskRunning == NULL && fs->bgTaskNum == 0) {
// launch task directly
fs->bgTaskRunning = task;
vnodeScheduleTaskEx(1, tsdbFSRunBgTask, fs);
} else {
// add to the queue tail
fs->bgTaskNum++;
task->next = fs->bgTaskQueue;
task->prev = fs->bgTaskQueue->prev;
task->prev->next = task;
task->next->prev = task;
}
if (taskid) *taskid = task->taskid;
return 0;
}
int32_t tsdbFSScheduleBgTask(STFileSystem *fs, EFSBgTaskT type, int32_t (*run)(void *), void *arg, int64_t *taskid) {
taosThreadMutexLock(fs->mutex);
int32_t code = tsdbFSScheduleBgTaskImpl(fs, type, run, arg, taskid);
taosThreadMutexUnlock(fs->mutex);
return code;
}
int32_t tsdbFSWaitBgTask(STFileSystem *fs, int64_t taskid) {
STFSBgTask *task = NULL;
taosThreadMutexLock(fs->mutex);
if (fs->bgTaskRunning && fs->bgTaskRunning->taskid == taskid) {
task = fs->bgTaskRunning;
} else {
for (STFSBgTask *taskt = fs->bgTaskQueue->next; taskt != fs->bgTaskQueue; taskt = taskt->next) {
if (taskt->taskid == taskid) {
task = taskt;
break;
}
}
}
if (task) {
tsdbDoWaitBgTask(fs, task);
}
taosThreadMutexUnlock(fs->mutex);
return 0;
}
int32_t tsdbFSWaitAllBgTask(STFileSystem *fs) {
taosThreadMutexLock(fs->mutex);
while (fs->bgTaskRunning) {
taosThreadCondWait(fs->bgTaskRunning->done, fs->mutex);
}
taosThreadMutexUnlock(fs->mutex);
return 0;
}
static int32_t tsdbFSDoDisableBgTask(STFileSystem *fs) {
fs->stop = true;
if (fs->bgTaskRunning) {
tsdbDoWaitBgTask(fs, fs->bgTaskRunning);
}
return 0;
}
int32_t tsdbFSDisableBgTask(STFileSystem *fs) {
taosThreadMutexLock(fs->mutex);
int32_t code = tsdbFSDoDisableBgTask(fs);
taosThreadMutexUnlock(fs->mutex);
return code;
}
int32_t tsdbFSEnableBgTask(STFileSystem *fs) {
taosThreadMutexLock(fs->mutex);
fs->stop = false;
taosThreadMutexUnlock(fs->mutex);
return 0;
}

View File

@ -0,0 +1,110 @@
/*
* Copyright (c) 2019 TAOS Data, Inc. <jhtao@taosdata.com>
*
* This program is free software: you can use, redistribute, and/or modify
* it under the terms of the GNU Affero General Public License, version 3
* or later ("AGPL"), as published by the Free Software Foundation.
*
* This program is distributed in the hope that it will be useful, but WITHOUT
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
* FITNESS FOR A PARTICULAR PURPOSE.
*
* You should have received a copy of the GNU Affero General Public License
* along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
#include "tsdbFSet2.h"
#ifndef _TSDB_FILE_SYSTEM_H
#define _TSDB_FILE_SYSTEM_H
#ifdef __cplusplus
extern "C" {
#endif
/* Exposed Handle */
typedef struct STFileSystem STFileSystem;
typedef struct STFSBgTask STFSBgTask;
// typedef TARRAY2(STFileSet *) TFileSetArray;
typedef enum {
TSDB_FEDIT_COMMIT = 1, //
TSDB_FEDIT_MERGE
} EFEditT;
typedef enum {
TSDB_BG_TASK_MERGER = 1,
TSDB_BG_TASK_RETENTION,
TSDB_BG_TASK_COMPACT,
} EFSBgTaskT;
typedef enum {
TSDB_FCURRENT = 1,
TSDB_FCURRENT_C, // for commit
TSDB_FCURRENT_M, // for merge
} EFCurrentT;
/* Exposed APIs */
// open/close
int32_t tsdbOpenFS(STsdb *pTsdb, STFileSystem **fs, int8_t rollback);
int32_t tsdbCloseFS(STFileSystem **fs);
// snapshot
int32_t tsdbFSCreateCopySnapshot(STFileSystem *fs, TFileSetArray **fsetArr);
int32_t tsdbFSDestroyCopySnapshot(TFileSetArray **fsetArr);
int32_t tsdbFSCreateRefSnapshot(STFileSystem *fs, TFileSetArray **fsetArr);
int32_t tsdbFSDestroyRefSnapshot(TFileSetArray **fsetArr);
// txn
int64_t tsdbFSAllocEid(STFileSystem *fs);
int32_t tsdbFSEditBegin(STFileSystem *fs, const TFileOpArray *opArray, EFEditT etype);
int32_t tsdbFSEditCommit(STFileSystem *fs);
int32_t tsdbFSEditAbort(STFileSystem *fs);
// background task
int32_t tsdbFSScheduleBgTask(STFileSystem *fs, EFSBgTaskT type, int32_t (*run)(void *), void *arg, int64_t *taskid);
int32_t tsdbFSWaitBgTask(STFileSystem *fs, int64_t taskid);
int32_t tsdbFSWaitAllBgTask(STFileSystem *fs);
int32_t tsdbFSDisableBgTask(STFileSystem *fs);
int32_t tsdbFSEnableBgTask(STFileSystem *fs);
// other
int32_t tsdbFSGetFSet(STFileSystem *fs, int32_t fid, STFileSet **fset);
struct STFSBgTask {
EFSBgTaskT type;
int32_t (*run)(void *arg);
void *arg;
TdThreadCond done[1];
int32_t numWait;
int64_t taskid;
int64_t scheduleTime;
int64_t launchTime;
int64_t finishTime;
struct STFSBgTask *prev;
struct STFSBgTask *next;
};
/* Exposed Structs */
struct STFileSystem {
STsdb *tsdb;
tsem_t canEdit;
int32_t state;
int64_t neid;
EFEditT etype;
TFileSetArray fSetArr[1];
TFileSetArray fSetArrTmp[1];
// background task queue
TdThreadMutex mutex[1];
bool stop;
int64_t taskid;
int32_t bgTaskNum;
STFSBgTask bgTaskQueue[1];
STFSBgTask *bgTaskRunning;
};
#ifdef __cplusplus
}
#endif
#endif /*_TSDB_FILE_SYSTEM_H*/

View File

@ -0,0 +1,542 @@
/*
* Copyright (c) 2019 TAOS Data, Inc. <jhtao@taosdata.com>
*
* This program is free software: you can use, redistribute, and/or modify
* it under the terms of the GNU Affero General Public License, version 3
* or later ("AGPL"), as published by the Free Software Foundation.
*
* This program is distributed in the hope that it will be useful, but WITHOUT
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
* FITNESS FOR A PARTICULAR PURPOSE.
*
* You should have received a copy of the GNU Affero General Public License
* along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
#include "tsdbFSet2.h"
int32_t tsdbSttLvlInit(int32_t level, SSttLvl **lvl) {
if (!(lvl[0] = taosMemoryMalloc(sizeof(SSttLvl)))) return TSDB_CODE_OUT_OF_MEMORY;
lvl[0]->level = level;
TARRAY2_INIT(lvl[0]->fobjArr);
return 0;
}
static void tsdbSttLvlClearFObj(void *data) { tsdbTFileObjUnref(*(STFileObj **)data); }
int32_t tsdbSttLvlClear(SSttLvl **lvl) {
if (lvl[0] != NULL) {
TARRAY2_DESTROY(lvl[0]->fobjArr, tsdbSttLvlClearFObj);
taosMemoryFree(lvl[0]);
lvl[0] = NULL;
}
return 0;
}
static int32_t tsdbSttLvlInitEx(STsdb *pTsdb, const SSttLvl *lvl1, SSttLvl **lvl) {
int32_t code = tsdbSttLvlInit(lvl1->level, lvl);
if (code) return code;
const STFileObj *fobj1;
TARRAY2_FOREACH(lvl1->fobjArr, fobj1) {
STFileObj *fobj;
code = tsdbTFileObjInit(pTsdb, fobj1->f, &fobj);
if (code) {
tsdbSttLvlClear(lvl);
return code;
}
TARRAY2_APPEND(lvl[0]->fobjArr, fobj);
}
return 0;
}
static int32_t tsdbSttLvlInitRef(STsdb *pTsdb, const SSttLvl *lvl1, SSttLvl **lvl) {
int32_t code = tsdbSttLvlInit(lvl1->level, lvl);
if (code) return code;
STFileObj *fobj1;
TARRAY2_FOREACH(lvl1->fobjArr, fobj1) {
tsdbTFileObjRef(fobj1);
code = TARRAY2_APPEND(lvl[0]->fobjArr, fobj1);
if (code) return code;
}
return 0;
}
static void tsdbSttLvlRemoveFObj(void *data) { tsdbTFileObjRemove(*(STFileObj **)data); }
static void tsdbSttLvlRemove(SSttLvl **lvl) {
TARRAY2_DESTROY(lvl[0]->fobjArr, tsdbSttLvlRemoveFObj);
taosMemoryFree(lvl[0]);
lvl[0] = NULL;
}
static int32_t tsdbSttLvlApplyEdit(STsdb *pTsdb, const SSttLvl *lvl1, SSttLvl *lvl2) {
int32_t code = 0;
ASSERT(lvl1->level == lvl2->level);
int32_t i1 = 0, i2 = 0;
while (i1 < TARRAY2_SIZE(lvl1->fobjArr) || i2 < TARRAY2_SIZE(lvl2->fobjArr)) {
STFileObj *fobj1 = i1 < TARRAY2_SIZE(lvl1->fobjArr) ? TARRAY2_GET(lvl1->fobjArr, i1) : NULL;
STFileObj *fobj2 = i2 < TARRAY2_SIZE(lvl2->fobjArr) ? TARRAY2_GET(lvl2->fobjArr, i2) : NULL;
if (fobj1 && fobj2) {
if (fobj1->f->cid < fobj2->f->cid) {
// create a file obj
code = tsdbTFileObjInit(pTsdb, fobj1->f, &fobj2);
if (code) return code;
code = TARRAY2_APPEND(lvl2->fobjArr, fobj2);
if (code) return code;
i1++;
i2++;
} else if (fobj1->f->cid > fobj2->f->cid) {
// remove a file obj
TARRAY2_REMOVE(lvl2->fobjArr, i2, tsdbSttLvlRemoveFObj);
} else {
if (tsdbIsSameTFile(fobj1->f, fobj2->f)) {
if (tsdbIsTFileChanged(fobj1->f, fobj2->f)) {
fobj2->f[0] = fobj1->f[0];
}
} else {
TARRAY2_REMOVE(lvl2->fobjArr, i2, tsdbSttLvlRemoveFObj);
code = tsdbTFileObjInit(pTsdb, fobj1->f, &fobj2);
if (code) return code;
code = TARRAY2_SORT_INSERT(lvl2->fobjArr, fobj2, tsdbTFileObjCmpr);
if (code) return code;
}
i1++;
i2++;
}
} else if (fobj1) {
// create a file obj
code = tsdbTFileObjInit(pTsdb, fobj1->f, &fobj2);
if (code) return code;
code = TARRAY2_APPEND(lvl2->fobjArr, fobj2);
if (code) return code;
i1++;
i2++;
} else {
// remove a file obj
TARRAY2_REMOVE(lvl2->fobjArr, i2, tsdbSttLvlRemoveFObj);
}
}
return 0;
}
static int32_t tsdbSttLvlCmprFn(const SSttLvl **lvl1, const SSttLvl **lvl2) {
if (lvl1[0]->level < lvl2[0]->level) return -1;
if (lvl1[0]->level > lvl2[0]->level) return 1;
return 0;
}
static int32_t tsdbSttLvlToJson(const SSttLvl *lvl, cJSON *json) {
if (cJSON_AddNumberToObject(json, "level", lvl->level) == NULL) {
return TSDB_CODE_OUT_OF_MEMORY;
}
cJSON *ajson = cJSON_AddArrayToObject(json, "files");
if (ajson == NULL) return TSDB_CODE_OUT_OF_MEMORY;
const STFileObj *fobj;
TARRAY2_FOREACH(lvl->fobjArr, fobj) {
cJSON *item = cJSON_CreateObject();
if (item == NULL) return TSDB_CODE_OUT_OF_MEMORY;
cJSON_AddItemToArray(ajson, item);
int32_t code = tsdbTFileToJson(fobj->f, item);
if (code) return code;
}
return 0;
}
static int32_t tsdbJsonToSttLvl(STsdb *pTsdb, const cJSON *json, SSttLvl **lvl) {
const cJSON *item1, *item2;
int32_t level;
item1 = cJSON_GetObjectItem(json, "level");
if (cJSON_IsNumber(item1)) {
level = item1->valuedouble;
} else {
return TSDB_CODE_FILE_CORRUPTED;
}
int32_t code = tsdbSttLvlInit(level, lvl);
if (code) return code;
item1 = cJSON_GetObjectItem(json, "files");
if (!cJSON_IsArray(item1)) {
tsdbSttLvlClear(lvl);
return TSDB_CODE_FILE_CORRUPTED;
}
cJSON_ArrayForEach(item2, item1) {
STFile tf;
code = tsdbJsonToTFile(item2, TSDB_FTYPE_STT, &tf);
if (code) {
tsdbSttLvlClear(lvl);
return code;
}
STFileObj *fobj;
code = tsdbTFileObjInit(pTsdb, &tf, &fobj);
if (code) {
tsdbSttLvlClear(lvl);
return code;
}
TARRAY2_APPEND(lvl[0]->fobjArr, fobj);
}
return 0;
}
int32_t tsdbTFileSetToJson(const STFileSet *fset, cJSON *json) {
int32_t code = 0;
cJSON *item1, *item2;
// fid
if (cJSON_AddNumberToObject(json, "fid", fset->fid) == NULL) {
return TSDB_CODE_OUT_OF_MEMORY;
}
for (int32_t ftype = TSDB_FTYPE_MIN; ftype < TSDB_FTYPE_MAX; ++ftype) {
if (fset->farr[ftype] == NULL) continue;
code = tsdbTFileToJson(fset->farr[ftype]->f, json);
if (code) return code;
}
// each level
item1 = cJSON_AddArrayToObject(json, "stt lvl");
if (item1 == NULL) return TSDB_CODE_OUT_OF_MEMORY;
const SSttLvl *lvl;
TARRAY2_FOREACH(fset->lvlArr, lvl) {
item2 = cJSON_CreateObject();
if (!item2) return TSDB_CODE_OUT_OF_MEMORY;
cJSON_AddItemToArray(item1, item2);
code = tsdbSttLvlToJson(lvl, item2);
if (code) return code;
}
return 0;
}
int32_t tsdbJsonToTFileSet(STsdb *pTsdb, const cJSON *json, STFileSet **fset) {
int32_t code;
const cJSON *item1, *item2;
int32_t fid;
STFile tf;
// fid
item1 = cJSON_GetObjectItem(json, "fid");
if (cJSON_IsNumber(item1)) {
fid = item1->valuedouble;
} else {
return TSDB_CODE_FILE_CORRUPTED;
}
code = tsdbTFileSetInit(fid, fset);
if (code) return code;
for (tsdb_ftype_t ftype = TSDB_FTYPE_MIN; ftype < TSDB_FTYPE_MAX; ++ftype) {
code = tsdbJsonToTFile(json, ftype, &tf);
if (code == TSDB_CODE_NOT_FOUND) {
continue;
} else if (code) {
tsdbTFileSetClear(fset);
return code;
} else {
code = tsdbTFileObjInit(pTsdb, &tf, &(*fset)->farr[ftype]);
if (code) return code;
}
}
// each level
item1 = cJSON_GetObjectItem(json, "stt lvl");
if (cJSON_IsArray(item1)) {
cJSON_ArrayForEach(item2, item1) {
SSttLvl *lvl;
code = tsdbJsonToSttLvl(pTsdb, item2, &lvl);
if (code) {
tsdbTFileSetClear(fset);
return code;
}
TARRAY2_APPEND((*fset)->lvlArr, lvl);
}
} else {
return TSDB_CODE_FILE_CORRUPTED;
}
return 0;
}
// NOTE: the api does not remove file, only do memory operation
int32_t tsdbTFileSetEdit(STsdb *pTsdb, STFileSet *fset, const STFileOp *op) {
int32_t code = 0;
if (op->optype == TSDB_FOP_CREATE) {
// create a new file
STFileObj *fobj;
code = tsdbTFileObjInit(pTsdb, &op->nf, &fobj);
if (code) return code;
if (fobj->f->type == TSDB_FTYPE_STT) {
SSttLvl *lvl = tsdbTFileSetGetSttLvl(fset, fobj->f->stt->level);
if (!lvl) {
code = tsdbSttLvlInit(fobj->f->stt->level, &lvl);
if (code) return code;
code = TARRAY2_SORT_INSERT(fset->lvlArr, lvl, tsdbSttLvlCmprFn);
if (code) return code;
}
code = TARRAY2_SORT_INSERT(lvl->fobjArr, fobj, tsdbTFileObjCmpr);
if (code) return code;
} else {
ASSERT(fset->farr[fobj->f->type] == NULL);
fset->farr[fobj->f->type] = fobj;
}
} else if (op->optype == TSDB_FOP_REMOVE) {
// delete a file
if (op->of.type == TSDB_FTYPE_STT) {
SSttLvl *lvl = tsdbTFileSetGetSttLvl(fset, op->of.stt->level);
ASSERT(lvl);
STFileObj tfobj = {.f[0] = {.cid = op->of.cid}};
STFileObj *tfobjp = &tfobj;
int32_t idx = TARRAY2_SEARCH_IDX(lvl->fobjArr, &tfobjp, tsdbTFileObjCmpr, TD_EQ);
ASSERT(idx >= 0);
TARRAY2_REMOVE(lvl->fobjArr, idx, tsdbSttLvlClearFObj);
if (TARRAY2_SIZE(lvl->fobjArr) == 0) {
// TODO: remove the stt level if no file exists anymore
// TARRAY2_REMOVE(&fset->lvlArr, lvl - fset->lvlArr.data, tsdbSttLvlClear);
}
} else {
ASSERT(tsdbIsSameTFile(&op->of, fset->farr[op->of.type]->f));
tsdbTFileObjUnref(fset->farr[op->of.type]);
fset->farr[op->of.type] = NULL;
}
} else {
if (op->nf.type == TSDB_FTYPE_STT) {
SSttLvl *lvl = tsdbTFileSetGetSttLvl(fset, op->of.stt->level);
ASSERT(lvl);
STFileObj tfobj = {.f[0] = {.cid = op->of.cid}}, *tfobjp = &tfobj;
STFileObj **fobjPtr = TARRAY2_SEARCH(lvl->fobjArr, &tfobjp, tsdbTFileObjCmpr, TD_EQ);
tfobjp = (fobjPtr ? *fobjPtr : NULL);
ASSERT(tfobjp);
tfobjp->f[0] = op->nf;
} else {
fset->farr[op->nf.type]->f[0] = op->nf;
}
}
return 0;
}
int32_t tsdbTFileSetApplyEdit(STsdb *pTsdb, const STFileSet *fset1, STFileSet *fset2) {
int32_t code = 0;
ASSERT(fset1->fid == fset2->fid);
for (tsdb_ftype_t ftype = TSDB_FTYPE_MIN; ftype < TSDB_FTYPE_MAX; ++ftype) {
if (!fset1->farr[ftype] && !fset2->farr[ftype]) continue;
STFileObj *fobj1 = fset1->farr[ftype];
STFileObj *fobj2 = fset2->farr[ftype];
if (fobj1 && fobj2) {
if (tsdbIsSameTFile(fobj1->f, fobj2->f)) {
if (tsdbIsTFileChanged(fobj1->f, fobj2->f)) {
fobj2->f[0] = fobj1->f[0];
}
} else {
tsdbTFileObjRemove(fobj2);
code = tsdbTFileObjInit(pTsdb, fobj1->f, &fset2->farr[ftype]);
if (code) return code;
}
} else if (fobj1) {
// create a new file
code = tsdbTFileObjInit(pTsdb, fobj1->f, &fset2->farr[ftype]);
if (code) return code;
} else {
// remove the file
tsdbTFileObjRemove(fobj2);
fset2->farr[ftype] = NULL;
}
}
// stt part
int32_t i1 = 0, i2 = 0;
while (i1 < TARRAY2_SIZE(fset1->lvlArr) || i2 < TARRAY2_SIZE(fset2->lvlArr)) {
SSttLvl *lvl1 = i1 < TARRAY2_SIZE(fset1->lvlArr) ? TARRAY2_GET(fset1->lvlArr, i1) : NULL;
SSttLvl *lvl2 = i2 < TARRAY2_SIZE(fset2->lvlArr) ? TARRAY2_GET(fset2->lvlArr, i2) : NULL;
if (lvl1 && lvl2) {
if (lvl1->level < lvl2->level) {
// add a new stt level
code = tsdbSttLvlInitEx(pTsdb, lvl1, &lvl2);
if (code) return code;
code = TARRAY2_SORT_INSERT(fset2->lvlArr, lvl2, tsdbSttLvlCmprFn);
if (code) return code;
i1++;
i2++;
} else if (lvl1->level > lvl2->level) {
// remove the stt level
TARRAY2_REMOVE(fset2->lvlArr, i2, tsdbSttLvlRemove);
} else {
// apply edit on stt level
code = tsdbSttLvlApplyEdit(pTsdb, lvl1, lvl2);
if (code) return code;
i1++;
i2++;
}
} else if (lvl1) {
// add a new stt level
code = tsdbSttLvlInitEx(pTsdb, lvl1, &lvl2);
if (code) return code;
code = TARRAY2_SORT_INSERT(fset2->lvlArr, lvl2, tsdbSttLvlCmprFn);
if (code) return code;
i1++;
i2++;
} else {
// remove the stt level
TARRAY2_REMOVE(fset2->lvlArr, i2, tsdbSttLvlRemove);
}
}
return 0;
}
int32_t tsdbTFileSetInit(int32_t fid, STFileSet **fset) {
fset[0] = taosMemoryCalloc(1, sizeof(STFileSet));
if (fset[0] == NULL) return TSDB_CODE_OUT_OF_MEMORY;
fset[0]->fid = fid;
TARRAY2_INIT(fset[0]->lvlArr);
return 0;
}
int32_t tsdbTFileSetInitDup(STsdb *pTsdb, const STFileSet *fset1, STFileSet **fset) {
int32_t code = tsdbTFileSetInit(fset1->fid, fset);
if (code) return code;
for (int32_t ftype = TSDB_FTYPE_MIN; ftype < TSDB_FTYPE_MAX; ++ftype) {
if (fset1->farr[ftype] == NULL) continue;
code = tsdbTFileObjInit(pTsdb, fset1->farr[ftype]->f, &fset[0]->farr[ftype]);
if (code) {
tsdbTFileSetClear(fset);
return code;
}
}
const SSttLvl *lvl1;
TARRAY2_FOREACH(fset1->lvlArr, lvl1) {
SSttLvl *lvl;
code = tsdbSttLvlInitEx(pTsdb, lvl1, &lvl);
if (code) {
tsdbTFileSetClear(fset);
return code;
}
code = TARRAY2_APPEND(fset[0]->lvlArr, lvl);
if (code) return code;
}
return 0;
}
int32_t tsdbTFileSetInitRef(STsdb *pTsdb, const STFileSet *fset1, STFileSet **fset) {
int32_t code = tsdbTFileSetInit(fset1->fid, fset);
if (code) return code;
for (int32_t ftype = TSDB_FTYPE_MIN; ftype < TSDB_FTYPE_MAX; ++ftype) {
if (fset1->farr[ftype] == NULL) continue;
tsdbTFileObjRef(fset1->farr[ftype]);
fset[0]->farr[ftype] = fset1->farr[ftype];
}
const SSttLvl *lvl1;
TARRAY2_FOREACH(fset1->lvlArr, lvl1) {
SSttLvl *lvl;
code = tsdbSttLvlInitRef(pTsdb, lvl1, &lvl);
if (code) {
tsdbTFileSetClear(fset);
return code;
}
code = TARRAY2_APPEND(fset[0]->lvlArr, lvl);
if (code) return code;
}
return 0;
}
int32_t tsdbTFileSetClear(STFileSet **fset) {
if (!fset[0]) return 0;
for (tsdb_ftype_t ftype = TSDB_FTYPE_MIN; ftype < TSDB_FTYPE_MAX; ++ftype) {
if (fset[0]->farr[ftype] == NULL) continue;
tsdbTFileObjUnref(fset[0]->farr[ftype]);
}
TARRAY2_DESTROY(fset[0]->lvlArr, tsdbSttLvlClear);
taosMemoryFree(fset[0]);
fset[0] = NULL;
return 0;
}
int32_t tsdbTFileSetRemove(STFileSet **fset) {
for (tsdb_ftype_t ftype = TSDB_FTYPE_MIN; ftype < TSDB_FTYPE_MAX; ++ftype) {
if (fset[0]->farr[ftype] == NULL) continue;
tsdbTFileObjRemove(fset[0]->farr[ftype]);
}
TARRAY2_DESTROY(fset[0]->lvlArr, tsdbSttLvlRemove);
taosMemoryFree(fset[0]);
fset[0] = NULL;
return 0;
}
SSttLvl *tsdbTFileSetGetSttLvl(STFileSet *fset, int32_t level) {
SSttLvl sttLvl = {.level = level};
SSttLvl *lvl = &sttLvl;
SSttLvl **lvlPtr = TARRAY2_SEARCH(fset->lvlArr, &lvl, tsdbSttLvlCmprFn, TD_EQ);
return lvlPtr ? lvlPtr[0] : NULL;
}
int32_t tsdbTFileSetCmprFn(const STFileSet **fset1, const STFileSet **fset2) {
if (fset1[0]->fid < fset2[0]->fid) return -1;
if (fset1[0]->fid > fset2[0]->fid) return 1;
return 0;
}
int64_t tsdbTFileSetMaxCid(const STFileSet *fset) {
int64_t maxCid = 0;
for (tsdb_ftype_t ftype = TSDB_FTYPE_MIN; ftype < TSDB_FTYPE_MAX; ++ftype) {
if (fset->farr[ftype] == NULL) continue;
maxCid = TMAX(maxCid, fset->farr[ftype]->f->cid);
}
const SSttLvl *lvl;
const STFileObj *fobj;
TARRAY2_FOREACH(fset->lvlArr, lvl) {
TARRAY2_FOREACH(lvl->fobjArr, fobj) { maxCid = TMAX(maxCid, fobj->f->cid); }
}
return maxCid;
}
bool tsdbTFileSetIsEmpty(const STFileSet *fset) {
for (tsdb_ftype_t ftype = TSDB_FTYPE_MIN; ftype < TSDB_FTYPE_MAX; ++ftype) {
if (fset->farr[ftype] != NULL) return false;
}
return TARRAY2_SIZE(fset->lvlArr) == 0;
}

View File

@ -0,0 +1,85 @@
/*
* Copyright (c) 2019 TAOS Data, Inc. <jhtao@taosdata.com>
*
* This program is free software: you can use, redistribute, and/or modify
* it under the terms of the GNU Affero General Public License, version 3
* or later ("AGPL"), as published by the Free Software Foundation.
*
* This program is distributed in the hope that it will be useful, but WITHOUT
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
* FITNESS FOR A PARTICULAR PURPOSE.
*
* You should have received a copy of the GNU Affero General Public License
* along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
#include "tsdbFile2.h"
#ifndef _TSDB_FILE_SET2_H
#define _TSDB_FILE_SET2_H
#ifdef __cplusplus
extern "C" {
#endif
typedef struct STFileSet STFileSet;
typedef struct STFileOp STFileOp;
typedef struct SSttLvl SSttLvl;
typedef TARRAY2(STFileObj *) TFileObjArray;
typedef TARRAY2(SSttLvl *) TSttLvlArray;
typedef TARRAY2(STFileOp) TFileOpArray;
typedef enum {
TSDB_FOP_NONE = 0,
TSDB_FOP_CREATE,
TSDB_FOP_REMOVE,
TSDB_FOP_MODIFY,
} tsdb_fop_t;
#define TFILE_SET(fid_) \
(STFileSet) { .fid = (fid_) }
// init/clear
int32_t tsdbTFileSetInit(int32_t fid, STFileSet **fset);
int32_t tsdbTFileSetInitDup(STsdb *pTsdb, const STFileSet *fset1, STFileSet **fset);
int32_t tsdbTFileSetInitRef(STsdb *pTsdb, const STFileSet *fset1, STFileSet **fset);
int32_t tsdbTFileSetClear(STFileSet **fset);
int32_t tsdbTFileSetRemove(STFileSet **fset);
// to/from json
int32_t tsdbTFileSetToJson(const STFileSet *fset, cJSON *json);
int32_t tsdbJsonToTFileSet(STsdb *pTsdb, const cJSON *json, STFileSet **fset);
// cmpr
int32_t tsdbTFileSetCmprFn(const STFileSet **fset1, const STFileSet **fset2);
// edit
int32_t tsdbTFileSetEdit(STsdb *pTsdb, STFileSet *fset, const STFileOp *op);
int32_t tsdbTFileSetApplyEdit(STsdb *pTsdb, const STFileSet *fset1, STFileSet *fset);
// max commit id
int64_t tsdbTFileSetMaxCid(const STFileSet *fset);
// get
SSttLvl *tsdbTFileSetGetSttLvl(STFileSet *fset, int32_t level);
// is empty
bool tsdbTFileSetIsEmpty(const STFileSet *fset);
struct STFileOp {
tsdb_fop_t optype;
int32_t fid;
STFile of; // old file state
STFile nf; // new file state
};
struct SSttLvl {
int32_t level;
TFileObjArray fobjArr[1];
};
struct STFileSet {
int32_t fid;
STFileObj *farr[TSDB_FTYPE_MAX]; // file array
TSttLvlArray lvlArr[1]; // level array
};
#ifdef __cplusplus
}
#endif
#endif /*_TSDB_FILE_SET2_H*/

View File

@ -0,0 +1,295 @@
/*
* Copyright (c) 2019 TAOS Data, Inc. <jhtao@taosdata.com>
*
* This program is free software: you can use, redistribute, and/or modify
* it under the terms of the GNU Affero General Public License, version 3
* or later ("AGPL"), as published by the Free Software Foundation.
*
* This program is distributed in the hope that it will be useful, but WITHOUT
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
* FITNESS FOR A PARTICULAR PURPOSE.
*
* You should have received a copy of the GNU Affero General Public License
* along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
#include "tsdbFSetRW.h"
// SFSetWriter ==================================================
struct SFSetWriter {
SFSetWriterConfig config[1];
SSkmInfo skmTb[1];
SSkmInfo skmRow[1];
uint8_t *bufArr[10];
struct {
TABLEID tbid[1];
} ctx[1];
// writer
SBlockData blockData[2];
int32_t blockDataIdx;
SDataFileWriter *dataWriter;
SSttFileWriter *sttWriter;
};
static int32_t tsdbFSetWriteTableDataBegin(SFSetWriter *writer, const TABLEID *tbid) {
int32_t code = 0;
int32_t lino = 0;
writer->ctx->tbid->suid = tbid->suid;
writer->ctx->tbid->uid = tbid->uid;
code = tsdbUpdateSkmTb(writer->config->tsdb, writer->ctx->tbid, writer->skmTb);
TSDB_CHECK_CODE(code, lino, _exit);
writer->blockDataIdx = 0;
for (int32_t i = 0; i < ARRAY_SIZE(writer->blockData); i++) {
code = tBlockDataInit(&writer->blockData[i], writer->ctx->tbid, writer->skmTb->pTSchema, NULL, 0);
TSDB_CHECK_CODE(code, lino, _exit);
}
_exit:
if (code) {
TSDB_ERROR_LOG(TD_VID(writer->config->tsdb->pVnode), lino, code);
}
return code;
}
static int32_t tsdbFSetWriteTableDataEnd(SFSetWriter *writer) {
if (writer->ctx->tbid->uid == 0) return 0;
int32_t code = 0;
int32_t lino = 0;
int32_t cidx = writer->blockDataIdx;
int32_t pidx = ((cidx + 1) & 1);
int32_t numRow = ((writer->blockData[pidx].nRow + writer->blockData[cidx].nRow) >> 1);
if (writer->blockData[pidx].nRow > 0 && numRow >= writer->config->minRow) {
ASSERT(writer->blockData[pidx].nRow == writer->config->maxRow);
SRowInfo row = {
.suid = writer->ctx->tbid->suid,
.uid = writer->ctx->tbid->uid,
.row = tsdbRowFromBlockData(writer->blockData + pidx, 0),
};
for (int32_t i = 0; i < numRow; i++) {
row.row.iRow = i;
code = tsdbDataFileWriteRow(writer->dataWriter, &row);
TSDB_CHECK_CODE(code, lino, _exit);
}
code = tsdbDataFileFlush(writer->dataWriter);
TSDB_CHECK_CODE(code, lino, _exit);
for (int32_t i = numRow; i < writer->blockData[pidx].nRow; i++) {
row.row.iRow = i;
code = tsdbDataFileWriteRow(writer->dataWriter, &row);
TSDB_CHECK_CODE(code, lino, _exit);
}
row.row = tsdbRowFromBlockData(writer->blockData + cidx, 0);
for (int32_t i = 0; i < writer->blockData[cidx].nRow; i++) {
row.row.iRow = i;
code = tsdbDataFileWriteRow(writer->dataWriter, &row);
TSDB_CHECK_CODE(code, lino, _exit);
}
} else {
// pidx
if (writer->blockData[pidx].nRow > 0) {
code = tsdbDataFileWriteBlockData(writer->dataWriter, &writer->blockData[pidx]);
TSDB_CHECK_CODE(code, lino, _exit);
}
// cidx
if (writer->blockData[cidx].nRow < writer->config->minRow) {
code = tsdbSttFileWriteBlockData(writer->sttWriter, &writer->blockData[cidx]);
TSDB_CHECK_CODE(code, lino, _exit);
} else {
code = tsdbDataFileWriteBlockData(writer->dataWriter, &writer->blockData[cidx]);
TSDB_CHECK_CODE(code, lino, _exit);
}
}
for (int32_t i = 0; i < ARRAY_SIZE(writer->blockData); i++) {
tBlockDataReset(&writer->blockData[i]);
}
_exit:
if (code) {
TSDB_ERROR_LOG(TD_VID(writer->config->tsdb->pVnode), lino, code);
}
return code;
}
int32_t tsdbFSetWriterOpen(SFSetWriterConfig *config, SFSetWriter **writer) {
int32_t code = 0;
int32_t lino = 0;
writer[0] = taosMemoryCalloc(1, sizeof(*writer[0]));
if (writer[0] == NULL) return TSDB_CODE_OUT_OF_MEMORY;
writer[0]->config[0] = config[0];
// data writer
if (!config->toSttOnly) {
SDataFileWriterConfig dataWriterConfig = {
.tsdb = config->tsdb,
.cmprAlg = config->cmprAlg,
.maxRow = config->maxRow,
.szPage = config->szPage,
.fid = config->fid,
.cid = config->cid,
.did = config->did,
.compactVersion = config->compactVersion,
.skmTb = writer[0]->skmTb,
.skmRow = writer[0]->skmRow,
.bufArr = writer[0]->bufArr,
};
for (int32_t ftype = 0; ftype < TSDB_FTYPE_MAX; ++ftype) {
dataWriterConfig.files[ftype].exist = config->files[ftype].exist;
dataWriterConfig.files[ftype].file = config->files[ftype].file;
}
code = tsdbDataFileWriterOpen(&dataWriterConfig, &writer[0]->dataWriter);
TSDB_CHECK_CODE(code, lino, _exit);
}
// stt writer
SSttFileWriterConfig sttWriterConfig = {
.tsdb = config->tsdb,
.maxRow = config->maxRow,
.szPage = config->szPage,
.cmprAlg = config->cmprAlg,
.compactVersion = config->compactVersion,
.did = config->did,
.fid = config->fid,
.cid = config->cid,
.level = config->level,
.skmTb = writer[0]->skmTb,
.skmRow = writer[0]->skmRow,
.bufArr = writer[0]->bufArr,
};
code = tsdbSttFileWriterOpen(&sttWriterConfig, &writer[0]->sttWriter);
TSDB_CHECK_CODE(code, lino, _exit);
_exit:
if (code) {
TSDB_ERROR_LOG(TD_VID(config->tsdb->pVnode), lino, code);
}
return code;
}
int32_t tsdbFSetWriterClose(SFSetWriter **writer, bool abort, TFileOpArray *fopArr) {
if (writer[0] == NULL) return 0;
int32_t code = 0;
int32_t lino = 0;
STsdb *tsdb = writer[0]->config->tsdb;
// end
if (!writer[0]->config->toSttOnly) {
code = tsdbFSetWriteTableDataEnd(writer[0]);
TSDB_CHECK_CODE(code, lino, _exit);
code = tsdbDataFileWriterClose(&writer[0]->dataWriter, abort, fopArr);
TSDB_CHECK_CODE(code, lino, _exit);
}
code = tsdbSttFileWriterClose(&writer[0]->sttWriter, abort, fopArr);
TSDB_CHECK_CODE(code, lino, _exit);
// free
for (int32_t i = 0; i < ARRAY_SIZE(writer[0]->blockData); i++) {
tBlockDataDestroy(&writer[0]->blockData[i]);
}
for (int32_t i = 0; i < ARRAY_SIZE(writer[0]->bufArr); i++) {
tFree(writer[0]->bufArr[i]);
}
tDestroyTSchema(writer[0]->skmRow->pTSchema);
tDestroyTSchema(writer[0]->skmTb->pTSchema);
taosMemoryFree(writer[0]);
writer[0] = NULL;
_exit:
if (code) {
TSDB_ERROR_LOG(TD_VID(tsdb->pVnode), lino, code);
}
return code;
}
int32_t tsdbFSetWriteRow(SFSetWriter *writer, SRowInfo *row) {
int32_t code = 0;
int32_t lino = 0;
if (writer->config->toSttOnly) {
code = tsdbSttFileWriteRow(writer->sttWriter, row);
TSDB_CHECK_CODE(code, lino, _exit);
} else {
if (writer->ctx->tbid->uid != row->uid) {
code = tsdbFSetWriteTableDataEnd(writer);
TSDB_CHECK_CODE(code, lino, _exit);
code = tsdbFSetWriteTableDataBegin(writer, (TABLEID *)row);
TSDB_CHECK_CODE(code, lino, _exit);
}
if (row->row.type == TSDBROW_ROW_FMT) {
code = tsdbUpdateSkmRow(writer->config->tsdb, writer->ctx->tbid, TSDBROW_SVERSION(&row->row), writer->skmRow);
TSDB_CHECK_CODE(code, lino, _exit);
}
TSDBKEY key = TSDBROW_KEY(&row->row);
if (key.version <= writer->config->compactVersion //
&& writer->blockData[writer->blockDataIdx].nRow > 0 //
&& key.ts == writer->blockData[writer->blockDataIdx].aTSKEY[writer->blockData[writer->blockDataIdx].nRow - 1]) {
code = tBlockDataUpdateRow(&writer->blockData[writer->blockDataIdx], &row->row, writer->skmRow->pTSchema);
TSDB_CHECK_CODE(code, lino, _exit);
} else {
if (writer->blockData[writer->blockDataIdx].nRow >= writer->config->maxRow) {
int32_t idx = ((writer->blockDataIdx + 1) & 1);
if (writer->blockData[idx].nRow >= writer->config->maxRow) {
code = tsdbDataFileWriteBlockData(writer->dataWriter, &writer->blockData[idx]);
TSDB_CHECK_CODE(code, lino, _exit);
tBlockDataClear(&writer->blockData[idx]);
}
writer->blockDataIdx = idx;
}
code =
tBlockDataAppendRow(&writer->blockData[writer->blockDataIdx], &row->row, writer->skmRow->pTSchema, row->uid);
TSDB_CHECK_CODE(code, lino, _exit);
}
}
_exit:
if (code) {
TSDB_ERROR_LOG(TD_VID(writer->config->tsdb->pVnode), lino, code);
}
return code;
}
int32_t tsdbFSetWriteTombRecord(SFSetWriter *writer, const STombRecord *tombRecord) {
int32_t code = 0;
int32_t lino = 0;
if (writer->config->toSttOnly || tsdbSttFileWriterIsOpened(writer->sttWriter)) {
code = tsdbSttFileWriteTombRecord(writer->sttWriter, tombRecord);
TSDB_CHECK_CODE(code, lino, _exit);
} else {
code = tsdbDataFileWriteTombRecord(writer->dataWriter, tombRecord);
TSDB_CHECK_CODE(code, lino, _exit);
}
_exit:
if (code) {
TSDB_ERROR_LOG(TD_VID(writer->config->tsdb->pVnode), lino, code);
}
return code;
}

View File

@ -0,0 +1,55 @@
/*
* Copyright (c) 2019 TAOS Data, Inc. <jhtao@taosdata.com>
*
* This program is free software: you can use, redistribute, and/or modify
* it under the terms of the GNU Affero General Public License, version 3
* or later ("AGPL"), as published by the Free Software Foundation.
*
* This program is distributed in the hope that it will be useful, but WITHOUT
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
* FITNESS FOR A PARTICULAR PURPOSE.
*
* You should have received a copy of the GNU Affero General Public License
* along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
#include "tsdbDataFileRW.h"
#include "tsdbSttFileRW.h"
#ifndef _TSDB_FSET_RW_H
#define _TSDB_FSET_RW_H
#ifdef __cplusplus
extern "C" {
#endif
//
typedef struct SFSetWriter SFSetWriter;
typedef struct {
STsdb *tsdb;
bool toSttOnly;
int64_t compactVersion;
int32_t minRow;
int32_t maxRow;
int32_t szPage;
int8_t cmprAlg;
int32_t fid;
int64_t cid;
SDiskID did;
int32_t level;
struct {
bool exist;
STFile file;
} files[TSDB_FTYPE_MAX];
} SFSetWriterConfig;
int32_t tsdbFSetWriterOpen(SFSetWriterConfig *config, SFSetWriter **writer);
int32_t tsdbFSetWriterClose(SFSetWriter **writer, bool abort, TFileOpArray *fopArr);
int32_t tsdbFSetWriteRow(SFSetWriter *writer, SRowInfo *row);
int32_t tsdbFSetWriteTombRecord(SFSetWriter *writer, const STombRecord *tombRecord);
#ifdef __cplusplus
}
#endif
#endif /*_TSDB_FSET_RW_H*/

View File

@ -0,0 +1,295 @@
/*
* Copyright (c) 2019 TAOS Data, Inc. <jhtao@taosdata.com>
*
* This program is free software: you can use, redistribute, and/or modify
* it under the terms of the GNU Affero General Public License, version 3
* or later ("AGPL"), as published by the Free Software Foundation.
*
* This program is distributed in the hope that it will be useful, but WITHOUT
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
* FITNESS FOR A PARTICULAR PURPOSE.
*
* You should have received a copy of the GNU Affero General Public License
* along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
#include "tsdbFile2.h"
// to_json
static int32_t head_to_json(const STFile *file, cJSON *json);
static int32_t data_to_json(const STFile *file, cJSON *json);
static int32_t sma_to_json(const STFile *file, cJSON *json);
static int32_t tomb_to_json(const STFile *file, cJSON *json);
static int32_t stt_to_json(const STFile *file, cJSON *json);
// from_json
static int32_t head_from_json(const cJSON *json, STFile *file);
static int32_t data_from_json(const cJSON *json, STFile *file);
static int32_t sma_from_json(const cJSON *json, STFile *file);
static int32_t tomb_from_json(const cJSON *json, STFile *file);
static int32_t stt_from_json(const cJSON *json, STFile *file);
static const struct {
const char *suffix;
int32_t (*to_json)(const STFile *file, cJSON *json);
int32_t (*from_json)(const cJSON *json, STFile *file);
} g_tfile_info[] = {
[TSDB_FTYPE_HEAD] = {"head", head_to_json, head_from_json},
[TSDB_FTYPE_DATA] = {"data", data_to_json, data_from_json},
[TSDB_FTYPE_SMA] = {"sma", sma_to_json, sma_from_json},
[TSDB_FTYPE_TOMB] = {"tomb", tomb_to_json, tomb_from_json},
[TSDB_FTYPE_STT] = {"stt", stt_to_json, stt_from_json},
};
static void remove_file(const char *fname) {
taosRemoveFile(fname);
tsdbInfo("file:%s is removed", fname);
}
static int32_t tfile_to_json(const STFile *file, cJSON *json) {
/* did.level */
if (cJSON_AddNumberToObject(json, "did.level", file->did.level) == NULL) {
return TSDB_CODE_OUT_OF_MEMORY;
}
/* did.id */
if (cJSON_AddNumberToObject(json, "did.id", file->did.id) == NULL) {
return TSDB_CODE_OUT_OF_MEMORY;
}
/* fid */
if (cJSON_AddNumberToObject(json, "fid", file->fid) == NULL) {
return TSDB_CODE_OUT_OF_MEMORY;
}
/* cid */
if (cJSON_AddNumberToObject(json, "cid", file->cid) == NULL) {
return TSDB_CODE_OUT_OF_MEMORY;
}
/* size */
if (cJSON_AddNumberToObject(json, "size", file->size) == NULL) {
return TSDB_CODE_OUT_OF_MEMORY;
}
return 0;
}
static int32_t tfile_from_json(const cJSON *json, STFile *file) {
const cJSON *item;
/* did.level */
item = cJSON_GetObjectItem(json, "did.level");
if (cJSON_IsNumber(item)) {
file->did.level = item->valuedouble;
} else {
return TSDB_CODE_FILE_CORRUPTED;
}
/* did.id */
item = cJSON_GetObjectItem(json, "did.id");
if (cJSON_IsNumber(item)) {
file->did.id = item->valuedouble;
} else {
return TSDB_CODE_FILE_CORRUPTED;
}
/* fid */
item = cJSON_GetObjectItem(json, "fid");
if (cJSON_IsNumber(item)) {
file->fid = item->valuedouble;
} else {
return TSDB_CODE_FILE_CORRUPTED;
}
/* cid */
item = cJSON_GetObjectItem(json, "cid");
if (cJSON_IsNumber(item)) {
file->cid = item->valuedouble;
} else {
return TSDB_CODE_FILE_CORRUPTED;
}
/* size */
item = cJSON_GetObjectItem(json, "size");
if (cJSON_IsNumber(item)) {
file->size = item->valuedouble;
} else {
return TSDB_CODE_FILE_CORRUPTED;
}
return 0;
}
static int32_t head_to_json(const STFile *file, cJSON *json) { return tfile_to_json(file, json); }
static int32_t data_to_json(const STFile *file, cJSON *json) { return tfile_to_json(file, json); }
static int32_t sma_to_json(const STFile *file, cJSON *json) { return tfile_to_json(file, json); }
static int32_t tomb_to_json(const STFile *file, cJSON *json) { return tfile_to_json(file, json); }
static int32_t stt_to_json(const STFile *file, cJSON *json) {
int32_t code = tfile_to_json(file, json);
if (code) return code;
/* lvl */
if (cJSON_AddNumberToObject(json, "level", file->stt->level) == NULL) {
return TSDB_CODE_OUT_OF_MEMORY;
}
return 0;
}
static int32_t head_from_json(const cJSON *json, STFile *file) { return tfile_from_json(json, file); }
static int32_t data_from_json(const cJSON *json, STFile *file) { return tfile_from_json(json, file); }
static int32_t sma_from_json(const cJSON *json, STFile *file) { return tfile_from_json(json, file); }
static int32_t tomb_from_json(const cJSON *json, STFile *file) { return tfile_from_json(json, file); }
static int32_t stt_from_json(const cJSON *json, STFile *file) {
int32_t code = tfile_from_json(json, file);
if (code) return code;
const cJSON *item;
/* lvl */
item = cJSON_GetObjectItem(json, "level");
if (cJSON_IsNumber(item)) {
file->stt->level = item->valuedouble;
} else {
return TSDB_CODE_FILE_CORRUPTED;
}
return 0;
}
int32_t tsdbTFileToJson(const STFile *file, cJSON *json) {
if (file->type == TSDB_FTYPE_STT) {
return g_tfile_info[file->type].to_json(file, json);
} else {
cJSON *item = cJSON_AddObjectToObject(json, g_tfile_info[file->type].suffix);
if (item == NULL) return TSDB_CODE_OUT_OF_MEMORY;
return g_tfile_info[file->type].to_json(file, item);
}
}
int32_t tsdbJsonToTFile(const cJSON *json, tsdb_ftype_t ftype, STFile *f) {
f[0] = (STFile){.type = ftype};
if (ftype == TSDB_FTYPE_STT) {
int32_t code = g_tfile_info[ftype].from_json(json, f);
if (code) return code;
} else {
const cJSON *item = cJSON_GetObjectItem(json, g_tfile_info[ftype].suffix);
if (cJSON_IsObject(item)) {
int32_t code = g_tfile_info[ftype].from_json(item, f);
if (code) return code;
} else {
return TSDB_CODE_NOT_FOUND;
}
}
return 0;
}
int32_t tsdbTFileObjInit(STsdb *pTsdb, const STFile *f, STFileObj **fobj) {
fobj[0] = taosMemoryMalloc(sizeof(*fobj[0]));
if (!fobj[0]) return TSDB_CODE_OUT_OF_MEMORY;
taosThreadMutexInit(&fobj[0]->mutex, NULL);
fobj[0]->f[0] = f[0];
fobj[0]->state = TSDB_FSTATE_LIVE;
fobj[0]->ref = 1;
tsdbTFileName(pTsdb, f, fobj[0]->fname);
return 0;
}
int32_t tsdbTFileObjRef(STFileObj *fobj) {
int32_t nRef;
taosThreadMutexLock(&fobj->mutex);
ASSERT(fobj->ref > 0 && fobj->state == TSDB_FSTATE_LIVE);
nRef = ++fobj->ref;
taosThreadMutexUnlock(&fobj->mutex);
tsdbTrace("ref file %s, fobj:%p ref %d", fobj->fname, fobj, nRef);
return 0;
}
int32_t tsdbTFileObjUnref(STFileObj *fobj) {
taosThreadMutexLock(&fobj->mutex);
int32_t nRef = --fobj->ref;
taosThreadMutexUnlock(&fobj->mutex);
ASSERT(nRef >= 0);
tsdbTrace("unref file %s, fobj:%p ref %d", fobj->fname, fobj, nRef);
if (nRef == 0) {
if (fobj->state == TSDB_FSTATE_DEAD) {
remove_file(fobj->fname);
}
taosMemoryFree(fobj);
}
return 0;
}
int32_t tsdbTFileObjRemove(STFileObj *fobj) {
taosThreadMutexLock(&fobj->mutex);
ASSERT(fobj->state == TSDB_FSTATE_LIVE && fobj->ref > 0);
fobj->state = TSDB_FSTATE_DEAD;
int32_t nRef = --fobj->ref;
taosThreadMutexUnlock(&fobj->mutex);
tsdbTrace("remove unref file %s, fobj:%p ref %d", fobj->fname, fobj, nRef);
if (nRef == 0) {
remove_file(fobj->fname);
taosMemoryFree(fobj);
}
return 0;
}
int32_t tsdbTFileName(STsdb *pTsdb, const STFile *f, char fname[]) {
SVnode *pVnode = pTsdb->pVnode;
STfs *pTfs = pVnode->pTfs;
if (pTfs) {
snprintf(fname, //
TSDB_FILENAME_LEN, //
"%s%s%s%sv%df%dver%" PRId64 ".%s", //
tfsGetDiskPath(pTfs, f->did), //
TD_DIRSEP, //
pTsdb->path, //
TD_DIRSEP, //
TD_VID(pVnode), //
f->fid, //
f->cid, //
g_tfile_info[f->type].suffix);
} else {
snprintf(fname, //
TSDB_FILENAME_LEN, //
"%s%sv%df%dver%" PRId64 ".%s", //
pTsdb->path, //
TD_DIRSEP, //
TD_VID(pVnode), //
f->fid, //
f->cid, //
g_tfile_info[f->type].suffix);
}
return 0;
}
bool tsdbIsSameTFile(const STFile *f1, const STFile *f2) {
if (f1->type != f2->type) return false;
if (f1->did.level != f2->did.level) return false;
if (f1->did.id != f2->did.id) return false;
if (f1->fid != f2->fid) return false;
if (f1->cid != f2->cid) return false;
return true;
}
bool tsdbIsTFileChanged(const STFile *f1, const STFile *f2) {
if (f1->size != f2->size) return true;
// if (f1->type == TSDB_FTYPE_STT && f1->stt->nseg != f2->stt->nseg) return true;
return false;
}
int32_t tsdbTFileObjCmpr(const STFileObj **fobj1, const STFileObj **fobj2) {
if (fobj1[0]->f->cid < fobj2[0]->f->cid) {
return -1;
} else if (fobj1[0]->f->cid > fobj2[0]->f->cid) {
return 1;
} else {
return 0;
}
}

View File

@ -0,0 +1,83 @@
/*
* Copyright (c) 2019 TAOS Data, Inc. <jhtao@taosdata.com>
*
* This program is free software: you can use, redistribute, and/or modify
* it under the terms of the GNU Affero General Public License, version 3
* or later ("AGPL"), as published by the Free Software Foundation.
*
* This program is distributed in the hope that it will be useful, but WITHOUT
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
* FITNESS FOR A PARTICULAR PURPOSE.
*
* You should have received a copy of the GNU Affero General Public License
* along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
#include "tsdbDef.h"
#ifndef _TSDB_FILE_H
#define _TSDB_FILE_H
#ifdef __cplusplus
extern "C" {
#endif
typedef struct STFile STFile;
typedef struct STFileObj STFileObj;
typedef enum {
TSDB_FTYPE_HEAD = 0, // .head
TSDB_FTYPE_DATA, // .data
TSDB_FTYPE_SMA, // .sma
TSDB_FTYPE_TOMB, // .tomb
TSDB_FTYPE_STT = TSDB_FTYPE_TOMB + 2, // .stt
} tsdb_ftype_t;
enum {
TSDB_FSTATE_LIVE = 1,
TSDB_FSTATE_DEAD,
};
#define TSDB_FTYPE_MIN TSDB_FTYPE_HEAD
#define TSDB_FTYPE_MAX (TSDB_FTYPE_TOMB + 1)
// STFile
int32_t tsdbTFileToJson(const STFile *f, cJSON *json);
int32_t tsdbJsonToTFile(const cJSON *json, tsdb_ftype_t ftype, STFile *f);
int32_t tsdbTFileName(STsdb *pTsdb, const STFile *f, char fname[]);
bool tsdbIsSameTFile(const STFile *f1, const STFile *f2);
bool tsdbIsTFileChanged(const STFile *f1, const STFile *f2);
// STFileObj
int32_t tsdbTFileObjInit(STsdb *pTsdb, const STFile *f, STFileObj **fobj);
int32_t tsdbTFileObjRef(STFileObj *fobj);
int32_t tsdbTFileObjUnref(STFileObj *fobj);
int32_t tsdbTFileObjRemove(STFileObj *fobj);
int32_t tsdbTFileObjCmpr(const STFileObj **fobj1, const STFileObj **fobj2);
struct STFile {
tsdb_ftype_t type;
SDiskID did; // disk id
int32_t fid; // file id
int64_t cid; // commit id
int64_t size;
union {
struct {
int32_t level;
} stt[1];
};
};
struct STFileObj {
TdThreadMutex mutex;
STFile f[1];
int32_t state;
int32_t ref;
char fname[TSDB_FILENAME_LEN];
};
#ifdef __cplusplus
}
#endif
#endif /*_TSDB_FILE_H*/

View File

@ -0,0 +1,780 @@
/*
* Copyright (c) 2019 TAOS Data, Inc. <jhtao@taosdata.com>
*
* This program is free software: you can use, redistribute, and/or modify
* it under the terms of the GNU Affero General Public License, version 3
* or later ("AGPL"), as published by the Free Software Foundation.
*
* This program is distributed in the hope that it will be useful, but WITHOUT
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
* FITNESS FOR A PARTICULAR PURPOSE.
*
* You should have received a copy of the GNU Affero General Public License
* along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
#include "tsdbIter.h"
// STsdbIter ================
struct STsdbIter {
EIterType type;
bool noMoreData;
bool filterByVersion;
int64_t range[2];
union {
SRowInfo row[1];
STombRecord record[1];
};
SRBTreeNode node[1];
union {
struct {
SSttFileReader *reader;
const TSttBlkArray *sttBlkArray;
int32_t sttBlkArrayIdx;
SBlockData blockData[1];
int32_t blockDataIdx;
} sttData[1];
struct {
SDataFileReader *reader;
const TBrinBlkArray *brinBlkArray;
int32_t brinBlkArrayIdx;
SBrinBlock brinBlock[1];
int32_t brinBlockIdx;
SBlockData blockData[1];
int32_t blockDataIdx;
} dataData[1];
struct {
SMemTable *memt;
TSDBKEY from[1];
SRBTreeIter iter[1];
STbData *tbData;
STbDataIter tbIter[1];
} memtData[1];
struct {
SSttFileReader *reader;
const TTombBlkArray *tombBlkArray;
int32_t tombBlkArrayIdx;
STombBlock tombBlock[1];
int32_t tombBlockIdx;
} sttTomb[1];
struct {
SDataFileReader *reader;
const TTombBlkArray *tombBlkArray;
int32_t tombBlkArrayIdx;
STombBlock tombBlock[1];
int32_t tombBlockIdx;
} dataTomb[1];
struct {
SMemTable *memt;
SRBTreeIter rbtIter[1];
STbData *tbData;
SDelData *delData;
} memtTomb[1];
};
};
static int32_t tsdbSttIterNext(STsdbIter *iter, const TABLEID *tbid) {
while (!iter->noMoreData) {
for (; iter->sttData->blockDataIdx < iter->sttData->blockData->nRow; iter->sttData->blockDataIdx++) {
int64_t version = iter->sttData->blockData->aVersion[iter->sttData->blockDataIdx];
if (iter->filterByVersion && (version < iter->range[0] || version > iter->range[1])) {
continue;
}
iter->row->suid = iter->sttData->blockData->suid;
iter->row->uid = iter->sttData->blockData->uid ? iter->sttData->blockData->uid
: iter->sttData->blockData->aUid[iter->sttData->blockDataIdx];
if (tbid && iter->row->suid == tbid->suid && iter->row->uid == tbid->uid) {
continue;
}
iter->row->row = tsdbRowFromBlockData(iter->sttData->blockData, iter->sttData->blockDataIdx);
iter->sttData->blockDataIdx++;
goto _exit;
}
if (iter->sttData->sttBlkArrayIdx >= TARRAY2_SIZE(iter->sttData->sttBlkArray)) {
iter->noMoreData = true;
break;
}
for (; iter->sttData->sttBlkArrayIdx < TARRAY2_SIZE(iter->sttData->sttBlkArray); iter->sttData->sttBlkArrayIdx++) {
const SSttBlk *sttBlk = TARRAY2_GET_PTR(iter->sttData->sttBlkArray, iter->sttData->sttBlkArrayIdx);
if (iter->filterByVersion && (sttBlk->maxVer < iter->range[0] || sttBlk->minVer > iter->range[1])) {
continue;
}
if (tbid && tbid->suid == sttBlk->suid && tbid->uid == sttBlk->minUid && tbid->uid == sttBlk->maxUid) {
continue;
}
int32_t code = tsdbSttFileReadBlockData(iter->sttData->reader, sttBlk, iter->sttData->blockData);
if (code) return code;
iter->sttData->blockDataIdx = 0;
iter->sttData->sttBlkArrayIdx++;
break;
}
}
_exit:
return 0;
}
static int32_t tsdbDataIterNext(STsdbIter *iter, const TABLEID *tbid) {
int32_t code;
while (!iter->noMoreData) {
for (;;) {
// SBlockData
for (; iter->dataData->blockDataIdx < iter->dataData->blockData->nRow; iter->dataData->blockDataIdx++) {
int64_t version = iter->dataData->blockData->aVersion[iter->dataData->blockDataIdx];
if (iter->filterByVersion && (version < iter->range[0] || version > iter->range[1])) {
continue;
}
if (tbid && tbid->suid == iter->dataData->blockData->suid && tbid->uid == iter->dataData->blockData->uid) {
iter->dataData->blockDataIdx = iter->dataData->blockData->nRow;
break;
}
iter->row->row = tsdbRowFromBlockData(iter->dataData->blockData, iter->dataData->blockDataIdx);
iter->dataData->blockDataIdx++;
goto _exit;
}
// SBrinBlock
if (iter->dataData->brinBlockIdx >= BRIN_BLOCK_SIZE(iter->dataData->brinBlock)) {
break;
}
for (; iter->dataData->brinBlockIdx < BRIN_BLOCK_SIZE(iter->dataData->brinBlock);
iter->dataData->brinBlockIdx++) {
SBrinRecord record[1];
tBrinBlockGet(iter->dataData->brinBlock, iter->dataData->brinBlockIdx, record);
if (iter->filterByVersion && (record->maxVer < iter->range[0] || record->minVer > iter->range[1])) {
continue;
}
if (tbid && tbid->suid == record->suid && tbid->uid == record->uid) {
continue;
}
iter->row->suid = record->suid;
iter->row->uid = record->uid;
code = tsdbDataFileReadBlockData(iter->dataData->reader, record, iter->dataData->blockData);
if (code) return code;
iter->dataData->blockDataIdx = 0;
iter->dataData->brinBlockIdx++;
break;
}
}
if (iter->dataData->brinBlkArrayIdx >= TARRAY2_SIZE(iter->dataData->brinBlkArray)) {
iter->noMoreData = true;
break;
}
for (; iter->dataData->brinBlkArrayIdx < TARRAY2_SIZE(iter->dataData->brinBlkArray);
iter->dataData->brinBlkArrayIdx++) {
const SBrinBlk *brinBlk = TARRAY2_GET_PTR(iter->dataData->brinBlkArray, iter->dataData->brinBlkArrayIdx);
if (iter->filterByVersion && (brinBlk->maxVer < iter->range[0] || brinBlk->minVer > iter->range[1])) {
continue;
}
if (tbid && tbid->uid == brinBlk->minTbid.uid && tbid->uid == brinBlk->maxTbid.uid) {
continue;
}
code = tsdbDataFileReadBrinBlock(iter->dataData->reader, brinBlk, iter->dataData->brinBlock);
if (code) return code;
iter->dataData->brinBlockIdx = 0;
iter->dataData->brinBlkArrayIdx++;
break;
}
}
_exit:
return 0;
}
static int32_t tsdbMemTableIterNext(STsdbIter *iter, const TABLEID *tbid) {
SRBTreeNode *node;
while (!iter->noMoreData) {
for (TSDBROW *row; iter->memtData->tbData && (row = tsdbTbDataIterGet(iter->memtData->tbIter));) {
if (tbid && tbid->suid == iter->memtData->tbData->suid && tbid->uid == iter->memtData->tbData->uid) {
iter->memtData->tbData = NULL;
break;
}
if (iter->filterByVersion) {
int64_t version = TSDBROW_VERSION(row);
if (version < iter->range[0] || version > iter->range[1]) {
continue;
}
}
iter->row->row = row[0];
tsdbTbDataIterNext(iter->memtData->tbIter);
goto _exit;
}
for (;;) {
node = tRBTreeIterNext(iter->memtData->iter);
if (!node) {
iter->noMoreData = true;
goto _exit;
}
iter->memtData->tbData = TCONTAINER_OF(node, STbData, rbtn);
if (tbid && tbid->suid == iter->memtData->tbData->suid && tbid->uid == iter->memtData->tbData->uid) {
continue;
} else {
iter->row->suid = iter->memtData->tbData->suid;
iter->row->uid = iter->memtData->tbData->uid;
tsdbTbDataIterOpen(iter->memtData->tbData, iter->memtData->from, 0, iter->memtData->tbIter);
break;
}
}
}
_exit:
return 0;
}
static int32_t tsdbDataTombIterNext(STsdbIter *iter, const TABLEID *tbid) {
while (!iter->noMoreData) {
for (; iter->dataTomb->tombBlockIdx < TOMB_BLOCK_SIZE(iter->dataTomb->tombBlock); iter->dataTomb->tombBlockIdx++) {
iter->record->suid = TARRAY2_GET(iter->dataTomb->tombBlock->suid, iter->dataTomb->tombBlockIdx);
iter->record->uid = TARRAY2_GET(iter->dataTomb->tombBlock->uid, iter->dataTomb->tombBlockIdx);
iter->record->version = TARRAY2_GET(iter->dataTomb->tombBlock->version, iter->dataTomb->tombBlockIdx);
if (iter->filterByVersion && (iter->record->version < iter->range[0] || iter->record->version > iter->range[1])) {
continue;
}
if (tbid && iter->record->suid == tbid->suid && iter->record->uid == tbid->uid) {
continue;
}
iter->record->skey = TARRAY2_GET(iter->dataTomb->tombBlock->skey, iter->dataTomb->tombBlockIdx);
iter->record->ekey = TARRAY2_GET(iter->dataTomb->tombBlock->ekey, iter->dataTomb->tombBlockIdx);
iter->dataTomb->tombBlockIdx++;
goto _exit;
}
if (iter->dataTomb->tombBlkArrayIdx >= TARRAY2_SIZE(iter->dataTomb->tombBlkArray)) {
iter->noMoreData = true;
goto _exit;
}
for (; iter->dataTomb->tombBlkArrayIdx < TARRAY2_SIZE(iter->dataTomb->tombBlkArray);
iter->dataTomb->tombBlkArrayIdx++) {
const STombBlk *tombBlk = TARRAY2_GET_PTR(iter->dataTomb->tombBlkArray, iter->dataTomb->tombBlkArrayIdx);
if (tbid && tbid->suid == tombBlk->minTbid.suid && tbid->uid == tombBlk->minTbid.uid &&
tbid->suid == tombBlk->maxTbid.suid && tbid->uid == tombBlk->maxTbid.uid) {
continue;
}
int32_t code = tsdbDataFileReadTombBlock(iter->dataTomb->reader, tombBlk, iter->dataTomb->tombBlock);
if (code) return code;
iter->dataTomb->tombBlockIdx = 0;
iter->dataTomb->tombBlkArrayIdx++;
break;
}
}
_exit:
return 0;
}
static int32_t tsdbMemTombIterNext(STsdbIter *iter, const TABLEID *tbid) {
while (!iter->noMoreData) {
for (; iter->memtTomb->delData;) {
if (tbid && tbid->uid == iter->memtTomb->tbData->uid) {
iter->memtTomb->delData = NULL;
break;
}
if (iter->filterByVersion &&
(iter->memtTomb->delData->version < iter->range[0] || iter->memtTomb->delData->version > iter->range[1])) {
continue;
}
iter->record->suid = iter->memtTomb->tbData->suid;
iter->record->uid = iter->memtTomb->tbData->uid;
iter->record->version = iter->memtTomb->delData->version;
iter->record->skey = iter->memtTomb->delData->sKey;
iter->record->ekey = iter->memtTomb->delData->eKey;
iter->memtTomb->delData = iter->memtTomb->delData->pNext;
goto _exit;
}
for (;;) {
SRBTreeNode *node = tRBTreeIterNext(iter->memtTomb->rbtIter);
if (node == NULL) {
iter->noMoreData = true;
goto _exit;
}
iter->memtTomb->tbData = TCONTAINER_OF(node, STbData, rbtn);
if (tbid && tbid->uid == iter->memtTomb->tbData->uid) {
continue;
} else {
iter->memtTomb->delData = iter->memtTomb->tbData->pHead;
break;
}
}
}
_exit:
return 0;
}
static int32_t tsdbSttIterOpen(STsdbIter *iter) {
int32_t code;
code = tsdbSttFileReadSttBlk(iter->sttData->reader, &iter->sttData->sttBlkArray);
if (code) return code;
if (TARRAY2_SIZE(iter->sttData->sttBlkArray) == 0) {
iter->noMoreData = true;
return 0;
}
iter->sttData->sttBlkArrayIdx = 0;
tBlockDataCreate(iter->sttData->blockData);
iter->sttData->blockDataIdx = 0;
return tsdbSttIterNext(iter, NULL);
}
static int32_t tsdbDataIterOpen(STsdbIter *iter) {
int32_t code;
// SBrinBlk
code = tsdbDataFileReadBrinBlk(iter->dataData->reader, &iter->dataData->brinBlkArray);
if (code) return code;
if (TARRAY2_SIZE(iter->dataData->brinBlkArray) == 0) {
iter->noMoreData = true;
return 0;
}
iter->dataData->brinBlkArrayIdx = 0;
// SBrinBlock
tBrinBlockInit(iter->dataData->brinBlock);
iter->dataData->brinBlockIdx = 0;
// SBlockData
tBlockDataCreate(iter->dataData->blockData);
iter->dataData->blockDataIdx = 0;
return tsdbDataIterNext(iter, NULL);
}
static int32_t tsdbMemTableIterOpen(STsdbIter *iter) {
if (iter->memtData->memt->nRow == 0) {
iter->noMoreData = true;
return 0;
}
iter->memtData->iter[0] = tRBTreeIterCreate(iter->memtData->memt->tbDataTree, 1);
return tsdbMemTableIterNext(iter, NULL);
}
static int32_t tsdbSttIterClose(STsdbIter *iter) {
tBlockDataDestroy(iter->sttData->blockData);
return 0;
}
static int32_t tsdbDataTombIterOpen(STsdbIter *iter) {
int32_t code;
code = tsdbDataFileReadTombBlk(iter->dataTomb->reader, &iter->dataTomb->tombBlkArray);
if (code) return code;
if (TARRAY2_SIZE(iter->dataTomb->tombBlkArray) == 0) {
iter->noMoreData = true;
return 0;
}
iter->dataTomb->tombBlkArrayIdx = 0;
tTombBlockInit(iter->dataTomb->tombBlock);
iter->dataTomb->tombBlockIdx = 0;
return tsdbDataTombIterNext(iter, NULL);
}
static int32_t tsdbMemTombIterOpen(STsdbIter *iter) {
int32_t code;
if (iter->memtTomb->memt->nDel == 0) {
iter->noMoreData = true;
return 0;
}
iter->memtTomb->rbtIter[0] = tRBTreeIterCreate(iter->memtTomb->memt->tbDataTree, 1);
return tsdbMemTombIterNext(iter, NULL);
}
static int32_t tsdbDataIterClose(STsdbIter *iter) {
tBrinBlockDestroy(iter->dataData->brinBlock);
tBlockDataDestroy(iter->dataData->blockData);
return 0;
}
static int32_t tsdbMemTableIterClose(STsdbIter *iter) { return 0; }
static int32_t tsdbSttTombIterNext(STsdbIter *iter, const TABLEID *tbid) {
while (!iter->noMoreData) {
for (; iter->sttTomb->tombBlockIdx < TOMB_BLOCK_SIZE(iter->sttTomb->tombBlock); iter->sttTomb->tombBlockIdx++) {
iter->record->suid = TARRAY2_GET(iter->sttTomb->tombBlock->suid, iter->sttTomb->tombBlockIdx);
iter->record->uid = TARRAY2_GET(iter->sttTomb->tombBlock->uid, iter->sttTomb->tombBlockIdx);
iter->record->version = TARRAY2_GET(iter->sttTomb->tombBlock->version, iter->sttTomb->tombBlockIdx);
if (iter->filterByVersion && (iter->record->version < iter->range[0] || iter->record->version > iter->range[1])) {
continue;
}
if (tbid && iter->record->suid == tbid->suid && iter->record->uid == tbid->uid) {
continue;
}
iter->record->skey = TARRAY2_GET(iter->sttTomb->tombBlock->skey, iter->sttTomb->tombBlockIdx);
iter->record->ekey = TARRAY2_GET(iter->sttTomb->tombBlock->ekey, iter->sttTomb->tombBlockIdx);
iter->sttTomb->tombBlockIdx++;
goto _exit;
}
if (iter->sttTomb->tombBlkArrayIdx >= TARRAY2_SIZE(iter->sttTomb->tombBlkArray)) {
iter->noMoreData = true;
goto _exit;
}
for (; iter->sttTomb->tombBlkArrayIdx < TARRAY2_SIZE(iter->sttTomb->tombBlkArray);
iter->sttTomb->tombBlkArrayIdx++) {
const STombBlk *tombBlk = TARRAY2_GET_PTR(iter->sttTomb->tombBlkArray, iter->sttTomb->tombBlkArrayIdx);
if (iter->filterByVersion && (tombBlk->maxVer < iter->range[0] || tombBlk->minVer > iter->range[1])) {
continue;
}
if (tbid && tbid->suid == tombBlk->minTbid.suid && tbid->uid == tombBlk->minTbid.uid &&
tbid->suid == tombBlk->maxTbid.suid && tbid->uid == tombBlk->maxTbid.uid) {
continue;
}
int32_t code = tsdbSttFileReadTombBlock(iter->sttTomb->reader, tombBlk, iter->sttTomb->tombBlock);
if (code) return code;
iter->sttTomb->tombBlockIdx = 0;
iter->sttTomb->tombBlkArrayIdx++;
break;
}
}
_exit:
return 0;
}
static int32_t tsdbSttTombIterOpen(STsdbIter *iter) {
int32_t code;
code = tsdbSttFileReadTombBlk(iter->sttTomb->reader, &iter->sttTomb->tombBlkArray);
if (code) return code;
if (TARRAY2_SIZE(iter->sttTomb->tombBlkArray) == 0) {
iter->noMoreData = true;
return 0;
}
iter->sttTomb->tombBlkArrayIdx = 0;
tTombBlockInit(iter->sttTomb->tombBlock);
iter->sttTomb->tombBlockIdx = 0;
return tsdbSttTombIterNext(iter, NULL);
}
int32_t tsdbIterOpen(const STsdbIterConfig *config, STsdbIter **iter) {
int32_t code;
iter[0] = taosMemoryCalloc(1, sizeof(*iter[0]));
if (iter[0] == NULL) {
return TSDB_CODE_OUT_OF_MEMORY;
}
iter[0]->type = config->type;
iter[0]->noMoreData = false;
iter[0]->filterByVersion = config->filterByVersion;
if (iter[0]->filterByVersion) {
iter[0]->range[0] = config->verRange[0];
iter[0]->range[1] = config->verRange[1];
}
switch (config->type) {
case TSDB_ITER_TYPE_STT:
iter[0]->sttData->reader = config->sttReader;
code = tsdbSttIterOpen(iter[0]);
break;
case TSDB_ITER_TYPE_DATA:
iter[0]->dataData->reader = config->dataReader;
code = tsdbDataIterOpen(iter[0]);
break;
case TSDB_ITER_TYPE_MEMT:
iter[0]->memtData->memt = config->memt;
iter[0]->memtData->from[0] = config->from[0];
code = tsdbMemTableIterOpen(iter[0]);
break;
case TSDB_ITER_TYPE_STT_TOMB:
iter[0]->sttTomb->reader = config->sttReader;
code = tsdbSttTombIterOpen(iter[0]);
break;
case TSDB_ITER_TYPE_DATA_TOMB:
iter[0]->dataTomb->reader = config->dataReader;
code = tsdbDataTombIterOpen(iter[0]);
break;
case TSDB_ITER_TYPE_MEMT_TOMB:
iter[0]->memtTomb->memt = config->memt;
code = tsdbMemTombIterOpen(iter[0]);
break;
default:
code = TSDB_CODE_INVALID_PARA;
ASSERTS(false, "Not implemented");
}
if (code) {
taosMemoryFree(iter[0]);
iter[0] = NULL;
}
return code;
}
static int32_t tsdbSttTombIterClose(STsdbIter *iter) {
tTombBlockDestroy(iter->sttTomb->tombBlock);
return 0;
}
static int32_t tsdbDataTombIterClose(STsdbIter *iter) {
tTombBlockDestroy(iter->dataTomb->tombBlock);
return 0;
}
int32_t tsdbIterClose(STsdbIter **iter) {
switch (iter[0]->type) {
case TSDB_ITER_TYPE_STT:
tsdbSttIterClose(iter[0]);
break;
case TSDB_ITER_TYPE_DATA:
tsdbDataIterClose(iter[0]);
break;
case TSDB_ITER_TYPE_MEMT:
tsdbMemTableIterClose(iter[0]);
break;
case TSDB_ITER_TYPE_STT_TOMB:
tsdbSttTombIterClose(iter[0]);
break;
case TSDB_ITER_TYPE_DATA_TOMB:
tsdbDataTombIterClose(iter[0]);
break;
case TSDB_ITER_TYPE_MEMT_TOMB:
break;
default:
ASSERT(false);
}
taosMemoryFree(iter[0]);
iter[0] = NULL;
return 0;
}
int32_t tsdbIterNext(STsdbIter *iter) {
switch (iter->type) {
case TSDB_ITER_TYPE_STT:
return tsdbSttIterNext(iter, NULL);
case TSDB_ITER_TYPE_DATA:
return tsdbDataIterNext(iter, NULL);
case TSDB_ITER_TYPE_MEMT:
return tsdbMemTableIterNext(iter, NULL);
case TSDB_ITER_TYPE_STT_TOMB:
return tsdbSttTombIterNext(iter, NULL);
case TSDB_ITER_TYPE_DATA_TOMB:
return tsdbDataTombIterNext(iter, NULL);
case TSDB_ITER_TYPE_MEMT_TOMB:
return tsdbMemTombIterNext(iter, NULL);
default:
ASSERT(false);
}
return 0;
}
static int32_t tsdbIterSkipTableData(STsdbIter *iter, const TABLEID *tbid) {
switch (iter->type) {
case TSDB_ITER_TYPE_STT:
return tsdbSttIterNext(iter, tbid);
case TSDB_ITER_TYPE_DATA:
return tsdbDataIterNext(iter, tbid);
case TSDB_ITER_TYPE_MEMT:
return tsdbMemTableIterNext(iter, tbid);
case TSDB_ITER_TYPE_STT_TOMB:
return tsdbSttTombIterNext(iter, tbid);
case TSDB_ITER_TYPE_DATA_TOMB:
return tsdbDataTombIterNext(iter, tbid);
case TSDB_ITER_TYPE_MEMT_TOMB:
return tsdbMemTombIterNext(iter, tbid);
default:
ASSERT(false);
}
return 0;
}
static int32_t tsdbIterCmprFn(const SRBTreeNode *n1, const SRBTreeNode *n2) {
STsdbIter *iter1 = TCONTAINER_OF(n1, STsdbIter, node);
STsdbIter *iter2 = TCONTAINER_OF(n2, STsdbIter, node);
return tRowInfoCmprFn(&iter1->row, &iter2->row);
}
static int32_t tsdbTombIterCmprFn(const SRBTreeNode *n1, const SRBTreeNode *n2) {
STsdbIter *iter1 = TCONTAINER_OF(n1, STsdbIter, node);
STsdbIter *iter2 = TCONTAINER_OF(n2, STsdbIter, node);
if (iter1->record->suid < iter2->record->suid) {
return -1;
} else if (iter1->record->suid > iter2->record->suid) {
return 1;
}
if (iter1->record->uid < iter2->record->uid) {
return -1;
} else if (iter1->record->uid > iter2->record->uid) {
return 1;
}
if (iter1->record->version < iter2->record->version) {
return -1;
} else if (iter1->record->version > iter2->record->version) {
return 1;
}
return 0;
}
// SIterMerger ================
struct SIterMerger {
bool isTomb;
STsdbIter *iter;
SRBTree iterTree[1];
};
int32_t tsdbIterMergerOpen(const TTsdbIterArray *iterArray, SIterMerger **merger, bool isTomb) {
STsdbIter *iter;
SRBTreeNode *node;
merger[0] = taosMemoryCalloc(1, sizeof(*merger[0]));
if (merger[0] == NULL) {
return TSDB_CODE_OUT_OF_MEMORY;
}
merger[0]->isTomb = isTomb;
if (isTomb) {
tRBTreeCreate(merger[0]->iterTree, tsdbTombIterCmprFn);
} else {
tRBTreeCreate(merger[0]->iterTree, tsdbIterCmprFn);
}
TARRAY2_FOREACH(iterArray, iter) {
if (iter->noMoreData) continue;
node = tRBTreePut(merger[0]->iterTree, iter->node);
ASSERT(node);
}
return tsdbIterMergerNext(merger[0]);
}
int32_t tsdbIterMergerClose(SIterMerger **merger) {
if (merger[0]) {
taosMemoryFree(merger[0]);
merger[0] = NULL;
}
return 0;
}
int32_t tsdbIterMergerNext(SIterMerger *merger) {
int32_t code;
int32_t c;
SRBTreeNode *node;
if (merger->iter) {
code = tsdbIterNext(merger->iter);
if (code) return code;
if (merger->iter->noMoreData) {
merger->iter = NULL;
} else if ((node = tRBTreeMin(merger->iterTree))) {
c = merger->iterTree->cmprFn(merger->iter->node, node);
ASSERT(c);
if (c > 0) {
node = tRBTreePut(merger->iterTree, merger->iter->node);
ASSERT(node);
merger->iter = NULL;
}
}
}
if (merger->iter == NULL && (node = tRBTreeDropMin(merger->iterTree))) {
merger->iter = TCONTAINER_OF(node, STsdbIter, node);
}
return 0;
}
SRowInfo *tsdbIterMergerGetData(SIterMerger *merger) {
ASSERT(!merger->isTomb);
return merger->iter ? merger->iter->row : NULL;
}
STombRecord *tsdbIterMergerGetTombRecord(SIterMerger *merger) {
ASSERT(merger->isTomb);
return merger->iter ? merger->iter->record : NULL;
}
int32_t tsdbIterMergerSkipTableData(SIterMerger *merger, const TABLEID *tbid) {
int32_t code;
int32_t c;
SRBTreeNode *node;
while (merger->iter && tbid->suid == merger->iter->row->suid && tbid->uid == merger->iter->row->uid) {
int32_t code = tsdbIterSkipTableData(merger->iter, tbid);
if (code) return code;
if (merger->iter->noMoreData) {
merger->iter = NULL;
} else if ((node = tRBTreeMin(merger->iterTree))) {
c = merger->iterTree->cmprFn(merger->iter->node, node);
ASSERT(c);
if (c > 0) {
node = tRBTreePut(merger->iterTree, merger->iter->node);
ASSERT(node);
merger->iter = NULL;
}
}
if (!merger->iter && (node = tRBTreeDropMin(merger->iterTree))) {
merger->iter = TCONTAINER_OF(node, STsdbIter, node);
}
}
return 0;
}

View File

@ -0,0 +1,73 @@
/*
* Copyright (c) 2019 TAOS Data, Inc. <jhtao@taosdata.com>
*
* This program is free software: you can use, redistribute, and/or modify
* it under the terms of the GNU Affero General Public License, version 3
* or later ("AGPL"), as published by the Free Software Foundation.
*
* This program is distributed in the hope that it will be useful, but WITHOUT
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
* FITNESS FOR A PARTICULAR PURPOSE.
*
* You should have received a copy of the GNU Affero General Public License
* along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
#include "trbtree.h"
#include "tsdbDataFileRW.h"
#include "tsdbDef.h"
#include "tsdbSttFileRW.h"
#ifndef _TSDB_ITER_H_
#define _TSDB_ITER_H_
#ifdef __cplusplus
extern "C" {
#endif
typedef struct SIterMerger SIterMerger;
typedef struct STsdbIter STsdbIter;
typedef TARRAY2(STsdbIter *) TTsdbIterArray;
typedef enum {
TSDB_ITER_TYPE_STT = 1,
TSDB_ITER_TYPE_DATA,
TSDB_ITER_TYPE_MEMT,
TSDB_ITER_TYPE_STT_TOMB,
TSDB_ITER_TYPE_DATA_TOMB,
TSDB_ITER_TYPE_MEMT_TOMB,
} EIterType;
typedef struct {
EIterType type;
union {
SSttFileReader *sttReader; // TSDB_ITER_TYPE_STT || TSDB_ITER_TYPE_STT_TOMB
SDataFileReader *dataReader; // TSDB_ITER_TYPE_DATA || TSDB_ITER_TYPE_DATA_TOMB
struct {
SMemTable *memt; // TSDB_ITER_TYPE_MEMT_TOMB
TSDBKEY from[1];
}; // TSDB_ITER_TYPE_MEMT
};
bool filterByVersion;
int64_t verRange[2];
} STsdbIterConfig;
// STsdbIter ===============
int32_t tsdbIterOpen(const STsdbIterConfig *config, STsdbIter **iter);
int32_t tsdbIterClose(STsdbIter **iter);
int32_t tsdbIterNext(STsdbIter *iter);
// SIterMerger ===============
int32_t tsdbIterMergerOpen(const TTsdbIterArray *iterArray, SIterMerger **merger, bool isTomb);
int32_t tsdbIterMergerClose(SIterMerger **merger);
int32_t tsdbIterMergerNext(SIterMerger *merger);
int32_t tsdbIterMergerSkipTableData(SIterMerger *merger, const TABLEID *tbid);
SRowInfo *tsdbIterMergerGetData(SIterMerger *merger);
STombRecord *tsdbIterMergerGetTombRecord(SIterMerger *merger);
#ifdef __cplusplus
}
#endif
#endif /*_TSDB_ITER_H_*/

View File

@ -38,6 +38,16 @@ static int32_t tsdbInsertRowDataToTable(SMemTable *pMemTable, STbData *pTbData,
static int32_t tsdbInsertColDataToTable(SMemTable *pMemTable, STbData *pTbData, int64_t version,
SSubmitTbData *pSubmitTbData, int32_t *affectedRows);
static int32_t tTbDataCmprFn(const SRBTreeNode *n1, const SRBTreeNode *n2) {
STbData *tbData1 = TCONTAINER_OF(n1, STbData, rbtn);
STbData *tbData2 = TCONTAINER_OF(n2, STbData, rbtn);
if (tbData1->suid < tbData2->suid) return -1;
if (tbData1->suid > tbData2->suid) return 1;
if (tbData1->uid < tbData2->uid) return -1;
if (tbData1->uid > tbData2->uid) return 1;
return 0;
}
int32_t tsdbMemTableCreate(STsdb *pTsdb, SMemTable **ppMemTable) {
int32_t code = 0;
SMemTable *pMemTable = NULL;
@ -66,6 +76,7 @@ int32_t tsdbMemTableCreate(STsdb *pTsdb, SMemTable **ppMemTable) {
goto _err;
}
vnodeBufPoolRef(pMemTable->pPool);
tRBTreeCreate(pMemTable->tbDataTree, tTbDataCmprFn);
*ppMemTable = pMemTable;
return code;
@ -406,6 +417,8 @@ static int32_t tsdbGetOrCreateTbData(SMemTable *pMemTable, tb_uid_t suid, tb_uid
pMemTable->aBucket[idx] = pTbData;
pMemTable->nTbData++;
tRBTreePut(pMemTable->tbDataTree, pTbData->rbtn);
taosWUnLockLatch(&pMemTable->latch);
_exit:

View File

@ -0,0 +1,454 @@
/*
* Copyright (c) 2019 TAOS Data, Inc. <jhtao@taosdata.com>
*
* This program is free software: you can use, redistribute, and/or modify
* it under the terms of the GNU Affero General Public License, version 3
* or later ("AGPL"), as published by the Free Software Foundation.
*
* This program is distributed in the hope that it will be useful, but WITHOUT
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
* FITNESS FOR A PARTICULAR PURPOSE.
*
* You should have received a copy of the GNU Affero General Public License
* along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
#include "tsdbMerge.h"
typedef struct {
STsdb *tsdb;
TFileSetArray *fsetArr;
int32_t sttTrigger;
int32_t maxRow;
int32_t minRow;
int32_t szPage;
int8_t cmprAlg;
int64_t compactVersion;
int64_t cid;
// context
struct {
bool opened;
int64_t now;
STFileSet *fset;
bool toData;
int32_t level;
SSttLvl *lvl;
TABLEID tbid[1];
} ctx[1];
TFileOpArray fopArr[1];
// reader
TSttFileReaderArray sttReaderArr[1];
// iter
TTsdbIterArray dataIterArr[1];
SIterMerger *dataIterMerger;
TTsdbIterArray tombIterArr[1];
SIterMerger *tombIterMerger;
// writer
SFSetWriter *writer;
} SMerger;
static int32_t tsdbMergerOpen(SMerger *merger) {
merger->ctx->now = taosGetTimestampSec();
merger->maxRow = merger->tsdb->pVnode->config.tsdbCfg.maxRows;
merger->minRow = merger->tsdb->pVnode->config.tsdbCfg.minRows;
merger->szPage = merger->tsdb->pVnode->config.tsdbPageSize;
merger->cmprAlg = merger->tsdb->pVnode->config.tsdbCfg.compression;
merger->compactVersion = INT64_MAX;
merger->cid = tsdbFSAllocEid(merger->tsdb->pFS);
merger->ctx->opened = true;
return 0;
}
static int32_t tsdbMergerClose(SMerger *merger) {
int32_t code = 0;
int32_t lino = 0;
SVnode *pVnode = merger->tsdb->pVnode;
// edit file system
code = tsdbFSEditBegin(merger->tsdb->pFS, merger->fopArr, TSDB_FEDIT_MERGE);
TSDB_CHECK_CODE(code, lino, _exit);
taosThreadRwlockWrlock(&merger->tsdb->rwLock);
code = tsdbFSEditCommit(merger->tsdb->pFS);
if (code) {
taosThreadRwlockUnlock(&merger->tsdb->rwLock);
TSDB_CHECK_CODE(code, lino, _exit);
}
taosThreadRwlockUnlock(&merger->tsdb->rwLock);
ASSERT(merger->writer == NULL);
ASSERT(merger->dataIterMerger == NULL);
ASSERT(merger->tombIterMerger == NULL);
ASSERT(TARRAY2_SIZE(merger->dataIterArr) == 0);
ASSERT(TARRAY2_SIZE(merger->tombIterArr) == 0);
ASSERT(TARRAY2_SIZE(merger->sttReaderArr) == 0);
// clear the merge
TARRAY2_DESTROY(merger->tombIterArr, NULL);
TARRAY2_DESTROY(merger->dataIterArr, NULL);
TARRAY2_DESTROY(merger->sttReaderArr, NULL);
TARRAY2_DESTROY(merger->fopArr, NULL);
_exit:
if (code) {
TSDB_ERROR_LOG(TD_VID(pVnode), lino, code);
}
return code;
}
static int32_t tsdbMergeFileSetBeginOpenReader(SMerger *merger) {
int32_t code = 0;
int32_t lino = 0;
merger->ctx->toData = true;
merger->ctx->level = 0;
// TODO: optimize merge strategy
for (int32_t i = 0;; ++i) {
if (i >= TARRAY2_SIZE(merger->ctx->fset->lvlArr)) {
merger->ctx->lvl = NULL;
break;
}
merger->ctx->lvl = TARRAY2_GET(merger->ctx->fset->lvlArr, i);
if (merger->ctx->lvl->level != merger->ctx->level ||
TARRAY2_SIZE(merger->ctx->lvl->fobjArr) + 1 < merger->sttTrigger) {
merger->ctx->toData = false;
merger->ctx->lvl = NULL;
break;
}
merger->ctx->level++;
STFileObj *fobj;
int32_t numFile = 0;
TARRAY2_FOREACH(merger->ctx->lvl->fobjArr, fobj) {
if (numFile == merger->sttTrigger) {
break;
}
STFileOp op = {
.optype = TSDB_FOP_REMOVE,
.fid = merger->ctx->fset->fid,
.of = fobj->f[0],
};
code = TARRAY2_APPEND(merger->fopArr, op);
TSDB_CHECK_CODE(code, lino, _exit);
SSttFileReader *reader;
SSttFileReaderConfig config = {
.tsdb = merger->tsdb,
.szPage = merger->szPage,
.file[0] = fobj->f[0],
};
code = tsdbSttFileReaderOpen(fobj->fname, &config, &reader);
TSDB_CHECK_CODE(code, lino, _exit);
code = TARRAY2_APPEND(merger->sttReaderArr, reader);
TSDB_CHECK_CODE(code, lino, _exit);
numFile++;
}
}
_exit:
if (code) {
TSDB_ERROR_LOG(TD_VID(merger->tsdb->pVnode), lino, code);
}
return code;
}
static int32_t tsdbMergeFileSetBeginOpenIter(SMerger *merger) {
int32_t code = 0;
int32_t lino = 0;
int32_t vid = TD_VID(merger->tsdb->pVnode);
SSttFileReader *sttReader;
TARRAY2_FOREACH(merger->sttReaderArr, sttReader) {
STsdbIter *iter;
STsdbIterConfig config = {0};
// data iter
config.type = TSDB_ITER_TYPE_STT;
config.sttReader = sttReader;
code = tsdbIterOpen(&config, &iter);
TSDB_CHECK_CODE(code, lino, _exit);
code = TARRAY2_APPEND(merger->dataIterArr, iter);
TSDB_CHECK_CODE(code, lino, _exit);
// tomb iter
config.type = TSDB_ITER_TYPE_STT_TOMB;
config.sttReader = sttReader;
code = tsdbIterOpen(&config, &iter);
TSDB_CHECK_CODE(code, lino, _exit);
code = TARRAY2_APPEND(merger->tombIterArr, iter);
TSDB_CHECK_CODE(code, lino, _exit);
}
code = tsdbIterMergerOpen(merger->dataIterArr, &merger->dataIterMerger, false);
TSDB_CHECK_CODE(code, lino, _exit);
code = tsdbIterMergerOpen(merger->tombIterArr, &merger->tombIterMerger, true);
TSDB_CHECK_CODE(code, lino, _exit);
_exit:
if (code) {
TSDB_ERROR_LOG(vid, lino, code);
}
return code;
}
static int32_t tsdbMergeFileSetBeginOpenWriter(SMerger *merger) {
int32_t code = 0;
int32_t lino = 0;
int32_t vid = TD_VID(merger->tsdb->pVnode);
SDiskID did;
int32_t level = tsdbFidLevel(merger->ctx->fset->fid, &merger->tsdb->keepCfg, merger->ctx->now);
if (tfsAllocDisk(merger->tsdb->pVnode->pTfs, level, &did) < 0) {
code = TSDB_CODE_FS_NO_VALID_DISK;
TSDB_CHECK_CODE(code, lino, _exit);
}
tfsMkdirRecurAt(merger->tsdb->pVnode->pTfs, merger->tsdb->path, did);
SFSetWriterConfig config = {
.tsdb = merger->tsdb,
.toSttOnly = true,
.compactVersion = merger->compactVersion,
.minRow = merger->minRow,
.maxRow = merger->maxRow,
.szPage = merger->szPage,
.cmprAlg = merger->cmprAlg,
.fid = merger->ctx->fset->fid,
.cid = merger->cid,
.did = did,
.level = merger->ctx->level,
};
if (merger->ctx->toData) {
config.toSttOnly = false;
for (int32_t ftype = 0; ftype < TSDB_FTYPE_MAX; ++ftype) {
if (merger->ctx->fset->farr[ftype]) {
config.files[ftype].exist = true;
config.files[ftype].file = merger->ctx->fset->farr[ftype]->f[0];
} else {
config.files[ftype].exist = false;
}
}
}
code = tsdbFSetWriterOpen(&config, &merger->writer);
TSDB_CHECK_CODE(code, lino, _exit);
_exit:
if (code) {
TSDB_ERROR_LOG(vid, lino, code);
}
return code;
}
static int32_t tsdbMergeFileSetBegin(SMerger *merger) {
int32_t code = 0;
int32_t lino = 0;
ASSERT(TARRAY2_SIZE(merger->sttReaderArr) == 0);
ASSERT(TARRAY2_SIZE(merger->dataIterArr) == 0);
ASSERT(merger->dataIterMerger == NULL);
ASSERT(merger->writer == NULL);
merger->ctx->tbid->suid = 0;
merger->ctx->tbid->uid = 0;
// open reader
code = tsdbMergeFileSetBeginOpenReader(merger);
TSDB_CHECK_CODE(code, lino, _exit);
// open iterator
code = tsdbMergeFileSetBeginOpenIter(merger);
TSDB_CHECK_CODE(code, lino, _exit);
// open writer
code = tsdbMergeFileSetBeginOpenWriter(merger);
TSDB_CHECK_CODE(code, lino, _exit);
_exit:
if (code) {
TSDB_ERROR_LOG(TD_VID(merger->tsdb->pVnode), lino, code);
}
return code;
}
static int32_t tsdbMergeFileSetEndCloseWriter(SMerger *merger) {
return tsdbFSetWriterClose(&merger->writer, 0, merger->fopArr);
}
static int32_t tsdbMergeFileSetEndCloseIter(SMerger *merger) {
tsdbIterMergerClose(&merger->tombIterMerger);
TARRAY2_CLEAR(merger->tombIterArr, tsdbIterClose);
tsdbIterMergerClose(&merger->dataIterMerger);
TARRAY2_CLEAR(merger->dataIterArr, tsdbIterClose);
return 0;
}
static int32_t tsdbMergeFileSetEndCloseReader(SMerger *merger) {
TARRAY2_CLEAR(merger->sttReaderArr, tsdbSttFileReaderClose);
return 0;
}
static int32_t tsdbMergeFileSetEnd(SMerger *merger) {
int32_t code = 0;
int32_t lino = 0;
code = tsdbMergeFileSetEndCloseWriter(merger);
TSDB_CHECK_CODE(code, lino, _exit);
code = tsdbMergeFileSetEndCloseIter(merger);
TSDB_CHECK_CODE(code, lino, _exit);
code = tsdbMergeFileSetEndCloseReader(merger);
TSDB_CHECK_CODE(code, lino, _exit);
_exit:
if (code) {
TSDB_ERROR_LOG(TD_VID(merger->tsdb->pVnode), lino, code);
}
return code;
}
static int32_t tsdbMergeFileSet(SMerger *merger, STFileSet *fset) {
int32_t code = 0;
int32_t lino = 0;
merger->ctx->fset = fset;
code = tsdbMergeFileSetBegin(merger);
TSDB_CHECK_CODE(code, lino, _exit);
// data
SMetaInfo info;
SRowInfo *row;
merger->ctx->tbid->suid = 0;
merger->ctx->tbid->uid = 0;
while ((row = tsdbIterMergerGetData(merger->dataIterMerger)) != NULL) {
if (row->uid != merger->ctx->tbid->uid) {
merger->ctx->tbid->uid = row->uid;
merger->ctx->tbid->suid = row->suid;
if (metaGetInfo(merger->tsdb->pVnode->pMeta, row->uid, &info, NULL) != 0) {
code = tsdbIterMergerSkipTableData(merger->dataIterMerger, merger->ctx->tbid);
TSDB_CHECK_CODE(code, lino, _exit);
continue;
}
}
code = tsdbFSetWriteRow(merger->writer, row);
TSDB_CHECK_CODE(code, lino, _exit);
code = tsdbIterMergerNext(merger->dataIterMerger);
TSDB_CHECK_CODE(code, lino, _exit);
}
// tomb
merger->ctx->tbid->suid = 0;
merger->ctx->tbid->uid = 0;
for (STombRecord *record; (record = tsdbIterMergerGetTombRecord(merger->tombIterMerger)) != NULL;) {
if (record->uid != merger->ctx->tbid->uid) {
merger->ctx->tbid->uid = record->uid;
merger->ctx->tbid->suid = record->suid;
if (metaGetInfo(merger->tsdb->pVnode->pMeta, record->uid, &info, NULL) != 0) {
code = tsdbIterMergerSkipTableData(merger->tombIterMerger, merger->ctx->tbid);
TSDB_CHECK_CODE(code, lino, _exit);
continue;
}
}
code = tsdbFSetWriteTombRecord(merger->writer, record);
TSDB_CHECK_CODE(code, lino, _exit);
code = tsdbIterMergerNext(merger->tombIterMerger);
TSDB_CHECK_CODE(code, lino, _exit);
}
code = tsdbMergeFileSetEnd(merger);
TSDB_CHECK_CODE(code, lino, _exit);
_exit:
if (code) {
tsdbError("vgId:%d %s failed at line %d since %s", TD_VID(merger->tsdb->pVnode), __func__, lino, tstrerror(code));
} else {
tsdbDebug("vgId:%d %s done, fid:%d", TD_VID(merger->tsdb->pVnode), __func__, fset->fid);
}
return code;
}
static int32_t tsdbDoMerge(SMerger *merger) {
int32_t code = 0;
int32_t lino = 0;
STFileSet *fset;
TARRAY2_FOREACH(merger->fsetArr, fset) {
if (TARRAY2_SIZE(fset->lvlArr) == 0) continue;
SSttLvl *lvl = TARRAY2_FIRST(fset->lvlArr);
if (lvl->level != 0 || TARRAY2_SIZE(lvl->fobjArr) < merger->sttTrigger) continue;
if (!merger->ctx->opened) {
code = tsdbMergerOpen(merger);
TSDB_CHECK_CODE(code, lino, _exit);
}
code = tsdbMergeFileSet(merger, fset);
TSDB_CHECK_CODE(code, lino, _exit);
}
if (merger->ctx->opened) {
code = tsdbMergerClose(merger);
TSDB_CHECK_CODE(code, lino, _exit);
}
_exit:
if (code) {
TSDB_ERROR_LOG(TD_VID(merger->tsdb->pVnode), lino, code);
} else {
tsdbDebug("vgId:%d %s done", TD_VID(merger->tsdb->pVnode), __func__);
}
return code;
}
int32_t tsdbMerge(void *arg) {
int32_t code = 0;
int32_t lino = 0;
STsdb *tsdb = (STsdb *)arg;
SMerger merger[1] = {{
.tsdb = tsdb,
.sttTrigger = tsdb->pVnode->config.sttTrigger,
}};
ASSERT(merger->sttTrigger > 1);
code = tsdbFSCreateCopySnapshot(tsdb->pFS, &merger->fsetArr);
TSDB_CHECK_CODE(code, lino, _exit);
code = tsdbDoMerge(merger);
TSDB_CHECK_CODE(code, lino, _exit);
tsdbFSDestroyCopySnapshot(&merger->fsetArr);
_exit:
if (code) {
TSDB_ERROR_LOG(TD_VID(tsdb->pVnode), lino, code);
} else if (merger->ctx->opened) {
tsdbDebug("vgId:%d %s done", TD_VID(tsdb->pVnode), __func__);
}
return code;
}

View File

@ -0,0 +1,40 @@
/*
* Copyright (c) 2019 TAOS Data, Inc. <jhtao@taosdata.com>
*
* This program is free software: you can use, redistribute, and/or modify
* it under the terms of the GNU Affero General Public License, version 3
* or later ("AGPL"), as published by the Free Software Foundation.
*
* This program is distributed in the hope that it will be useful, but WITHOUT
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
* FITNESS FOR A PARTICULAR PURPOSE.
*
* You should have received a copy of the GNU Affero General Public License
* along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
#include "tsdbDataFileRW.h"
#include "tsdbFS2.h"
#include "tsdbFSetRW.h"
#include "tsdbIter.h"
#include "tsdbSttFileRW.h"
#include "tsdbUtil2.h"
#ifndef _TD_TSDB_MERGE_H_
#define _TD_TSDB_MERGE_H_
#ifdef __cplusplus
extern "C" {
#endif
/* Exposed Handle */
/* Exposed APIs */
/* Exposed Structs */
#ifdef __cplusplus
}
#endif
#endif /*_TD_TSDB_MERGE_H_*/

View File

@ -14,6 +14,11 @@
*/
#include "tsdb.h"
#include "tsdbFSet2.h"
#include "tsdbReadUtil.h"
#include "tsdbSttFileRW.h"
static void tLDataIterClose2(SLDataIter *pIter);
// SLDataIter =================================================
SSttBlockLoadInfo *tCreateLastBlockLoadInfo(STSchema *pSchema, int16_t *colList, int32_t numOfCols,
@ -24,8 +29,6 @@ SSttBlockLoadInfo *tCreateLastBlockLoadInfo(STSchema *pSchema, int16_t *colList,
return NULL;
}
pLoadInfo->numOfStt = numOfSttTrigger;
for (int32_t i = 0; i < numOfSttTrigger; ++i) {
pLoadInfo[i].blockIndex[0] = -1;
pLoadInfo[i].blockIndex[1] = -1;
@ -50,8 +53,37 @@ SSttBlockLoadInfo *tCreateLastBlockLoadInfo(STSchema *pSchema, int16_t *colList,
return pLoadInfo;
}
SSttBlockLoadInfo *tCreateOneLastBlockLoadInfo(STSchema *pSchema, int16_t *colList, int32_t numOfCols) {
SSttBlockLoadInfo *pLoadInfo = taosMemoryCalloc(1, sizeof(SSttBlockLoadInfo));
if (pLoadInfo == NULL) {
terrno = TSDB_CODE_OUT_OF_MEMORY;
return NULL;
}
pLoadInfo->blockIndex[0] = -1;
pLoadInfo->blockIndex[1] = -1;
pLoadInfo->currentLoadBlockIndex = 1;
int32_t code = tBlockDataCreate(&pLoadInfo->blockData[0]);
if (code) {
terrno = code;
}
code = tBlockDataCreate(&pLoadInfo->blockData[1]);
if (code) {
terrno = code;
}
pLoadInfo->aSttBlk = taosArrayInit(4, sizeof(SSttBlk));
pLoadInfo->pSchema = pSchema;
pLoadInfo->colIds = colList;
pLoadInfo->numOfCols = numOfCols;
return pLoadInfo;
}
void resetLastBlockLoadInfo(SSttBlockLoadInfo *pLoadInfo) {
for (int32_t i = 0; i < pLoadInfo->numOfStt; ++i) {
for (int32_t i = 0; i < 1; ++i) {
pLoadInfo[i].currentLoadBlockIndex = 1;
pLoadInfo[i].blockIndex[0] = -1;
pLoadInfo[i].blockIndex[1] = -1;
@ -65,18 +97,24 @@ void resetLastBlockLoadInfo(SSttBlockLoadInfo *pLoadInfo) {
}
void getLastBlockLoadInfo(SSttBlockLoadInfo *pLoadInfo, int64_t *blocks, double *el) {
for (int32_t i = 0; i < pLoadInfo->numOfStt; ++i) {
for (int32_t i = 0; i < 1; ++i) {
*el += pLoadInfo[i].elapsedTime;
*blocks += pLoadInfo[i].loadBlocks;
}
}
static void freeTombBlock(void *param) {
STombBlock **pTombBlock = (STombBlock **)param;
tTombBlockDestroy(*pTombBlock);
taosMemoryFree(*pTombBlock);
}
void *destroyLastBlockLoadInfo(SSttBlockLoadInfo *pLoadInfo) {
if (pLoadInfo == NULL) {
return NULL;
}
for (int32_t i = 0; i < pLoadInfo->numOfStt; ++i) {
for (int32_t i = 0; i < 1; ++i) {
pLoadInfo[i].currentLoadBlockIndex = 1;
pLoadInfo[i].blockIndex[0] = -1;
pLoadInfo[i].blockIndex[1] = -1;
@ -91,6 +129,33 @@ void *destroyLastBlockLoadInfo(SSttBlockLoadInfo *pLoadInfo) {
return NULL;
}
static void destroyLDataIter(SLDataIter *pIter) {
tLDataIterClose2(pIter);
destroyLastBlockLoadInfo(pIter->pBlockLoadInfo);
taosMemoryFree(pIter);
}
void *destroySttBlockReader(SArray *pLDataIterArray, int64_t *blocks, double *el) {
if (pLDataIterArray == NULL) {
return NULL;
}
int32_t numOfLevel = taosArrayGetSize(pLDataIterArray);
for (int32_t i = 0; i < numOfLevel; ++i) {
SArray *pList = taosArrayGetP(pLDataIterArray, i);
for (int32_t j = 0; j < taosArrayGetSize(pList); ++j) {
SLDataIter *pIter = taosArrayGetP(pList, j);
*el += pIter->pBlockLoadInfo->elapsedTime;
*blocks += pIter->pBlockLoadInfo->loadBlocks;
destroyLDataIter(pIter);
}
taosArrayDestroy(pList);
}
taosArrayDestroy(pLDataIterArray);
return NULL;
}
static SBlockData *loadLastBlock(SLDataIter *pIter, const char *idStr) {
int32_t code = 0;
@ -122,20 +187,8 @@ static SBlockData *loadLastBlock(SLDataIter *pIter, const char *idStr) {
int64_t st = taosGetTimestampUs();
SBlockData *pBlock = &pInfo->blockData[pInfo->currentLoadBlockIndex];
TABLEID id = {0};
if (pIter->pSttBlk->suid != 0) {
id.suid = pIter->pSttBlk->suid;
} else {
id.uid = pIter->uid;
}
code = tBlockDataInit(pBlock, &id, pInfo->pSchema, pInfo->colIds, pInfo->numOfCols);
if (code != TSDB_CODE_SUCCESS) {
goto _exit;
}
code = tsdbReadSttBlock(pIter->pReader, pIter->iStt, pIter->pSttBlk, pBlock);
code = tsdbSttFileReadBlockDataByColumn(pIter->pReader, pIter->pSttBlk, pBlock, pInfo->pSchema, &pInfo->colIds[1],
pInfo->numOfCols - 1);
if (code != TSDB_CODE_SUCCESS) {
goto _exit;
}
@ -255,74 +308,153 @@ static int32_t binarySearchForStartRowIndex(uint64_t *uidList, int32_t num, uint
int32_t tLDataIterOpen(struct SLDataIter *pIter, SDataFReader *pReader, int32_t iStt, int8_t backward, uint64_t suid,
uint64_t uid, STimeWindow *pTimeWindow, SVersionRange *pRange, SSttBlockLoadInfo *pBlockLoadInfo,
const char *idStr, bool strictTimeRange) {
return 0;
}
static int32_t extractSttBlockInfo(SLDataIter *pIter, const TSttBlkArray *pArray, SSttBlockLoadInfo *pBlockLoadInfo,
uint64_t suid) {
if (TARRAY2_SIZE(pArray) <= 0) {
return TSDB_CODE_SUCCESS;
}
SSttBlk *pStart = &pArray->data[0];
SSttBlk *pEnd = &pArray->data[TARRAY2_SIZE(pArray) - 1];
// all identical
if (pStart->suid == pEnd->suid) {
if (pStart->suid != suid) { // no qualified stt block existed
taosArrayClear(pBlockLoadInfo->aSttBlk);
pIter->iSttBlk = -1;
return TSDB_CODE_SUCCESS;
} else { // all blocks are qualified
taosArrayClear(pBlockLoadInfo->aSttBlk);
taosArrayAddBatch(pBlockLoadInfo->aSttBlk, pArray->data, pArray->size);
}
} else {
SArray *pTmp = taosArrayInit(TARRAY2_SIZE(pArray), sizeof(SSttBlk));
for (int32_t i = 0; i < TARRAY2_SIZE(pArray); ++i) {
SSttBlk *p = &pArray->data[i];
if (p->suid < suid) {
continue;
}
if (p->suid == suid) {
taosArrayPush(pTmp, p);
} else if (p->suid > suid) {
break;
}
}
taosArrayDestroy(pBlockLoadInfo->aSttBlk);
pBlockLoadInfo->aSttBlk = pTmp;
}
return TSDB_CODE_SUCCESS;
}
static int32_t uidComparFn(const void* p1, const void* p2) {
const uint64_t* uid1 = p1;
const uint64_t* uid2 = p2;
return (*uid1) - (*uid2);
}
static bool existsFromSttBlkStatis(const TStatisBlkArray *pStatisBlkArray, uint64_t suid, uint64_t uid,
SSttFileReader *pReader) {
if (TARRAY2_SIZE(pStatisBlkArray) <= 0) {
return true;
}
int32_t i = 0;
for (i = 0; i < TARRAY2_SIZE(pStatisBlkArray); ++i) {
SStatisBlk *p = &pStatisBlkArray->data[i];
if (p->minTbid.suid <= suid && p->maxTbid.suid >= suid) {
break;
}
}
// for (; i < TARRAY2_SIZE(pStatisBlkArray); ++i) {
// SStatisBlk *p = &pStatisBlkArray->data[i];
// if (p->minTbid.uid <= uid && p->maxTbid.uid >= uid) {
// break;
// }
//
// if (p->maxTbid.uid < uid) {
// break;
// }
// }
if (i >= TARRAY2_SIZE(pStatisBlkArray)) {
return false;
}
SStatisBlk *p = &pStatisBlkArray->data[i];
STbStatisBlock block = {0};
tsdbSttFileReadStatisBlock(pReader, p, &block);
int32_t index = tarray2SearchIdx(block.uid, &uid, sizeof(int64_t), uidComparFn, TD_EQ);
tStatisBlockDestroy(&block);
return (index != -1);
}
int32_t tLDataIterOpen2(struct SLDataIter *pIter, SSttFileReader *pSttFileReader, int32_t iStt, int8_t backward,
uint64_t suid, uint64_t uid, STimeWindow *pTimeWindow, SVersionRange *pRange,
SSttBlockLoadInfo *pBlockLoadInfo, const char *idStr, bool strictTimeRange,
_load_tomb_fn loadTombFn, void *pReader1) {
int32_t code = TSDB_CODE_SUCCESS;
pIter->uid = uid;
pIter->pReader = pReader;
pIter->iStt = iStt;
pIter->backward = backward;
pIter->verRange.minVer = pRange->minVer;
pIter->verRange.maxVer = pRange->maxVer;
pIter->timeWindow.skey = pTimeWindow->skey;
pIter->timeWindow.ekey = pTimeWindow->ekey;
pIter->pReader = pSttFileReader;
pIter->pBlockLoadInfo = pBlockLoadInfo;
if (!pBlockLoadInfo->sttBlockLoaded) {
int64_t st = taosGetTimestampUs();
const TSttBlkArray*pSttBlkArray = NULL;
pBlockLoadInfo->sttBlockLoaded = true;
code = tsdbReadSttBlk(pReader, iStt, pBlockLoadInfo->aSttBlk);
if (code) {
// load the stt block info for each stt-block
code = tsdbSttFileReadSttBlk(pIter->pReader, &pSttBlkArray);
if (code != TSDB_CODE_SUCCESS) {
tsdbError("load stt blk failed, code:%s, %s", tstrerror(code), idStr);
return code;
}
// only apply to the child tables, ordinary tables will not incur this filter procedure.
size_t size = taosArrayGetSize(pBlockLoadInfo->aSttBlk);
if (size >= 1) {
SSttBlk *pStart = taosArrayGet(pBlockLoadInfo->aSttBlk, 0);
SSttBlk *pEnd = taosArrayGet(pBlockLoadInfo->aSttBlk, size - 1);
// all identical
if (pStart->suid == pEnd->suid) {
if (pStart->suid != suid) {
// no qualified stt block existed
taosArrayClear(pBlockLoadInfo->aSttBlk);
pIter->iSttBlk = -1;
double el = (taosGetTimestampUs() - st) / 1000.0;
tsdbDebug("load the last file info completed, elapsed time:%.2fms, %s", el, idStr);
return code;
}
} else {
SArray *pTmp = taosArrayInit(size, sizeof(SSttBlk));
for (int32_t i = 0; i < size; ++i) {
SSttBlk *p = taosArrayGet(pBlockLoadInfo->aSttBlk, i);
uint64_t s = p->suid;
if (s < suid) {
continue;
}
if (s == suid) {
taosArrayPush(pTmp, p);
} else if (s > suid) {
break;
}
}
taosArrayDestroy(pBlockLoadInfo->aSttBlk);
pBlockLoadInfo->aSttBlk = pTmp;
}
code = extractSttBlockInfo(pIter, pSttBlkArray, pBlockLoadInfo, suid);
if (code != TSDB_CODE_SUCCESS) {
tsdbError("load stt block info failed, code:%s, %s", tstrerror(code), idStr);
return code;
}
// load stt blocks statis for all stt-blocks, to decide if the data of queried table exists in current stt file
code = tsdbSttFileReadStatisBlk(pIter->pReader, (const TStatisBlkArray **)&pBlockLoadInfo->pSttStatisBlkArray);
if (code != TSDB_CODE_SUCCESS) {
tsdbError("failed to load stt block statistics, code:%s, %s", tstrerror(code), idStr);
return code;
}
code = loadTombFn(pReader1, pIter->pReader, pIter->pBlockLoadInfo);
double el = (taosGetTimestampUs() - st) / 1000.0;
tsdbDebug("load the last file info completed, elapsed time:%.2fms, %s", el, idStr);
tsdbDebug("load the stt file info completed, elapsed time:%.2fms, %s", el, idStr);
}
size_t size = taosArrayGetSize(pBlockLoadInfo->aSttBlk);
// bool exists = existsFromSttBlkStatis(pBlockLoadInfo->pSttStatisBlkArray, suid, uid, pIter->pReader);
// if (!exists) {
// pIter->iSttBlk = -1;
// pIter->pSttBlk = NULL;
// return TSDB_CODE_SUCCESS;
// }
// find the start block
// find the start block, actually we could load the position to avoid repeatly searching for the start position when
// the skey is updated.
size_t size = taosArrayGetSize(pBlockLoadInfo->aSttBlk);
pIter->iSttBlk = binarySearchForStartBlock(pBlockLoadInfo->aSttBlk->pData, size, uid, backward);
if (pIter->iSttBlk != -1) {
pIter->pSttBlk = taosArrayGet(pBlockLoadInfo->aSttBlk, pIter->iSttBlk);
@ -343,7 +475,10 @@ int32_t tLDataIterOpen(struct SLDataIter *pIter, SDataFReader *pReader, int32_t
return code;
}
void tLDataIterClose(SLDataIter *pIter) { /*taosMemoryFree(pIter); */}
void tLDataIterClose2(SLDataIter *pIter) {
tsdbSttFileReaderClose(&pIter->pReader);
pIter->pReader = NULL;
}
void tLDataIterNextBlock(SLDataIter *pIter, const char *idStr) {
int32_t step = pIter->backward ? -1 : 1;
@ -395,25 +530,23 @@ void tLDataIterNextBlock(SLDataIter *pIter, const char *idStr) {
if (index != -1) {
pIter->iSttBlk = index;
pIter->pSttBlk = (SSttBlk *)taosArrayGet(pIter->pBlockLoadInfo->aSttBlk, pIter->iSttBlk);
tsdbDebug("try next last file block:%d from %d, trigger by uid:%" PRIu64 ", file index:%d, %s", pIter->iSttBlk,
oldIndex, pIter->uid, pIter->iStt, idStr);
tsdbDebug("try next last file block:%d from stt fileIdx:%d, trigger by uid:%" PRIu64 ", file index:%d, %s",
pIter->iSttBlk, oldIndex, pIter->uid, pIter->iStt, idStr);
} else {
tsdbDebug("no more last block qualified, uid:%" PRIu64 ", file index:%d, %s", pIter->uid, oldIndex, idStr);
}
}
static void findNextValidRow(SLDataIter *pIter, const char *idStr) {
int32_t step = pIter->backward ? -1 : 1;
bool hasVal = false;
int32_t step = pIter->backward ? -1 : 1;
int32_t i = pIter->iRow;
SBlockData *pBlockData = loadLastBlock(pIter, idStr);
SBlockData *pData = loadLastBlock(pIter, idStr);
// mostly we only need to find the start position for a given table
if ((((i == 0) && (!pIter->backward)) || (i == pBlockData->nRow - 1 && pIter->backward)) &&
pBlockData->aUid != NULL) {
i = binarySearchForStartRowIndex((uint64_t *)pBlockData->aUid, pBlockData->nRow, pIter->uid, pIter->backward);
if ((((i == 0) && (!pIter->backward)) || (i == pData->nRow - 1 && pIter->backward)) && pData->aUid != NULL) {
i = binarySearchForStartRowIndex((uint64_t *)pData->aUid, pData->nRow, pIter->uid, pIter->backward);
if (i == -1) {
tsdbDebug("failed to find the data in pBlockData, uid:%" PRIu64 " , %s", pIter->uid, idStr);
pIter->iRow = -1;
@ -421,20 +554,20 @@ static void findNextValidRow(SLDataIter *pIter, const char *idStr) {
}
}
for (; i < pBlockData->nRow && i >= 0; i += step) {
if (pBlockData->aUid != NULL) {
for (; i < pData->nRow && i >= 0; i += step) {
if (pData->aUid != NULL) {
if (!pIter->backward) {
if (pBlockData->aUid[i] > pIter->uid) {
if (pData->aUid[i] > pIter->uid) {
break;
}
} else {
if (pBlockData->aUid[i] < pIter->uid) {
if (pData->aUid[i] < pIter->uid) {
break;
}
}
}
int64_t ts = pBlockData->aTSKEY[i];
int64_t ts = pData->aTSKEY[i];
if (!pIter->backward) { // asc
if (ts > pIter->timeWindow.ekey) { // no more data
break;
@ -449,7 +582,7 @@ static void findNextValidRow(SLDataIter *pIter, const char *idStr) {
}
}
int64_t ver = pBlockData->aVersion[i];
int64_t ver = pData->aVersion[i];
if (ver < pIter->verRange.minVer) {
continue;
}
@ -485,7 +618,6 @@ bool tLDataIterNextRow(SLDataIter *pIter, const char *idStr) {
while (1) {
bool skipBlock = false;
findNextValidRow(pIter, idStr);
if (pIter->pBlockLoadInfo->checkRemainingRow) {
@ -570,7 +702,7 @@ static FORCE_INLINE int32_t tLDataIterDescCmprFn(const SRBTreeNode *p1, const SR
int32_t tMergeTreeOpen(SMergeTree *pMTree, int8_t backward, SDataFReader *pFReader, uint64_t suid, uint64_t uid,
STimeWindow *pTimeWindow, SVersionRange *pVerRange, SSttBlockLoadInfo *pBlockLoadInfo,
bool destroyLoadInfo, const char *idStr, bool strictTimeRange, SLDataIter* pLDataIter) {
bool destroyLoadInfo, const char *idStr, bool strictTimeRange, SLDataIter *pLDataIter) {
int32_t code = TSDB_CODE_SUCCESS;
pMTree->backward = backward;
@ -612,6 +744,95 @@ _end:
return code;
}
int32_t tMergeTreeOpen2(SMergeTree *pMTree, SMergeTreeConf *pConf) {
int32_t code = TSDB_CODE_SUCCESS;
pMTree->pIter = NULL;
pMTree->backward = pConf->backward;
pMTree->idStr = pConf->idstr;
if (!pMTree->backward) { // asc
tRBTreeCreate(&pMTree->rbt, tLDataIterCmprFn);
} else { // desc
tRBTreeCreate(&pMTree->rbt, tLDataIterDescCmprFn);
}
pMTree->ignoreEarlierTs = false;
// todo handle other level of stt files, here only deal with the first level stt
int32_t size = ((STFileSet *)pConf->pCurrentFileset)->lvlArr->size;
if (size == 0) {
goto _end;
}
// add the list/iter placeholder
while (taosArrayGetSize(pConf->pSttFileBlockIterArray) < size) {
SArray *pList = taosArrayInit(4, POINTER_BYTES);
taosArrayPush(pConf->pSttFileBlockIterArray, &pList);
}
for (int32_t j = 0; j < size; ++j) {
SSttLvl *pSttLevel = ((STFileSet *)pConf->pCurrentFileset)->lvlArr->data[j];
ASSERT(pSttLevel->level == j);
SArray *pList = taosArrayGetP(pConf->pSttFileBlockIterArray, j);
int32_t numOfIter = taosArrayGetSize(pList);
if (numOfIter < TARRAY2_SIZE(pSttLevel->fobjArr)) {
int32_t inc = TARRAY2_SIZE(pSttLevel->fobjArr) - numOfIter;
for (int32_t k = 0; k < inc; ++k) {
SLDataIter *pIter = taosMemoryCalloc(1, sizeof(SLDataIter));
taosArrayPush(pList, &pIter);
}
}
for (int32_t i = 0; i < TARRAY2_SIZE(pSttLevel->fobjArr); ++i) { // open all last file
SLDataIter *pIter = taosArrayGetP(pList, i);
SSttFileReader *pSttFileReader = pIter->pReader;
SSttBlockLoadInfo *pLoadInfo = pIter->pBlockLoadInfo;
// open stt file reader if not
if (pSttFileReader == NULL) {
SSttFileReaderConfig conf = {.tsdb = pConf->pTsdb, .szPage = pConf->pTsdb->pVnode->config.szPage};
conf.file[0] = *pSttLevel->fobjArr->data[i]->f;
code = tsdbSttFileReaderOpen(pSttLevel->fobjArr->data[i]->fname, &conf, &pSttFileReader);
if (code != TSDB_CODE_SUCCESS) {
return code;
}
}
if (pLoadInfo == NULL) {
pLoadInfo = tCreateOneLastBlockLoadInfo(pConf->pSchema, pConf->pCols, pConf->numOfCols);
}
memset(pIter, 0, sizeof(SLDataIter));
code = tLDataIterOpen2(pIter, pSttFileReader, i, pMTree->backward, pConf->suid, pConf->uid, &pConf->timewindow,
&pConf->verRange, pLoadInfo, pMTree->idStr, pConf->strictTimeRange, pConf->loadTombFn,
pConf->pReader);
if (code != TSDB_CODE_SUCCESS) {
goto _end;
}
bool hasVal = tLDataIterNextRow(pIter, pMTree->idStr);
if (hasVal) {
tMergeTreeAddIter(pMTree, pIter);
} else {
if (!pMTree->ignoreEarlierTs) {
pMTree->ignoreEarlierTs = pIter->ignoreEarlierTs;
}
}
}
}
return code;
_end:
tMergeTreeClose(pMTree);
return code;
}
void tMergeTreeAddIter(SMergeTree *pMTree, SLDataIter *pIter) { tRBTreePut(&pMTree->rbt, (SRBTreeNode *)pIter); }
bool tMergeTreeIgnoreEarlierTs(SMergeTree *pMTree) { return pMTree->ignoreEarlierTs; }

View File

@ -14,6 +14,7 @@
*/
#include "tsdb.h"
#include "tsdbFS2.h"
int32_t tsdbSetKeepCfg(STsdb *pTsdb, STsdbCfg *pCfg) {
STsdbKeepCfg *pKeepCfg = &pTsdb->keepCfg;
@ -66,7 +67,7 @@ int tsdbOpen(SVnode *pVnode, STsdb **ppTsdb, const char *dir, STsdbKeepCfg *pKee
}
// open tsdb
if (tsdbFSOpen(pTsdb, rollback) < 0) {
if (tsdbOpenFS(pTsdb, &pTsdb->pFS, rollback) < 0) {
goto _err;
}
@ -94,7 +95,7 @@ int tsdbClose(STsdb **pTsdb) {
taosThreadRwlockDestroy(&(*pTsdb)->rwLock);
tsdbFSClose(*pTsdb);
tsdbCloseFS(&(*pTsdb)->pFS);
tsdbCloseCache(*pTsdb);
taosMemoryFreeClear(*pTsdb);
}

View File

@ -17,7 +17,7 @@
#include "tsdb.h"
#include "tsimplehash.h"
#define ASCENDING_TRAVERSE(o) (o == TSDB_ORDER_ASC)
#define ASCENDING_TRAVERSE(o) (o == TSDB_ORDER_ASC)
#define getCurrentKeyInLastBlock(_r) ((_r)->currentKey)
typedef enum {
@ -30,12 +30,12 @@ typedef enum {
EXTERNAL_ROWS_MAIN = 0x2,
EXTERNAL_ROWS_NEXT = 0x3,
} EContentData;
/*
typedef enum {
READ_MODE_COUNT_ONLY = 0x1,
READ_MODE_ALL,
} EReadMode;
*/
typedef struct {
STbDataIter* iter;
int32_t index;
@ -166,7 +166,7 @@ typedef struct SReaderStatus {
SDataBlockIter blockIter;
SLDataIter* pLDataIter;
SRowMerger merger;
SColumnInfoData* pPrimaryTsCol; // primary time stamp output col info data
SColumnInfoData* pPrimaryTsCol; // primary time stamp output col info data
} SReaderStatus;
typedef struct SBlockInfoBuf {
@ -410,7 +410,7 @@ static int32_t uidComparFunc(const void* p1, const void* p2) {
// NOTE: speedup the whole processing by preparing the buffer for STableBlockScanInfo in batch model
static SSHashObj* createDataBlockScanInfo(STsdbReader* pTsdbReader, SBlockInfoBuf* pBuf, const STableKeyInfo* idList,
STableUidList* pUidList, int32_t numOfTables) {
STableUidList* pUidList, int32_t numOfTables) {
// allocate buffer in order to load data blocks from file
// todo use simple hash instead, optimize the memory consumption
SSHashObj* pTableMap = tSimpleHashInit(numOfTables, taosGetDefaultHashFunction(TSDB_DATA_TYPE_BIGINT));
@ -461,7 +461,7 @@ static SSHashObj* createDataBlockScanInfo(STsdbReader* pTsdbReader, SBlockInfoBu
}
static void resetAllDataBlockScanInfo(SSHashObj* pTableMap, int64_t ts, int32_t step) {
void *p = NULL;
void* p = NULL;
int32_t iter = 0;
while ((p = tSimpleHashIterate(pTableMap, p, &iter)) != NULL) {
@ -505,7 +505,7 @@ static void clearBlockScanInfo(STableBlockScanInfo* p) {
}
static void destroyAllBlockScanInfo(SSHashObj* pTableMap) {
void* p = NULL;
void* p = NULL;
int32_t iter = 0;
while ((p = tSimpleHashIterate(pTableMap, p, &iter)) != NULL) {
@ -743,7 +743,8 @@ void tsdbReleaseDataBlock(STsdbReader* pReader) {
}
}
static int32_t initResBlockInfo(SResultBlockInfo* pResBlockInfo, int64_t capacity, SSDataBlock* pResBlock, SQueryTableDataCond* pCond) {
static int32_t initResBlockInfo(SResultBlockInfo* pResBlockInfo, int64_t capacity, SSDataBlock* pResBlock,
SQueryTableDataCond* pCond) {
pResBlockInfo->capacity = capacity;
pResBlockInfo->pResBlock = pResBlock;
terrno = 0;
@ -921,9 +922,9 @@ static void cleanupTableScanInfo(SReaderStatus* pStatus) {
return;
}
SSHashObj* pTableMap = pStatus->pTableMap;
SSHashObj* pTableMap = pStatus->pTableMap;
STableBlockScanInfo** px = NULL;
int32_t iter = 0;
int32_t iter = 0;
while (1) {
px = tSimpleHashIterate(pTableMap, px, &iter);
@ -937,9 +938,10 @@ static void cleanupTableScanInfo(SReaderStatus* pStatus) {
pStatus->mapDataCleaned = true;
}
static int32_t doLoadFileBlock(STsdbReader* pReader, SArray* pIndexList, SBlockNumber* pBlockNum, SArray* pTableScanInfoList) {
size_t sizeInDisk = 0;
size_t numOfTables = taosArrayGetSize(pIndexList);
static int32_t doLoadFileBlock(STsdbReader* pReader, SArray* pIndexList, SBlockNumber* pBlockNum,
SArray* pTableScanInfoList) {
size_t sizeInDisk = 0;
size_t numOfTables = taosArrayGetSize(pIndexList);
int64_t st = taosGetTimestampUs();
cleanupTableScanInfo(&pReader->status);
@ -1125,18 +1127,18 @@ static int32_t getEndPosInDataBlock(STsdbReader* pReader, SBlockData* pBlockData
endPos = doBinarySearchKey(pBlockData->aTSKEY, pBlock->nRow, pos, key, pReader->order);
}
if ((pReader->verRange.maxVer >= pBlock->minVer && pReader->verRange.maxVer < pBlock->maxVer)||
if ((pReader->verRange.maxVer >= pBlock->minVer && pReader->verRange.maxVer < pBlock->maxVer) ||
(pReader->verRange.minVer <= pBlock->maxVer && pReader->verRange.minVer > pBlock->minVer)) {
int32_t i = endPos;
if (asc) {
for(; i >= 0; --i) {
for (; i >= 0; --i) {
if (pBlockData->aVersion[i] <= pReader->verRange.maxVer) {
break;
}
}
} else {
for(; i < pBlock->nRow; ++i) {
for (; i < pBlock->nRow; ++i) {
if (pBlockData->aVersion[i] >= pReader->verRange.minVer) {
break;
}
@ -1309,17 +1311,17 @@ static int32_t copyBlockDataToSDataBlock(STsdbReader* pReader) {
ASSERT(pReader->verRange.minVer <= pBlock->maxVer && pReader->verRange.maxVer >= pBlock->minVer);
// find the appropriate start position that satisfies the version requirement.
if ((pReader->verRange.maxVer >= pBlock->minVer && pReader->verRange.maxVer < pBlock->maxVer)||
if ((pReader->verRange.maxVer >= pBlock->minVer && pReader->verRange.maxVer < pBlock->maxVer) ||
(pReader->verRange.minVer <= pBlock->maxVer && pReader->verRange.minVer > pBlock->minVer)) {
int32_t i = pDumpInfo->rowIndex;
if (asc) {
for(; i < pBlock->nRow; ++i) {
for (; i < pBlock->nRow; ++i) {
if (pBlockData->aVersion[i] >= pReader->verRange.minVer) {
break;
}
}
} else {
for(; i >= 0; --i) {
for (; i >= 0; --i) {
if (pBlockData->aVersion[i] <= pReader->verRange.maxVer) {
break;
}
@ -1562,7 +1564,8 @@ static int32_t doSetCurrentBlock(SDataBlockIter* pBlockIter, const char* idStr)
return TSDB_CODE_SUCCESS;
}
static int32_t initBlockIterator(STsdbReader* pReader, SDataBlockIter* pBlockIter, int32_t numOfBlocks, SArray* pTableList) {
static int32_t initBlockIterator(STsdbReader* pReader, SDataBlockIter* pBlockIter, int32_t numOfBlocks,
SArray* pTableList) {
bool asc = ASCENDING_TRAVERSE(pReader->order);
SBlockOrderSupporter sup = {0};
@ -1967,13 +1970,14 @@ static bool nextRowFromLastBlocks(SLastBlockReader* pLastBlockReader, STableBloc
}
TSDBROW* pRow = tMergeTreeGetRow(&pLastBlockReader->mergeTree);
int64_t key = pRow->pBlockData->aTSKEY[pRow->iRow];
int64_t ver = pRow->pBlockData->aVersion[pRow->iRow];
int64_t key = pRow->pBlockData->aTSKEY[pRow->iRow];
int64_t ver = pRow->pBlockData->aVersion[pRow->iRow];
pLastBlockReader->currentKey = key;
pScanInfo->lastKeyInStt = key;
if (!hasBeenDropped(pScanInfo->delSkyline, &pScanInfo->lastBlockDelIndex, key, ver, pLastBlockReader->order, pVerRange)) {
if (!hasBeenDropped(pScanInfo->delSkyline, &pScanInfo->lastBlockDelIndex, key, ver, pLastBlockReader->order,
pVerRange)) {
return true;
}
}
@ -2030,7 +2034,7 @@ static FORCE_INLINE STSchema* doGetSchemaForTSRow(int32_t sversion, STsdbReader*
}
STSchema* ptr = NULL;
int32_t code = metaGetTbTSchemaEx(pReader->pTsdb->pVnode->pMeta, pReader->suid, uid, sversion, &ptr);
int32_t code = metaGetTbTSchemaEx(pReader->pTsdb->pVnode->pMeta, pReader->suid, uid, sversion, &ptr);
if (code != TSDB_CODE_SUCCESS) {
terrno = code;
return NULL;
@ -2153,7 +2157,7 @@ static int32_t doMergeBufAndFileRows(STsdbReader* pReader, STableBlockScanInfo*
return terrno;
}
int32_t code = tsdbRowMergerAdd(pMerger, pRow, pSchema);
int32_t code = tsdbRowMergerAdd(pMerger, pRow, pSchema);
if (code != TSDB_CODE_SUCCESS) {
return code;
}
@ -2208,7 +2212,7 @@ static int32_t doMergeBufAndFileRows(STsdbReader* pReader, STableBlockScanInfo*
static int32_t doMergeFileBlockAndLastBlock(SLastBlockReader* pLastBlockReader, STsdbReader* pReader,
STableBlockScanInfo* pBlockScanInfo, SBlockData* pBlockData,
bool mergeBlockData) {
SRowMerger* pMerger = &pReader->status.merger;
SRowMerger* pMerger = &pReader->status.merger;
SFileBlockDumpInfo* pDumpInfo = &pReader->status.fBlockDumpInfo;
int64_t tsLastBlock = getCurrentKeyInLastBlock(pLastBlockReader);
@ -2218,9 +2222,10 @@ static int32_t doMergeFileBlockAndLastBlock(SLastBlockReader* pLastBlockReader,
TSDBROW* pRow = tMergeTreeGetRow(&pLastBlockReader->mergeTree);
// create local variable to hold the row value
TSDBROW fRow = {.iRow = pRow->iRow, .type = TSDBROW_COL_FMT, .pBlockData = pRow->pBlockData};
TSDBROW fRow = {.iRow = pRow->iRow, .type = TSDBROW_COL_FMT, .pBlockData = pRow->pBlockData};
tsdbTrace("fRow ptr:%p, %d, uid:%" PRIu64 ", %s", pRow->pBlockData, pRow->iRow, pLastBlockReader->uid, pReader->idStr);
tsdbTrace("fRow ptr:%p, %d, uid:%" PRIu64 ", %s", pRow->pBlockData, pRow->iRow, pLastBlockReader->uid,
pReader->idStr);
// only last block exists
if ((!mergeBlockData) || (tsLastBlock != pBlockData->aTSKEY[pDumpInfo->rowIndex])) {
@ -2240,7 +2245,8 @@ static int32_t doMergeFileBlockAndLastBlock(SLastBlockReader* pLastBlockReader,
TSDBROW* pRow1 = tMergeTreeGetRow(&pLastBlockReader->mergeTree);
tsdbRowMergerAdd(pMerger, pRow1, NULL);
doMergeRowsInLastBlock(pLastBlockReader, pBlockScanInfo, tsLastBlock, pMerger, &pReader->verRange, pReader->idStr);
doMergeRowsInLastBlock(pLastBlockReader, pBlockScanInfo, tsLastBlock, pMerger, &pReader->verRange,
pReader->idStr);
code = tsdbRowMergerGetRow(pMerger, &pTSRow);
if (code != TSDB_CODE_SUCCESS) {
@ -2290,7 +2296,7 @@ static int32_t doMergeFileBlockAndLastBlock(SLastBlockReader* pLastBlockReader,
static int32_t mergeFileBlockAndLastBlock(STsdbReader* pReader, SLastBlockReader* pLastBlockReader, int64_t key,
STableBlockScanInfo* pBlockScanInfo, SBlockData* pBlockData) {
SFileBlockDumpInfo* pDumpInfo = &pReader->status.fBlockDumpInfo;
SRowMerger* pMerger = &pReader->status.merger;
SRowMerger* pMerger = &pReader->status.merger;
// merge is not initialized yet, due to the fact that the pReader->pSchema is not initialized
if (pMerger->pArray == NULL) {
@ -2316,7 +2322,7 @@ static int32_t mergeFileBlockAndLastBlock(STsdbReader* pReader, SLastBlockReader
if (key < ts) { // imem, mem are all empty, file blocks (data blocks and last block) exist
return mergeRowsInFileBlocks(pBlockData, pBlockScanInfo, key, pReader);
} else if (key == ts) {
SRow* pTSRow = NULL;
SRow* pTSRow = NULL;
int32_t code = tsdbRowMergerAdd(pMerger, &fRow, pReader->pSchema);
if (code != TSDB_CODE_SUCCESS) {
return code;
@ -2723,7 +2729,7 @@ int32_t mergeRowsInFileBlocks(SBlockData* pBlockData, STableBlockScanInfo* pBloc
} else {
TSDBROW fRow = tsdbRowFromBlockData(pBlockData, pDumpInfo->rowIndex);
SRow* pTSRow = NULL;
SRow* pTSRow = NULL;
code = tsdbRowMergerAdd(pMerger, &fRow, pReader->pSchema);
if (code != TSDB_CODE_SUCCESS) {
return code;
@ -2837,11 +2843,11 @@ static int32_t buildComposedDataBlock(STsdbReader* pReader) {
SFileDataBlockInfo* pBlockInfo = getCurrentBlockInfo(&pReader->status.blockIter);
SLastBlockReader* pLastBlockReader = pReader->status.fileIter.pLastBlockReader;
bool asc = ASCENDING_TRAVERSE(pReader->order);
int64_t st = taosGetTimestampUs();
int32_t step = asc ? 1 : -1;
double el = 0;
SDataBlk* pBlock = getCurrentBlock(&pReader->status.blockIter);
bool asc = ASCENDING_TRAVERSE(pReader->order);
int64_t st = taosGetTimestampUs();
int32_t step = asc ? 1 : -1;
double el = 0;
SDataBlk* pBlock = getCurrentBlock(&pReader->status.blockIter);
SFileBlockDumpInfo* pDumpInfo = &pReader->status.fBlockDumpInfo;
STableBlockScanInfo* pBlockScanInfo = NULL;
@ -2874,7 +2880,8 @@ static int32_t buildComposedDataBlock(STsdbReader* pReader) {
}
} else { // file blocks not exist
pBlockScanInfo = *pReader->status.pTableIter;
if (pReader->pIgnoreTables && taosHashGet(*pReader->pIgnoreTables, &pBlockScanInfo->uid, sizeof(pBlockScanInfo->uid))) {
if (pReader->pIgnoreTables &&
taosHashGet(*pReader->pIgnoreTables, &pBlockScanInfo->uid, sizeof(pBlockScanInfo->uid))) {
setBlockAllDumped(pDumpInfo, pBlock->maxKey.ts, pReader->order);
return code;
}
@ -3238,7 +3245,7 @@ static int32_t doLoadLastBlockSequentially(STsdbReader* pReader) {
}
static int32_t doBuildDataBlock(STsdbReader* pReader) {
int32_t code = TSDB_CODE_SUCCESS;
int32_t code = TSDB_CODE_SUCCESS;
SReaderStatus* pStatus = &pReader->status;
SDataBlockIter* pBlockIter = &pStatus->blockIter;
@ -3261,7 +3268,6 @@ static int32_t doBuildDataBlock(STsdbReader* pReader) {
return terrno;
}
initLastBlockReader(pLastBlockReader, pScanInfo, pReader);
TSDBKEY keyInBuf = getCurrentKeyInBuf(pScanInfo, pReader);
@ -3338,7 +3344,7 @@ static int32_t doBuildDataBlock(STsdbReader* pReader) {
}
}
return (pReader->code != TSDB_CODE_SUCCESS)? pReader->code:code;
return (pReader->code != TSDB_CODE_SUCCESS) ? pReader->code : code;
}
static int32_t doSumFileBlockRows(STsdbReader* pReader, SDataFReader* pFileReader) {
@ -3493,7 +3499,8 @@ static int32_t buildBlockFromBufferSequentially(STsdbReader* pReader) {
}
STableBlockScanInfo** pBlockScanInfo = pStatus->pTableIter;
if (pReader->pIgnoreTables && taosHashGet(*pReader->pIgnoreTables, &(*pBlockScanInfo)->uid, sizeof((*pBlockScanInfo)->uid))) {
if (pReader->pIgnoreTables &&
taosHashGet(*pReader->pIgnoreTables, &(*pBlockScanInfo)->uid, sizeof((*pBlockScanInfo)->uid))) {
bool hasNexTable = moveToNextTable(pUidList, pStatus);
if (!hasNexTable) {
return TSDB_CODE_SUCCESS;
@ -3544,7 +3551,7 @@ static void initBlockDumpInfo(STsdbReader* pReader, SDataBlockIter* pBlockIter)
static int32_t initForFirstBlockInFile(STsdbReader* pReader, SDataBlockIter* pBlockIter) {
SBlockNumber num = {0};
SArray* pTableList = taosArrayInit(40, POINTER_BYTES);
SArray* pTableList = taosArrayInit(40, POINTER_BYTES);
int32_t code = moveToNextFile(pReader, &num, pTableList);
if (code != TSDB_CODE_SUCCESS) {
@ -3589,7 +3596,7 @@ static ERetrieveType doReadDataFromLastFiles(STsdbReader* pReader) {
SSDataBlock* pResBlock = pReader->resBlockInfo.pResBlock;
SDataBlockIter* pBlockIter = &pReader->status.blockIter;
while(1) {
while (1) {
terrno = 0;
code = doLoadLastBlockSequentially(pReader);
@ -3612,7 +3619,7 @@ static ERetrieveType doReadDataFromLastFiles(STsdbReader* pReader) {
return TSDB_READ_RETURN;
}
if (pBlockIter->numOfBlocks > 0) { // there are data blocks existed.
if (pBlockIter->numOfBlocks > 0) { // there are data blocks existed.
return TSDB_READ_CONTINUE;
} else { // all blocks in data file are checked, let's check the data in last files
resetTableListIndex(&pReader->status);
@ -3625,7 +3632,7 @@ static int32_t buildBlockFromFiles(STsdbReader* pReader) {
bool asc = ASCENDING_TRAVERSE(pReader->order);
SDataBlockIter* pBlockIter = &pReader->status.blockIter;
SSDataBlock* pResBlock = pReader->resBlockInfo.pResBlock;
SSDataBlock* pResBlock = pReader->resBlockInfo.pResBlock;
if (pBlockIter->numOfBlocks == 0) {
// let's try to extract data from stt files.
@ -3737,13 +3744,14 @@ SVersionRange getQueryVerRange(SVnode* pVnode, SQueryTableDataCond* pCond, const
endVer = (pCond->endVersion > pVnode->state.applied) ? pVnode->state.applied : pCond->endVersion;
}
tsdbDebug("queried verRange:%"PRId64"-%"PRId64", revised query verRange:%"PRId64"-%"PRId64", %s", pCond->startVersion,
pCond->endVersion, startVer, endVer, id);
tsdbDebug("queried verRange:%" PRId64 "-%" PRId64 ", revised query verRange:%" PRId64 "-%" PRId64 ", %s",
pCond->startVersion, pCond->endVersion, startVer, endVer, id);
return (SVersionRange){.minVer = startVer, .maxVer = endVer};
}
bool hasBeenDropped(const SArray* pDelList, int32_t* index, int64_t key, int64_t ver, int32_t order, SVersionRange* pVerRange) {
bool hasBeenDropped(const SArray* pDelList, int32_t* index, int64_t key, int64_t ver, int32_t order,
SVersionRange* pVerRange) {
if (pDelList == NULL) {
return false;
}
@ -3761,8 +3769,7 @@ bool hasBeenDropped(const SArray* pDelList, int32_t* index, int64_t key, int64_t
return false;
} else if (key == last->ts) {
TSDBKEY* prev = taosArrayGet(pDelList, num - 2);
return (prev->version >= ver && prev->version <= pVerRange->maxVer &&
prev->version >= pVerRange->minVer);
return (prev->version >= ver && prev->version <= pVerRange->maxVer && prev->version >= pVerRange->minVer);
}
} else {
TSDBKEY* pCurrent = taosArrayGet(pDelList, *index);
@ -3971,9 +3978,9 @@ int32_t doMergeRowsInFileBlocks(SBlockData* pBlockData, STableBlockScanInfo* pSc
SFileBlockDumpInfo* pDumpInfo = &pReader->status.fBlockDumpInfo;
SRowMerger* pMerger = &pReader->status.merger;
bool asc = ASCENDING_TRAVERSE(pReader->order);
int64_t key = pBlockData->aTSKEY[pDumpInfo->rowIndex];
int32_t step = asc ? 1 : -1;
bool asc = ASCENDING_TRAVERSE(pReader->order);
int64_t key = pBlockData->aTSKEY[pDumpInfo->rowIndex];
int32_t step = asc ? 1 : -1;
pDumpInfo->rowIndex += step;
if ((pDumpInfo->rowIndex <= pBlockData->nRow - 1 && asc) || (pDumpInfo->rowIndex >= 0 && !asc)) {
@ -4070,14 +4077,14 @@ int32_t doMergeMemTableMultiRows(TSDBROW* pRow, uint64_t uid, SIterInfo* pIter,
return terrno;
}
tsdbRowMergerAdd(&pReader->status.merger,pNextRow, pTSchema1);
tsdbRowMergerAdd(&pReader->status.merger, pNextRow, pTSchema1);
} else { // let's merge rows in file block
code = tsdbRowMergerAdd(&pReader->status.merger, &current, pReader->pSchema);
if (code != TSDB_CODE_SUCCESS) {
return code;
}
tsdbRowMergerAdd(&pReader->status.merger,pNextRow, NULL);
tsdbRowMergerAdd(&pReader->status.merger, pNextRow, NULL);
}
code = doMergeRowsInBuf(pIter, uid, TSDBROW_TS(&current), pDelList, pReader);
@ -4124,9 +4131,8 @@ int32_t doMergeMemIMemRows(TSDBROW* pRow, TSDBROW* piRow, STableBlockScanInfo* p
return code;
}
tsdbRowMergerAdd(&pReader->status.merger,pRow, pSchema);
code =
doMergeRowsInBuf(&pBlockScanInfo->iter, pBlockScanInfo->uid, k.ts, pBlockScanInfo->delSkyline, pReader);
tsdbRowMergerAdd(&pReader->status.merger, pRow, pSchema);
code = doMergeRowsInBuf(&pBlockScanInfo->iter, pBlockScanInfo->uid, k.ts, pBlockScanInfo->delSkyline, pReader);
if (code != TSDB_CODE_SUCCESS) {
return code;
}
@ -4365,7 +4371,7 @@ int32_t tsdbSetTableList(STsdbReader* pReader, const void* pTableList, int32_t n
int32_t size = tSimpleHashGetSize(pReader->status.pTableMap);
STableBlockScanInfo** p = NULL;
int32_t iter = 0;
int32_t iter = 0;
while ((p = tSimpleHashIterate(pReader->status.pTableMap, p, &iter)) != NULL) {
clearBlockScanInfo(*p);
@ -4452,15 +4458,16 @@ static int32_t doOpenReaderImpl(STsdbReader* pReader) {
}
static void freeSchemaFunc(void* param) {
void **p = (void **)param;
void** p = (void**)param;
taosMemoryFreeClear(*p);
}
// ====================================== EXPOSED APIs ======================================
int32_t tsdbReaderOpen(void* pVnode, SQueryTableDataCond* pCond, void* pTableList, int32_t numOfTables,
SSDataBlock* pResBlock, void** ppReader, const char* idstr, bool countOnly, SHashObj** pIgnoreTables) {
SSDataBlock* pResBlock, void** ppReader, const char* idstr, bool countOnly,
SHashObj** pIgnoreTables) {
STimeWindow window = pCond->twindows;
SVnodeCfg* pConf = &(((SVnode*)pVnode)->config);
SVnodeCfg* pConf = &(((SVnode*)pVnode)->config);
int32_t capacity = pConf->tsdbCfg.maxRows;
if (pResBlock != NULL) {
@ -4729,7 +4736,7 @@ int32_t tsdbReaderSuspend(STsdbReader* pReader) {
// resetDataBlockScanInfo excluding lastKey
STableBlockScanInfo** p = NULL;
int32_t iter = 0;
int32_t iter = 0;
while ((p = tSimpleHashIterate(pStatus->pTableMap, p, &iter)) != NULL) {
STableBlockScanInfo* pInfo = *(STableBlockScanInfo**)p;
@ -4751,7 +4758,7 @@ int32_t tsdbReaderSuspend(STsdbReader* pReader) {
} else {
// resetDataBlockScanInfo excluding lastKey
STableBlockScanInfo** p = NULL;
int32_t iter = 0;
int32_t iter = 0;
while ((p = tSimpleHashIterate(pStatus->pTableMap, p, &iter)) != NULL) {
STableBlockScanInfo* pInfo = *(STableBlockScanInfo**)p;
@ -4950,8 +4957,9 @@ int32_t tsdbNextDataBlock(STsdbReader* pReader, bool* hasNext) {
*hasNext = false;
if (isEmptyQueryTimeWindow(&pReader->window) || pReader->step == EXTERNAL_ROWS_NEXT || pReader->code != TSDB_CODE_SUCCESS) {
return (pReader->code != TSDB_CODE_SUCCESS)? pReader->code:code;
if (isEmptyQueryTimeWindow(&pReader->window) || pReader->step == EXTERNAL_ROWS_NEXT ||
pReader->code != TSDB_CODE_SUCCESS) {
return (pReader->code != TSDB_CODE_SUCCESS) ? pReader->code : code;
}
SReaderStatus* pStatus = &pReader->status;
@ -5087,7 +5095,7 @@ static bool doFillNullColSMA(SBlockLoadSuppInfo* pSup, int32_t numOfRows, int32_
return hasNullSMA;
}
int32_t tsdbRetrieveDatablockSMA(STsdbReader* pReader, SSDataBlock* pDataBlock, bool* allHave, bool *hasNullSMA) {
int32_t tsdbRetrieveDatablockSMA(STsdbReader* pReader, SSDataBlock* pDataBlock, bool* allHave, bool* hasNullSMA) {
SColumnDataAgg*** pBlockSMA = &pDataBlock->pBlockAgg;
int32_t code = 0;
@ -5196,9 +5204,9 @@ STableBlockScanInfo* getTableBlockScanInfo(SSHashObj* pTableMap, uint64_t uid, c
}
static SSDataBlock* doRetrieveDataBlock(STsdbReader* pReader) {
SReaderStatus* pStatus = &pReader->status;
int32_t code = TSDB_CODE_SUCCESS;
SFileDataBlockInfo* pBlockInfo = getCurrentBlockInfo(&pStatus->blockIter);
SReaderStatus* pStatus = &pReader->status;
int32_t code = TSDB_CODE_SUCCESS;
SFileDataBlockInfo* pBlockInfo = getCurrentBlockInfo(&pStatus->blockIter);
if (pReader->code != TSDB_CODE_SUCCESS) {
return NULL;

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,630 @@
/*
* Copyright (c) 2019 TAOS Data, Inc. <jhtao@taosdata.com>
*
* This program is free software: you can use, redistribute, and/or modify
* it under the terms of the GNU Affero General Public License, version 3
* or later ("AGPL"), as published by the Free Software Foundation.
*
* This program is distributed in the hope that it will be useful, but WITHOUT
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
* FITNESS FOR A PARTICULAR PURPOSE.
*
* You should have received a copy of the GNU Affero General Public License
* along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
#include "tsdbReadUtil.h"
#include "osDef.h"
#include "tsdb.h"
#include "tsdbDataFileRW.h"
#include "tsdbFS2.h"
#include "tsdbMerge.h"
#include "tsdbUtil2.h"
#include "tsimplehash.h"
static int32_t uidComparFunc(const void* p1, const void* p2) {
uint64_t pu1 = *(uint64_t*)p1;
uint64_t pu2 = *(uint64_t*)p2;
if (pu1 == pu2) {
return 0;
} else {
return (pu1 < pu2) ? -1 : 1;
}
}
static int32_t initBlockScanInfoBuf(SBlockInfoBuf* pBuf, int32_t numOfTables) {
int32_t num = numOfTables / pBuf->numPerBucket;
int32_t remainder = numOfTables % pBuf->numPerBucket;
if (pBuf->pData == NULL) {
pBuf->pData = taosArrayInit(num + 1, POINTER_BYTES);
}
for (int32_t i = 0; i < num; ++i) {
char* p = taosMemoryCalloc(pBuf->numPerBucket, sizeof(STableBlockScanInfo));
if (p == NULL) {
return TSDB_CODE_OUT_OF_MEMORY;
}
taosArrayPush(pBuf->pData, &p);
}
if (remainder > 0) {
char* p = taosMemoryCalloc(remainder, sizeof(STableBlockScanInfo));
if (p == NULL) {
return TSDB_CODE_OUT_OF_MEMORY;
}
taosArrayPush(pBuf->pData, &p);
}
pBuf->numOfTables = numOfTables;
return TSDB_CODE_SUCCESS;
}
int32_t ensureBlockScanInfoBuf(SBlockInfoBuf* pBuf, int32_t numOfTables) {
if (numOfTables <= pBuf->numOfTables) {
return TSDB_CODE_SUCCESS;
}
if (pBuf->numOfTables > 0) {
STableBlockScanInfo** p = (STableBlockScanInfo**)taosArrayPop(pBuf->pData);
taosMemoryFree(*p);
pBuf->numOfTables /= pBuf->numPerBucket;
}
int32_t num = (numOfTables - pBuf->numOfTables) / pBuf->numPerBucket;
int32_t remainder = (numOfTables - pBuf->numOfTables) % pBuf->numPerBucket;
if (pBuf->pData == NULL) {
pBuf->pData = taosArrayInit(num + 1, POINTER_BYTES);
}
for (int32_t i = 0; i < num; ++i) {
char* p = taosMemoryCalloc(pBuf->numPerBucket, sizeof(STableBlockScanInfo));
if (p == NULL) {
return TSDB_CODE_OUT_OF_MEMORY;
}
taosArrayPush(pBuf->pData, &p);
}
if (remainder > 0) {
char* p = taosMemoryCalloc(remainder, sizeof(STableBlockScanInfo));
if (p == NULL) {
return TSDB_CODE_OUT_OF_MEMORY;
}
taosArrayPush(pBuf->pData, &p);
}
pBuf->numOfTables = numOfTables;
return TSDB_CODE_SUCCESS;
}
void clearBlockScanInfoBuf(SBlockInfoBuf* pBuf) {
size_t num = taosArrayGetSize(pBuf->pData);
for (int32_t i = 0; i < num; ++i) {
char** p = taosArrayGet(pBuf->pData, i);
taosMemoryFree(*p);
}
taosArrayDestroy(pBuf->pData);
}
void* getPosInBlockInfoBuf(SBlockInfoBuf* pBuf, int32_t index) {
int32_t bucketIndex = index / pBuf->numPerBucket;
char** pBucket = taosArrayGet(pBuf->pData, bucketIndex);
return (*pBucket) + (index % pBuf->numPerBucket) * sizeof(STableBlockScanInfo);
}
STableBlockScanInfo* getTableBlockScanInfo(SSHashObj* pTableMap, uint64_t uid, const char* id) {
STableBlockScanInfo** p = tSimpleHashGet(pTableMap, &uid, sizeof(uid));
if (p == NULL || *p == NULL) {
terrno = TSDB_CODE_INVALID_PARA;
int32_t size = tSimpleHashGetSize(pTableMap);
tsdbError("failed to locate the uid:%" PRIu64 " in query table uid list, total tables:%d, %s", uid, size, id);
return NULL;
}
return *p;
}
// NOTE: speedup the whole processing by preparing the buffer for STableBlockScanInfo in batch model
SSHashObj* createDataBlockScanInfo(STsdbReader* pTsdbReader, SBlockInfoBuf* pBuf, const STableKeyInfo* idList,
STableUidList* pUidList, int32_t numOfTables) {
// allocate buffer in order to load data blocks from file
// todo use simple hash instead, optimize the memory consumption
SSHashObj* pTableMap = tSimpleHashInit(numOfTables, taosGetDefaultHashFunction(TSDB_DATA_TYPE_BIGINT));
if (pTableMap == NULL) {
return NULL;
}
int64_t st = taosGetTimestampUs();
initBlockScanInfoBuf(pBuf, numOfTables);
pUidList->tableUidList = taosMemoryMalloc(numOfTables * sizeof(uint64_t));
if (pUidList->tableUidList == NULL) {
tSimpleHashCleanup(pTableMap);
return NULL;
}
pUidList->currentIndex = 0;
for (int32_t j = 0; j < numOfTables; ++j) {
STableBlockScanInfo* pScanInfo = getPosInBlockInfoBuf(pBuf, j);
pScanInfo->uid = idList[j].uid;
pUidList->tableUidList[j] = idList[j].uid;
if (ASCENDING_TRAVERSE(pTsdbReader->info.order)) {
int64_t skey = pTsdbReader->info.window.skey;
pScanInfo->lastKey = (skey > INT64_MIN) ? (skey - 1) : skey;
pScanInfo->lastKeyInStt = skey;
} else {
int64_t ekey = pTsdbReader->info.window.ekey;
pScanInfo->lastKey = (ekey < INT64_MAX) ? (ekey + 1) : ekey;
pScanInfo->lastKeyInStt = ekey;
}
tSimpleHashPut(pTableMap, &pScanInfo->uid, sizeof(uint64_t), &pScanInfo, POINTER_BYTES);
tsdbTrace("%p check table uid:%" PRId64 " from lastKey:%" PRId64 " %s", pTsdbReader, pScanInfo->uid,
pScanInfo->lastKey, pTsdbReader->idStr);
}
taosSort(pUidList->tableUidList, numOfTables, sizeof(uint64_t), uidComparFunc);
pTsdbReader->cost.createScanInfoList = (taosGetTimestampUs() - st) / 1000.0;
tsdbDebug("%p create %d tables scan-info, size:%.2f Kb, elapsed time:%.2f ms, %s", pTsdbReader, numOfTables,
(sizeof(STableBlockScanInfo) * numOfTables) / 1024.0, pTsdbReader->cost.createScanInfoList,
pTsdbReader->idStr);
return pTableMap;
}
void resetAllDataBlockScanInfo(SSHashObj* pTableMap, int64_t ts, int32_t step) {
void* p = NULL;
int32_t iter = 0;
while ((p = tSimpleHashIterate(pTableMap, p, &iter)) != NULL) {
STableBlockScanInfo* pInfo = *(STableBlockScanInfo**)p;
pInfo->iterInit = false;
pInfo->iter.hasVal = false;
pInfo->iiter.hasVal = false;
if (pInfo->iter.iter != NULL) {
pInfo->iter.iter = tsdbTbDataIterDestroy(pInfo->iter.iter);
}
if (pInfo->iiter.iter != NULL) {
pInfo->iiter.iter = tsdbTbDataIterDestroy(pInfo->iiter.iter);
}
pInfo->delSkyline = taosArrayDestroy(pInfo->delSkyline);
pInfo->lastKey = ts;
pInfo->lastKeyInStt = ts + step;
}
}
void clearBlockScanInfo(STableBlockScanInfo* p) {
p->iterInit = false;
p->iter.hasVal = false;
p->iiter.hasVal = false;
if (p->iter.iter != NULL) {
p->iter.iter = tsdbTbDataIterDestroy(p->iter.iter);
}
if (p->iiter.iter != NULL) {
p->iiter.iter = tsdbTbDataIterDestroy(p->iiter.iter);
}
p->delSkyline = taosArrayDestroy(p->delSkyline);
p->pBlockList = taosArrayDestroy(p->pBlockList);
p->pMemDelData = taosArrayDestroy(p->pMemDelData);
p->pfileDelData = taosArrayDestroy(p->pfileDelData);
}
void destroyAllBlockScanInfo(SSHashObj* pTableMap) {
void* p = NULL;
int32_t iter = 0;
while ((p = tSimpleHashIterate(pTableMap, p, &iter)) != NULL) {
clearBlockScanInfo(*(STableBlockScanInfo**)p);
}
tSimpleHashCleanup(pTableMap);
}
static void doCleanupInfoForNextFileset(STableBlockScanInfo* pScanInfo) {
// reset the index in last block when handing a new file
taosArrayClear(pScanInfo->pBlockList);
taosArrayClear(pScanInfo->pfileDelData); // del data from each file set
}
void cleanupInfoFoxNextFileset(SSHashObj* pTableMap) {
STableBlockScanInfo** p = NULL;
int32_t iter = 0;
while ((p = tSimpleHashIterate(pTableMap, p, &iter)) != NULL) {
doCleanupInfoForNextFileset(*p);
}
}
// brin records iterator
void initBrinRecordIter(SBrinRecordIter* pIter, SDataFileReader* pReader, SArray* pList) {
memset(&pIter->block, 0, sizeof(SBrinBlock));
memset(&pIter->record, 0, sizeof(SBrinRecord));
pIter->blockIndex = -1;
pIter->recordIndex = -1;
pIter->pReader = pReader;
pIter->pBrinBlockList = pList;
}
SBrinRecord* getNextBrinRecord(SBrinRecordIter* pIter) {
if (pIter->blockIndex == -1 || (pIter->recordIndex + 1) >= TARRAY2_SIZE(pIter->block.numRow)) {
pIter->blockIndex += 1;
if (pIter->blockIndex >= taosArrayGetSize(pIter->pBrinBlockList)) {
return NULL;
}
pIter->pCurrentBlk = taosArrayGet(pIter->pBrinBlockList, pIter->blockIndex);
tBrinBlockClear(&pIter->block);
tsdbDataFileReadBrinBlock(pIter->pReader, pIter->pCurrentBlk, &pIter->block);
pIter->recordIndex = -1;
}
pIter->recordIndex += 1;
tBrinBlockGet(&pIter->block, pIter->recordIndex, &pIter->record);
return &pIter->record;
}
void clearBrinBlockIter(SBrinRecordIter* pIter) { tBrinBlockDestroy(&pIter->block); }
// initialize the file block access order
// sort the file blocks according to the offset of each data block in the files
static void cleanupBlockOrderSupporter(SBlockOrderSupporter* pSup) {
taosMemoryFreeClear(pSup->numOfBlocksPerTable);
taosMemoryFreeClear(pSup->indexPerTable);
for (int32_t i = 0; i < pSup->numOfTables; ++i) {
SBlockOrderWrapper* pBlockInfo = pSup->pDataBlockInfo[i];
taosMemoryFreeClear(pBlockInfo);
}
taosMemoryFreeClear(pSup->pDataBlockInfo);
}
static int32_t initBlockOrderSupporter(SBlockOrderSupporter* pSup, int32_t numOfTables) {
pSup->numOfBlocksPerTable = taosMemoryCalloc(1, sizeof(int32_t) * numOfTables);
pSup->indexPerTable = taosMemoryCalloc(1, sizeof(int32_t) * numOfTables);
pSup->pDataBlockInfo = taosMemoryCalloc(1, POINTER_BYTES * numOfTables);
if (pSup->numOfBlocksPerTable == NULL || pSup->indexPerTable == NULL || pSup->pDataBlockInfo == NULL) {
cleanupBlockOrderSupporter(pSup);
return TSDB_CODE_OUT_OF_MEMORY;
}
return TSDB_CODE_SUCCESS;
}
static int32_t fileDataBlockOrderCompar(const void* pLeft, const void* pRight, void* param) {
int32_t leftIndex = *(int32_t*)pLeft;
int32_t rightIndex = *(int32_t*)pRight;
SBlockOrderSupporter* pSupporter = (SBlockOrderSupporter*)param;
int32_t leftTableBlockIndex = pSupporter->indexPerTable[leftIndex];
int32_t rightTableBlockIndex = pSupporter->indexPerTable[rightIndex];
if (leftTableBlockIndex > pSupporter->numOfBlocksPerTable[leftIndex]) {
/* left block is empty */
return 1;
} else if (rightTableBlockIndex > pSupporter->numOfBlocksPerTable[rightIndex]) {
/* right block is empty */
return -1;
}
SBlockOrderWrapper* pLeftBlock = &pSupporter->pDataBlockInfo[leftIndex][leftTableBlockIndex];
SBlockOrderWrapper* pRightBlock = &pSupporter->pDataBlockInfo[rightIndex][rightTableBlockIndex];
return pLeftBlock->offset > pRightBlock->offset ? 1 : -1;
}
int32_t initBlockIterator(STsdbReader* pReader, SDataBlockIter* pBlockIter, int32_t numOfBlocks, SArray* pTableList) {
bool asc = ASCENDING_TRAVERSE(pReader->info.order);
SBlockOrderSupporter sup = {0};
pBlockIter->numOfBlocks = numOfBlocks;
taosArrayClear(pBlockIter->blockList);
pBlockIter->pTableMap = pReader->status.pTableMap;
// access data blocks according to the offset of each block in asc/desc order.
int32_t numOfTables = taosArrayGetSize(pTableList);
int64_t st = taosGetTimestampUs();
int32_t code = initBlockOrderSupporter(&sup, numOfTables);
if (code != TSDB_CODE_SUCCESS) {
return code;
}
int32_t cnt = 0;
for (int32_t i = 0; i < numOfTables; ++i) {
STableBlockScanInfo* pTableScanInfo = taosArrayGetP(pTableList, i);
// ASSERT(pTableScanInfo->pBlockList != NULL && taosArrayGetSize(pTableScanInfo->pBlockList) > 0);
size_t num = taosArrayGetSize(pTableScanInfo->pBlockList);
sup.numOfBlocksPerTable[sup.numOfTables] = num;
char* buf = taosMemoryMalloc(sizeof(SBlockOrderWrapper) * num);
if (buf == NULL) {
cleanupBlockOrderSupporter(&sup);
return TSDB_CODE_OUT_OF_MEMORY;
}
sup.pDataBlockInfo[sup.numOfTables] = (SBlockOrderWrapper*)buf;
for (int32_t k = 0; k < num; ++k) {
SBrinRecord* pRecord = taosArrayGet(pTableScanInfo->pBlockList, k);
sup.pDataBlockInfo[sup.numOfTables][k] =
(SBlockOrderWrapper){.uid = pTableScanInfo->uid, .offset = pRecord->blockOffset, .pInfo = pTableScanInfo};
cnt++;
}
sup.numOfTables += 1;
}
if (numOfBlocks != cnt && sup.numOfTables != numOfTables) {
cleanupBlockOrderSupporter(&sup);
return TSDB_CODE_INVALID_PARA;
}
// since there is only one table qualified, blocks are not sorted
if (sup.numOfTables == 1) {
for (int32_t i = 0; i < numOfBlocks; ++i) {
SFileDataBlockInfo blockInfo = {.uid = sup.pDataBlockInfo[0][i].uid, .tbBlockIdx = i};
blockInfo.record = *(SBrinRecord*)taosArrayGet(sup.pDataBlockInfo[0][i].pInfo->pBlockList, i);
taosArrayPush(pBlockIter->blockList, &blockInfo);
}
int64_t et = taosGetTimestampUs();
tsdbDebug("%p create blocks info struct completed for one table, %d blocks not sorted, elapsed time:%.2f ms %s",
pReader, numOfBlocks, (et - st) / 1000.0, pReader->idStr);
pBlockIter->index = asc ? 0 : (numOfBlocks - 1);
cleanupBlockOrderSupporter(&sup);
return TSDB_CODE_SUCCESS;
}
tsdbDebug("%p create data blocks info struct completed, %d blocks in %d tables %s", pReader, cnt, sup.numOfTables,
pReader->idStr);
SMultiwayMergeTreeInfo* pTree = NULL;
uint8_t ret = tMergeTreeCreate(&pTree, sup.numOfTables, &sup, fileDataBlockOrderCompar);
if (ret != TSDB_CODE_SUCCESS) {
cleanupBlockOrderSupporter(&sup);
return TSDB_CODE_OUT_OF_MEMORY;
}
int32_t numOfTotal = 0;
while (numOfTotal < cnt) {
int32_t pos = tMergeTreeGetChosenIndex(pTree);
int32_t index = sup.indexPerTable[pos]++;
SFileDataBlockInfo blockInfo = {.uid = sup.pDataBlockInfo[pos][index].uid, .tbBlockIdx = index};
blockInfo.record = *(SBrinRecord*)taosArrayGet(sup.pDataBlockInfo[pos][index].pInfo->pBlockList, index);
taosArrayPush(pBlockIter->blockList, &blockInfo);
// set data block index overflow, in order to disable the offset comparator
if (sup.indexPerTable[pos] >= sup.numOfBlocksPerTable[pos]) {
sup.indexPerTable[pos] = sup.numOfBlocksPerTable[pos] + 1;
}
numOfTotal += 1;
tMergeTreeAdjust(pTree, tMergeTreeGetAdjustIndex(pTree));
}
int64_t et = taosGetTimestampUs();
tsdbDebug("%p %d data blocks access order completed, elapsed time:%.2f ms %s", pReader, numOfBlocks,
(et - st) / 1000.0, pReader->idStr);
cleanupBlockOrderSupporter(&sup);
taosMemoryFree(pTree);
pBlockIter->index = asc ? 0 : (numOfBlocks - 1);
return TSDB_CODE_SUCCESS;
}
bool blockIteratorNext(SDataBlockIter* pBlockIter, const char* idStr) {
bool asc = ASCENDING_TRAVERSE(pBlockIter->order);
int32_t step = asc ? 1 : -1;
if ((pBlockIter->index >= pBlockIter->numOfBlocks - 1 && asc) || (pBlockIter->index <= 0 && (!asc))) {
return false;
}
pBlockIter->index += step;
return true;
}
typedef enum {
BLK_CHECK_CONTINUE = 0x1,
BLK_CHECK_QUIT = 0x2,
} ETombBlkCheckEnum;
static int32_t doCheckTombBlock(STombBlock* pBlock, STsdbReader* pReader, int32_t numOfTables, int32_t* j,
STableBlockScanInfo** pScanInfo, ETombBlkCheckEnum* pRet) {
int32_t code = 0;
STombRecord record = {0};
uint64_t uid = pReader->status.uidList.tableUidList[*j];
for (int32_t k = 0; k < TARRAY2_SIZE(pBlock->suid); ++k) {
code = tTombBlockGet(pBlock, k, &record);
if (code != TSDB_CODE_SUCCESS) {
*pRet = BLK_CHECK_QUIT;
return code;
}
if (record.suid < pReader->info.suid) {
continue;
}
if (record.suid > pReader->info.suid) {
*pRet = BLK_CHECK_QUIT;
return TSDB_CODE_SUCCESS;
}
bool newTable = false;
if (uid < record.uid) {
while ((*j) < numOfTables && pReader->status.uidList.tableUidList[*j] < record.uid) {
(*j) += 1;
newTable = true;
}
if ((*j) >= numOfTables) {
*pRet = BLK_CHECK_QUIT;
return TSDB_CODE_SUCCESS;
}
uid = pReader->status.uidList.tableUidList[*j];
}
if (record.uid < uid) {
continue;
}
ASSERT(record.suid == pReader->info.suid && uid == record.uid);
if (newTable) {
(*pScanInfo) = getTableBlockScanInfo(pReader->status.pTableMap, uid, pReader->idStr);
if ((*pScanInfo)->pfileDelData == NULL) {
(*pScanInfo)->pfileDelData = taosArrayInit(4, sizeof(SDelData));
}
}
if (record.version <= pReader->info.verRange.maxVer) {
SDelData delData = {.version = record.version, .sKey = record.skey, .eKey = record.ekey};
taosArrayPush((*pScanInfo)->pfileDelData, &delData);
}
}
*pRet = BLK_CHECK_CONTINUE;
return TSDB_CODE_SUCCESS;
}
// load tomb data API
static int32_t doLoadTombDataFromTombBlk(const TTombBlkArray* pTombBlkArray, STsdbReader* pReader, void* pFileReader,
bool isFile) {
int32_t code = 0;
STableUidList* pList = &pReader->status.uidList;
int32_t numOfTables = tSimpleHashGetSize(pReader->status.pTableMap);
int32_t i = 0, j = 0;
while (i < pTombBlkArray->size && j < numOfTables) {
STombBlk* pTombBlk = &pTombBlkArray->data[i];
if (pTombBlk->maxTbid.suid < pReader->info.suid) {
i += 1;
continue;
}
if (pTombBlk->minTbid.suid > pReader->info.suid) {
break;
}
ASSERT(pTombBlk->minTbid.suid <= pReader->info.suid && pTombBlk->maxTbid.suid >= pReader->info.suid);
if (pTombBlk->maxTbid.suid == pReader->info.suid && pTombBlk->maxTbid.uid < pList->tableUidList[0]) {
i += 1;
continue;
}
if (pTombBlk->minTbid.suid == pReader->info.suid && pTombBlk->minTbid.uid > pList->tableUidList[numOfTables - 1]) {
break;
}
STombBlock block = {0};
code = isFile ? tsdbDataFileReadTombBlock(pFileReader, &pTombBlkArray->data[i], &block)
: tsdbSttFileReadTombBlock(pFileReader, &pTombBlkArray->data[i], &block);
if (code != TSDB_CODE_SUCCESS) {
return code;
}
uint64_t uid = pReader->status.uidList.tableUidList[j];
STableBlockScanInfo* pScanInfo = getTableBlockScanInfo(pReader->status.pTableMap, uid, pReader->idStr);
if (pScanInfo->pfileDelData == NULL) {
pScanInfo->pfileDelData = taosArrayInit(4, sizeof(SDelData));
}
ETombBlkCheckEnum ret = 0;
code = doCheckTombBlock(&block, pReader, numOfTables, &j, &pScanInfo, &ret);
tTombBlockDestroy(&block);
if (code != TSDB_CODE_SUCCESS || ret == BLK_CHECK_QUIT) {
return code;
}
i += 1;
}
return TSDB_CODE_SUCCESS;
}
int32_t loadDataFileTombDataForAll(STsdbReader* pReader) {
if (pReader->status.pCurrentFileset == NULL || pReader->status.pCurrentFileset->farr[3] == NULL) {
return TSDB_CODE_SUCCESS;
}
const TTombBlkArray* pBlkArray = NULL;
int32_t code = tsdbDataFileReadTombBlk(pReader->pFileReader, &pBlkArray);
if (code != TSDB_CODE_SUCCESS) {
return code;
}
return doLoadTombDataFromTombBlk(pBlkArray, pReader, pReader->pFileReader, true);
}
int32_t loadSttTombDataForAll(STsdbReader* pReader, SSttFileReader* pSttFileReader, SSttBlockLoadInfo* pLoadInfo) {
const TTombBlkArray* pBlkArray = NULL;
int32_t code = tsdbSttFileReadTombBlk(pSttFileReader, &pBlkArray);
if (code != TSDB_CODE_SUCCESS) {
return code;
}
return doLoadTombDataFromTombBlk(pBlkArray, pReader, pSttFileReader, false);
}
void loadMemTombData(SArray** ppMemDelData, STbData* pMemTbData, STbData* piMemTbData, int64_t ver) {
if (*ppMemDelData == NULL) {
*ppMemDelData = taosArrayInit(4, sizeof(SDelData));
}
SArray* pMemDelData = *ppMemDelData;
SDelData* p = NULL;
if (pMemTbData != NULL) {
p = pMemTbData->pHead;
while (p) {
if (p->version <= ver) {
taosArrayPush(pMemDelData, p);
}
p = p->pNext;
}
}
if (piMemTbData != NULL) {
p = piMemTbData->pHead;
while (p) {
if (p->version <= ver) {
taosArrayPush(pMemDelData, p);
}
p = p->pNext;
}
}
}

View File

@ -0,0 +1,248 @@
/*
* Copyright (c) 2019 TAOS Data, Inc. <jhtao@taosdata.com>
*
* This program is free software: you can use, redistribute, and/or modify
* it under the terms of the GNU Affero General Public License, version 3
* or later ("AGPL"), as published by the Free Software Foundation.
*
* This program is distributed in the hope that it will be useful, but WITHOUT
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
* FITNESS FOR A PARTICULAR PURPOSE.
*
* You should have received a copy of the GNU Affero General Public License
* along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
#ifndef TDENGINE_TSDBREADUTIL_H
#define TDENGINE_TSDBREADUTIL_H
#ifdef __cplusplus
extern "C" {
#endif
#include "tsdbDataFileRW.h"
#include "tsdbUtil2.h"
#define ASCENDING_TRAVERSE(o) (o == TSDB_ORDER_ASC)
typedef enum {
READER_STATUS_SUSPEND = 0x1,
READER_STATUS_NORMAL = 0x2,
} EReaderStatus;
typedef enum {
EXTERNAL_ROWS_PREV = 0x1,
EXTERNAL_ROWS_MAIN = 0x2,
EXTERNAL_ROWS_NEXT = 0x3,
} EContentData;
typedef struct SBlockInfoBuf {
int32_t currentIndex;
SArray* pData;
int32_t numPerBucket;
int32_t numOfTables;
} SBlockInfoBuf;
typedef struct {
STbDataIter* iter;
int32_t index;
bool hasVal;
} SIterInfo;
typedef struct STableBlockScanInfo {
uint64_t uid;
TSKEY lastKey;
TSKEY lastKeyInStt; // last accessed key in stt
SArray* pBlockList; // block data index list, SArray<SBrinRecord>
SArray* pMemDelData; // SArray<SDelData>
SArray* pfileDelData; // SArray<SDelData> from each file set
SIterInfo iter; // mem buffer skip list iterator
SIterInfo iiter; // imem buffer skip list iterator
SArray* delSkyline; // delete info for this table
int32_t fileDelIndex; // file block delete index
int32_t lastBlockDelIndex; // delete index for last block
bool iterInit; // whether to initialize the in-memory skip list iterator or not
} STableBlockScanInfo;
typedef struct SResultBlockInfo {
SSDataBlock* pResBlock;
bool freeBlock;
int64_t capacity;
} SResultBlockInfo;
typedef struct SCostSummary {
int64_t numOfBlocks;
double blockLoadTime;
double buildmemBlock;
int64_t headFileLoad;
double headFileLoadTime;
int64_t smaDataLoad;
double smaLoadTime;
int64_t lastBlockLoad;
double lastBlockLoadTime;
int64_t composedBlocks;
double buildComposedBlockTime;
double createScanInfoList;
double createSkylineIterTime;
double initLastBlockReader;
} SCostSummary;
typedef struct STableUidList {
uint64_t* tableUidList; // access table uid list in uid ascending order list
int32_t currentIndex; // index in table uid list
} STableUidList;
typedef struct {
int32_t numOfBlocks;
int32_t numOfLastFiles;
} SBlockNumber;
typedef struct SBlockIndex {
int32_t ordinalIndex;
int64_t inFileOffset;
STimeWindow window; // todo replace it with overlap flag.
} SBlockIndex;
typedef struct SBlockOrderWrapper {
int64_t uid;
int64_t offset;
STableBlockScanInfo* pInfo;
} SBlockOrderWrapper;
typedef struct SBlockOrderSupporter {
SBlockOrderWrapper** pDataBlockInfo;
int32_t* indexPerTable;
int32_t* numOfBlocksPerTable;
int32_t numOfTables;
} SBlockOrderSupporter;
typedef struct SBlockLoadSuppInfo {
TColumnDataAggArray colAggArray;
SColumnDataAgg tsColAgg;
int16_t* colId;
int16_t* slotId;
int32_t numOfCols;
char** buildBuf; // build string tmp buffer, todo remove it later after all string format being updated.
bool smaValid; // the sma on all queried columns are activated
} SBlockLoadSuppInfo;
typedef struct SLastBlockReader {
STimeWindow window;
SVersionRange verRange;
int32_t order;
uint64_t uid;
SMergeTree mergeTree;
SSttBlockLoadInfo* pInfo;
int64_t currentKey;
} SLastBlockReader;
typedef struct SFilesetIter {
int32_t numOfFiles; // number of total files
int32_t index; // current accessed index in the list
TFileSetArray* pFilesetList; // data file set list
int32_t order;
SLastBlockReader* pLastBlockReader; // last file block reader
} SFilesetIter;
typedef struct SFileDataBlockInfo {
// index position in STableBlockScanInfo in order to check whether neighbor block overlaps with it
uint64_t uid;
int32_t tbBlockIdx;
SBrinRecord record;
} SFileDataBlockInfo;
typedef struct SDataBlockIter {
int32_t numOfBlocks;
int32_t index;
SArray* blockList; // SArray<SFileDataBlockInfo>
int32_t order;
SDataBlk block; // current SDataBlk data
SSHashObj* pTableMap;
} SDataBlockIter;
typedef struct SFileBlockDumpInfo {
int32_t totalRows;
int32_t rowIndex;
int64_t lastKey;
bool allDumped;
} SFileBlockDumpInfo;
typedef struct SReaderStatus {
bool loadFromFile; // check file stage
bool composedDataBlock; // the returned data block is a composed block or not
SSHashObj* pTableMap; // SHash<STableBlockScanInfo>
STableBlockScanInfo** pTableIter; // table iterator used in building in-memory buffer data blocks.
STableUidList uidList; // check tables in uid order, to avoid the repeatly load of blocks in STT.
SFileBlockDumpInfo fBlockDumpInfo;
STFileSet* pCurrentFileset; // current opened file set
SBlockData fileBlockData;
SFilesetIter fileIter;
SDataBlockIter blockIter;
SArray* pLDataIterArray;
SRowMerger merger;
SColumnInfoData* pPrimaryTsCol; // primary time stamp output col info data
} SReaderStatus;
struct STsdbReader {
STsdb* pTsdb;
STsdbReaderInfo info;
TdThreadMutex readerMutex;
EReaderStatus flag;
int32_t code;
uint64_t rowsNum;
SResultBlockInfo resBlockInfo;
SReaderStatus status;
char* idStr; // query info handle, for debug purpose
int32_t type; // query type: 1. retrieve all data blocks, 2. retrieve direct prev|next rows
SBlockLoadSuppInfo suppInfo;
STsdbReadSnap* pReadSnap;
SCostSummary cost;
SHashObj** pIgnoreTables;
SSHashObj* pSchemaMap; // keep the retrieved schema info, to avoid the overhead by repeatly load schema
SDataFileReader* pFileReader; // the file reader
SBlockInfoBuf blockInfoBuf;
EContentData step;
STsdbReader* innerReader[2];
};
typedef struct SBrinRecordIter {
SArray* pBrinBlockList;
SBrinBlk* pCurrentBlk;
int32_t blockIndex;
int32_t recordIndex;
SDataFileReader* pReader;
SBrinBlock block;
SBrinRecord record;
} SBrinRecordIter;
STableBlockScanInfo* getTableBlockScanInfo(SSHashObj* pTableMap, uint64_t uid, const char* id);
SSHashObj* createDataBlockScanInfo(STsdbReader* pTsdbReader, SBlockInfoBuf* pBuf, const STableKeyInfo* idList,
STableUidList* pUidList, int32_t numOfTables);
void clearBlockScanInfo(STableBlockScanInfo* p);
void destroyAllBlockScanInfo(SSHashObj* pTableMap);
void resetAllDataBlockScanInfo(SSHashObj* pTableMap, int64_t ts, int32_t step);
void cleanupInfoFoxNextFileset(SSHashObj* pTableMap);
int32_t ensureBlockScanInfoBuf(SBlockInfoBuf* pBuf, int32_t numOfTables);
void clearBlockScanInfoBuf(SBlockInfoBuf* pBuf);
void* getPosInBlockInfoBuf(SBlockInfoBuf* pBuf, int32_t index);
// brin records iterator
void initBrinRecordIter(SBrinRecordIter* pIter, SDataFileReader* pReader, SArray* pList);
SBrinRecord* getNextBrinRecord(SBrinRecordIter* pIter);
void clearBrinBlockIter(SBrinRecordIter* pIter);
// initialize block iterator API
int32_t initBlockIterator(STsdbReader* pReader, SDataBlockIter* pBlockIter, int32_t numOfBlocks, SArray* pTableList);
bool blockIteratorNext(SDataBlockIter* pBlockIter, const char* idStr);
// load tomb data API (stt/mem only for one table each, tomb data from data files are load for all tables at one time)
void loadMemTombData(SArray** ppMemDelData, STbData* pMemTbData, STbData* piMemTbData, int64_t ver);
int32_t loadDataFileTombDataForAll(STsdbReader* pReader);
int32_t loadSttTombDataForAll(STsdbReader* pReader, SSttFileReader* pSttFileReader, SSttBlockLoadInfo* pLoadInfo);
#ifdef __cplusplus
}
#endif
#endif // TDENGINE_TSDBREADUTIL_H

View File

@ -16,7 +16,7 @@
#include "tsdb.h"
// =============== PAGE-WISE FILE ===============
static int32_t tsdbOpenFile(const char *path, int32_t szPage, int32_t flag, STsdbFD **ppFD) {
int32_t tsdbOpenFile(const char *path, int32_t szPage, int32_t flag, STsdbFD **ppFD) {
int32_t code = 0;
STsdbFD *pFD = NULL;
@ -68,7 +68,7 @@ _exit:
return code;
}
static void tsdbCloseFile(STsdbFD **ppFD) {
void tsdbCloseFile(STsdbFD **ppFD) {
STsdbFD *pFD = *ppFD;
if (pFD) {
taosMemoryFree(pFD->pBuf);
@ -141,7 +141,7 @@ _exit:
return code;
}
static int32_t tsdbWriteFile(STsdbFD *pFD, int64_t offset, const uint8_t *pBuf, int64_t size) {
int32_t tsdbWriteFile(STsdbFD *pFD, int64_t offset, const uint8_t *pBuf, int64_t size) {
int32_t code = 0;
int64_t fOffset = LOGIC_TO_FILE_OFFSET(offset, pFD->szPage);
int64_t pgno = OFFSET_PGNO(fOffset, pFD->szPage);
@ -173,7 +173,7 @@ _exit:
return code;
}
static int32_t tsdbReadFile(STsdbFD *pFD, int64_t offset, uint8_t *pBuf, int64_t size) {
int32_t tsdbReadFile(STsdbFD *pFD, int64_t offset, uint8_t *pBuf, int64_t size) {
int32_t code = 0;
int64_t n = 0;
int64_t fOffset = LOGIC_TO_FILE_OFFSET(offset, pFD->szPage);
@ -202,7 +202,7 @@ _exit:
return code;
}
static int32_t tsdbFsyncFile(STsdbFD *pFD) {
int32_t tsdbFsyncFile(STsdbFD *pFD) {
int32_t code = 0;
code = tsdbWriteFilePage(pFD);
@ -1489,7 +1489,7 @@ int32_t tsdbDelFReaderClose(SDelFReader **ppReader) {
}
int32_t tsdbReadDelData(SDelFReader *pReader, SDelIdx *pDelIdx, SArray *aDelData) {
return tsdbReadDelDatav1(pReader, pDelIdx, aDelData, INT64_MAX);
return tsdbReadDelDatav1(pReader, pDelIdx, aDelData, INT64_MAX);
}
int32_t tsdbReadDelDatav1(SDelFReader *pReader, SDelIdx *pDelIdx, SArray *aDelData, int64_t maxVer) {
@ -1517,10 +1517,10 @@ int32_t tsdbReadDelDatav1(SDelFReader *pReader, SDelIdx *pDelIdx, SArray *aDelDa
if (delData.version > maxVer) {
continue;
}
if (taosArrayPush(aDelData, &delData) == NULL) {
code = TSDB_CODE_OUT_OF_MEMORY;
goto _err;
}
if (taosArrayPush(aDelData, &delData) == NULL) {
code = TSDB_CODE_OUT_OF_MEMORY;
goto _err;
}
}
ASSERT(n == size);

View File

@ -14,101 +14,265 @@
*/
#include "tsdb.h"
#include "tsdbFS2.h"
static bool tsdbShouldDoRetentionImpl(STsdb *pTsdb, int64_t now) {
for (int32_t iSet = 0; iSet < taosArrayGetSize(pTsdb->fs.aDFileSet); iSet++) {
SDFileSet *pSet = (SDFileSet *)taosArrayGet(pTsdb->fs.aDFileSet, iSet);
int32_t expLevel = tsdbFidLevel(pSet->fid, &pTsdb->keepCfg, now);
SDiskID did;
typedef struct {
STsdb *tsdb;
int32_t szPage;
int64_t now;
int64_t cid;
if (expLevel == pSet->diskId.level) continue;
TFileSetArray *fsetArr;
TFileOpArray fopArr[1];
if (expLevel < 0) {
return true;
} else {
if (tfsAllocDisk(pTsdb->pVnode->pTfs, expLevel, &did) < 0) {
return false;
}
struct {
int32_t fsetArrIdx;
STFileSet *fset;
} ctx[1];
} SRTNer;
if (did.level == pSet->diskId.level) continue;
static int32_t tsdbDoRemoveFileObject(SRTNer *rtner, const STFileObj *fobj) {
STFileOp op = {
.optype = TSDB_FOP_REMOVE,
.fid = fobj->f->fid,
.of = fobj->f[0],
};
return true;
}
}
return false;
}
bool tsdbShouldDoRetention(STsdb *pTsdb, int64_t now) {
bool should;
taosThreadRwlockRdlock(&pTsdb->rwLock);
should = tsdbShouldDoRetentionImpl(pTsdb, now);
taosThreadRwlockUnlock(&pTsdb->rwLock);
return should;
return TARRAY2_APPEND(rtner->fopArr, op);
}
int32_t tsdbDoRetention(STsdb *pTsdb, int64_t now) {
static int32_t tsdbDoCopyFile(SRTNer *rtner, const STFileObj *from, const STFile *to) {
int32_t code = 0;
int32_t lino = 0;
STsdbFS fs = {0};
code = tsdbFSCopy(pTsdb, &fs);
char fname[TSDB_FILENAME_LEN];
TdFilePtr fdFrom = NULL;
TdFilePtr fdTo = NULL;
tsdbTFileName(rtner->tsdb, to, fname);
fdFrom = taosOpenFile(from->fname, TD_FILE_READ);
if (fdFrom == NULL) code = terrno;
TSDB_CHECK_CODE(code, lino, _exit);
for (int32_t iSet = 0; iSet < taosArrayGetSize(fs.aDFileSet); iSet++) {
SDFileSet *pSet = (SDFileSet *)taosArrayGet(fs.aDFileSet, iSet);
int32_t expLevel = tsdbFidLevel(pSet->fid, &pTsdb->keepCfg, now);
SDiskID did;
fdTo = taosOpenFile(fname, TD_FILE_WRITE | TD_FILE_CREATE | TD_FILE_TRUNC);
if (fdTo == NULL) code = terrno;
TSDB_CHECK_CODE(code, lino, _exit);
if (expLevel < 0) {
taosMemoryFree(pSet->pHeadF);
taosMemoryFree(pSet->pDataF);
taosMemoryFree(pSet->pSmaF);
for (int32_t iStt = 0; iStt < pSet->nSttF; iStt++) {
taosMemoryFree(pSet->aSttF[iStt]);
}
taosArrayRemove(fs.aDFileSet, iSet);
iSet--;
} else {
if (expLevel == 0) continue;
if (tfsAllocDisk(pTsdb->pVnode->pTfs, expLevel, &did) < 0) {
code = terrno;
goto _exit;
}
if (did.level == pSet->diskId.level) continue;
// copy file to new disk (todo)
SDFileSet fSet = *pSet;
fSet.diskId = did;
code = tsdbDFileSetCopy(pTsdb, pSet, &fSet);
TSDB_CHECK_CODE(code, lino, _exit);
code = tsdbFSUpsertFSet(&fs, &fSet);
TSDB_CHECK_CODE(code, lino, _exit);
}
int64_t n = taosFSendFile(fdTo, fdFrom, 0, tsdbLogicToFileSize(from->f->size, rtner->szPage));
if (n < 0) {
code = TAOS_SYSTEM_ERROR(errno);
TSDB_CHECK_CODE(code, lino, _exit);
}
taosCloseFile(&fdFrom);
taosCloseFile(&fdTo);
// do change fs
code = tsdbFSPrepareCommit(pTsdb, &fs);
_exit:
if (code) {
TSDB_ERROR_LOG(TD_VID(rtner->tsdb->pVnode), lino, code);
taosCloseFile(&fdFrom);
taosCloseFile(&fdTo);
}
return code;
}
static int32_t tsdbDoMigrateFileObj(SRTNer *rtner, const STFileObj *fobj, const SDiskID *did) {
int32_t code = 0;
int32_t lino = 0;
STFileOp op = {0};
// remove old
op = (STFileOp){
.optype = TSDB_FOP_REMOVE,
.fid = fobj->f->fid,
.of = fobj->f[0],
};
code = TARRAY2_APPEND(rtner->fopArr, op);
TSDB_CHECK_CODE(code, lino, _exit);
// create new
op = (STFileOp){
.optype = TSDB_FOP_CREATE,
.fid = fobj->f->fid,
.nf =
{
.type = fobj->f->type,
.did = did[0],
.fid = fobj->f->fid,
.cid = rtner->cid,
.size = fobj->f->size,
.stt[0] =
{
.level = fobj->f->stt[0].level,
},
},
};
code = TARRAY2_APPEND(rtner->fopArr, op);
TSDB_CHECK_CODE(code, lino, _exit);
// do copy the file
code = tsdbDoCopyFile(rtner, fobj, &op.nf);
TSDB_CHECK_CODE(code, lino, _exit);
_exit:
if (code) {
tsdbError("vgId:%d %s failed at line %d since %s", TD_VID(pTsdb->pVnode), __func__, lino, tstrerror(code));
} else {
tsdbInfo("vgId:%d %s done", TD_VID(pTsdb->pVnode), __func__);
TSDB_ERROR_LOG(TD_VID(rtner->tsdb->pVnode), lino, code);
}
tsdbFSDestroy(&fs);
return code;
}
static int32_t tsdbCommitRetentionImpl(STsdb *pTsdb) { return tsdbFSCommit(pTsdb); }
typedef struct {
STsdb *tsdb;
int64_t now;
} SRtnArg;
int32_t tsdbCommitRetention(STsdb *pTsdb) {
taosThreadRwlockWrlock(&pTsdb->rwLock);
tsdbCommitRetentionImpl(pTsdb);
taosThreadRwlockUnlock(&pTsdb->rwLock);
tsdbInfo("vgId:%d %s done", TD_VID(pTsdb->pVnode), __func__);
return 0;
static int32_t tsdbDoRetentionBegin(SRtnArg *arg, SRTNer *rtner) {
int32_t code = 0;
int32_t lino = 0;
STsdb *tsdb = arg->tsdb;
rtner->tsdb = tsdb;
rtner->szPage = tsdb->pVnode->config.tsdbPageSize;
rtner->now = arg->now;
rtner->cid = tsdbFSAllocEid(tsdb->pFS);
code = tsdbFSCreateCopySnapshot(tsdb->pFS, &rtner->fsetArr);
TSDB_CHECK_CODE(code, lino, _exit);
_exit:
if (code) {
TSDB_ERROR_LOG(TD_VID(rtner->tsdb->pVnode), lino, code);
} else {
tsdbInfo("vid:%d, cid:%" PRId64 ", %s done", TD_VID(rtner->tsdb->pVnode), rtner->cid, __func__);
}
return code;
}
static int32_t tsdbDoRetentionEnd(SRTNer *rtner) {
int32_t code = 0;
int32_t lino = 0;
if (TARRAY2_SIZE(rtner->fopArr) == 0) goto _exit;
code = tsdbFSEditBegin(rtner->tsdb->pFS, rtner->fopArr, TSDB_FEDIT_MERGE);
TSDB_CHECK_CODE(code, lino, _exit);
taosThreadRwlockWrlock(&rtner->tsdb->rwLock);
code = tsdbFSEditCommit(rtner->tsdb->pFS);
if (code) {
taosThreadRwlockUnlock(&rtner->tsdb->rwLock);
TSDB_CHECK_CODE(code, lino, _exit);
}
taosThreadRwlockUnlock(&rtner->tsdb->rwLock);
TARRAY2_DESTROY(rtner->fopArr, NULL);
_exit:
if (code) {
TSDB_ERROR_LOG(TD_VID(rtner->tsdb->pVnode), lino, code);
} else {
tsdbInfo("vid:%d, cid:%" PRId64 ", %s done", TD_VID(rtner->tsdb->pVnode), rtner->cid, __func__);
}
tsdbFSDestroyCopySnapshot(&rtner->fsetArr);
return code;
}
static int32_t tsdbDoRetention2(void *arg) {
int32_t code = 0;
int32_t lino = 0;
SRTNer rtner[1] = {0};
code = tsdbDoRetentionBegin(arg, rtner);
TSDB_CHECK_CODE(code, lino, _exit);
for (rtner->ctx->fsetArrIdx = 0; rtner->ctx->fsetArrIdx < TARRAY2_SIZE(rtner->fsetArr); rtner->ctx->fsetArrIdx++) {
rtner->ctx->fset = TARRAY2_GET(rtner->fsetArr, rtner->ctx->fsetArrIdx);
STFileObj *fobj;
int32_t expLevel = tsdbFidLevel(rtner->ctx->fset->fid, &rtner->tsdb->keepCfg, rtner->now);
if (expLevel < 0) { // remove the file set
for (int32_t ftype = 0; (ftype < TSDB_FTYPE_MAX) && (fobj = rtner->ctx->fset->farr[ftype], 1); ++ftype) {
if (fobj == NULL) continue;
code = tsdbDoRemoveFileObject(rtner, fobj);
TSDB_CHECK_CODE(code, lino, _exit);
}
SSttLvl *lvl;
TARRAY2_FOREACH(rtner->ctx->fset->lvlArr, lvl) {
TARRAY2_FOREACH(lvl->fobjArr, fobj) {
code = tsdbDoRemoveFileObject(rtner, fobj);
TSDB_CHECK_CODE(code, lino, _exit);
}
}
} else if (expLevel == 0) {
continue;
} else {
SDiskID did;
if (tfsAllocDisk(rtner->tsdb->pVnode->pTfs, expLevel, &did) < 0) {
code = terrno;
TSDB_CHECK_CODE(code, lino, _exit);
}
tfsMkdirRecurAt(rtner->tsdb->pVnode->pTfs, rtner->tsdb->path, did);
// data
for (int32_t ftype = 0; ftype < TSDB_FTYPE_MAX && (fobj = rtner->ctx->fset->farr[ftype], 1); ++ftype) {
if (fobj == NULL) continue;
if (fobj->f->did.level == did.level) continue;
code = tsdbDoMigrateFileObj(rtner, fobj, &did);
TSDB_CHECK_CODE(code, lino, _exit);
}
// stt
SSttLvl *lvl;
TARRAY2_FOREACH(rtner->ctx->fset->lvlArr, lvl) {
TARRAY2_FOREACH(lvl->fobjArr, fobj) {
if (fobj->f->did.level == did.level) continue;
code = tsdbDoMigrateFileObj(rtner, fobj, &did);
TSDB_CHECK_CODE(code, lino, _exit);
}
}
}
}
code = tsdbDoRetentionEnd(rtner);
TSDB_CHECK_CODE(code, lino, _exit);
_exit:
if (code) {
TSDB_ERROR_LOG(TD_VID(rtner->tsdb->pVnode), lino, code);
}
taosMemoryFree(arg);
return code;
}
int32_t tsdbAsyncRetention(STsdb *tsdb, int64_t now, int64_t *taskid) {
SRtnArg *arg = taosMemoryMalloc(sizeof(*arg));
if (arg == NULL) return TSDB_CODE_OUT_OF_MEMORY;
arg->tsdb = tsdb;
arg->now = now;
int32_t code = tsdbFSScheduleBgTask(tsdb->pFS, TSDB_BG_TASK_RETENTION, tsdbDoRetention2, arg, taskid);
if (code) taosMemoryFree(arg);
return code;
}
int32_t tsdbSyncRetention(STsdb *tsdb, int64_t now) {
int64_t taskid;
int32_t code = tsdbAsyncRetention(tsdb, now, &taskid);
if (code) return code;
return tsdbFSWaitBgTask(tsdb->pFS, taskid);
}

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,982 @@
/*
* Copyright (c) 2019 TAOS Data, Inc. <jhtao@taosdata.com>
*
* This program is free software: you can use, redistribute, and/or modify
* it under the terms of the GNU Affero General Public License, version 3
* or later ("AGPL"), as published by the Free Software Foundation.
*
* This program is distributed in the hope that it will be useful, but WITHOUT
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
* FITNESS FOR A PARTICULAR PURPOSE.
*
* You should have received a copy of the GNU Affero General Public License
* along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
#include "tsdbSttFileRW.h"
// SSttFReader ============================================================
struct SSttFileReader {
SSttFileReaderConfig config[1];
STsdbFD *fd;
SSttFooter footer[1];
struct {
bool sttBlkLoaded;
bool statisBlkLoaded;
bool tombBlkLoaded;
} ctx[1];
TSttBlkArray sttBlkArray[1];
TStatisBlkArray statisBlkArray[1];
TTombBlkArray tombBlkArray[1];
uint8_t *bufArr[5];
};
// SSttFileReader
int32_t tsdbSttFileReaderOpen(const char *fname, const SSttFileReaderConfig *config, SSttFileReader **reader) {
int32_t code = 0;
int32_t lino = 0;
reader[0] = taosMemoryCalloc(1, sizeof(*reader[0]));
if (reader[0] == NULL) return TSDB_CODE_OUT_OF_MEMORY;
reader[0]->config[0] = config[0];
if (reader[0]->config->bufArr == NULL) {
reader[0]->config->bufArr = reader[0]->bufArr;
}
// open file
if (fname) {
code = tsdbOpenFile(fname, config->szPage, TD_FILE_READ, &reader[0]->fd);
TSDB_CHECK_CODE(code, lino, _exit);
} else {
char fname1[TSDB_FILENAME_LEN];
tsdbTFileName(config->tsdb, config->file, fname1);
code = tsdbOpenFile(fname1, config->szPage, TD_FILE_READ, &reader[0]->fd);
TSDB_CHECK_CODE(code, lino, _exit);
}
// // open each segment reader
int64_t offset = config->file->size - sizeof(SSttFooter);
ASSERT(offset >= TSDB_FHDR_SIZE);
code = tsdbReadFile(reader[0]->fd, offset, (uint8_t *)(reader[0]->footer), sizeof(SSttFooter));
TSDB_CHECK_CODE(code, lino, _exit);
_exit:
if (code) {
TSDB_ERROR_LOG(TD_VID(config->tsdb->pVnode), lino, code);
tsdbSttFileReaderClose(reader);
}
return code;
}
int32_t tsdbSttFileReaderClose(SSttFileReader **reader) {
if (reader[0]) {
for (int32_t i = 0; i < ARRAY_SIZE(reader[0]->bufArr); ++i) {
tFree(reader[0]->bufArr[i]);
}
tsdbCloseFile(&reader[0]->fd);
TARRAY2_DESTROY(reader[0]->tombBlkArray, NULL);
TARRAY2_DESTROY(reader[0]->statisBlkArray, NULL);
TARRAY2_DESTROY(reader[0]->sttBlkArray, NULL);
taosMemoryFree(reader[0]);
reader[0] = NULL;
}
return 0;
}
// SSttFSegReader
int32_t tsdbSttFileReadStatisBlk(SSttFileReader *reader, const TStatisBlkArray **statisBlkArray) {
if (!reader->ctx->statisBlkLoaded) {
if (reader->footer->statisBlkPtr->size > 0) {
ASSERT(reader->footer->statisBlkPtr->size % sizeof(SStatisBlk) == 0);
int32_t size = reader->footer->statisBlkPtr->size / sizeof(SStatisBlk);
void *data = taosMemoryMalloc(reader->footer->statisBlkPtr->size);
if (!data) return TSDB_CODE_OUT_OF_MEMORY;
int32_t code =
tsdbReadFile(reader->fd, reader->footer->statisBlkPtr->offset, data, reader->footer->statisBlkPtr->size);
if (code) {
taosMemoryFree(data);
return code;
}
TARRAY2_INIT_EX(reader->statisBlkArray, size, size, data);
} else {
TARRAY2_INIT(reader->statisBlkArray);
}
reader->ctx->statisBlkLoaded = true;
}
statisBlkArray[0] = reader->statisBlkArray;
return 0;
}
int32_t tsdbSttFileReadTombBlk(SSttFileReader *reader, const TTombBlkArray **tombBlkArray) {
if (!reader->ctx->tombBlkLoaded) {
if (reader->footer->tombBlkPtr->size > 0) {
ASSERT(reader->footer->tombBlkPtr->size % sizeof(STombBlk) == 0);
int32_t size = reader->footer->tombBlkPtr->size / sizeof(STombBlk);
void *data = taosMemoryMalloc(reader->footer->tombBlkPtr->size);
if (!data) return TSDB_CODE_OUT_OF_MEMORY;
int32_t code =
tsdbReadFile(reader->fd, reader->footer->tombBlkPtr->offset, data, reader->footer->tombBlkPtr->size);
if (code) {
taosMemoryFree(data);
return code;
}
TARRAY2_INIT_EX(reader->tombBlkArray, size, size, data);
} else {
TARRAY2_INIT(reader->tombBlkArray);
}
reader->ctx->tombBlkLoaded = true;
}
tombBlkArray[0] = reader->tombBlkArray;
return 0;
}
int32_t tsdbSttFileReadSttBlk(SSttFileReader *reader, const TSttBlkArray **sttBlkArray) {
if (!reader->ctx->sttBlkLoaded) {
if (reader->footer->sttBlkPtr->size > 0) {
ASSERT(reader->footer->sttBlkPtr->size % sizeof(SSttBlk) == 0);
int32_t size = reader->footer->sttBlkPtr->size / sizeof(SSttBlk);
void *data = taosMemoryMalloc(reader->footer->sttBlkPtr->size);
if (!data) return TSDB_CODE_OUT_OF_MEMORY;
int32_t code = tsdbReadFile(reader->fd, reader->footer->sttBlkPtr->offset, data, reader->footer->sttBlkPtr->size);
if (code) {
taosMemoryFree(data);
return code;
}
TARRAY2_INIT_EX(reader->sttBlkArray, size, size, data);
} else {
TARRAY2_INIT(reader->sttBlkArray);
}
reader->ctx->sttBlkLoaded = true;
}
sttBlkArray[0] = reader->sttBlkArray;
return 0;
}
int32_t tsdbSttFileReadBlockData(SSttFileReader *reader, const SSttBlk *sttBlk, SBlockData *bData) {
int32_t code = 0;
int32_t lino = 0;
code = tRealloc(&reader->config->bufArr[0], sttBlk->bInfo.szBlock);
TSDB_CHECK_CODE(code, lino, _exit);
code = tsdbReadFile(reader->fd, sttBlk->bInfo.offset, reader->config->bufArr[0], sttBlk->bInfo.szBlock);
TSDB_CHECK_CODE(code, lino, _exit);
code = tDecmprBlockData(reader->config->bufArr[0], sttBlk->bInfo.szBlock, bData, &reader->config->bufArr[1]);
TSDB_CHECK_CODE(code, lino, _exit);
_exit:
if (code) {
TSDB_ERROR_LOG(TD_VID(reader->config->tsdb->pVnode), lino, code);
}
return code;
}
int32_t tsdbSttFileReadBlockDataByColumn(SSttFileReader *reader, const SSttBlk *sttBlk, SBlockData *bData,
STSchema *pTSchema, int16_t cids[], int32_t ncid) {
int32_t code = 0;
int32_t lino = 0;
TABLEID tbid = {.suid = sttBlk->suid};
if (tbid.suid == 0) {
tbid.uid = sttBlk->minUid;
} else {
tbid.uid = 0;
}
code = tBlockDataInit(bData, &tbid, pTSchema, cids, ncid);
TSDB_CHECK_CODE(code, lino, _exit);
// uid + version + tskey
code = tRealloc(&reader->config->bufArr[0], sttBlk->bInfo.szKey);
TSDB_CHECK_CODE(code, lino, _exit);
code = tsdbReadFile(reader->fd, sttBlk->bInfo.offset, reader->config->bufArr[0], sttBlk->bInfo.szKey);
TSDB_CHECK_CODE(code, lino, _exit);
// hdr
SDiskDataHdr hdr[1];
int32_t size = 0;
size += tGetDiskDataHdr(reader->config->bufArr[0] + size, hdr);
ASSERT(hdr->delimiter == TSDB_FILE_DLMT);
bData->nRow = hdr->nRow;
bData->uid = hdr->uid;
// uid
if (hdr->uid == 0) {
ASSERT(hdr->szUid);
code = tsdbDecmprData(reader->config->bufArr[0] + size, hdr->szUid, TSDB_DATA_TYPE_BIGINT, hdr->cmprAlg,
(uint8_t **)&bData->aUid, sizeof(int64_t) * hdr->nRow, &reader->config->bufArr[1]);
TSDB_CHECK_CODE(code, lino, _exit);
} else {
ASSERT(hdr->szUid == 0);
}
size += hdr->szUid;
// version
code = tsdbDecmprData(reader->config->bufArr[0] + size, hdr->szVer, TSDB_DATA_TYPE_BIGINT, hdr->cmprAlg,
(uint8_t **)&bData->aVersion, sizeof(int64_t) * hdr->nRow, &reader->config->bufArr[1]);
TSDB_CHECK_CODE(code, lino, _exit);
size += hdr->szVer;
// ts
code = tsdbDecmprData(reader->config->bufArr[0] + size, hdr->szKey, TSDB_DATA_TYPE_TIMESTAMP, hdr->cmprAlg,
(uint8_t **)&bData->aTSKEY, sizeof(TSKEY) * hdr->nRow, &reader->config->bufArr[1]);
TSDB_CHECK_CODE(code, lino, _exit);
size += hdr->szKey;
ASSERT(size == sttBlk->bInfo.szKey);
// other columns
if (bData->nColData > 0) {
if (hdr->szBlkCol > 0) {
code = tRealloc(&reader->config->bufArr[0], hdr->szBlkCol);
TSDB_CHECK_CODE(code, lino, _exit);
code = tsdbReadFile(reader->fd, sttBlk->bInfo.offset + sttBlk->bInfo.szKey, reader->config->bufArr[0],
hdr->szBlkCol);
TSDB_CHECK_CODE(code, lino, _exit);
}
SBlockCol bc[1] = {{.cid = 0}};
SBlockCol *blockCol = bc;
size = 0;
for (int32_t i = 0; i < bData->nColData; i++) {
SColData *colData = tBlockDataGetColDataByIdx(bData, i);
while (blockCol && blockCol->cid < colData->cid) {
if (size < hdr->szBlkCol) {
size += tGetBlockCol(reader->config->bufArr[0] + size, blockCol);
} else {
ASSERT(size == hdr->szBlkCol);
blockCol = NULL;
}
}
if (blockCol == NULL || blockCol->cid > colData->cid) {
for (int32_t iRow = 0; iRow < hdr->nRow; iRow++) {
code = tColDataAppendValue(colData, &COL_VAL_NONE(colData->cid, colData->type));
TSDB_CHECK_CODE(code, lino, _exit);
}
} else {
ASSERT(blockCol->type == colData->type);
ASSERT(blockCol->flag && blockCol->flag != HAS_NONE);
if (blockCol->flag == HAS_NULL) {
for (int32_t iRow = 0; iRow < hdr->nRow; iRow++) {
code = tColDataAppendValue(colData, &COL_VAL_NULL(blockCol->cid, blockCol->type));
TSDB_CHECK_CODE(code, lino, _exit);
}
} else {
int32_t size1 = blockCol->szBitmap + blockCol->szOffset + blockCol->szValue;
code = tRealloc(&reader->config->bufArr[1], size1);
TSDB_CHECK_CODE(code, lino, _exit);
code = tsdbReadFile(reader->fd, sttBlk->bInfo.offset + sttBlk->bInfo.szKey + hdr->szBlkCol + blockCol->offset,
reader->config->bufArr[1], size1);
TSDB_CHECK_CODE(code, lino, _exit);
code = tsdbDecmprColData(reader->config->bufArr[1], blockCol, hdr->cmprAlg, hdr->nRow, colData,
&reader->config->bufArr[2]);
TSDB_CHECK_CODE(code, lino, _exit);
}
}
}
}
_exit:
if (code) {
TSDB_ERROR_LOG(TD_VID(reader->config->tsdb->pVnode), lino, code);
}
return code;
}
int32_t tsdbSttFileReadTombBlock(SSttFileReader *reader, const STombBlk *tombBlk, STombBlock *tombBlock) {
int32_t code = 0;
int32_t lino = 0;
code = tRealloc(&reader->config->bufArr[0], tombBlk->dp->size);
TSDB_CHECK_CODE(code, lino, _exit);
code = tsdbReadFile(reader->fd, tombBlk->dp->offset, reader->config->bufArr[0], tombBlk->dp->size);
if (code) TSDB_CHECK_CODE(code, lino, _exit);
int64_t size = 0;
tTombBlockClear(tombBlock);
for (int32_t i = 0; i < ARRAY_SIZE(tombBlock->dataArr); ++i) {
code = tsdbDecmprData(reader->config->bufArr[0] + size, tombBlk->size[i], TSDB_DATA_TYPE_BIGINT, tombBlk->cmprAlg,
&reader->config->bufArr[1], sizeof(int64_t) * tombBlk->numRec, &reader->config->bufArr[2]);
TSDB_CHECK_CODE(code, lino, _exit);
code = TARRAY2_APPEND_BATCH(&tombBlock->dataArr[i], reader->config->bufArr[1], tombBlk->numRec);
TSDB_CHECK_CODE(code, lino, _exit);
size += tombBlk->size[i];
}
ASSERT(size == tombBlk->dp->size);
_exit:
if (code) {
TSDB_ERROR_LOG(TD_VID(reader->config->tsdb->pVnode), lino, code);
}
return code;
}
int32_t tsdbSttFileReadStatisBlock(SSttFileReader *reader, const SStatisBlk *statisBlk, STbStatisBlock *statisBlock) {
int32_t code = 0;
int32_t lino = 0;
code = tRealloc(&reader->config->bufArr[0], statisBlk->dp->size);
TSDB_CHECK_CODE(code, lino, _exit);
code = tsdbReadFile(reader->fd, statisBlk->dp->offset, reader->config->bufArr[0], statisBlk->dp->size);
TSDB_CHECK_CODE(code, lino, _exit);
int64_t size = 0;
tStatisBlockClear(statisBlock);
for (int32_t i = 0; i < ARRAY_SIZE(statisBlock->dataArr); ++i) {
code =
tsdbDecmprData(reader->config->bufArr[0] + size, statisBlk->size[i], TSDB_DATA_TYPE_BIGINT, statisBlk->cmprAlg,
&reader->config->bufArr[1], sizeof(int64_t) * statisBlk->numRec, &reader->config->bufArr[2]);
TSDB_CHECK_CODE(code, lino, _exit);
code = TARRAY2_APPEND_BATCH(statisBlock->dataArr + i, reader->config->bufArr[1], statisBlk->numRec);
TSDB_CHECK_CODE(code, lino, _exit);
size += statisBlk->size[i];
}
ASSERT(size == statisBlk->dp->size);
_exit:
if (code) {
TSDB_ERROR_LOG(TD_VID(reader->config->tsdb->pVnode), lino, code);
}
return code;
}
// SSttFWriter ============================================================
struct SSttFileWriter {
SSttFileWriterConfig config[1];
struct {
bool opened;
TABLEID tbid[1];
} ctx[1];
// file
STsdbFD *fd;
STFile file[1];
// data
SSttFooter footer[1];
TTombBlkArray tombBlkArray[1];
TSttBlkArray sttBlkArray[1];
TStatisBlkArray statisBlkArray[1];
STombBlock tombBlock[1];
STbStatisBlock staticBlock[1];
SBlockData blockData[1];
// helper data
SSkmInfo skmTb[1];
SSkmInfo skmRow[1];
uint8_t *bufArr[5];
};
int32_t tsdbFileDoWriteBlockData(STsdbFD *fd, SBlockData *blockData, int8_t cmprAlg, int64_t *fileSize,
TSttBlkArray *sttBlkArray, uint8_t **bufArr) {
if (blockData->nRow == 0) return 0;
int32_t code = 0;
SSttBlk sttBlk[1] = {{
.suid = blockData->suid,
.minUid = blockData->uid ? blockData->uid : blockData->aUid[0],
.maxUid = blockData->uid ? blockData->uid : blockData->aUid[blockData->nRow - 1],
.minKey = blockData->aTSKEY[0],
.maxKey = blockData->aTSKEY[0],
.minVer = blockData->aVersion[0],
.maxVer = blockData->aVersion[0],
.nRow = blockData->nRow,
}};
for (int32_t iRow = 1; iRow < blockData->nRow; iRow++) {
if (sttBlk->minKey > blockData->aTSKEY[iRow]) sttBlk->minKey = blockData->aTSKEY[iRow];
if (sttBlk->maxKey < blockData->aTSKEY[iRow]) sttBlk->maxKey = blockData->aTSKEY[iRow];
if (sttBlk->minVer > blockData->aVersion[iRow]) sttBlk->minVer = blockData->aVersion[iRow];
if (sttBlk->maxVer < blockData->aVersion[iRow]) sttBlk->maxVer = blockData->aVersion[iRow];
}
int32_t sizeArr[5] = {0};
code = tCmprBlockData(blockData, cmprAlg, NULL, NULL, bufArr, sizeArr);
if (code) return code;
sttBlk->bInfo.offset = *fileSize;
sttBlk->bInfo.szKey = sizeArr[2] + sizeArr[3];
sttBlk->bInfo.szBlock = sizeArr[0] + sizeArr[1] + sttBlk->bInfo.szKey;
for (int32_t i = 3; i >= 0; i--) {
if (sizeArr[i]) {
code = tsdbWriteFile(fd, *fileSize, bufArr[i], sizeArr[i]);
if (code) return code;
*fileSize += sizeArr[i];
}
}
code = TARRAY2_APPEND_PTR(sttBlkArray, sttBlk);
if (code) return code;
tBlockDataClear(blockData);
return 0;
}
static int32_t tsdbSttFileDoWriteBlockData(SSttFileWriter *writer) {
if (writer->blockData->nRow == 0) return 0;
int32_t code = 0;
int32_t lino = 0;
code = tsdbFileDoWriteBlockData(writer->fd, writer->blockData, writer->config->cmprAlg, &writer->file->size,
writer->sttBlkArray, writer->config->bufArr);
TSDB_CHECK_CODE(code, lino, _exit);
_exit:
if (code) {
TSDB_ERROR_LOG(TD_VID(writer->config->tsdb->pVnode), lino, code);
}
return code;
}
static int32_t tsdbSttFileDoWriteStatisBlock(SSttFileWriter *writer) {
if (STATIS_BLOCK_SIZE(writer->staticBlock) == 0) return 0;
int32_t code = 0;
int32_t lino = 0;
SStatisBlk statisBlk[1] = {{
.dp[0] =
{
.offset = writer->file->size,
.size = 0,
},
.minTbid =
{
.suid = TARRAY2_FIRST(writer->staticBlock->suid),
.uid = TARRAY2_FIRST(writer->staticBlock->uid),
},
.maxTbid =
{
.suid = TARRAY2_LAST(writer->staticBlock->suid),
.uid = TARRAY2_LAST(writer->staticBlock->uid),
},
.numRec = STATIS_BLOCK_SIZE(writer->staticBlock),
.cmprAlg = writer->config->cmprAlg,
}};
for (int32_t i = 0; i < STATIS_RECORD_NUM_ELEM; i++) {
code = tsdbCmprData((uint8_t *)TARRAY2_DATA(writer->staticBlock->dataArr + i),
TARRAY2_DATA_LEN(&writer->staticBlock->dataArr[i]), TSDB_DATA_TYPE_BIGINT, statisBlk->cmprAlg,
&writer->config->bufArr[0], 0, &statisBlk->size[i], &writer->config->bufArr[1]);
TSDB_CHECK_CODE(code, lino, _exit);
code = tsdbWriteFile(writer->fd, writer->file->size, writer->config->bufArr[0], statisBlk->size[i]);
TSDB_CHECK_CODE(code, lino, _exit);
statisBlk->dp->size += statisBlk->size[i];
writer->file->size += statisBlk->size[i];
}
code = TARRAY2_APPEND_PTR(writer->statisBlkArray, statisBlk);
TSDB_CHECK_CODE(code, lino, _exit);
tStatisBlockClear(writer->staticBlock);
_exit:
if (code) {
TSDB_ERROR_LOG(TD_VID(writer->config->tsdb->pVnode), lino, code);
}
return code;
}
int32_t tsdbFileWriteTombBlock(STsdbFD *fd, STombBlock *tombBlock, int8_t cmprAlg, int64_t *fileSize,
TTombBlkArray *tombBlkArray, uint8_t **bufArr) {
int32_t code;
if (TOMB_BLOCK_SIZE(tombBlock) == 0) return 0;
STombBlk tombBlk[1] = {{
.dp[0] =
{
.offset = *fileSize,
.size = 0,
},
.minTbid =
{
.suid = TARRAY2_FIRST(tombBlock->suid),
.uid = TARRAY2_FIRST(tombBlock->uid),
},
.maxTbid =
{
.suid = TARRAY2_LAST(tombBlock->suid),
.uid = TARRAY2_LAST(tombBlock->uid),
},
.minVer = TARRAY2_FIRST(tombBlock->version),
.maxVer = TARRAY2_FIRST(tombBlock->version),
.numRec = TOMB_BLOCK_SIZE(tombBlock),
.cmprAlg = cmprAlg,
}};
for (int32_t i = 1; i < TOMB_BLOCK_SIZE(tombBlock); i++) {
if (tombBlk->minVer > TARRAY2_GET(tombBlock->version, i)) {
tombBlk->minVer = TARRAY2_GET(tombBlock->version, i);
}
if (tombBlk->maxVer < TARRAY2_GET(tombBlock->version, i)) {
tombBlk->maxVer = TARRAY2_GET(tombBlock->version, i);
}
}
for (int32_t i = 0; i < ARRAY_SIZE(tombBlock->dataArr); i++) {
code = tsdbCmprData((uint8_t *)TARRAY2_DATA(&tombBlock->dataArr[i]), TARRAY2_DATA_LEN(&tombBlock->dataArr[i]),
TSDB_DATA_TYPE_BIGINT, tombBlk->cmprAlg, &bufArr[0], 0, &tombBlk->size[i], &bufArr[1]);
if (code) return code;
code = tsdbWriteFile(fd, *fileSize, bufArr[0], tombBlk->size[i]);
if (code) return code;
tombBlk->dp->size += tombBlk->size[i];
*fileSize += tombBlk->size[i];
}
code = TARRAY2_APPEND_PTR(tombBlkArray, tombBlk);
if (code) return code;
tTombBlockClear(tombBlock);
return 0;
}
static int32_t tsdbSttFileDoWriteTombBlock(SSttFileWriter *writer) {
if (TOMB_BLOCK_SIZE(writer->tombBlock) == 0) return 0;
int32_t code = 0;
int32_t lino = 0;
code = tsdbFileWriteTombBlock(writer->fd, writer->tombBlock, writer->config->cmprAlg, &writer->file->size,
writer->tombBlkArray, writer->config->bufArr);
TSDB_CHECK_CODE(code, lino, _exit);
_exit:
if (code) {
TSDB_ERROR_LOG(TD_VID(writer->config->tsdb->pVnode), lino, code);
}
return code;
}
int32_t tsdbFileWriteSttBlk(STsdbFD *fd, const TSttBlkArray *sttBlkArray, SFDataPtr *ptr, int64_t *fileSize) {
ptr->size = TARRAY2_DATA_LEN(sttBlkArray);
if (ptr->size > 0) {
ptr->offset = *fileSize;
int32_t code = tsdbWriteFile(fd, *fileSize, (const uint8_t *)TARRAY2_DATA(sttBlkArray), ptr->size);
if (code) {
return code;
}
*fileSize += ptr->size;
}
return 0;
}
static int32_t tsdbSttFileDoWriteSttBlk(SSttFileWriter *writer) {
int32_t code = 0;
int32_t lino;
code = tsdbFileWriteSttBlk(writer->fd, writer->sttBlkArray, writer->footer->sttBlkPtr, &writer->file->size);
TSDB_CHECK_CODE(code, lino, _exit);
_exit:
if (code) {
TSDB_ERROR_LOG(TD_VID(writer->config->tsdb->pVnode), lino, code);
}
return code;
}
static int32_t tsdbSttFileDoWriteStatisBlk(SSttFileWriter *writer) {
int32_t code = 0;
int32_t lino;
writer->footer->statisBlkPtr->size = TARRAY2_DATA_LEN(writer->statisBlkArray);
if (writer->footer->statisBlkPtr->size) {
writer->footer->statisBlkPtr->offset = writer->file->size;
code = tsdbWriteFile(writer->fd, writer->file->size, (const uint8_t *)TARRAY2_DATA(writer->statisBlkArray),
writer->footer->statisBlkPtr->size);
TSDB_CHECK_CODE(code, lino, _exit);
writer->file->size += writer->footer->statisBlkPtr->size;
}
_exit:
if (code) {
TSDB_ERROR_LOG(TD_VID(writer->config->tsdb->pVnode), lino, code);
}
return code;
}
int32_t tsdbFileWriteTombBlk(STsdbFD *fd, const TTombBlkArray *tombBlkArray, SFDataPtr *ptr, int64_t *fileSize) {
ptr->size = TARRAY2_DATA_LEN(tombBlkArray);
if (ptr->size > 0) {
ptr->offset = *fileSize;
int32_t code = tsdbWriteFile(fd, *fileSize, (const uint8_t *)TARRAY2_DATA(tombBlkArray), ptr->size);
if (code) {
return code;
}
*fileSize += ptr->size;
}
return 0;
}
static int32_t tsdbSttFileDoWriteTombBlk(SSttFileWriter *writer) {
int32_t code = 0;
int32_t lino = 0;
code = tsdbFileWriteTombBlk(writer->fd, writer->tombBlkArray, writer->footer->tombBlkPtr, &writer->file->size);
TSDB_CHECK_CODE(code, lino, _exit);
_exit:
if (code) {
TSDB_ERROR_LOG(TD_VID(writer->config->tsdb->pVnode), lino, code);
}
return code;
}
int32_t tsdbFileWriteSttFooter(STsdbFD *fd, const SSttFooter *footer, int64_t *fileSize) {
int32_t code = tsdbWriteFile(fd, *fileSize, (const uint8_t *)footer, sizeof(*footer));
if (code) return code;
*fileSize += sizeof(*footer);
return 0;
}
static int32_t tsdbSttFileDoWriteFooter(SSttFileWriter *writer) {
return tsdbFileWriteSttFooter(writer->fd, writer->footer, &writer->file->size);
}
static int32_t tsdbSttFWriterDoOpen(SSttFileWriter *writer) {
int32_t code = 0;
int32_t lino = 0;
// set
if (!writer->config->skmTb) writer->config->skmTb = writer->skmTb;
if (!writer->config->skmRow) writer->config->skmRow = writer->skmRow;
if (!writer->config->bufArr) writer->config->bufArr = writer->bufArr;
writer->file[0] = (STFile){
.type = TSDB_FTYPE_STT,
.did = writer->config->did,
.fid = writer->config->fid,
.cid = writer->config->cid,
.size = 0,
.stt[0] =
{
.level = writer->config->level,
},
};
// open file
int32_t flag = TD_FILE_READ | TD_FILE_WRITE | TD_FILE_CREATE | TD_FILE_TRUNC;
char fname[TSDB_FILENAME_LEN];
tsdbTFileName(writer->config->tsdb, writer->file, fname);
code = tsdbOpenFile(fname, writer->config->szPage, flag, &writer->fd);
TSDB_CHECK_CODE(code, lino, _exit);
uint8_t hdr[TSDB_FHDR_SIZE] = {0};
code = tsdbWriteFile(writer->fd, 0, hdr, sizeof(hdr));
TSDB_CHECK_CODE(code, lino, _exit);
writer->file->size += sizeof(hdr);
writer->ctx->opened = true;
_exit:
if (code) {
TSDB_ERROR_LOG(TD_VID(writer->config->tsdb->pVnode), lino, code);
}
return code;
}
static void tsdbSttFWriterDoClose(SSttFileWriter *writer) {
ASSERT(writer->fd == NULL);
for (int32_t i = 0; i < ARRAY_SIZE(writer->bufArr); ++i) {
tFree(writer->bufArr[i]);
}
tDestroyTSchema(writer->skmRow->pTSchema);
tDestroyTSchema(writer->skmTb->pTSchema);
tTombBlockDestroy(writer->tombBlock);
tStatisBlockDestroy(writer->staticBlock);
tBlockDataDestroy(writer->blockData);
TARRAY2_DESTROY(writer->tombBlkArray, NULL);
TARRAY2_DESTROY(writer->statisBlkArray, NULL);
TARRAY2_DESTROY(writer->sttBlkArray, NULL);
}
static int32_t tsdbSttFileDoUpdateHeader(SSttFileWriter *writer) {
// TODO
return 0;
}
static int32_t tsdbSttFWriterCloseCommit(SSttFileWriter *writer, TFileOpArray *opArray) {
int32_t lino;
int32_t code;
code = tsdbSttFileDoWriteBlockData(writer);
TSDB_CHECK_CODE(code, lino, _exit);
code = tsdbSttFileDoWriteStatisBlock(writer);
TSDB_CHECK_CODE(code, lino, _exit);
code = tsdbSttFileDoWriteTombBlock(writer);
TSDB_CHECK_CODE(code, lino, _exit);
code = tsdbSttFileDoWriteSttBlk(writer);
TSDB_CHECK_CODE(code, lino, _exit);
code = tsdbSttFileDoWriteStatisBlk(writer);
TSDB_CHECK_CODE(code, lino, _exit);
code = tsdbSttFileDoWriteTombBlk(writer);
TSDB_CHECK_CODE(code, lino, _exit);
code = tsdbSttFileDoWriteFooter(writer);
TSDB_CHECK_CODE(code, lino, _exit);
code = tsdbSttFileDoUpdateHeader(writer);
TSDB_CHECK_CODE(code, lino, _exit);
code = tsdbFsyncFile(writer->fd);
TSDB_CHECK_CODE(code, lino, _exit);
tsdbCloseFile(&writer->fd);
ASSERT(writer->file->size > 0);
STFileOp op = (STFileOp){
.optype = TSDB_FOP_CREATE,
.fid = writer->config->fid,
.nf = writer->file[0],
};
code = TARRAY2_APPEND(opArray, op);
TSDB_CHECK_CODE(code, lino, _exit);
_exit:
if (code) {
TSDB_ERROR_LOG(TD_VID(writer->config->tsdb->pVnode), lino, code);
}
return code;
}
static int32_t tsdbSttFWriterCloseAbort(SSttFileWriter *writer) {
char fname[TSDB_FILENAME_LEN];
tsdbTFileName(writer->config->tsdb, writer->file, fname);
tsdbCloseFile(&writer->fd);
taosRemoveFile(fname);
return 0;
}
int32_t tsdbSttFileWriterOpen(const SSttFileWriterConfig *config, SSttFileWriter **writer) {
writer[0] = taosMemoryCalloc(1, sizeof(*writer[0]));
if (writer[0] == NULL) return TSDB_CODE_OUT_OF_MEMORY;
writer[0]->config[0] = config[0];
writer[0]->ctx->opened = false;
return 0;
}
int32_t tsdbSttFileWriterClose(SSttFileWriter **writer, int8_t abort, TFileOpArray *opArray) {
int32_t code = 0;
int32_t lino = 0;
if (writer[0]->ctx->opened) {
if (abort) {
code = tsdbSttFWriterCloseAbort(writer[0]);
TSDB_CHECK_CODE(code, lino, _exit);
} else {
code = tsdbSttFWriterCloseCommit(writer[0], opArray);
TSDB_CHECK_CODE(code, lino, _exit);
}
tsdbSttFWriterDoClose(writer[0]);
}
taosMemoryFree(writer[0]);
writer[0] = NULL;
_exit:
if (code) {
TSDB_ERROR_LOG(TD_VID(writer[0]->config->tsdb->pVnode), lino, code);
}
return code;
}
int32_t tsdbSttFileWriteRow(SSttFileWriter *writer, SRowInfo *row) {
int32_t code = 0;
int32_t lino = 0;
if (!writer->ctx->opened) {
code = tsdbSttFWriterDoOpen(writer);
TSDB_CHECK_CODE(code, lino, _exit);
}
if (!TABLE_SAME_SCHEMA(row->suid, row->uid, writer->ctx->tbid->suid, writer->ctx->tbid->uid)) {
code = tsdbSttFileDoWriteBlockData(writer);
TSDB_CHECK_CODE(code, lino, _exit);
code = tsdbUpdateSkmTb(writer->config->tsdb, (TABLEID *)row, writer->config->skmTb);
TSDB_CHECK_CODE(code, lino, _exit);
TABLEID id = {.suid = row->suid, .uid = row->suid ? 0 : row->uid};
code = tBlockDataInit(writer->blockData, &id, writer->config->skmTb->pTSchema, NULL, 0);
TSDB_CHECK_CODE(code, lino, _exit);
}
TSDBKEY key[1];
if (row->row.type == TSDBROW_ROW_FMT) {
key->ts = row->row.pTSRow->ts;
key->version = row->row.version;
} else {
key->ts = row->row.pBlockData->aTSKEY[row->row.iRow];
key->version = row->row.pBlockData->aVersion[row->row.iRow];
}
if (writer->ctx->tbid->uid != row->uid) {
writer->ctx->tbid->suid = row->suid;
writer->ctx->tbid->uid = row->uid;
if (STATIS_BLOCK_SIZE(writer->staticBlock) >= writer->config->maxRow) {
code = tsdbSttFileDoWriteStatisBlock(writer);
TSDB_CHECK_CODE(code, lino, _exit);
}
STbStatisRecord record = {
.suid = row->suid,
.uid = row->uid,
.firstKey = key->ts,
.lastKey = key->ts,
.count = 1,
};
code = tStatisBlockPut(writer->staticBlock, &record);
TSDB_CHECK_CODE(code, lino, _exit);
} else {
ASSERT(key->ts >= TARRAY2_LAST(writer->staticBlock->lastKey));
if (key->ts > TARRAY2_LAST(writer->staticBlock->lastKey)) {
TARRAY2_LAST(writer->staticBlock->count)++;
TARRAY2_LAST(writer->staticBlock->lastKey) = key->ts;
}
}
if (row->row.type == TSDBROW_ROW_FMT) {
code = tsdbUpdateSkmRow(writer->config->tsdb, writer->ctx->tbid, //
TSDBROW_SVERSION(&row->row), writer->config->skmRow);
TSDB_CHECK_CODE(code, lino, _exit);
}
// row to col conversion
if (key->version <= writer->config->compactVersion //
&& writer->blockData->nRow > 0 //
&& writer->blockData->aTSKEY[writer->blockData->nRow - 1] == key->ts //
&& (writer->blockData->uid //
? writer->blockData->uid //
: writer->blockData->aUid[writer->blockData->nRow - 1]) == row->uid //
) {
code = tBlockDataUpdateRow(writer->blockData, &row->row, writer->config->skmRow->pTSchema);
TSDB_CHECK_CODE(code, lino, _exit);
} else {
if (writer->blockData->nRow >= writer->config->maxRow) {
code = tsdbSttFileDoWriteBlockData(writer);
TSDB_CHECK_CODE(code, lino, _exit);
}
code = tBlockDataAppendRow(writer->blockData, &row->row, writer->config->skmRow->pTSchema, row->uid);
TSDB_CHECK_CODE(code, lino, _exit);
}
_exit:
if (code) {
TSDB_ERROR_LOG(TD_VID(writer->config->tsdb->pVnode), lino, code);
}
return code;
}
int32_t tsdbSttFileWriteBlockData(SSttFileWriter *writer, SBlockData *bdata) {
int32_t code = 0;
int32_t lino = 0;
SRowInfo row[1];
row->suid = bdata->suid;
for (int32_t i = 0; i < bdata->nRow; i++) {
row->uid = bdata->uid ? bdata->uid : bdata->aUid[i];
row->row = tsdbRowFromBlockData(bdata, i);
code = tsdbSttFileWriteRow(writer, row);
TSDB_CHECK_CODE(code, lino, _exit);
}
_exit:
if (code) {
TSDB_ERROR_LOG(TD_VID(writer->config->tsdb->pVnode), lino, code);
}
return code;
}
int32_t tsdbSttFileWriteTombRecord(SSttFileWriter *writer, const STombRecord *record) {
int32_t code;
int32_t lino;
if (!writer->ctx->opened) {
code = tsdbSttFWriterDoOpen(writer);
TSDB_CHECK_CODE(code, lino, _exit);
} else {
if (writer->blockData->nRow > 0) {
code = tsdbSttFileDoWriteBlockData(writer);
TSDB_CHECK_CODE(code, lino, _exit);
}
if (STATIS_BLOCK_SIZE(writer->staticBlock) > 0) {
code = tsdbSttFileDoWriteStatisBlock(writer);
TSDB_CHECK_CODE(code, lino, _exit);
}
}
code = tTombBlockPut(writer->tombBlock, record);
TSDB_CHECK_CODE(code, lino, _exit);
if (TOMB_BLOCK_SIZE(writer->tombBlock) >= writer->config->maxRow) {
code = tsdbSttFileDoWriteTombBlock(writer);
TSDB_CHECK_CODE(code, lino, _exit);
}
_exit:
if (code) {
TSDB_ERROR_LOG(TD_VID(writer->config->tsdb->pVnode), lino, code);
}
return code;
}
bool tsdbSttFileWriterIsOpened(SSttFileWriter *writer) { return writer->ctx->opened; }

View File

@ -0,0 +1,93 @@
/*
* Copyright (c) 2019 TAOS Data, Inc. <jhtao@taosdata.com>
*
* This program is free software: you can use, redistribute, and/or modify
* it under the terms of the GNU Affero General Public License, version 3
* or later ("AGPL"), as published by the Free Software Foundation.
*
* This program is distributed in the hope that it will be useful, but WITHOUT
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
* FITNESS FOR A PARTICULAR PURPOSE.
*
* You should have received a copy of the GNU Affero General Public License
* along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
#include "tsdbFS2.h"
#include "tsdbUtil2.h"
#ifndef _TSDB_STT_FILE_RW_H
#define _TSDB_STT_FILE_RW_H
#ifdef __cplusplus
extern "C" {
#endif
typedef TARRAY2(SSttBlk) TSttBlkArray;
typedef TARRAY2(SStatisBlk) TStatisBlkArray;
typedef struct {
SFDataPtr sttBlkPtr[1];
SFDataPtr statisBlkPtr[1];
SFDataPtr tombBlkPtr[1];
SFDataPtr rsrvd[2];
} SSttFooter;
// SSttFileReader ==========================================
typedef struct SSttFileReader SSttFileReader;
typedef struct SSttFileReaderConfig SSttFileReaderConfig;
typedef TARRAY2(SSttFileReader *) TSttFileReaderArray;
// SSttFileReader
int32_t tsdbSttFileReaderOpen(const char *fname, const SSttFileReaderConfig *config, SSttFileReader **reader);
int32_t tsdbSttFileReaderClose(SSttFileReader **reader);
// SSttSegReader
int32_t tsdbSttFileReadSttBlk(SSttFileReader *reader, const TSttBlkArray **sttBlkArray);
int32_t tsdbSttFileReadStatisBlk(SSttFileReader *reader, const TStatisBlkArray **statisBlkArray);
int32_t tsdbSttFileReadTombBlk(SSttFileReader *reader, const TTombBlkArray **delBlkArray);
int32_t tsdbSttFileReadBlockData(SSttFileReader *reader, const SSttBlk *sttBlk, SBlockData *bData);
int32_t tsdbSttFileReadBlockDataByColumn(SSttFileReader *reader, const SSttBlk *sttBlk, SBlockData *bData,
STSchema *pTSchema, int16_t cids[], int32_t ncid);
int32_t tsdbSttFileReadStatisBlock(SSttFileReader *reader, const SStatisBlk *statisBlk, STbStatisBlock *sData);
int32_t tsdbSttFileReadTombBlock(SSttFileReader *reader, const STombBlk *delBlk, STombBlock *dData);
struct SSttFileReaderConfig {
STsdb *tsdb;
int32_t szPage;
STFile file[1];
uint8_t **bufArr;
};
// SSttFileWriter ==========================================
typedef struct SSttFileWriter SSttFileWriter;
typedef struct SSttFileWriterConfig SSttFileWriterConfig;
int32_t tsdbSttFileWriterOpen(const SSttFileWriterConfig *config, SSttFileWriter **writer);
int32_t tsdbSttFileWriterClose(SSttFileWriter **writer, int8_t abort, TFileOpArray *opArray);
int32_t tsdbSttFileWriteRow(SSttFileWriter *writer, SRowInfo *row);
int32_t tsdbSttFileWriteBlockData(SSttFileWriter *writer, SBlockData *pBlockData);
int32_t tsdbSttFileWriteTombRecord(SSttFileWriter *writer, const STombRecord *record);
bool tsdbSttFileWriterIsOpened(SSttFileWriter *writer);
struct SSttFileWriterConfig {
STsdb *tsdb;
int32_t maxRow;
int32_t szPage;
int8_t cmprAlg;
int64_t compactVersion;
SDiskID did;
int32_t fid;
int64_t cid;
int32_t level;
SSkmInfo *skmTb;
SSkmInfo *skmRow;
uint8_t **bufArr;
};
#ifdef __cplusplus
}
#endif
#endif /*_TSDB_STT_FILE_RW_H*/

View File

@ -0,0 +1,640 @@
/*
* Copyright (c) 2019 TAOS Data, Inc. <jhtao@taosdata.com>
*
* This program is free software: you can use, redistribute, and/or modify
* it under the terms of the GNU Affero General Public License, version 3
* or later ("AGPL"), as published by the Free Software Foundation.
*
* This program is distributed in the hope that it will be useful, but WITHOUT
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
* FITNESS FOR A PARTICULAR PURPOSE.
*
* You should have received a copy of the GNU Affero General Public License
* along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
#include "tsdbUpgrade.h"
// old
extern void tsdbGetCurrentFName(STsdb *pTsdb, char *current, char *current_t);
extern int32_t tsdbReadDataBlockEx(SDataFReader *pReader, SDataBlk *pDataBlk, SBlockData *pBlockData);
// new
extern int32_t save_fs(const TFileSetArray *arr, const char *fname);
extern int32_t current_fname(STsdb *pTsdb, char *fname, EFCurrentT ftype);
extern int32_t tsdbFileWriteBrinBlock(STsdbFD *fd, SBrinBlock *brinBlock, int8_t cmprAlg, int64_t *fileSize,
TBrinBlkArray *brinBlkArray, uint8_t **bufArr);
extern int32_t tsdbFileWriteBrinBlk(STsdbFD *fd, TBrinBlkArray *brinBlkArray, SFDataPtr *ptr, int64_t *fileSize);
extern int32_t tsdbFileWriteHeadFooter(STsdbFD *fd, int64_t *fileSize, const SHeadFooter *footer);
extern int32_t tsdbSttLvlInit(int32_t level, SSttLvl **lvl);
extern int32_t tsdbSttLvlClear(SSttLvl **lvl);
extern int32_t tsdbFileWriteSttBlk(STsdbFD *fd, const TSttBlkArray *sttBlkArray, SFDataPtr *ptr, int64_t *fileSize);
extern int32_t tsdbFileWriteSttFooter(STsdbFD *fd, const SSttFooter *footer, int64_t *fileSize);
extern int32_t tsdbFileWriteTombBlock(STsdbFD *fd, STombBlock *tombBlock, int8_t cmprAlg, int64_t *fileSize,
TTombBlkArray *tombBlkArray, uint8_t **bufArr);
extern int32_t tsdbFileWriteTombBlk(STsdbFD *fd, const TTombBlkArray *tombBlkArray, SFDataPtr *ptr, int64_t *fileSize);
extern int32_t tsdbFileWriteTombFooter(STsdbFD *fd, const STombFooter *footer, int64_t *fileSize);
static int32_t tsdbUpgradeHead(STsdb *tsdb, SDFileSet *pDFileSet, SDataFReader *reader, STFileSet *fset) {
int32_t code = 0;
int32_t lino = 0;
// init
struct {
// config
int32_t maxRow;
int8_t cmprAlg;
int32_t szPage;
uint8_t *bufArr[8];
// reader
SArray *aBlockIdx;
SMapData mDataBlk[1];
SBlockData blockData[1];
// writer
STsdbFD *fd;
SBrinBlock brinBlock[1];
TBrinBlkArray brinBlkArray[1];
SHeadFooter footer[1];
} ctx[1] = {{
.maxRow = tsdb->pVnode->config.tsdbCfg.maxRows,
.cmprAlg = tsdb->pVnode->config.tsdbCfg.compression,
.szPage = tsdb->pVnode->config.tsdbPageSize,
}};
// read SBlockIdx array
if ((ctx->aBlockIdx = taosArrayInit(0, sizeof(SBlockIdx))) == NULL) {
code = TSDB_CODE_OUT_OF_MEMORY;
TSDB_CHECK_CODE(code, lino, _exit);
}
code = tsdbReadBlockIdx(reader, ctx->aBlockIdx);
TSDB_CHECK_CODE(code, lino, _exit);
if (taosArrayGetSize(ctx->aBlockIdx) > 0) {
// init/open file fd
STFile file = {
.type = TSDB_FTYPE_HEAD,
.did = pDFileSet->diskId,
.fid = fset->fid,
.cid = pDFileSet->pHeadF->commitID,
.size = pDFileSet->pHeadF->size,
};
code = tsdbTFileObjInit(tsdb, &file, &fset->farr[TSDB_FTYPE_HEAD]);
TSDB_CHECK_CODE(code, lino, _exit);
// open fd
char fname[TSDB_FILENAME_LEN];
tsdbTFileName(tsdb, &file, fname);
code = tsdbOpenFile(fname, ctx->szPage, TD_FILE_READ | TD_FILE_WRITE, &ctx->fd);
TSDB_CHECK_CODE(code, lino, _exit);
// convert
for (int32_t iBlockIdx = 0; iBlockIdx < taosArrayGetSize(ctx->aBlockIdx); ++iBlockIdx) {
SBlockIdx *pBlockIdx = taosArrayGet(ctx->aBlockIdx, iBlockIdx);
code = tsdbReadDataBlk(reader, pBlockIdx, ctx->mDataBlk);
TSDB_CHECK_CODE(code, lino, _exit);
for (int32_t iDataBlk = 0; iDataBlk < ctx->mDataBlk->nItem; ++iDataBlk) {
SDataBlk dataBlk[1];
tMapDataGetItemByIdx(ctx->mDataBlk, iDataBlk, dataBlk, tGetDataBlk);
SBrinRecord record = {
.suid = pBlockIdx->suid,
.uid = pBlockIdx->uid,
.firstKey = dataBlk->minKey.ts,
.firstKeyVer = dataBlk->minKey.version,
.lastKey = dataBlk->maxKey.ts,
.lastKeyVer = dataBlk->maxKey.version,
.minVer = dataBlk->minVer,
.maxVer = dataBlk->maxVer,
.blockOffset = dataBlk->aSubBlock->offset,
.smaOffset = dataBlk->smaInfo.offset,
.blockSize = dataBlk->aSubBlock->szBlock,
.blockKeySize = dataBlk->aSubBlock->szKey,
.smaSize = dataBlk->smaInfo.size,
.numRow = dataBlk->nRow,
.count = dataBlk->nRow,
};
if (dataBlk->hasDup) {
code = tsdbReadDataBlockEx(reader, dataBlk, ctx->blockData);
TSDB_CHECK_CODE(code, lino, _exit);
record.count = 1;
for (int32_t i = 1; i < ctx->blockData->nRow; ++i) {
if (ctx->blockData->aTSKEY[i] != ctx->blockData->aTSKEY[i - 1]) {
record.count++;
}
}
}
code = tBrinBlockPut(ctx->brinBlock, &record);
TSDB_CHECK_CODE(code, lino, _exit);
if (BRIN_BLOCK_SIZE(ctx->brinBlock) >= ctx->maxRow) {
code = tsdbFileWriteBrinBlock(ctx->fd, ctx->brinBlock, ctx->cmprAlg, &fset->farr[TSDB_FTYPE_HEAD]->f->size,
ctx->brinBlkArray, ctx->bufArr);
TSDB_CHECK_CODE(code, lino, _exit);
}
}
}
if (BRIN_BLOCK_SIZE(ctx->brinBlock) > 0) {
code = tsdbFileWriteBrinBlock(ctx->fd, ctx->brinBlock, ctx->cmprAlg, &fset->farr[TSDB_FTYPE_HEAD]->f->size,
ctx->brinBlkArray, ctx->bufArr);
TSDB_CHECK_CODE(code, lino, _exit);
}
code = tsdbFileWriteBrinBlk(ctx->fd, ctx->brinBlkArray, ctx->footer->brinBlkPtr,
&fset->farr[TSDB_FTYPE_HEAD]->f->size);
TSDB_CHECK_CODE(code, lino, _exit);
code = tsdbFileWriteHeadFooter(ctx->fd, &fset->farr[TSDB_FTYPE_HEAD]->f->size, ctx->footer);
TSDB_CHECK_CODE(code, lino, _exit);
code = tsdbFsyncFile(ctx->fd);
TSDB_CHECK_CODE(code, lino, _exit);
tsdbCloseFile(&ctx->fd);
}
_exit:
if (code) {
TSDB_ERROR_LOG(TD_VID(tsdb->pVnode), lino, code);
}
TARRAY2_DESTROY(ctx->brinBlkArray, NULL);
tBrinBlockDestroy(ctx->brinBlock);
tBlockDataDestroy(ctx->blockData);
tMapDataClear(ctx->mDataBlk);
taosArrayDestroy(ctx->aBlockIdx);
for (int32_t i = 0; i < ARRAY_SIZE(ctx->bufArr); ++i) {
tFree(ctx->bufArr[i]);
}
return code;
}
static int32_t tsdbUpgradeData(STsdb *tsdb, SDFileSet *pDFileSet, SDataFReader *reader, STFileSet *fset) {
int32_t code = 0;
int32_t lino = 0;
if (fset->farr[TSDB_FTYPE_HEAD] == NULL) {
return 0;
}
STFile file = {
.type = TSDB_FTYPE_DATA,
.did = pDFileSet->diskId,
.fid = fset->fid,
.cid = pDFileSet->pDataF->commitID,
.size = pDFileSet->pDataF->size,
};
code = tsdbTFileObjInit(tsdb, &file, &fset->farr[TSDB_FTYPE_DATA]);
TSDB_CHECK_CODE(code, lino, _exit);
_exit:
if (code) {
TSDB_ERROR_LOG(TD_VID(tsdb->pVnode), lino, code);
}
return code;
}
static int32_t tsdbUpgradeSma(STsdb *tsdb, SDFileSet *pDFileSet, SDataFReader *reader, STFileSet *fset) {
int32_t code = 0;
int32_t lino = 0;
if (fset->farr[TSDB_FTYPE_HEAD] == NULL) {
return 0;
}
STFile file = {
.type = TSDB_FTYPE_SMA,
.did = pDFileSet->diskId,
.fid = fset->fid,
.cid = pDFileSet->pSmaF->commitID,
.size = pDFileSet->pSmaF->size,
};
code = tsdbTFileObjInit(tsdb, &file, &fset->farr[TSDB_FTYPE_SMA]);
TSDB_CHECK_CODE(code, lino, _exit);
_exit:
if (code) {
TSDB_ERROR_LOG(TD_VID(tsdb->pVnode), lino, code);
}
return code;
}
static int32_t tsdbUpgradeSttFile(STsdb *tsdb, SDFileSet *pDFileSet, SDataFReader *reader, STFileSet *fset,
int32_t iStt, SSttLvl *lvl) {
int32_t code = 0;
int32_t lino = 0;
SArray *aSttBlk = taosArrayInit(0, sizeof(SSttBlk));
if (aSttBlk == NULL) {
code = TSDB_CODE_OUT_OF_MEMORY;
TSDB_CHECK_CODE(code, lino, _exit);
}
code = tsdbReadSttBlk(reader, iStt, aSttBlk);
TSDB_CHECK_CODE(code, lino, _exit);
if (taosArrayGetSize(aSttBlk) > 0) {
SSttFile *pSttF = pDFileSet->aSttF[iStt];
STFileObj *fobj;
struct {
int32_t szPage;
// writer
STsdbFD *fd;
TSttBlkArray sttBlkArray[1];
SSttFooter footer[1];
} ctx[1] = {{
.szPage = tsdb->pVnode->config.tsdbPageSize,
}};
STFile file = {
.type = TSDB_FTYPE_STT,
.did = pDFileSet->diskId,
.fid = fset->fid,
.cid = pSttF->commitID,
.size = pSttF->size,
};
code = tsdbTFileObjInit(tsdb, &file, &fobj);
TSDB_CHECK_CODE(code, lino, _exit1);
code = tsdbOpenFile(fobj->fname, ctx->szPage, TD_FILE_READ | TD_FILE_WRITE, &ctx->fd);
TSDB_CHECK_CODE(code, lino, _exit1);
for (int32_t iSttBlk = 0; iSttBlk < taosArrayGetSize(aSttBlk); iSttBlk++) {
code = TARRAY2_APPEND_PTR(ctx->sttBlkArray, (SSttBlk *)taosArrayGet(aSttBlk, iSttBlk));
TSDB_CHECK_CODE(code, lino, _exit1);
}
code = tsdbFileWriteSttBlk(ctx->fd, ctx->sttBlkArray, ctx->footer->sttBlkPtr, &fobj->f->size);
TSDB_CHECK_CODE(code, lino, _exit1);
code = tsdbFileWriteSttFooter(ctx->fd, ctx->footer, &fobj->f->size);
TSDB_CHECK_CODE(code, lino, _exit1);
code = tsdbFsyncFile(ctx->fd);
TSDB_CHECK_CODE(code, lino, _exit1);
tsdbCloseFile(&ctx->fd);
code = TARRAY2_APPEND(lvl->fobjArr, fobj);
TSDB_CHECK_CODE(code, lino, _exit1);
_exit1:
TARRAY2_DESTROY(ctx->sttBlkArray, NULL);
}
_exit:
if (code) {
TSDB_ERROR_LOG(TD_VID(tsdb->pVnode), lino, code);
}
taosArrayDestroy(aSttBlk);
return code;
}
static int32_t tsdbUpgradeStt(STsdb *tsdb, SDFileSet *pDFileSet, SDataFReader *reader, STFileSet *fset) {
int32_t code = 0;
int32_t lino = 0;
if (pDFileSet->nSttF == 0) {
return 0;
}
SSttLvl *lvl;
code = tsdbSttLvlInit(0, &lvl);
TSDB_CHECK_CODE(code, lino, _exit);
for (int32_t iStt = 0; iStt < pDFileSet->nSttF; ++iStt) {
code = tsdbUpgradeSttFile(tsdb, pDFileSet, reader, fset, iStt, lvl);
TSDB_CHECK_CODE(code, lino, _exit);
}
if (TARRAY2_SIZE(lvl->fobjArr) > 0) {
code = TARRAY2_APPEND(fset->lvlArr, lvl);
TSDB_CHECK_CODE(code, lino, _exit);
} else {
tsdbSttLvlClear(&lvl);
}
_exit:
if (code) {
TSDB_ERROR_LOG(TD_VID(tsdb->pVnode), lino, code);
}
return code;
}
static int32_t tsdbUpgradeFileSet(STsdb *tsdb, SDFileSet *pDFileSet, TFileSetArray *fileSetArray) {
int32_t code = 0;
int32_t lino = 0;
SDataFReader *reader;
STFileSet *fset;
code = tsdbTFileSetInit(pDFileSet->fid, &fset);
TSDB_CHECK_CODE(code, lino, _exit);
code = tsdbDataFReaderOpen(&reader, tsdb, pDFileSet);
TSDB_CHECK_CODE(code, lino, _exit);
// .head
code = tsdbUpgradeHead(tsdb, pDFileSet, reader, fset);
TSDB_CHECK_CODE(code, lino, _exit);
// .data
code = tsdbUpgradeData(tsdb, pDFileSet, reader, fset);
TSDB_CHECK_CODE(code, lino, _exit);
// .sma
code = tsdbUpgradeSma(tsdb, pDFileSet, reader, fset);
TSDB_CHECK_CODE(code, lino, _exit);
// .stt
if (pDFileSet->nSttF > 0) {
code = tsdbUpgradeStt(tsdb, pDFileSet, reader, fset);
TSDB_CHECK_CODE(code, lino, _exit);
}
tsdbDataFReaderClose(&reader);
code = TARRAY2_APPEND(fileSetArray, fset);
TSDB_CHECK_CODE(code, lino, _exit);
_exit:
if (code) {
TSDB_ERROR_LOG(TD_VID(tsdb->pVnode), lino, code);
}
return code;
}
static int32_t tsdbUpgradeOpenTombFile(STsdb *tsdb, STFileSet *fset, STsdbFD **fd, STFileObj **fobj, bool *toStt) {
int32_t code = 0;
int32_t lino = 0;
if (TARRAY2_SIZE(fset->lvlArr) == 0) { // to .tomb file
*toStt = false;
STFile file = {
.type = TSDB_FTYPE_TOMB,
.did = fset->farr[TSDB_FTYPE_HEAD]->f->did,
.fid = fset->fid,
.cid = 0,
.size = 0,
};
code = tsdbTFileObjInit(tsdb, &file, fobj);
TSDB_CHECK_CODE(code, lino, _exit);
fset->farr[TSDB_FTYPE_TOMB] = *fobj;
} else { // to .stt file
*toStt = true;
SSttLvl *lvl = TARRAY2_GET(fset->lvlArr, 0);
STFile file = {
.type = TSDB_FTYPE_STT,
.did = TARRAY2_GET(lvl->fobjArr, 0)->f->did,
.fid = fset->fid,
.cid = 0,
.size = 0,
};
code = tsdbTFileObjInit(tsdb, &file, fobj);
TSDB_CHECK_CODE(code, lino, _exit);
code = TARRAY2_APPEND(lvl->fobjArr, fobj[0]);
TSDB_CHECK_CODE(code, lino, _exit);
}
char fname[TSDB_FILENAME_LEN] = {0};
code = tsdbOpenFile(fobj[0]->fname, tsdb->pVnode->config.tsdbPageSize,
TD_FILE_READ | TD_FILE_WRITE | TD_FILE_TRUNC | TD_FILE_CREATE, fd);
TSDB_CHECK_CODE(code, lino, _exit);
uint8_t hdr[TSDB_FHDR_SIZE] = {0};
code = tsdbWriteFile(fd[0], 0, hdr, TSDB_FHDR_SIZE);
TSDB_CHECK_CODE(code, lino, _exit);
fobj[0]->f->size += TSDB_FHDR_SIZE;
_exit:
if (code) {
TSDB_ERROR_LOG(TD_VID(tsdb->pVnode), lino, code);
}
return code;
}
static int32_t tsdbDumpTombDataToFSet(STsdb *tsdb, SDelFReader *reader, SArray *aDelIdx, STFileSet *fset) {
int32_t code = 0;
int32_t lino = 0;
struct {
// context
bool toStt;
int8_t cmprAlg;
int32_t maxRow;
int64_t minKey;
int64_t maxKey;
uint8_t *bufArr[8];
// reader
SArray *aDelData;
// writer
STsdbFD *fd;
STFileObj *fobj;
STombBlock tombBlock[1];
TTombBlkArray tombBlkArray[1];
STombFooter tombFooter[1];
SSttFooter sttFooter[1];
} ctx[1] = {{
.maxRow = tsdb->pVnode->config.tsdbCfg.maxRows,
.cmprAlg = tsdb->pVnode->config.tsdbCfg.compression,
}};
tsdbFidKeyRange(fset->fid, tsdb->keepCfg.days, tsdb->keepCfg.precision, &ctx->minKey, &ctx->maxKey);
if ((ctx->aDelData = taosArrayInit(0, sizeof(SDelData))) == NULL) {
code = TSDB_CODE_OUT_OF_MEMORY;
TSDB_CHECK_CODE(code, lino, _exit);
}
for (int32_t iDelIdx = 0; iDelIdx < taosArrayGetSize(aDelIdx); iDelIdx++) {
SDelIdx *pDelIdx = (SDelIdx *)taosArrayGet(aDelIdx, iDelIdx);
code = tsdbReadDelData(reader, pDelIdx, ctx->aDelData);
TSDB_CHECK_CODE(code, lino, _exit);
for (int32_t iDelData = 0; iDelData < taosArrayGetSize(ctx->aDelData); iDelData++) {
SDelData *pDelData = (SDelData *)taosArrayGet(ctx->aDelData, iDelData);
STombRecord record = {
.suid = pDelIdx->suid,
.uid = pDelIdx->uid,
.version = pDelData->version,
.skey = pDelData->sKey,
.ekey = pDelData->eKey,
};
code = tTombBlockPut(ctx->tombBlock, &record);
TSDB_CHECK_CODE(code, lino, _exit);
if (TOMB_BLOCK_SIZE(ctx->tombBlock) > ctx->maxRow) {
if (ctx->fd == NULL) {
code = tsdbUpgradeOpenTombFile(tsdb, fset, &ctx->fd, &ctx->fobj, &ctx->toStt);
TSDB_CHECK_CODE(code, lino, _exit);
}
code = tsdbFileWriteTombBlock(ctx->fd, ctx->tombBlock, ctx->cmprAlg, &ctx->fobj->f->size, ctx->tombBlkArray,
ctx->bufArr);
TSDB_CHECK_CODE(code, lino, _exit);
}
}
}
if (TOMB_BLOCK_SIZE(ctx->tombBlock) > 0) {
if (ctx->fd == NULL) {
code = tsdbUpgradeOpenTombFile(tsdb, fset, &ctx->fd, &ctx->fobj, &ctx->toStt);
TSDB_CHECK_CODE(code, lino, _exit);
}
code = tsdbFileWriteTombBlock(ctx->fd, ctx->tombBlock, ctx->cmprAlg, &ctx->fobj->f->size, ctx->tombBlkArray,
ctx->bufArr);
TSDB_CHECK_CODE(code, lino, _exit);
}
if (ctx->fd != NULL) {
if (ctx->toStt) {
code = tsdbFileWriteTombBlk(ctx->fd, ctx->tombBlkArray, ctx->sttFooter->tombBlkPtr, &ctx->fobj->f->size);
TSDB_CHECK_CODE(code, lino, _exit);
code = tsdbFileWriteSttFooter(ctx->fd, ctx->sttFooter, &ctx->fobj->f->size);
TSDB_CHECK_CODE(code, lino, _exit);
} else {
code = tsdbFileWriteTombBlk(ctx->fd, ctx->tombBlkArray, ctx->tombFooter->tombBlkPtr, &ctx->fobj->f->size);
TSDB_CHECK_CODE(code, lino, _exit);
code = tsdbFileWriteTombFooter(ctx->fd, ctx->tombFooter, &ctx->fobj->f->size);
TSDB_CHECK_CODE(code, lino, _exit);
}
code = tsdbFsyncFile(ctx->fd);
TSDB_CHECK_CODE(code, lino, _exit);
tsdbCloseFile(&ctx->fd);
}
_exit:
if (code) {
TSDB_ERROR_LOG(TD_VID(tsdb->pVnode), lino, code);
}
for (int32_t i = 0; i < ARRAY_SIZE(ctx->bufArr); i++) {
tFree(ctx->bufArr[i]);
}
TARRAY2_DESTROY(ctx->tombBlkArray, NULL);
tTombBlockDestroy(ctx->tombBlock);
taosArrayDestroy(ctx->aDelData);
return code;
}
static int32_t tsdbUpgradeTombFile(STsdb *tsdb, SDelFile *pDelFile, TFileSetArray *fileSetArray) {
int32_t code = 0;
int32_t lino = 0;
SDelFReader *reader = NULL;
SArray *aDelIdx = NULL;
if ((aDelIdx = taosArrayInit(0, sizeof(SDelIdx))) == NULL) {
code = TSDB_CODE_OUT_OF_MEMORY;
TSDB_CHECK_CODE(code, lino, _exit);
}
code = tsdbDelFReaderOpen(&reader, pDelFile, tsdb);
TSDB_CHECK_CODE(code, lino, _exit);
code = tsdbReadDelIdx(reader, aDelIdx);
TSDB_CHECK_CODE(code, lino, _exit);
if (taosArrayGetSize(aDelIdx) > 0) {
STFileSet *fset;
TARRAY2_FOREACH(fileSetArray, fset) {
code = tsdbDumpTombDataToFSet(tsdb, reader, aDelIdx, fset);
TSDB_CHECK_CODE(code, lino, _exit);
}
}
_exit:
if (code) {
TSDB_ERROR_LOG(TD_VID(tsdb->pVnode), lino, code);
}
tsdbDelFReaderClose(&reader);
taosArrayDestroy(aDelIdx);
return code;
}
static int32_t tsdbDoUpgradeFileSystem(STsdb *tsdb, TFileSetArray *fileSetArray) {
int32_t code = 0;
int32_t lino = 0;
// upgrade each file set
for (int32_t i = 0; i < taosArrayGetSize(tsdb->fs.aDFileSet); i++) {
code = tsdbUpgradeFileSet(tsdb, taosArrayGet(tsdb->fs.aDFileSet, i), fileSetArray);
TSDB_CHECK_CODE(code, lino, _exit);
}
// upgrade tomb file
if (tsdb->fs.pDelFile != NULL) {
code = tsdbUpgradeTombFile(tsdb, tsdb->fs.pDelFile, fileSetArray);
TSDB_CHECK_CODE(code, lino, _exit);
}
_exit:
if (code) {
TSDB_ERROR_LOG(TD_VID(tsdb->pVnode), lino, code);
}
return code;
}
static int32_t tsdbUpgradeFileSystem(STsdb *tsdb, int8_t rollback) {
int32_t code = 0;
int32_t lino = 0;
TFileSetArray fileSetArray[1] = {0};
// open old file system
code = tsdbFSOpen(tsdb, rollback);
TSDB_CHECK_CODE(code, lino, _exit);
code = tsdbDoUpgradeFileSystem(tsdb, fileSetArray);
TSDB_CHECK_CODE(code, lino, _exit);
// close file system
code = tsdbFSClose(tsdb);
TSDB_CHECK_CODE(code, lino, _exit);
// save new file system
char fname[TSDB_FILENAME_LEN];
current_fname(tsdb, fname, TSDB_FCURRENT);
code = save_fs(fileSetArray, fname);
TSDB_CHECK_CODE(code, lino, _exit);
_exit:
if (code) {
TSDB_ERROR_LOG(TD_VID(tsdb->pVnode), lino, code);
}
TARRAY2_DESTROY(fileSetArray, tsdbTFileSetClear);
return code;
}
int32_t tsdbCheckAndUpgradeFileSystem(STsdb *tsdb, int8_t rollback) {
char fname[TSDB_FILENAME_LEN];
tsdbGetCurrentFName(tsdb, fname, NULL);
if (!taosCheckExistFile(fname)) return 0;
int32_t code = tsdbUpgradeFileSystem(tsdb, rollback);
if (code) return code;
taosRemoveFile(fname);
return 0;
}

View File

@ -0,0 +1,35 @@
/*
* Copyright (c) 2019 TAOS Data, Inc. <jhtao@taosdata.com>
*
* This program is free software: you can use, redistribute, and/or modify
* it under the terms of the GNU Affero General Public License, version 3
* or later ("AGPL"), as published by the Free Software Foundation.
*
* This program is distributed in the hope that it will be useful, but WITHOUT
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
* FITNESS FOR A PARTICULAR PURPOSE.
*
* You should have received a copy of the GNU Affero General Public License
* along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
#include "tsdb.h"
#include "tsdbDataFileRW.h"
#include "tsdbDef.h"
#include "tsdbFS2.h"
#include "tsdbUtil2.h"
#ifndef _TSDB_UPGRADE_H_
#define _TSDB_UPGRADE_H_
#ifdef __cplusplus
extern "C" {
#endif
int32_t tsdbCheckAndUpgradeFileSystem(STsdb *tsdb, int8_t rollback);
#ifdef __cplusplus
}
#endif
#endif /*_TSDB_UPGRADE_H_*/

View File

@ -0,0 +1,191 @@
/*
* Copyright (c) 2019 TAOS Data, Inc. <jhtao@taosdata.com>
*
* This program is free software: you can use, redistribute, and/or modify
* it under the terms of the GNU Affero General Public License, version 3
* or later ("AGPL"), as published by the Free Software Foundation.
*
* This program is distributed in the hope that it will be useful, but WITHOUT
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
* FITNESS FOR A PARTICULAR PURPOSE.
*
* You should have received a copy of the GNU Affero General Public License
* along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
#include "tsdbUtil2.h"
// SDelBlock ----------
int32_t tTombBlockInit(STombBlock *tombBlock) {
for (int32_t i = 0; i < TOMB_RECORD_ELEM_NUM; ++i) {
TARRAY2_INIT(&tombBlock->dataArr[i]);
}
return 0;
}
int32_t tTombBlockDestroy(STombBlock *tombBlock) {
for (int32_t i = 0; i < TOMB_RECORD_ELEM_NUM; ++i) {
TARRAY2_DESTROY(&tombBlock->dataArr[i], NULL);
}
return 0;
}
int32_t tTombBlockClear(STombBlock *tombBlock) {
for (int32_t i = 0; i < TOMB_RECORD_ELEM_NUM; ++i) {
TARRAY2_CLEAR(&tombBlock->dataArr[i], NULL);
}
return 0;
}
int32_t tTombBlockPut(STombBlock *tombBlock, const STombRecord *record) {
int32_t code;
for (int32_t i = 0; i < TOMB_RECORD_ELEM_NUM; ++i) {
code = TARRAY2_APPEND(&tombBlock->dataArr[i], record->dataArr[i]);
if (code) return code;
}
return 0;
}
int32_t tTombBlockGet(STombBlock *tombBlock, int32_t idx, STombRecord *record) {
if (idx >= TOMB_BLOCK_SIZE(tombBlock)) return TSDB_CODE_OUT_OF_RANGE;
for (int32_t i = 0; i < TOMB_RECORD_ELEM_NUM; ++i) {
record->dataArr[i] = TARRAY2_GET(&tombBlock->dataArr[i], idx);
}
return 0;
}
int32_t tTombRecordCompare(const STombRecord *r1, const STombRecord *r2) {
if (r1->suid < r2->suid) return -1;
if (r1->suid > r2->suid) return 1;
if (r1->uid < r2->uid) return -1;
if (r1->uid > r2->uid) return 1;
if (r1->version < r2->version) return -1;
if (r1->version > r2->version) return 1;
return 0;
}
// STbStatisBlock ----------
int32_t tStatisBlockInit(STbStatisBlock *statisBlock) {
for (int32_t i = 0; i < STATIS_RECORD_NUM_ELEM; ++i) {
TARRAY2_INIT(&statisBlock->dataArr[i]);
}
return 0;
}
int32_t tStatisBlockDestroy(STbStatisBlock *statisBlock) {
for (int32_t i = 0; i < STATIS_RECORD_NUM_ELEM; ++i) {
TARRAY2_DESTROY(&statisBlock->dataArr[i], NULL);
}
return 0;
}
int32_t tStatisBlockClear(STbStatisBlock *statisBlock) {
for (int32_t i = 0; i < STATIS_RECORD_NUM_ELEM; ++i) {
TARRAY2_CLEAR(&statisBlock->dataArr[i], NULL);
}
return 0;
}
int32_t tStatisBlockPut(STbStatisBlock *statisBlock, const STbStatisRecord *record) {
int32_t code;
for (int32_t i = 0; i < STATIS_RECORD_NUM_ELEM; ++i) {
code = TARRAY2_APPEND(&statisBlock->dataArr[i], record->dataArr[i]);
if (code) return code;
}
return 0;
}
int32_t tStatisBlockGet(STbStatisBlock *statisBlock, int32_t idx, STbStatisRecord *record) {
if (idx >= STATIS_BLOCK_SIZE(statisBlock)) return TSDB_CODE_OUT_OF_RANGE;
for (int32_t i = 0; i < STATIS_RECORD_NUM_ELEM; ++i) {
record->dataArr[i] = TARRAY2_GET(&statisBlock->dataArr[i], idx);
}
return 0;
}
// SBrinRecord ----------
int32_t tBrinBlockInit(SBrinBlock *brinBlock) {
for (int32_t i = 0; i < ARRAY_SIZE(brinBlock->dataArr1); ++i) {
TARRAY2_INIT(&brinBlock->dataArr1[i]);
}
for (int32_t i = 0; i < ARRAY_SIZE(brinBlock->dataArr2); ++i) {
TARRAY2_INIT(&brinBlock->dataArr2[i]);
}
return 0;
}
int32_t tBrinBlockDestroy(SBrinBlock *brinBlock) {
for (int32_t i = 0; i < ARRAY_SIZE(brinBlock->dataArr1); ++i) {
TARRAY2_DESTROY(&brinBlock->dataArr1[i], NULL);
}
for (int32_t i = 0; i < ARRAY_SIZE(brinBlock->dataArr2); ++i) {
TARRAY2_DESTROY(&brinBlock->dataArr2[i], NULL);
}
return 0;
}
int32_t tBrinBlockClear(SBrinBlock *brinBlock) {
for (int32_t i = 0; i < ARRAY_SIZE(brinBlock->dataArr1); ++i) {
TARRAY2_CLEAR(&brinBlock->dataArr1[i], NULL);
}
for (int32_t i = 0; i < ARRAY_SIZE(brinBlock->dataArr2); ++i) {
TARRAY2_CLEAR(&brinBlock->dataArr2[i], NULL);
}
return 0;
}
int32_t tBrinBlockPut(SBrinBlock *brinBlock, const SBrinRecord *record) {
int32_t code;
for (int32_t i = 0; i < ARRAY_SIZE(brinBlock->dataArr1); ++i) {
code = TARRAY2_APPEND(&brinBlock->dataArr1[i], record->dataArr1[i]);
if (code) return code;
}
for (int32_t i = 0; i < ARRAY_SIZE(brinBlock->dataArr2); ++i) {
code = TARRAY2_APPEND(&brinBlock->dataArr2[i], record->dataArr2[i]);
if (code) return code;
}
return 0;
}
int32_t tBrinBlockGet(SBrinBlock *brinBlock, int32_t idx, SBrinRecord *record) {
if (idx >= BRIN_BLOCK_SIZE(brinBlock)) return TSDB_CODE_OUT_OF_RANGE;
for (int32_t i = 0; i < ARRAY_SIZE(brinBlock->dataArr1); ++i) {
record->dataArr1[i] = TARRAY2_GET(&brinBlock->dataArr1[i], idx);
}
for (int32_t i = 0; i < ARRAY_SIZE(brinBlock->dataArr2); ++i) {
record->dataArr2[i] = TARRAY2_GET(&brinBlock->dataArr2[i], idx);
}
return 0;
}
// other apis ----------
int32_t tsdbUpdateSkmTb(STsdb *pTsdb, const TABLEID *tbid, SSkmInfo *pSkmTb) {
if (tbid->suid) {
if (pSkmTb->suid == tbid->suid) {
pSkmTb->uid = tbid->uid;
return 0;
}
} else if (pSkmTb->uid == tbid->uid) {
return 0;
}
pSkmTb->suid = tbid->suid;
pSkmTb->uid = tbid->uid;
tDestroyTSchema(pSkmTb->pTSchema);
return metaGetTbTSchemaEx(pTsdb->pVnode->pMeta, tbid->suid, tbid->uid, -1, &pSkmTb->pTSchema);
}
int32_t tsdbUpdateSkmRow(STsdb *pTsdb, const TABLEID *tbid, int32_t sver, SSkmInfo *pSkmRow) {
if (pSkmRow->pTSchema && pSkmRow->suid == tbid->suid) {
if (pSkmRow->suid) {
if (sver == pSkmRow->pTSchema->version) return 0;
} else if (pSkmRow->uid == tbid->uid && pSkmRow->pTSchema->version == sver) {
return 0;
}
}
pSkmRow->suid = tbid->suid;
pSkmRow->uid = tbid->uid;
tDestroyTSchema(pSkmRow->pTSchema);
return metaGetTbTSchemaEx(pTsdb->pVnode->pMeta, tbid->suid, tbid->uid, sver, &pSkmRow->pTSchema);
}

View File

@ -0,0 +1,193 @@
/*
* Copyright (c) 2019 TAOS Data, Inc. <jhtao@taosdata.com>
*
* This program is free software: you can use, redistribute, and/or modify
* it under the terms of the GNU Affero General Public License, version 3
* or later ("AGPL"), as published by the Free Software Foundation.
*
* This program is distributed in the hope that it will be useful, but WITHOUT
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
* FITNESS FOR A PARTICULAR PURPOSE.
*
* You should have received a copy of the GNU Affero General Public License
* along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
#ifndef _TSDB_UTIL_H
#define _TSDB_UTIL_H
#include "tsdbDef.h"
#ifdef __cplusplus
extern "C" {
#endif
// STombRecord ----------
#define TOMB_RECORD_ELEM_NUM 5
typedef union {
int64_t dataArr[TOMB_RECORD_ELEM_NUM];
struct {
int64_t suid;
int64_t uid;
int64_t version;
int64_t skey;
int64_t ekey;
};
} STombRecord;
typedef union {
TARRAY2(int64_t) dataArr[TOMB_RECORD_ELEM_NUM];
struct {
TARRAY2(int64_t) suid[1];
TARRAY2(int64_t) uid[1];
TARRAY2(int64_t) version[1];
TARRAY2(int64_t) skey[1];
TARRAY2(int64_t) ekey[1];
};
} STombBlock;
typedef struct {
SFDataPtr dp[1];
TABLEID minTbid;
TABLEID maxTbid;
int64_t minVer;
int64_t maxVer;
int32_t numRec;
int32_t size[TOMB_RECORD_ELEM_NUM];
int8_t cmprAlg;
int8_t rsvd[7];
} STombBlk;
typedef TARRAY2(STombBlk) TTombBlkArray;
#define TOMB_BLOCK_SIZE(db) TARRAY2_SIZE((db)->suid)
int32_t tTombBlockInit(STombBlock *tombBlock);
int32_t tTombBlockDestroy(STombBlock *tombBlock);
int32_t tTombBlockClear(STombBlock *tombBlock);
int32_t tTombBlockPut(STombBlock *tombBlock, const STombRecord *record);
int32_t tTombBlockGet(STombBlock *tombBlock, int32_t idx, STombRecord *record);
int32_t tTombRecordCompare(const STombRecord *record1, const STombRecord *record2);
// STbStatisRecord ----------
#define STATIS_RECORD_NUM_ELEM 5
typedef union {
int64_t dataArr[STATIS_RECORD_NUM_ELEM];
struct {
int64_t suid;
int64_t uid;
int64_t firstKey;
int64_t lastKey;
int64_t count;
};
} STbStatisRecord;
typedef union {
TARRAY2(int64_t) dataArr[STATIS_RECORD_NUM_ELEM];
struct {
TARRAY2(int64_t) suid[1];
TARRAY2(int64_t) uid[1];
TARRAY2(int64_t) firstKey[1];
TARRAY2(int64_t) lastKey[1];
TARRAY2(int64_t) count[1];
};
} STbStatisBlock;
typedef struct {
SFDataPtr dp[1];
TABLEID minTbid;
TABLEID maxTbid;
int32_t numRec;
int32_t size[STATIS_RECORD_NUM_ELEM];
int8_t cmprAlg;
int8_t rsvd[7];
} SStatisBlk;
#define STATIS_BLOCK_SIZE(db) TARRAY2_SIZE((db)->suid)
int32_t tStatisBlockInit(STbStatisBlock *statisBlock);
int32_t tStatisBlockDestroy(STbStatisBlock *statisBlock);
int32_t tStatisBlockClear(STbStatisBlock *statisBlock);
int32_t tStatisBlockPut(STbStatisBlock *statisBlock, const STbStatisRecord *record);
int32_t tStatisBlockGet(STbStatisBlock *statisBlock, int32_t idx, STbStatisRecord *record);
// SBrinRecord ----------
typedef union {
struct {
int64_t dataArr1[10];
int32_t dataArr2[5];
};
struct {
int64_t suid;
int64_t uid;
int64_t firstKey;
int64_t firstKeyVer;
int64_t lastKey;
int64_t lastKeyVer;
int64_t minVer;
int64_t maxVer;
int64_t blockOffset;
int64_t smaOffset;
int32_t blockSize;
int32_t blockKeySize;
int32_t smaSize;
int32_t numRow;
int32_t count;
};
} SBrinRecord;
typedef union {
struct {
TARRAY2(int64_t) dataArr1[10];
TARRAY2(int32_t) dataArr2[5];
};
struct {
TARRAY2(int64_t) suid[1];
TARRAY2(int64_t) uid[1];
TARRAY2(int64_t) firstKey[1];
TARRAY2(int64_t) firstKeyVer[1];
TARRAY2(int64_t) lastKey[1];
TARRAY2(int64_t) lastKeyVer[1];
TARRAY2(int64_t) minVer[1];
TARRAY2(int64_t) maxVer[1];
TARRAY2(int64_t) blockOffset[1];
TARRAY2(int64_t) smaOffset[1];
TARRAY2(int32_t) blockSize[1];
TARRAY2(int32_t) blockKeySize[1];
TARRAY2(int32_t) smaSize[1];
TARRAY2(int32_t) numRow[1];
TARRAY2(int32_t) count[1];
};
} SBrinBlock;
typedef struct {
SFDataPtr dp[1];
TABLEID minTbid;
TABLEID maxTbid;
int64_t minVer;
int64_t maxVer;
int32_t numRec;
int32_t size[15];
int8_t cmprAlg;
int8_t rsvd[7];
} SBrinBlk;
typedef TARRAY2(SBrinBlk) TBrinBlkArray;
#define BRIN_BLOCK_SIZE(db) TARRAY2_SIZE((db)->suid)
int32_t tBrinBlockInit(SBrinBlock *brinBlock);
int32_t tBrinBlockDestroy(SBrinBlock *brinBlock);
int32_t tBrinBlockClear(SBrinBlock *brinBlock);
int32_t tBrinBlockPut(SBrinBlock *brinBlock, const SBrinRecord *record);
int32_t tBrinBlockGet(SBrinBlock *brinBlock, int32_t idx, SBrinRecord *record);
// other apis
int32_t tsdbUpdateSkmTb(STsdb *pTsdb, const TABLEID *tbid, SSkmInfo *pSkmTb);
int32_t tsdbUpdateSkmRow(STsdb *pTsdb, const TABLEID *tbid, int32_t sver, SSkmInfo *pSkmRow);
#ifdef __cplusplus
}
#endif
#endif /*_TSDB_UTIL_H*/

View File

@ -49,7 +49,7 @@ const SVnodeCfg vnodeCfgDefault = {.vgId = -1,
.hashBegin = 0,
.hashEnd = 0,
.hashMethod = 0,
.sttTrigger = TSDB_DEFAULT_STT_FILE,
.sttTrigger = TSDB_DEFAULT_SST_TRIGGER,
.tsdbPageSize = TSDB_DEFAULT_PAGE_SIZE};
int vnodeCheckCfg(const SVnodeCfg *pCfg) {
@ -57,7 +57,7 @@ int vnodeCheckCfg(const SVnodeCfg *pCfg) {
return 0;
}
const char* vnodeRoleToStr(ESyncRole role) {
const char *vnodeRoleToStr(ESyncRole role) {
switch (role) {
case TAOS_SYNC_ROLE_VOTER:
return "true";
@ -68,11 +68,11 @@ const char* vnodeRoleToStr(ESyncRole role) {
}
}
const ESyncRole vnodeStrToRole(char* str) {
if(strcmp(str, "true") == 0){
const ESyncRole vnodeStrToRole(char *str) {
if (strcmp(str, "true") == 0) {
return TAOS_SYNC_ROLE_VOTER;
}
if(strcmp(str, "false") == 0){
if (strcmp(str, "false") == 0) {
return TAOS_SYNC_ROLE_LEARNER;
}
@ -295,10 +295,9 @@ int vnodeDecodeConfig(const SJson *pJson, void *pObj) {
char role[10] = {0};
code = tjsonGetStringValue(info, "isReplica", role);
if (code < 0) return -1;
if(strlen(role) != 0){
if (strlen(role) != 0) {
pNode->nodeRole = vnodeStrToRole(role);
}
else{
} else {
pNode->nodeRole = TAOS_SYNC_ROLE_VOTER;
}
vDebug("vgId:%d, decode config, replica:%d ep:%s:%u dnode:%d", pCfg->vgId, i, pNode->nodeFqdn, pNode->nodePort,

View File

@ -16,6 +16,13 @@
#include "vnd.h"
#include "vnodeInt.h"
extern int32_t tsdbPreCommit(STsdb *pTsdb);
extern int32_t tsdbCommitBegin(STsdb *pTsdb, SCommitInfo *pInfo);
extern int32_t tsdbCommitCommit(STsdb *pTsdb);
extern int32_t tsdbCommitAbort(STsdb *pTsdb);
#define VND_INFO_FNAME_TMP "vnode_tmp.json"
static int vnodeEncodeInfo(const SVnodeInfo *pInfo, char **ppData);
static int vnodeCommitImpl(SCommitInfo *pInfo);
@ -296,7 +303,7 @@ static int32_t vnodePrepareCommit(SVnode *pVnode, SCommitInfo *pInfo) {
TSDB_CHECK_CODE(code, lino, _exit);
}
tsdbPrepareCommit(pVnode->pTsdb);
tsdbPreCommit(pVnode->pTsdb);
metaPrepareAsyncCommit(pVnode->pMeta);
@ -430,8 +437,7 @@ static int vnodeCommitImpl(SCommitInfo *pInfo) {
syncBeginSnapshot(pVnode->sync, pInfo->info.state.committed);
// commit each sub-system
code = tsdbCommit(pVnode->pTsdb, pInfo);
code = tsdbCommitBegin(pVnode->pTsdb, pInfo);
TSDB_CHECK_CODE(code, lino, _exit);
if (!TSDB_CACHE_NO(pVnode->config)) {
@ -455,7 +461,7 @@ static int vnodeCommitImpl(SCommitInfo *pInfo) {
TSDB_CHECK_CODE(code, lino, _exit);
}
code = tsdbFinishCommit(pVnode->pTsdb);
code = tsdbCommitCommit(pVnode->pTsdb);
TSDB_CHECK_CODE(code, lino, _exit);
if (VND_IS_RSMA(pVnode)) {

View File

@ -42,24 +42,24 @@ void initStorageAPI(SStorageAPI* pAPI) {
void initTsdbReaderAPI(TsdReader* pReader) {
pReader->tsdReaderOpen = (int32_t(*)(void*, SQueryTableDataCond*, void*, int32_t, SSDataBlock*, void**, const char*,
bool, SHashObj**))tsdbReaderOpen;
pReader->tsdReaderClose = tsdbReaderClose;
bool, SHashObj**))tsdbReaderOpen2;
pReader->tsdReaderClose = tsdbReaderClose2;
pReader->tsdNextDataBlock = tsdbNextDataBlock;
pReader->tsdNextDataBlock = tsdbNextDataBlock2;
pReader->tsdReaderRetrieveDataBlock = tsdbRetrieveDataBlock;
pReader->tsdReaderReleaseDataBlock = tsdbReleaseDataBlock;
pReader->tsdReaderRetrieveDataBlock = tsdbRetrieveDataBlock2;
pReader->tsdReaderReleaseDataBlock = tsdbReleaseDataBlock2;
pReader->tsdReaderRetrieveBlockSMAInfo = tsdbRetrieveDatablockSMA;
pReader->tsdReaderRetrieveBlockSMAInfo = tsdbRetrieveDatablockSMA2;
pReader->tsdReaderNotifyClosing = tsdbReaderSetCloseFlag;
pReader->tsdReaderResetStatus = tsdbReaderReset;
pReader->tsdReaderResetStatus = tsdbReaderReset2;
pReader->tsdReaderGetDataBlockDistInfo = tsdbGetFileBlocksDistInfo;
pReader->tsdReaderGetNumOfInMemRows = tsdbGetNumOfRowsInMemTable; // todo this function should be moved away
pReader->tsdReaderGetDataBlockDistInfo = tsdbGetFileBlocksDistInfo2;
pReader->tsdReaderGetNumOfInMemRows = tsdbGetNumOfRowsInMemTable2; // todo this function should be moved away
pReader->tsdSetQueryTableList = tsdbSetTableList;
pReader->tsdSetReaderTaskId = (void (*)(void*, const char*))tsdbReaderSetId;
pReader->tsdSetQueryTableList = tsdbSetTableList2;
pReader->tsdSetReaderTaskId = (void (*)(void*, const char*))tsdbReaderSetId2;
}
void initMetadataAPI(SStoreMeta* pMeta) {

View File

@ -23,26 +23,24 @@ struct SVnodeTask {
void* arg;
};
struct SVnodeGlobal {
int8_t init;
int8_t stop;
typedef struct {
int nthreads;
TdThread* threads;
TdThreadMutex mutex;
TdThreadCond hasTask;
SVnodeTask queue;
} SVnodeThreadPool;
struct SVnodeGlobal {
int8_t init;
int8_t stop;
SVnodeThreadPool tp[2];
};
struct SVnodeGlobal vnodeGlobal;
static void* loop(void* arg);
static tsem_t canCommit = {0};
static void vnodeInitCommit() { tsem_init(&canCommit, 0, 4); };
void vnode_wait_commit() { tsem_wait(&canCommit); }
void vnode_done_commit() { tsem_wait(&canCommit); }
int vnodeInit(int nthreads) {
int8_t init;
int ret;
@ -51,28 +49,30 @@ int vnodeInit(int nthreads) {
if (init) {
return 0;
}
taosThreadMutexInit(&vnodeGlobal.mutex, NULL);
taosThreadCondInit(&vnodeGlobal.hasTask, NULL);
taosThreadMutexLock(&vnodeGlobal.mutex);
vnodeGlobal.stop = 0;
vnodeGlobal.queue.next = &vnodeGlobal.queue;
vnodeGlobal.queue.prev = &vnodeGlobal.queue;
taosThreadMutexUnlock(&(vnodeGlobal.mutex));
for (int32_t i = 0; i < ARRAY_SIZE(vnodeGlobal.tp); i++) {
taosThreadMutexInit(&vnodeGlobal.tp[i].mutex, NULL);
taosThreadCondInit(&vnodeGlobal.tp[i].hasTask, NULL);
vnodeGlobal.nthreads = nthreads;
vnodeGlobal.threads = taosMemoryCalloc(nthreads, sizeof(TdThread));
if (vnodeGlobal.threads == NULL) {
terrno = TSDB_CODE_OUT_OF_MEMORY;
vError("failed to init vnode module since:%s", tstrerror(terrno));
return -1;
}
taosThreadMutexLock(&vnodeGlobal.tp[i].mutex);
for (int i = 0; i < nthreads; i++) {
taosThreadCreate(&(vnodeGlobal.threads[i]), NULL, loop, NULL);
vnodeGlobal.tp[i].queue.next = &vnodeGlobal.tp[i].queue;
vnodeGlobal.tp[i].queue.prev = &vnodeGlobal.tp[i].queue;
taosThreadMutexUnlock(&(vnodeGlobal.tp[i].mutex));
vnodeGlobal.tp[i].nthreads = nthreads;
vnodeGlobal.tp[i].threads = taosMemoryCalloc(nthreads, sizeof(TdThread));
if (vnodeGlobal.tp[i].threads == NULL) {
terrno = TSDB_CODE_OUT_OF_MEMORY;
vError("failed to init vnode module since:%s", tstrerror(terrno));
return -1;
}
for (int j = 0; j < nthreads; j++) {
taosThreadCreate(&(vnodeGlobal.tp[i].threads[j]), NULL, loop, &vnodeGlobal.tp[i]);
}
}
if (walInit() < 0) {
@ -92,27 +92,29 @@ void vnodeCleanup() {
if (init == 0) return;
// set stop
taosThreadMutexLock(&(vnodeGlobal.mutex));
vnodeGlobal.stop = 1;
taosThreadCondBroadcast(&(vnodeGlobal.hasTask));
taosThreadMutexUnlock(&(vnodeGlobal.mutex));
for (int32_t i = 0; i < ARRAY_SIZE(vnodeGlobal.tp); i++) {
taosThreadMutexLock(&(vnodeGlobal.tp[i].mutex));
taosThreadCondBroadcast(&(vnodeGlobal.tp[i].hasTask));
taosThreadMutexUnlock(&(vnodeGlobal.tp[i].mutex));
// wait for threads
for (int i = 0; i < vnodeGlobal.nthreads; i++) {
taosThreadJoin(vnodeGlobal.threads[i], NULL);
// wait for threads
for (int j = 0; j < vnodeGlobal.tp[i].nthreads; j++) {
taosThreadJoin(vnodeGlobal.tp[i].threads[j], NULL);
}
// clear source
taosMemoryFreeClear(vnodeGlobal.tp[i].threads);
taosThreadCondDestroy(&(vnodeGlobal.tp[i].hasTask));
taosThreadMutexDestroy(&(vnodeGlobal.tp[i].mutex));
}
// clear source
taosMemoryFreeClear(vnodeGlobal.threads);
taosThreadCondDestroy(&(vnodeGlobal.hasTask));
taosThreadMutexDestroy(&(vnodeGlobal.mutex));
walCleanUp();
tqCleanUp();
smaCleanUp();
}
int vnodeScheduleTask(int (*execute)(void*), void* arg) {
int vnodeScheduleTaskEx(int tpid, int (*execute)(void*), void* arg) {
SVnodeTask* pTask;
ASSERT(!vnodeGlobal.stop);
@ -126,35 +128,42 @@ int vnodeScheduleTask(int (*execute)(void*), void* arg) {
pTask->execute = execute;
pTask->arg = arg;
taosThreadMutexLock(&(vnodeGlobal.mutex));
pTask->next = &vnodeGlobal.queue;
pTask->prev = vnodeGlobal.queue.prev;
vnodeGlobal.queue.prev->next = pTask;
vnodeGlobal.queue.prev = pTask;
taosThreadCondSignal(&(vnodeGlobal.hasTask));
taosThreadMutexUnlock(&(vnodeGlobal.mutex));
taosThreadMutexLock(&(vnodeGlobal.tp[tpid].mutex));
pTask->next = &vnodeGlobal.tp[tpid].queue;
pTask->prev = vnodeGlobal.tp[tpid].queue.prev;
vnodeGlobal.tp[tpid].queue.prev->next = pTask;
vnodeGlobal.tp[tpid].queue.prev = pTask;
taosThreadCondSignal(&(vnodeGlobal.tp[tpid].hasTask));
taosThreadMutexUnlock(&(vnodeGlobal.tp[tpid].mutex));
return 0;
}
int vnodeScheduleTask(int (*execute)(void*), void* arg) { return vnodeScheduleTaskEx(0, execute, arg); }
/* ------------------------ STATIC METHODS ------------------------ */
static void* loop(void* arg) {
SVnodeTask* pTask;
int ret;
SVnodeThreadPool* tp = (SVnodeThreadPool*)arg;
SVnodeTask* pTask;
int ret;
setThreadName("vnode-commit");
if (tp == &vnodeGlobal.tp[0]) {
setThreadName("vnode-commit");
} else if (tp == &vnodeGlobal.tp[1]) {
setThreadName("vnode-merge");
}
for (;;) {
taosThreadMutexLock(&(vnodeGlobal.mutex));
taosThreadMutexLock(&(tp->mutex));
for (;;) {
pTask = vnodeGlobal.queue.next;
if (pTask == &vnodeGlobal.queue) {
pTask = tp->queue.next;
if (pTask == &tp->queue) {
// no task
if (vnodeGlobal.stop) {
taosThreadMutexUnlock(&(vnodeGlobal.mutex));
taosThreadMutexUnlock(&(tp->mutex));
return NULL;
} else {
taosThreadCondWait(&(vnodeGlobal.hasTask), &(vnodeGlobal.mutex));
taosThreadCondWait(&(tp->hasTask), &(tp->mutex));
}
} else {
// has task
@ -164,7 +173,7 @@ static void* loop(void* arg) {
}
}
taosThreadMutexUnlock(&(vnodeGlobal.mutex));
taosThreadMutexUnlock(&(tp->mutex));
pTask->execute(pTask->arg);
taosMemoryFree(pTask);

View File

@ -15,111 +15,27 @@
#include "vnd.h"
typedef struct {
SVnode *pVnode;
int64_t now;
int64_t commitID;
SVnodeInfo info;
} SRetentionInfo;
extern int32_t tsdbSyncRetention(STsdb *tsdb, int64_t now);
extern int32_t tsdbAsyncRetention(STsdb *tsdb, int64_t now, int64_t *taskid);
extern bool tsdbShouldDoRetention(STsdb *pTsdb, int64_t now);
extern int32_t tsdbDoRetention(STsdb *pTsdb, int64_t now);
extern int32_t tsdbCommitRetention(STsdb *pTsdb);
int32_t vnodeDoRetention(SVnode *pVnode, int64_t now) {
int32_t code;
int32_t lino;
static int32_t vnodePrepareRentention(SVnode *pVnode, SRetentionInfo *pInfo) {
int32_t code = 0;
int32_t lino = 0;
tsem_wait(&pVnode->canCommit);
pInfo->commitID = ++pVnode->state.commitID;
char dir[TSDB_FILENAME_LEN] = {0};
vnodeGetPrimaryDir(pVnode->path, pVnode->diskPrimary, pVnode->pTfs, dir, TSDB_FILENAME_LEN);
if (vnodeLoadInfo(dir, &pInfo->info) < 0) {
code = terrno;
if (pVnode->config.sttTrigger == 1) {
tsem_wait(&pVnode->canCommit);
code = tsdbSyncRetention(pVnode->pTsdb, now);
TSDB_CHECK_CODE(code, lino, _exit);
}
_exit:
if (code) {
vError("vgId:%d %s failed at line %d since %s", TD_VID(pVnode), __func__, lino, tstrerror(code));
// code = smaDoRetention(pVnode->pSma, now);
// TSDB_CHECK_CODE(code, lino, _exit);
tsem_post(&pVnode->canCommit);
} else {
vInfo("vgId:%d %s done", TD_VID(pVnode), __func__);
}
return code;
}
static int32_t vnodeRetentionTask(void *param) {
int32_t code = 0;
int32_t lino = 0;
SRetentionInfo *pInfo = (SRetentionInfo *)param;
SVnode *pVnode = pInfo->pVnode;
char dir[TSDB_FILENAME_LEN] = {0};
vnodeGetPrimaryDir(pVnode->path, pVnode->diskPrimary, pVnode->pTfs, dir, TSDB_FILENAME_LEN);
// save info
pInfo->info.state.commitID = pInfo->commitID;
if (vnodeSaveInfo(dir, &pInfo->info) < 0) {
code = terrno;
int64_t taskid;
code = tsdbAsyncRetention(pVnode->pTsdb, now, &taskid);
TSDB_CHECK_CODE(code, lino, _exit);
}
// do job
code = tsdbDoRetention(pInfo->pVnode->pTsdb, pInfo->now);
TSDB_CHECK_CODE(code, lino, _exit);
code = smaDoRetention(pInfo->pVnode->pSma, pInfo->now);
TSDB_CHECK_CODE(code, lino, _exit);
// commit info
vnodeCommitInfo(dir);
// commit sub-job
tsdbCommitRetention(pVnode->pTsdb);
_exit:
if (code) {
vError("vgId:%d %s failed at line %d since %s", TD_VID(pInfo->pVnode), __func__, lino, tstrerror(code));
} else {
vInfo("vgId:%d %s done", TD_VID(pInfo->pVnode), __func__);
}
tsem_post(&pInfo->pVnode->canCommit);
taosMemoryFree(pInfo);
return code;
}
int32_t vnodeAsyncRentention(SVnode *pVnode, int64_t now) {
int32_t code = 0;
int32_t lino = 0;
if (!tsdbShouldDoRetention(pVnode->pTsdb, now)) return code;
SRetentionInfo *pInfo = (SRetentionInfo *)taosMemoryCalloc(1, sizeof(*pInfo));
if (pInfo == NULL) {
code = TSDB_CODE_OUT_OF_MEMORY;
TSDB_CHECK_CODE(code, lino, _exit);
}
pInfo->pVnode = pVnode;
pInfo->now = now;
code = vnodePrepareRentention(pVnode, pInfo);
TSDB_CHECK_CODE(code, lino, _exit);
vnodeScheduleTask(vnodeRetentionTask, pInfo);
_exit:
if (code) {
vError("vgId:%d %s failed at line %d since %s", TD_VID(pVnode), __func__, lino, tstrerror(code));
if (pInfo) taosMemoryFree(pInfo);
} else {
vInfo("vgId:%d %s done", TD_VID(pInfo->pVnode), __func__);
}
return 0;
}

View File

@ -689,7 +689,8 @@ void vnodeUpdateMetaRsp(SVnode *pVnode, STableMetaRsp *pMetaRsp) {
pMetaRsp->precision = pVnode->config.tsdbCfg.precision;
}
extern int32_t vnodeAsyncRentention(SVnode *pVnode, int64_t now);
extern int32_t vnodeDoRetention(SVnode *pVnode, int64_t now);
static int32_t vnodeProcessTrimReq(SVnode *pVnode, int64_t ver, void *pReq, int32_t len, SRpcMsg *pRsp) {
int32_t code = 0;
SVTrimDbReq trimReq = {0};
@ -702,10 +703,7 @@ static int32_t vnodeProcessTrimReq(SVnode *pVnode, int64_t ver, void *pReq, int3
vInfo("vgId:%d, trim vnode request will be processed, time:%d", pVnode->config.vgId, trimReq.timestamp);
// process
vnodeAsyncRentention(pVnode, trimReq.timestamp);
tsem_wait(&pVnode->canCommit);
tsem_post(&pVnode->canCommit);
code = vnodeDoRetention(pVnode, trimReq.timestamp);
_exit:
return code;
@ -730,7 +728,7 @@ static int32_t vnodeProcessDropTtlTbReq(SVnode *pVnode, int64_t ver, void *pReq,
tqUpdateTbUidList(pVnode->pTq, tbUids, false);
}
vnodeAsyncRentention(pVnode, ttlReq.timestampSec);
vnodeDoRetention(pVnode, ttlReq.timestampSec);
end:
taosArrayDestroy(tbUids);

View File

@ -938,7 +938,7 @@ int32_t ctgInitJob(SCatalog* pCtg, SRequestConnInfo* pConn, SCtgJob** job, const
void* param);
int32_t ctgLaunchJob(SCtgJob* pJob);
int32_t ctgMakeAsyncRes(SCtgJob* pJob);
int32_t ctgLaunchSubTask(SCtgTask* pTask, CTG_TASK_TYPE type, ctgSubTaskCbFp fp, void* param);
int32_t ctgLaunchSubTask(SCtgTask** ppTask, CTG_TASK_TYPE type, ctgSubTaskCbFp fp, void* param);
int32_t ctgGetTbCfgCb(SCtgTask* pTask);
void ctgFreeHandle(SCatalog* pCatalog);

View File

@ -2097,7 +2097,7 @@ int32_t ctgLaunchGetTbCfgTask(SCtgTask* pTask) {
SCtgTbMetaParam param;
param.pName = pCtx->pName;
param.flag = 0;
CTG_ERR_JRET(ctgLaunchSubTask(pTask, CTG_TASK_GET_TB_META, ctgGetTbCfgCb, &param));
CTG_ERR_JRET(ctgLaunchSubTask(&pTask, CTG_TASK_GET_TB_META, ctgGetTbCfgCb, &param));
return TSDB_CODE_SUCCESS;
}
}
@ -2108,7 +2108,7 @@ int32_t ctgLaunchGetTbCfgTask(SCtgTask* pTask) {
if (NULL == pCtx->pVgInfo) {
CTG_ERR_JRET(ctgGetTbHashVgroupFromCache(pCtg, pCtx->pName, &pCtx->pVgInfo));
if (NULL == pCtx->pVgInfo) {
CTG_ERR_JRET(ctgLaunchSubTask(pTask, CTG_TASK_GET_DB_VGROUP, ctgGetTbCfgCb, dbFName));
CTG_ERR_JRET(ctgLaunchSubTask(&pTask, CTG_TASK_GET_DB_VGROUP, ctgGetTbCfgCb, dbFName));
return TSDB_CODE_SUCCESS;
}
}
@ -2145,7 +2145,7 @@ int32_t ctgLaunchGetTbTagTask(SCtgTask* pTask) {
if (NULL == pCtx->pVgInfo) {
CTG_ERR_JRET(ctgGetTbHashVgroupFromCache(pCtg, pCtx->pName, &pCtx->pVgInfo));
if (NULL == pCtx->pVgInfo) {
CTG_ERR_JRET(ctgLaunchSubTask(pTask, CTG_TASK_GET_DB_VGROUP, ctgGetTbTagCb, dbFName));
CTG_ERR_JRET(ctgLaunchSubTask(&pTask, CTG_TASK_GET_DB_VGROUP, ctgGetTbTagCb, dbFName));
return TSDB_CODE_SUCCESS;
}
}
@ -2331,7 +2331,7 @@ int32_t ctgLaunchGetUserTask(SCtgTask* pTask) {
SCtgTbMetaParam param;
param.pName = &pCtx->user.tbName;
param.flag = CTG_FLAG_SYNC_OP;
CTG_ERR_RET(ctgLaunchSubTask(pTask, CTG_TASK_GET_TB_META, ctgGetUserCb, &param));
CTG_ERR_RET(ctgLaunchSubTask(&pTask, CTG_TASK_GET_TB_META, ctgGetUserCb, &param));
} else {
CTG_ERR_RET(ctgGetUserDbAuthFromMnode(pCtg, pConn, pCtx->user.user, NULL, pTask));
}
@ -2541,19 +2541,35 @@ _return:
CTG_RET(code);
}
int32_t ctgLaunchSubTask(SCtgTask* pTask, CTG_TASK_TYPE type, ctgSubTaskCbFp fp, void* param) {
SCtgJob* pJob = pTask->pJob;
SCtgTask* ctgGetTask(SCtgJob* pJob, int32_t taskId) {
int32_t taskNum = taosArrayGetSize(pJob->pTasks);
for (int32_t i = 0; i < taskNum; ++i) {
SCtgTask* pTask = taosArrayGet(pJob->pTasks, i);
if (pTask->taskId == taskId) {
return pTask;
}
}
return NULL;
}
int32_t ctgLaunchSubTask(SCtgTask** ppTask, CTG_TASK_TYPE type, ctgSubTaskCbFp fp, void* param) {
SCtgJob* pJob = (*ppTask)->pJob;
int32_t subTaskId = -1;
bool newTask = false;
int32_t taskId = (*ppTask)->taskId;
ctgClearSubTaskRes(&pTask->subRes);
pTask->subRes.type = type;
pTask->subRes.fp = fp;
ctgClearSubTaskRes(&(*ppTask)->subRes);
(*ppTask)->subRes.type = type;
(*ppTask)->subRes.fp = fp;
CTG_ERR_RET(ctgSearchExistingTask(pJob, type, param, &subTaskId));
if (subTaskId < 0) {
CTG_ERR_RET(ctgInitTask(pJob, type, param, &subTaskId));
newTask = true;
*ppTask = ctgGetTask(pJob, taskId);
}
SCtgTask* pSub = taosArrayGet(pJob->pTasks, subTaskId);
@ -2561,10 +2577,10 @@ int32_t ctgLaunchSubTask(SCtgTask* pTask, CTG_TASK_TYPE type, ctgSubTaskCbFp fp,
pSub->subTask = true;
}
CTG_ERR_RET(ctgSetSubTaskCb(pSub, pTask));
CTG_ERR_RET(ctgSetSubTaskCb(pSub, *ppTask));
if (newTask) {
SCtgMsgCtx* pMsgCtx = CTG_GET_TASK_MSGCTX(pTask, -1);
SCtgMsgCtx* pMsgCtx = CTG_GET_TASK_MSGCTX(*ppTask, -1);
SCtgMsgCtx* pSubMsgCtx = CTG_GET_TASK_MSGCTX(pSub, -1);
pSubMsgCtx->pBatchs = pMsgCtx->pBatchs;
@ -2584,6 +2600,7 @@ int32_t ctgLaunchJob(SCtgJob* pJob) {
qDebug("QID:0x%" PRIx64 " ctg launch [%dth] task", pJob->queryId, pTask->taskId);
CTG_ERR_RET((*gCtgAsyncFps[pTask->type].launchFp)(pTask));
pTask = taosArrayGet(pJob->pTasks, i);
pTask->status = CTG_TASK_LAUNCHED;
}

View File

@ -191,7 +191,7 @@ void* taosArrayGet(const SArray* pArray, size_t index) {
}
if (index >= pArray->size) {
uError("index is out of range, current:%"PRIzu" max:%d", index, pArray->capacity);
uError("index is out of range, current:%" PRIzu " max:%d", index, pArray->capacity);
return NULL;
}
@ -221,7 +221,7 @@ size_t taosArrayGetSize(const SArray* pArray) {
return TARRAY_SIZE(pArray);
}
void* taosArrayInsert(SArray* pArray, size_t index, void* pData) {
void* taosArrayInsert(SArray* pArray, size_t index, const void* pData) {
if (pArray == NULL || pData == NULL) {
return NULL;
}
@ -492,7 +492,7 @@ void* taosDecodeArray(const void* buf, SArray** pArray, FDecode decode, int32_t
// order array<type *>
void taosArraySortPWithExt(SArray* pArray, __ext_compar_fn_t fn, const void* param) {
taosqsort(pArray->pData, pArray->size, pArray->elemSize, param, fn);
// taosArrayGetSize(pArray) > 8 ? taosArrayQuickSort(pArray, fn, param) : taosArrayInsertSort(pArray, fn, param);
// taosArrayGetSize(pArray) > 8 ? taosArrayQuickSort(pArray, fn, param) : taosArrayInsertSort(pArray, fn, param);
}
void taosArraySwap(SArray* a, SArray* b) {

View File

@ -105,7 +105,7 @@ static void tRBTreeTransplant(SRBTree *pTree, SRBTreeNode *u, SRBTreeNode *v) {
v->parent = u->parent;
}
static SRBTreeNode *tRBTreeSuccessor(SRBTree *pTree, SRBTreeNode *pNode) {
static SRBTreeNode *tRBTreeSuccessor(const SRBTree *pTree, SRBTreeNode *pNode) {
if (pNode->right != pTree->NIL) {
pNode = pNode->right;
while (pNode->left != pTree->NIL) {
@ -125,7 +125,7 @@ static SRBTreeNode *tRBTreeSuccessor(SRBTree *pTree, SRBTreeNode *pNode) {
return pNode;
}
static SRBTreeNode *tRBTreePredecessor(SRBTree *pTree, SRBTreeNode *pNode) {
static SRBTreeNode *tRBTreePredecessor(const SRBTree *pTree, SRBTreeNode *pNode) {
if (pNode->left != pTree->NIL) {
pNode = pNode->left;
while (pNode->right != pTree->NIL) {
@ -443,7 +443,7 @@ SRBTreeNode *tRBTreeDropMax(SRBTree *pTree) {
return pNode;
}
SRBTreeNode *tRBTreeGet(SRBTree *pTree, const SRBTreeNode *pKeyNode) {
SRBTreeNode *tRBTreeGet(const SRBTree *pTree, const SRBTreeNode *pKeyNode) {
SRBTreeNode *pNode = pTree->root;
while (pNode != pTree->NIL) {

View File

@ -42,7 +42,7 @@ class TDTestCase:
tdSql.query('show create database scd;')
tdSql.checkRows(1)
tdSql.checkData(0, 0, 'scd')
tdSql.checkData(0, 1, "CREATE DATABASE `scd` BUFFER 256 CACHESIZE 1 CACHEMODEL 'none' COMP 2 DURATION 14400m WAL_FSYNC_PERIOD 3000 MAXROWS 4096 MINROWS 100 STT_TRIGGER 1 KEEP 5256000m,5256000m,5256000m PAGES 256 PAGESIZE 4 PRECISION 'ms' REPLICA 1 WAL_LEVEL 1 VGROUPS 2 SINGLE_STABLE 0 TABLE_PREFIX 0 TABLE_SUFFIX 0 TSDB_PAGESIZE 4 WAL_RETENTION_PERIOD 0 WAL_RETENTION_SIZE 0")
tdSql.checkData(0, 1, "CREATE DATABASE `scd` BUFFER 256 CACHESIZE 1 CACHEMODEL 'none' COMP 2 DURATION 14400m WAL_FSYNC_PERIOD 3000 MAXROWS 4096 MINROWS 100 STT_TRIGGER 2 KEEP 5256000m,5256000m,5256000m PAGES 256 PAGESIZE 4 PRECISION 'ms' REPLICA 1 WAL_LEVEL 1 VGROUPS 2 SINGLE_STABLE 0 TABLE_PREFIX 0 TABLE_SUFFIX 0 TSDB_PAGESIZE 4 WAL_RETENTION_PERIOD 0 WAL_RETENTION_SIZE 0")
tdSql.query('show create database scd2;')
tdSql.checkRows(1)
@ -60,7 +60,7 @@ class TDTestCase:
tdSql.query('show create database scd;')
tdSql.checkRows(1)
tdSql.checkData(0, 0, 'scd')
tdSql.checkData(0, 1, "CREATE DATABASE `scd` BUFFER 256 CACHESIZE 1 CACHEMODEL 'none' COMP 2 DURATION 14400m WAL_FSYNC_PERIOD 3000 MAXROWS 4096 MINROWS 100 STT_TRIGGER 1 KEEP 5256000m,5256000m,5256000m PAGES 256 PAGESIZE 4 PRECISION 'ms' REPLICA 1 WAL_LEVEL 1 VGROUPS 2 SINGLE_STABLE 0 TABLE_PREFIX 0 TABLE_SUFFIX 0 TSDB_PAGESIZE 4 WAL_RETENTION_PERIOD 0 WAL_RETENTION_SIZE 0")
tdSql.checkData(0, 1, "CREATE DATABASE `scd` BUFFER 256 CACHESIZE 1 CACHEMODEL 'none' COMP 2 DURATION 14400m WAL_FSYNC_PERIOD 3000 MAXROWS 4096 MINROWS 100 STT_TRIGGER 2 KEEP 5256000m,5256000m,5256000m PAGES 256 PAGESIZE 4 PRECISION 'ms' REPLICA 1 WAL_LEVEL 1 VGROUPS 2 SINGLE_STABLE 0 TABLE_PREFIX 0 TABLE_SUFFIX 0 TSDB_PAGESIZE 4 WAL_RETENTION_PERIOD 0 WAL_RETENTION_SIZE 0")
tdSql.query('show create database scd2;')
tdSql.checkRows(1)

View File

@ -283,14 +283,14 @@ class TDTestCase:
# normal table
# all rows
sql = "select * from @db_name.ta"
self.queryDouble(sql)
# count
sql = "select count(*) from @db_name.ta"
self.queryDouble(sql)
# all rows
sql = "select * from @db_name.ta"
self.queryDouble(sql)
# sum
sql = "select sum(c1) from @db_name.ta"
self.queryDouble(sql)
@ -316,7 +316,8 @@ class TDTestCase:
tdSql.execute(sql)
# wait end
for i in range(100):
seconds = 300
for i in range(seconds):
sql ="show transactions;"
rows = tdSql.query(sql)
if rows == 0:
@ -325,7 +326,7 @@ class TDTestCase:
#tdLog.info(f"i={i} wait split vgroup ...")
time.sleep(1)
tdLog.exit("split vgroup transaction is not finished after executing 50s")
tdLog.exit(f"split vgroup transaction is not finished after executing {seconds}s")
return False
# split error
@ -383,6 +384,14 @@ class TDTestCase:
tdSql.execute("drop topic toa;")
self.expectSplitOk("topicdb")
# compact and check db2
def compactAndCheck(self):
tdLog.info("compact db2 and check result ...")
# compact
tdSql.execute(f"compact database {self.db2};")
# check result
self.checkResult()
# run
def run(self):
# prepare env
@ -390,12 +399,15 @@ class TDTestCase:
for i in range(5):
# split vgroup on db2
start = time.time()
self.splitVGroup(self.db2)
end = time.time()
self.vgroups2 += 1
# check two db query result same
self.checkResult()
tdLog.info(f"split vgroup i={i} passed.")
spend = "%.3f"%(end-start)
tdLog.info(f"split vgroup i={i} passed. spend = {spend}s")
# split empty db
self.splitEmptyDB()
@ -403,6 +415,9 @@ class TDTestCase:
# check topic and stream forib
self.checkForbid()
# compact database
self.compactAndCheck()
# stop
def stop(self):
tdSql.close()

View File

@ -24,10 +24,11 @@ from util.common import *
from util.sqlset import TDSetSql
class TDTestCase:
updatecfgDict = {'tsdbdebugFlag': 143}
def init(self, conn, logSql, replicaVar=1):
self.replicaVar = int(replicaVar)
tdLog.debug("start to execute %s" % __file__)
tdSql.init(conn.cursor())
tdSql.init(conn.cursor(), True)
self.dbname = 'db_test'
self.ns_dbname = 'ns_test'
self.us_dbname = 'us_test'

View File

@ -127,7 +127,7 @@ class TDTestCase:
self.c2Sum = None
# create database db
sql = f"create database db vgroups 5 replica 3"
sql = f"create database db vgroups 5 replica 3 stt_trigger 1"
tdLog.info(sql)
tdSql.execute(sql)
sql = f"use db"