fix:conflict from 3.0

This commit is contained in:
wangmm0220 2023-11-13 18:42:59 +08:00
commit 56381dea90
122 changed files with 6490 additions and 1118 deletions

View File

@ -149,6 +149,8 @@ ELSE ()
CHECK_C_COMPILER_FLAG("-mfma" COMPILER_SUPPORT_FMA)
CHECK_C_COMPILER_FLAG("-mavx" COMPILER_SUPPORT_AVX)
CHECK_C_COMPILER_FLAG("-mavx2" COMPILER_SUPPORT_AVX2)
CHECK_C_COMPILER_FLAG("-mavx512f" COMPILER_SUPPORT_AVX512F)
CHECK_C_COMPILER_FLAG("-mavx512vbmi" COMPILER_SUPPORT_AVX512BMI)
IF (COMPILER_SUPPORT_SSE42)
SET(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -msse4.2")
@ -168,7 +170,13 @@ ELSE ()
SET(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -mavx2")
SET(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -mavx2")
ENDIF()
MESSAGE(STATUS "SIMD instructions (FMA/AVX/AVX2) is ACTIVATED")
MESSAGE(STATUS "SIMD instructions (FMA/AVX/AVX2/AVX512) is ACTIVATED")
IF (COMPILER_SUPPORT_AVX512F AND COMPILER_SUPPORT_AVX512BMI)
SET(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -mavx512f -mavx512vbmi")
SET(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -mavx512f -mavx512vbmi")
MESSAGE(STATUS "avx512 supported by gcc")
ENDIF()
ENDIF()
# build mode

Binary file not shown.

View File

@ -565,7 +565,7 @@ TO_TIMESTAMP(ts_str_literal, format_str_literal)
- The uppercase or lowercase of `MONTH`, `MON`, `DAY`, `DY` and formtas that output digits have same effect when used in `to_timestamp`, like `to_timestamp('2023-JANUARY-01', 'YYYY-month-dd')`, `month` can be replaced by `MONTH`, or `month`. The cases are ignored.
- If multi times are specified for one component, the previous will be overwritten. Like `to_timestamp('2023-22-10-10', 'yyyy-yy-MM-dd')`, the output year will be `2022`.
- To avoid unexpected time zone used during the convertion, it's recommended to put time zone in the ts string, e.g. '2023-10-10 10:10:10+08'. If time zone not specified, default will be that in server or client.
- The default timestamp if some components are not specified will be: `1970-01-01 00:00:00` with the timezone specified or default to local timezone.
- The default timestamp if some components are not specified will be: `1970-01-01 00:00:00` with the timezone specified or default to local timezone. Only `DDD` is specified without `DD` is not supported currently, e.g. format 'yyyy-mm-ddd' is not supported, but 'yyyy-mm-dd' is supported.
- If `AM` or `PM` is specified in formats, the Hour must between `1-12`.
- In some cases, `to_timestamp` can convert correctly even the format and the timestamp string are not totally matched. Like `to_timetamp('200101/2', 'yyyyMM1/dd')`, the digit `1` in format string are ignored, and the output timestsamp is `2001-01-02 00:00:00`. Spaces and tabs in formats and tiemstamp string are also ignored automatically.
- The precision of the output timestamp will be the same as the table in SELECT stmt, millisecond will be used if no table is specified. The output of `select to_timestamp('2023-08-1 10:10:10.123456789', 'yyyy-mm-dd hh:mi:ss.ns')` will be truncated to millisecond precision. If a nano precision table is specified, no truncation will be applied. Like `select to_timestamp('2023-08-1 10:10:10.123456789', 'yyyy-mm-dd hh:mi:ss.ns') from db_ns.table_ns limit 1`.

View File

@ -38,11 +38,16 @@ Aggregation by time window is supported in TDengine. For example, in the case wh
window_clause: {
SESSION(ts_col, tol_val)
| STATE_WINDOW(col)
| INTERVAL(interval [, offset]) [SLIDING sliding] [FILL({NONE | VALUE | PREV | NULL | LINEAR | NEXT})]
| INTERVAL(interval_val [, offset]) [SLIDING (sliding_value)] [FILL({NONE | VALUE | PREV | NULL | LINEAR | NEXT})]
| EVENT_WINDOW START WITH start_trigger_condition END WITH end_trigger_condition
}
```
Both interval_val and sliding_value are time durations which have 3 forms of representation.
- INTERVAL(1s, 500a) SLIDING(1s), the unit char should be any one of a (millisecond), b (nanosecond), d (day), h (hour), m (minute), n (month), s (second), u (microsecond), w (week), y (year).
- INTERVAL(1000, 500) SLIDING(1000), the unit will the same as the queried database, if there are more than one databases, higher precision will be used.
- INTERVAL('1s', '500a') SLIDING('1s'), unit must be specified, no spaces allowed.
The following restrictions apply:
### Other Rules

View File

@ -565,7 +565,7 @@ TO_TIMESTAMP(ts_str_literal, format_str_literal)
- `MONTH`, `MON`, `DAY`, `DY` 以及其他输出为数字的格式的大小写意义相同, 如 `to_timestamp('2023-JANUARY-01', 'YYYY-month-dd')`, `month`可以被替换为`MONTH` 或者`Month`.
- 如果同一字段被指定了多次, 那么前面的指定将会被覆盖. 如 `to_timestamp('2023-22-10-10', 'yyyy-yy-MM-dd')`, 输出年份是`2022`.
- 为避免转换时使用了非预期的时区,推荐在时间中携带时区信息,例如'2023-10-10 10:10:10+08',如果未指定时区则默认时区为服务端或客户端指定的时区。
- 如果没有指定完整的时间,那么默认时间值为指定或默认时区的 `1970-01-01 00:00:00`, 未指定部分使用该默认值中的对应部分.
- 如果没有指定完整的时间,那么默认时间值为指定或默认时区的 `1970-01-01 00:00:00`, 未指定部分使用该默认值中的对应部分. 暂不支持只指定年日而不指定月日的格式, 如'yyyy-mm-DDD', 支持'yyyy-mm-DD'.
- 如果格式串中有`AM`, `PM`等, 那么小时必须是12小时制, 范围必须是01-12.
- `to_timestamp`转换具有一定的容错机制, 在格式串和时间戳串不完全对应时, 有时也可转换, 如: `to_timestamp('200101/2', 'yyyyMM1/dd')`, 格式串中多出来的1会被丢弃. 格式串与时间戳串中多余的空格字符(空格, tab等)也会被 自动忽略. 如`to_timestamp(' 23 年 - 1 月 - 01 日 ', 'yy 年-MM月-dd日')` 可以被成功转换. 虽然`MM`等字段需要两个数字对应(只有一位时前面补0), 在`to_timestamp`时, 一个数字也可以成功转换.
- 输出时间戳的精度与查询表的精度相同, 若查询未指定表, 则输出精度为毫秒. 如`select to_timestamp('2023-08-1 10:10:10.123456789', 'yyyy-mm-dd hh:mi:ss.ns')`的输出将会把微妙和纳秒进行截断. 如果指定一张纳秒表, 那么就不会发生截断, 如`select to_timestamp('2023-08-1 10:10:10.123456789', 'yyyy-mm-dd hh:mi:ss.ns') from db_ns.table_ns limit 1`.

View File

@ -44,7 +44,11 @@ window_clause: {
}
```
在上述语法中的具体限制如下
其中interval_val 和 sliding_val 都表示时间段, 语法上支持三种方式,举例说明如下:
- INTERVAL(1s, 500a) SLIDING(1s), 自带时间单位的形式,其中的时间单位是单字符表示, 分别为: a (毫秒), b (纳秒), d (天), h (小时), m (分钟), n (月), s (秒), u (微妙), w (周), y (年).
- INTERVAL(1000, 500) SLIDING(1000), 不带时间单位的形式,将使用查询库的时间精度作为默认时间单位,当存在多个库时默认采用精度更高的库.
- INTERVAL('1s', '500a') SLIDING('1s'), 自带时间单位的字符串形式,字符串内部不能有任何空格等其它字符.
### 窗口子句的规则

View File

@ -39,8 +39,8 @@ void s3DeleteObjectsByPrefix(const char *prefix);
void s3DeleteObjects(const char *object_name[], int nobject);
bool s3Exists(const char *object_name);
bool s3Get(const char *object_name, const char *path);
int32_t s3GetObjectBlock(const char *object_name, int64_t offset, int64_t size, bool check, uint8_t **ppBlock);
int32_t s3GetObjectsByPrefix(const char *prefix, const char *path);
int32_t s3GetObjectBlock(const char *object_name, int64_t offset, int64_t size, uint8_t **ppBlock);
void s3EvictCache(const char *path, long object_size);
long s3Size(const char *object_name);

View File

@ -249,6 +249,7 @@ typedef struct SQueryTableDataCond {
SColumnInfo* colList;
int32_t* pSlotList; // the column output destation slot, and it may be null
int32_t type; // data block load type:
bool skipRollup;
STimeWindow twindows;
int64_t startVersion;
int64_t endVersion;
@ -364,6 +365,11 @@ typedef struct SSortExecInfo {
int32_t readBytes; // read io bytes
} SSortExecInfo;
typedef struct SNonSortExecInfo {
int32_t blkNums;
} SNonSortExecInfo;
typedef struct STUidTagInfo {
char* name;
uint64_t uid;

View File

@ -3774,6 +3774,7 @@ typedef struct {
int64_t suid;
SArray* deleteReqs; // SArray<SSingleDeleteReq>
int64_t ctimeMs; // fill by vnode
int8_t level; // 0 tsdb(default), 1 rsma1 , 2 rsma2
} SBatchDeleteReq;
int32_t tEncodeSBatchDeleteReq(SEncoder* pCoder, const SBatchDeleteReq* pReq);

View File

@ -49,6 +49,7 @@ typedef struct {
uint64_t checkpointId;
bool initTableReader;
bool initTqReader;
bool skipRollup;
int32_t numOfVgroups;
void* sContext; // SSnapContext*
void* pStateBackend;

View File

@ -168,7 +168,6 @@ typedef struct {
struct SStreamFileState *pFileState;
int32_t number;
SSHashObj *parNameMap;
int64_t checkPointId;
int32_t taskId;
int64_t streamId;
int64_t streamBackendRid;

View File

@ -121,6 +121,7 @@ int32_t nodesListMakeAppend(SNodeList** pList, SNode* pNode);
int32_t nodesListMakeStrictAppend(SNodeList** pList, SNode* pNode);
int32_t nodesListAppendList(SNodeList* pTarget, SNodeList* pSrc);
int32_t nodesListStrictAppendList(SNodeList* pTarget, SNodeList* pSrc);
int32_t nodesListMakeStrictAppendList(SNodeList** pTarget, SNodeList* pSrc);
int32_t nodesListPushFront(SNodeList* pList, SNode* pNode);
SListCell* nodesListErase(SNodeList* pList, SListCell* pCell);
void nodesListInsertList(SNodeList* pTarget, SListCell* pPos, SNodeList* pSrc);

View File

@ -40,6 +40,13 @@ typedef enum EGroupAction {
GROUP_ACTION_CLEAR
} EGroupAction;
typedef enum EMergeType {
MERGE_TYPE_SORT = 1,
MERGE_TYPE_NON_SORT,
MERGE_TYPE_COLUMNS,
MERGE_TYPE_MAX_VALUE
} EMergeType;
typedef struct SLogicNode {
ENodeType type;
bool dynamicOp;
@ -138,6 +145,7 @@ typedef struct SAggLogicNode {
bool hasGroupKeyOptimized;
bool isGroupTb;
bool isPartTb; // true if partition keys has tbname
bool hasGroup;
} SAggLogicNode;
typedef struct SProjectLogicNode {
@ -221,6 +229,8 @@ typedef struct SMergeLogicNode {
SNodeList* pInputs;
int32_t numOfChannels;
int32_t srcGroupId;
bool colsMerge;
bool needSort;
bool groupSort;
bool ignoreGroupId;
bool inputWithGroupId;
@ -388,6 +398,7 @@ typedef struct SLastRowScanPhysiNode {
SNodeList* pGroupTags;
bool groupSort;
bool ignoreNull;
SNodeList* pTargets;
} SLastRowScanPhysiNode;
typedef SLastRowScanPhysiNode STableCountScanPhysiNode;
@ -531,6 +542,7 @@ typedef struct SExchangePhysiNode {
typedef struct SMergePhysiNode {
SPhysiNode node;
EMergeType type;
SNodeList* pMergeKeys;
SNodeList* pTargets;
int32_t numOfChannels;

View File

@ -304,6 +304,7 @@ typedef struct SCheckpointInfo {
int64_t startTs;
int64_t checkpointId;
int64_t checkpointVer; // latest checkpointId version
int64_t processedVer; // already processed ver, that has generated results version.
int64_t nextProcessVer; // current offset in WAL, not serialize it
int64_t failedId; // record the latest failed checkpoint id
} SCheckpointInfo;
@ -460,7 +461,7 @@ typedef struct STaskStartInfo {
int32_t taskStarting; // restart flag, sentinel to guard the restart procedure.
SHashObj* pReadyTaskSet; // tasks that are all ready for running stream processing
SHashObj* pFailedTaskSet; // tasks that are done the check downstream process, may be successful or failed
int32_t elapsedTime;
int64_t elapsedTime;
} STaskStartInfo;
typedef struct STaskUpdateInfo {
@ -826,6 +827,7 @@ void streamMetaReleaseTask(SStreamMeta* pMeta, SStreamTask* pTask);
int32_t streamMetaReopen(SStreamMeta* pMeta);
int32_t streamMetaCommit(SStreamMeta* pMeta);
int32_t streamMetaLoadAllTasks(SStreamMeta* pMeta);
int64_t streamMetaGetLatestCheckpointId(SStreamMeta* pMeta);
void streamMetaNotifyClose(SStreamMeta* pMeta);
void streamMetaStartHb(SStreamMeta* pMeta);
bool streamMetaTaskInTimer(SStreamMeta* pMeta);
@ -839,6 +841,7 @@ void streamMetaResetStartInfo(STaskStartInfo* pMeta);
// checkpoint
int32_t streamProcessCheckpointSourceReq(SStreamTask* pTask, SStreamCheckpointSourceReq* pReq);
int32_t streamProcessCheckpointReadyMsg(SStreamTask* pTask);
int32_t streamTaskBuildCheckpoint(SStreamTask* pTask);
void streamTaskClearCheckInfo(SStreamTask* pTask);
int32_t streamAlignTransferState(SStreamTask* pTask);
int32_t streamBuildAndSendDropTaskMsg(SMsgCb* pMsgCb, int32_t vgId, SStreamTaskId* pTaskId);

View File

@ -36,11 +36,12 @@ extern int64_t tsStreamMax;
extern float tsNumOfCores;
extern int64_t tsTotalMemoryKB;
extern char *tsProcPath;
extern char tsSIMDBuiltins;
extern char tsSIMDEnable;
extern char tsSSE42Enable;
extern char tsAVXEnable;
extern char tsAVX2Enable;
extern char tsFMAEnable;
extern char tsAVX512Enable;
extern char tsTagFilterCache;
extern char configDir[];

View File

@ -41,7 +41,7 @@ int32_t taosGetOsReleaseName(char *releaseName, char* sName, char* ver, int32_t
int32_t taosGetCpuInfo(char *cpuModel, int32_t maxLen, float *numOfCores);
int32_t taosGetCpuCores(float *numOfCores, bool physical);
void taosGetCpuUsage(double *cpu_system, double *cpu_engine);
int32_t taosGetCpuInstructions(char* sse42, char* avx, char* avx2, char* fma);
int32_t taosGetCpuInstructions(char* sse42, char* avx, char* avx2, char* fma, char* avx512);
int32_t taosGetTotalMemory(int64_t *totalKB);
int32_t taosGetProcMemory(int64_t *usedKB);
int32_t taosGetSysMemory(int64_t *usedKB);

View File

@ -754,6 +754,7 @@ int32_t* taosGetErrno();
#define TSDB_CODE_FUNC_DUP_TIMESTAMP TAOS_DEF_ERROR_CODE(0, 0x2805)
#define TSDB_CODE_FUNC_TO_TIMESTAMP_FAILED_FORMAT_ERR TAOS_DEF_ERROR_CODE(0, 0x2806)
#define TSDB_CODE_FUNC_TO_TIMESTAMP_FAILED_TS_ERR TAOS_DEF_ERROR_CODE(0, 0x2807)
#define TSDB_CODE_FUNC_TO_TIMESTAMP_FAILED_NOT_SUPPORTED TAOS_DEF_ERROR_CODE(0, 0x2808)
//udf
#define TSDB_CODE_UDF_STOPPING TAOS_DEF_ERROR_CODE(0, 0x2901)

View File

@ -25,7 +25,7 @@ static S3UriStyle uriStyleG = S3UriStylePath;
static int retriesG = 5;
static int timeoutMsG = 0;
static int32_t s3Begin() {
int32_t s3Begin() {
S3Status status;
const char *hostname = tsS3Hostname;
const char *env_hn = getenv("S3_HOSTNAME");
@ -44,10 +44,12 @@ static int32_t s3Begin() {
return 0;
}
static void s3End() { S3_deinitialize(); }
int32_t s3Init() { return s3Begin(); }
void s3End() { S3_deinitialize(); }
void s3CleanUp() { s3End(); }
int32_t s3Init() { return 0; /*s3Begin();*/ }
void s3CleanUp() { /*s3End();*/
}
static int should_retry() {
/*
@ -73,8 +75,9 @@ static void s3PrintError(const char *func, S3Status status, char error_details[]
}
typedef struct {
char err_msg[128];
char err_msg[512];
S3Status status;
uint64_t content_length;
TdFilePtr file;
} TS3GetData;
@ -83,6 +86,7 @@ typedef struct {
S3Status status;
uint64_t content_length;
char *buf;
int64_t buf_pos;
} TS3SizeCBD;
static S3Status responsePropertiesCallbackNull(const S3ResponseProperties *properties, void *callbackData) {
@ -109,24 +113,26 @@ static void responseCompleteCallback(S3Status status, const S3ErrorDetails *erro
int len = 0;
const int elen = sizeof(cbd->err_msg);
if (error) {
if (error->message) {
if (error->message && elen - len > 0) {
len += snprintf(&(cbd->err_msg[len]), elen - len, " Message: %s\n", error->message);
}
if (error->resource) {
if (error->resource && elen - len > 0) {
len += snprintf(&(cbd->err_msg[len]), elen - len, " Resource: %s\n", error->resource);
}
if (error->furtherDetails) {
if (error->furtherDetails && elen - len > 0) {
len += snprintf(&(cbd->err_msg[len]), elen - len, " Further Details: %s\n", error->furtherDetails);
}
if (error->extraDetailsCount) {
if (error->extraDetailsCount && elen - len > 0) {
len += snprintf(&(cbd->err_msg[len]), elen - len, "%s", " Extra Details:\n");
for (int i = 0; i < error->extraDetailsCount; i++) {
if (elen - len > 0) {
len += snprintf(&(cbd->err_msg[len]), elen - len, " %s: %s\n", error->extraDetails[i].name,
error->extraDetails[i].value);
}
}
}
}
}
typedef struct growbuffer {
// The total number of bytes, and the start byte
@ -213,8 +219,9 @@ static void growbuffer_destroy(growbuffer *gb) {
}
typedef struct put_object_callback_data {
char err_msg[128];
char err_msg[512];
S3Status status;
uint64_t content_length;
// FILE *infile;
TdFilePtr infileFD;
growbuffer *gb;
@ -226,8 +233,9 @@ typedef struct put_object_callback_data {
#define MULTIPART_CHUNK_SIZE (768 << 20) // multipart is 768M
typedef struct UploadManager {
char err_msg[128];
char err_msg[512];
S3Status status;
uint64_t content_length;
// used for initial multipart
char *upload_id;
@ -241,8 +249,9 @@ typedef struct UploadManager {
} UploadManager;
typedef struct list_parts_callback_data {
char err_msg[128];
char err_msg[512];
S3Status status;
uint64_t content_length;
int isTruncated;
char nextPartNumberMarker[24];
char initiatorId[256];
@ -258,8 +267,6 @@ typedef struct list_parts_callback_data {
} list_parts_callback_data;
typedef struct MultipartPartData {
char err_msg[128];
S3Status status;
put_object_callback_data put_object_data;
int seq;
UploadManager *manager;
@ -267,11 +274,12 @@ typedef struct MultipartPartData {
static int putObjectDataCallback(int bufferSize, char *buffer, void *callbackData) {
put_object_callback_data *data = (put_object_callback_data *)callbackData;
/*
if (data->infileFD == 0) {
MultipartPartData *mpd = (MultipartPartData *)callbackData;
data = &mpd->put_object_data;
}
*/
int ret = 0;
if (data->contentLength) {
@ -402,7 +410,8 @@ static int try_get_parts_info(const char *bucketName, const char *key, UploadMan
S3BucketContext bucketContext = {0, tsS3BucketName, protocolG, uriStyleG, tsS3AccessKeyId, tsS3AccessKeySecret,
0, awsRegionG};
S3ListPartsHandler listPartsHandler = {{&responsePropertiesCallbackNull, &responseCompleteCallback}, &listPartsCallback};
S3ListPartsHandler listPartsHandler = {{&responsePropertiesCallbackNull, &responseCompleteCallback},
&listPartsCallback};
list_parts_callback_data data;
@ -448,13 +457,13 @@ int32_t s3PutObjectFromFile2(const char *file, const char *object) {
int metaPropertiesCount = 0;
S3NameValue metaProperties[S3_MAX_METADATA_COUNT];
char useServerSideEncryption = 0;
int noStatus = 0;
put_object_callback_data data;
put_object_callback_data data = {0};
// int noStatus = 0;
// data.infile = 0;
data.infileFD = NULL;
data.gb = 0;
data.noStatus = noStatus;
// data.gb = 0;
// data.infileFD = NULL;
// data.noStatus = noStatus;
if (taosStatFile(file, &contentLength, NULL, NULL) < 0) {
uError("ERROR: %s Failed to stat file %s: ", __func__, file);
@ -571,9 +580,9 @@ int32_t s3PutObjectFromFile2(const char *file, const char *object) {
do {
S3_upload_part(&bucketContext, key, &putProperties, &putObjectHandler, seq, manager.upload_id,
partContentLength, 0, timeoutMsG, &partData);
} while (S3_status_is_retryable(partData.status) && should_retry());
if (partData.status != S3StatusOK) {
s3PrintError(__func__, partData.status, partData.err_msg);
} while (S3_status_is_retryable(partData.put_object_data.status) && should_retry());
if (partData.put_object_data.status != S3StatusOK) {
s3PrintError(__func__, partData.put_object_data.status, partData.put_object_data.err_msg);
code = TAOS_SYSTEM_ERROR(EIO);
goto clean;
}
@ -621,7 +630,7 @@ int32_t s3PutObjectFromFile2(const char *file, const char *object) {
}
typedef struct list_bucket_callback_data {
char err_msg[128];
char err_msg[512];
S3Status status;
int isTruncated;
char nextMarker[1024];
@ -739,15 +748,19 @@ void s3DeleteObjectsByPrefix(const char *prefix) {
static S3Status getObjectDataCallback(int bufferSize, const char *buffer, void *callbackData) {
TS3SizeCBD *cbd = callbackData;
/*
if (cbd->content_length != bufferSize) {
cbd->status = S3StatusAbortedByCallback;
return S3StatusAbortedByCallback;
}
*/
if (!cbd->buf) {
cbd->buf = taosMemoryCalloc(1, cbd->content_length);
}
char *buf = taosMemoryCalloc(1, bufferSize);
if (buf) {
memcpy(buf, buffer, bufferSize);
cbd->buf = buf;
if (cbd->buf) {
memcpy(cbd->buf + cbd->buf_pos, buffer, bufferSize);
cbd->buf_pos += bufferSize;
cbd->status = S3StatusOK;
return S3StatusOK;
} else {
@ -756,7 +769,7 @@ static S3Status getObjectDataCallback(int bufferSize, const char *buffer, void *
}
}
int32_t s3GetObjectBlock(const char *object_name, int64_t offset, int64_t size, uint8_t **ppBlock) {
int32_t s3GetObjectBlock(const char *object_name, int64_t offset, int64_t size, bool check, uint8_t **ppBlock) {
int status = 0;
int64_t ifModifiedSince = -1, ifNotModifiedSince = -1;
const char *ifMatch = 0, *ifNotMatch = 0;
@ -769,6 +782,7 @@ int32_t s3GetObjectBlock(const char *object_name, int64_t offset, int64_t size,
TS3SizeCBD cbd = {0};
cbd.content_length = size;
cbd.buf_pos = 0;
do {
S3_get_object(&bucketContext, object_name, &getConditions, offset, size, 0, 0, &getObjectHandler, &cbd);
} while (S3_status_is_retryable(cbd.status) && should_retry());
@ -778,6 +792,11 @@ int32_t s3GetObjectBlock(const char *object_name, int64_t offset, int64_t size,
return TAOS_SYSTEM_ERROR(EIO);
}
if (check && cbd.buf_pos != size) {
uError("%s: %d(%s)", __func__, cbd.status, cbd.err_msg);
return TAOS_SYSTEM_ERROR(EIO);
}
*ppBlock = cbd.buf;
return 0;
@ -786,8 +805,7 @@ int32_t s3GetObjectBlock(const char *object_name, int64_t offset, int64_t size,
static S3Status getObjectCallback(int bufferSize, const char *buffer, void *callbackData) {
TS3GetData *cbd = (TS3GetData *)callbackData;
size_t wrote = taosWriteFile(cbd->file, buffer, bufferSize);
return ((wrote < (size_t) bufferSize) ?
S3StatusAbortedByCallback : S3StatusOK);
return ((wrote < (size_t)bufferSize) ? S3StatusAbortedByCallback : S3StatusOK);
}
int32_t s3GetObjectToFile(const char *object_name, char *fileName) {
@ -1122,7 +1140,8 @@ bool s3Get(const char *object_name, const char *path) {
return ret;
}
int32_t s3GetObjectBlock(const char *object_name, int64_t offset, int64_t block_size, uint8_t **ppBlock) {
int32_t s3GetObjectBlock(const char *object_name, int64_t offset, int64_t block_size, bool check, uint8_t **ppBlock) {
(void)check;
int32_t code = 0;
cos_pool_t *p = NULL;
int is_cname = 0;
@ -1314,9 +1333,11 @@ void s3DeleteObjectsByPrefix(const char *prefix) {}
void s3DeleteObjects(const char *object_name[], int nobject) {}
bool s3Exists(const char *object_name) { return false; }
bool s3Get(const char *object_name, const char *path) { return false; }
int32_t s3GetObjectsByPrefix(const char *prefix, const char* path) { return 0; }
int32_t s3GetObjectBlock(const char *object_name, int64_t offset, int64_t size, uint8_t **ppBlock) { return 0; }
int32_t s3GetObjectBlock(const char *object_name, int64_t offset, int64_t size, bool check, uint8_t **ppBlock) {
return 0;
}
void s3EvictCache(const char *path, long object_size) {}
long s3Size(const char *object_name) { return 0; }
int32_t s3GetObjectsByPrefix(const char *prefix, const char *path) { return 0; }
#endif

View File

@ -2121,6 +2121,7 @@ _end:
char* buildCtbNameByGroupId(const char* stbFullName, uint64_t groupId) {
char* pBuf = taosMemoryCalloc(1, TSDB_TABLE_NAME_LEN + 1);
if (!pBuf) {
terrno = TSDB_CODE_OUT_OF_MEMORY;
return NULL;
}
int32_t code = buildCtbNameByGroupIdImpl(stbFullName, groupId, pBuf);
@ -2133,6 +2134,7 @@ char* buildCtbNameByGroupId(const char* stbFullName, uint64_t groupId) {
int32_t buildCtbNameByGroupIdImpl(const char* stbFullName, uint64_t groupId, char* cname) {
if (stbFullName[0] == 0) {
terrno = TSDB_CODE_INVALID_PARA;
return TSDB_CODE_FAILED;
}
@ -2142,6 +2144,7 @@ int32_t buildCtbNameByGroupIdImpl(const char* stbFullName, uint64_t groupId, cha
}
if (cname == NULL) {
terrno = TSDB_CODE_INVALID_PARA;
taosArrayDestroy(tags);
return TSDB_CODE_FAILED;
}

View File

@ -249,7 +249,7 @@ int32_t tsTransPullupInterval = 2;
int32_t tsMqRebalanceInterval = 2;
int32_t tsStreamCheckpointInterval = 60;
float tsSinkDataRate = 2.0;
int32_t tsStreamNodeCheckInterval = 30;
int32_t tsStreamNodeCheckInterval = 15;
int32_t tsTtlUnit = 86400;
int32_t tsTtlPushIntervalSec = 10;
int32_t tsTrimVDbIntervalSec = 60 * 60; // interval of trimming db in all vgroups
@ -343,7 +343,9 @@ int32_t taosSetS3Cfg(SConfig *pCfg) {
return 0;
}
struct SConfig *taosGetCfg() { return tsCfg; }
struct SConfig *taosGetCfg() {
return tsCfg;
}
static int32_t taosLoadCfg(SConfig *pCfg, const char **envCmd, const char *inputCfgDir, const char *envFile,
char *apolloUrl) {
@ -453,8 +455,8 @@ static int32_t taosAddClientCfg(SConfig *pCfg) {
if (cfgAddBool(pCfg, "enableScience", tsEnableScience, CFG_SCOPE_CLIENT, CFG_DYN_NONE) != 0) return -1;
if (cfgAddInt32(pCfg, "querySmaOptimize", tsQuerySmaOptimize, 0, 1, CFG_SCOPE_CLIENT, CFG_DYN_CLIENT) != 0) return -1;
if (cfgAddBool(pCfg, "queryPlannerTrace", tsQueryPlannerTrace, CFG_SCOPE_CLIENT, CFG_DYN_CLIENT) != 0) return -1;
if (cfgAddInt32(pCfg, "queryNodeChunkSize", tsQueryNodeChunkSize, 1024, 128 * 1024, CFG_SCOPE_CLIENT, CFG_DYN_CLIENT) !=
0)
if (cfgAddInt32(pCfg, "queryNodeChunkSize", tsQueryNodeChunkSize, 1024, 128 * 1024, CFG_SCOPE_CLIENT,
CFG_DYN_CLIENT) != 0)
return -1;
if (cfgAddBool(pCfg, "queryUseNodeAllocator", tsQueryUseNodeAllocator, CFG_SCOPE_CLIENT, CFG_DYN_CLIENT) != 0)
return -1;
@ -470,7 +472,8 @@ static int32_t taosAddClientCfg(SConfig *pCfg) {
// if (cfgAddInt32(pCfg, "smlBatchSize", tsSmlBatchSize, 1, INT32_MAX, CFG_SCOPE_CLIENT, CFG_DYN_NONE) != 0)
// return -1;
if (cfgAddInt32(pCfg, "maxShellConns", tsMaxShellConns, 10, 50000000, CFG_SCOPE_CLIENT, CFG_DYN_NONE) != 0) return -1;
if (cfgAddInt32(pCfg, "maxInsertBatchRows", tsMaxInsertBatchRows, 1, INT32_MAX, CFG_SCOPE_CLIENT, CFG_DYN_CLIENT) != 0)
if (cfgAddInt32(pCfg, "maxInsertBatchRows", tsMaxInsertBatchRows, 1, INT32_MAX, CFG_SCOPE_CLIENT, CFG_DYN_CLIENT) !=
0)
return -1;
if (cfgAddInt32(pCfg, "maxRetryWaitTime", tsMaxRetryWaitTime, 0, 86400000, CFG_SCOPE_BOTH, CFG_DYN_CLIENT) != 0)
return -1;
@ -526,7 +529,8 @@ static int32_t taosAddSystemCfg(SConfig *pCfg) {
if (cfgAddBool(pCfg, "avx", tsAVXEnable, CFG_SCOPE_BOTH, CFG_DYN_NONE) != 0) return -1;
if (cfgAddBool(pCfg, "avx2", tsAVX2Enable, CFG_SCOPE_BOTH, CFG_DYN_NONE) != 0) return -1;
if (cfgAddBool(pCfg, "fma", tsFMAEnable, CFG_SCOPE_BOTH, CFG_DYN_NONE) != 0) return -1;
if (cfgAddBool(pCfg, "simdEnable", tsSIMDBuiltins, CFG_SCOPE_BOTH, CFG_DYN_NONE) != 0) return -1;
if (cfgAddBool(pCfg, "avx512", tsAVX512Enable, CFG_SCOPE_BOTH, CFG_DYN_NONE) != 0) return -1;
if (cfgAddBool(pCfg, "simdEnable", tsSIMDEnable, CFG_SCOPE_BOTH, CFG_DYN_NONE) != 0) return -1;
if (cfgAddBool(pCfg, "tagFilterCache", tsTagFilterCache, CFG_SCOPE_BOTH, CFG_DYN_NONE) != 0) return -1;
if (cfgAddInt64(pCfg, "openMax", tsOpenMax, 0, INT64_MAX, CFG_SCOPE_BOTH, CFG_DYN_NONE) != 0) return -1;
@ -557,7 +561,8 @@ static int32_t taosAddServerCfg(SConfig *pCfg) {
if (cfgAddInt32(pCfg, "supportVnodes", tsNumOfSupportVnodes, 0, 4096, CFG_SCOPE_SERVER, CFG_DYN_NONE) != 0) return -1;
if (cfgAddInt32(pCfg, "statusInterval", tsStatusInterval, 1, 30, CFG_SCOPE_SERVER, CFG_DYN_NONE) != 0) return -1;
if (cfgAddInt32(pCfg, "minSlidingTime", tsMinSlidingTime, 1, 1000000, CFG_SCOPE_CLIENT, CFG_DYN_CLIENT) != 0) return -1;
if (cfgAddInt32(pCfg, "minSlidingTime", tsMinSlidingTime, 1, 1000000, CFG_SCOPE_CLIENT, CFG_DYN_CLIENT) != 0)
return -1;
if (cfgAddInt32(pCfg, "minIntervalTime", tsMinIntervalTime, 1, 1000000, CFG_SCOPE_CLIENT, CFG_DYN_CLIENT) != 0)
return -1;
@ -605,7 +610,7 @@ static int32_t taosAddServerCfg(SConfig *pCfg) {
0)
return -1;
tsNumOfVnodeRsmaThreads = tsNumOfCores;
tsNumOfVnodeRsmaThreads = tsNumOfCores / 4;
tsNumOfVnodeRsmaThreads = TMAX(tsNumOfVnodeRsmaThreads, 4);
if (cfgAddInt32(pCfg, "numOfVnodeRsmaThreads", tsNumOfVnodeRsmaThreads, 1, 1024, CFG_SCOPE_SERVER, CFG_DYN_NONE) != 0)
return -1;
@ -674,7 +679,8 @@ static int32_t taosAddServerCfg(SConfig *pCfg) {
if (cfgAddInt32(pCfg, "rsyncPort", tsRsyncPort, 1, 65535, CFG_SCOPE_BOTH, CFG_DYN_SERVER) != 0) return -1;
if (cfgAddString(pCfg, "snodeAddress", tsSnodeAddress, CFG_SCOPE_SERVER, CFG_DYN_SERVER) != 0) return -1;
if (cfgAddString(pCfg, "checkpointBackupDir", tsCheckpointBackupDir, CFG_SCOPE_SERVER, CFG_DYN_SERVER) != 0) return -1;
if (cfgAddString(pCfg, "checkpointBackupDir", tsCheckpointBackupDir, CFG_SCOPE_SERVER, CFG_DYN_SERVER) != 0)
return -1;
if (cfgAddInt32(pCfg, "tmqMaxTopicNum", tmqMaxTopicNum, 1, 10000, CFG_SCOPE_SERVER, CFG_DYN_ENT_SERVER) != 0)
return -1;
@ -700,7 +706,8 @@ static int32_t taosAddServerCfg(SConfig *pCfg) {
if (cfgAddInt32(pCfg, "uptimeInterval", tsUptimeInterval, 1, 100000, CFG_SCOPE_SERVER, CFG_DYN_NONE) != 0) return -1;
if (cfgAddInt32(pCfg, "queryRsmaTolerance", tsQueryRsmaTolerance, 0, 900000, CFG_SCOPE_SERVER, CFG_DYN_NONE) != 0)
return -1;
if (cfgAddInt32(pCfg, "timeseriesThreshold", tsTimeSeriesThreshold, 0, 2000, CFG_SCOPE_SERVER, CFG_DYN_ENT_SERVER) != 0)
if (cfgAddInt32(pCfg, "timeseriesThreshold", tsTimeSeriesThreshold, 0, 2000, CFG_SCOPE_SERVER, CFG_DYN_ENT_SERVER) !=
0)
return -1;
if (cfgAddInt64(pCfg, "walFsyncDataSizeLimit", tsWalFsyncDataSizeLimit, 100 * 1024 * 1024, INT64_MAX,
@ -743,7 +750,7 @@ static int32_t taosAddServerCfg(SConfig *pCfg) {
if (cfgAddString(pCfg, "s3Accesskey", tsS3AccessKey, CFG_SCOPE_SERVER, CFG_DYN_NONE) != 0) return -1;
if (cfgAddString(pCfg, "s3Endpoint", tsS3Endpoint, CFG_SCOPE_SERVER, CFG_DYN_NONE) != 0) return -1;
if (cfgAddString(pCfg, "s3BucketName", tsS3BucketName, CFG_SCOPE_SERVER, CFG_DYN_NONE) != 0) return -1;
if (cfgAddInt32(pCfg, "s3BlockSize", tsS3BlockSize, -100, 1024 * 1024, CFG_SCOPE_SERVER, CFG_DYN_NONE) != 0)
if (cfgAddInt32(pCfg, "s3BlockSize", tsS3BlockSize, -1, 1024 * 1024, CFG_SCOPE_SERVER, CFG_DYN_ENT_SERVER) != 0)
return -1;
if (cfgAddInt32(pCfg, "s3BlockCacheSize", tsS3BlockCacheSize, 4, 1024 * 1024, CFG_SCOPE_SERVER, CFG_DYN_ENT_SERVER) !=
0)
@ -1095,7 +1102,7 @@ static int32_t taosSetServerCfg(SConfig *pCfg) {
tsNumOfSnodeWriteThreads = cfgGetItem(pCfg, "numOfSnodeUniqueThreads")->i32;
tsRpcQueueMemoryAllowed = cfgGetItem(pCfg, "rpcQueueMemoryAllowed")->i64;
tsSIMDBuiltins = (bool)cfgGetItem(pCfg, "simdEnable")->bval;
tsSIMDEnable = (bool)cfgGetItem(pCfg, "simdEnable")->bval;
tsTagFilterCache = (bool)cfgGetItem(pCfg, "tagFilterCache")->bval;
tsEnableMonitor = cfgGetItem(pCfg, "monitor")->bval;
@ -1687,6 +1694,7 @@ void taosCfgDynamicOptions(const char *option, const char *value) {
{"ttlBatchDropNum", &tsTtlBatchDropNum},
{"ttlFlushThreshold", &tsTtlFlushThreshold},
{"ttlPushInterval", &tsTtlPushIntervalSec},
//{"s3BlockSize", &tsS3BlockSize},
{"s3BlockCacheSize", &tsS3BlockCacheSize},
{"s3PageCacheSize", &tsS3PageCacheSize},
{"s3UploadDelaySec", &tsS3UploadDelaySec},

View File

@ -8337,6 +8337,7 @@ int32_t tEncodeSBatchDeleteReq(SEncoder *pEncoder, const SBatchDeleteReq *pReq)
if (tEncodeSSingleDeleteReq(pEncoder, pOneReq) < 0) return -1;
}
if (tEncodeI64(pEncoder, pReq->ctimeMs) < 0) return -1;
if (tEncodeI8(pEncoder, pReq->level) < 0) return -1;
return 0;
}
@ -8361,6 +8362,9 @@ int32_t tDecodeSBatchDeleteReq(SDecoder *pDecoder, SBatchDeleteReq *pReq) {
if (!tDecodeIsEnd(pDecoder)) {
if (tDecodeI64(pDecoder, &pReq->ctimeMs) < 0) return -1;
}
if (!tDecodeIsEnd(pDecoder)) {
if (tDecodeI8(pDecoder, &pReq->level) < 0) return -1;
}
return 0;
}

View File

@ -296,7 +296,10 @@ static int compareKv(const void* p1, const void* p2) {
void buildChildTableName(RandTableName* rName) {
SStringBuilder sb = {0};
taosStringBuilderAppendStringLen(&sb, rName->stbFullName, rName->stbFullNameLen);
if (sb.buf == NULL) return;
if (sb.buf == NULL) {
terrno = TSDB_CODE_OUT_OF_MEMORY;
return;
}
taosArraySort(rName->tags, compareKv);
for (int j = 0; j < taosArrayGetSize(rName->tags); ++j) {
taosStringBuilderAppendChar(&sb, ',');

View File

@ -1580,6 +1580,7 @@ static bool needMoreDigits(SArray* formats, int32_t curIdx) {
/// @retval 0 for success
/// @retval -1 for format and s mismatch error
/// @retval -2 if datetime err, like 2023-13-32 25:61:69
/// @retval -3 if not supported
static int32_t char2ts(const char* s, SArray* formats, int64_t* ts, int32_t precision, const char** sErrPos,
int32_t* fErrIdx) {
int32_t size = taosArrayGetSize(formats);
@ -1589,6 +1590,7 @@ static int32_t char2ts(const char* s, SArray* formats, int64_t* ts, int32_t prec
int32_t hour = 0, min = 0, sec = 0, us = 0, ms = 0, ns = 0;
int32_t tzSign = 1, tz = tsTimezone;
int32_t err = 0;
bool withYD = false, withMD = false;
for (int32_t i = 0; i < size && *s != '\0'; ++i) {
while (isspace(*s) && *s != '\0') {
@ -1782,6 +1784,7 @@ static int32_t char2ts(const char* s, SArray* formats, int64_t* ts, int32_t prec
} else {
s = newPos;
}
withYD = true;
} break;
case TSFKW_DD: {
const char* newPos = tsFormatStr2Int32(&md, s, 2, needMoreDigits(formats, i));
@ -1790,6 +1793,7 @@ static int32_t char2ts(const char* s, SArray* formats, int64_t* ts, int32_t prec
} else {
s = newPos;
}
withMD = true;
} break;
case TSFKW_D: {
const char* newPos = tsFormatStr2Int32(&wd, s, 1, needMoreDigits(formats, i));
@ -1843,6 +1847,10 @@ static int32_t char2ts(const char* s, SArray* formats, int64_t* ts, int32_t prec
return err;
}
}
if (!withMD) {
// yyyy-mm-DDD, currently, the c api can't convert to correct timestamp, return not supported
if (withYD) return -3;
}
struct STm tm = {0};
tm.tm.tm_year = year - 1900;
tm.tm.tm_mon = mon;
@ -1892,8 +1900,13 @@ int32_t taosChar2Ts(const char* format, SArray** formats, const char* tsStr, int
TSFormatNode* fNode = (taosArrayGet(*formats, fErrIdx));
snprintf(errMsg, errMsgLen, "mismatch format for: %s and %s", sErrPos,
fErrIdx < taosArrayGetSize(*formats) ? ((TSFormatNode*)taosArrayGet(*formats, fErrIdx))->key->name : "");
code = TSDB_CODE_FUNC_TO_TIMESTAMP_FAILED_FORMAT_ERR;
} else if (code == -2) {
snprintf(errMsg, errMsgLen, "timestamp format error: %s -> %s", tsStr, format);
code = TSDB_CODE_FUNC_TO_TIMESTAMP_FAILED_TS_ERR;
} else if (code == -3) {
snprintf(errMsg, errMsgLen, "timestamp format not supported");
code = TSDB_CODE_FUNC_TO_TIMESTAMP_FAILED_NOT_SUPPORTED;
}
return code;
}

View File

@ -489,6 +489,7 @@ TEST(timeTest, char2ts) {
ASSERT_EQ(ts, 0);
ASSERT_EQ(0, TEST_char2ts("yyyy年 a a a MM/ddTZH", &ts, TSDB_TIME_PRECISION_MICRO, "1970年 a a a 1/1+0"));
ASSERT_EQ(0, TEST_char2ts("yyyy年 a a a a a a a a a a a a a a a MM/ddTZH", &ts, TSDB_TIME_PRECISION_MICRO, "1970年 a "));
ASSERT_EQ(-3, TEST_char2ts("yyyy-mm-DDD", &ts, TSDB_TIME_PRECISION_MILLI, "1970-01-001"));
}
#pragma GCC diagnostic pop

View File

@ -144,7 +144,7 @@ void vmCloseVnode(SVnodeMgmt *pMgmt, SVnodeObj *pVnode, bool commitAndRemoveWal)
char path[TSDB_FILENAME_LEN] = {0};
bool atExit = true;
if (vnodeIsLeader(pVnode->pImpl)) {
if (pVnode->pImpl && vnodeIsLeader(pVnode->pImpl)) {
vnodeProposeCommitOnNeed(pVnode->pImpl, atExit);
}
@ -153,6 +153,10 @@ void vmCloseVnode(SVnodeMgmt *pMgmt, SVnodeObj *pVnode, bool commitAndRemoveWal)
taosThreadRwlockUnlock(&pMgmt->lock);
vmReleaseVnode(pMgmt, pVnode);
if (pVnode->failed) {
ASSERT(pVnode->pImpl == NULL);
goto _closed;
}
dInfo("vgId:%d, pre close", pVnode->vgId);
vnodePreClose(pVnode->pImpl);
@ -202,6 +206,8 @@ void vmCloseVnode(SVnodeMgmt *pMgmt, SVnodeObj *pVnode, bool commitAndRemoveWal)
vnodeClose(pVnode->pImpl);
pVnode->pImpl = NULL;
_closed:
dInfo("vgId:%d, vnode is closed", pVnode->vgId);
if (commitAndRemoveWal) {
@ -386,7 +392,6 @@ static void *vmCloseVnodeInThread(void *param) {
for (int32_t v = 0; v < pThread->vnodeNum; ++v) {
SVnodeObj *pVnode = pThread->ppVnodes[v];
if (pVnode->failed) continue;
char stepDesc[TSDB_STEP_DESC_LEN] = {0};
snprintf(stepDesc, TSDB_STEP_DESC_LEN, "vgId:%d, start to close, %d of %d have been closed", pVnode->vgId,

View File

@ -3,6 +3,17 @@ add_library(dnode STATIC ${IMPLEMENT_SRC})
target_link_libraries(
dnode mgmt_mnode mgmt_qnode mgmt_snode mgmt_vnode mgmt_dnode
)
IF (TD_STORAGE)
IF(${BUILD_WITH_S3})
add_definitions(-DUSE_S3)
ELSEIF(${BUILD_WITH_COS})
add_definitions(-DUSE_COS)
ENDIF()
ENDIF ()
target_include_directories(
dnode
PRIVATE "${CMAKE_CURRENT_SOURCE_DIR}/inc"

View File

@ -146,6 +146,13 @@ static bool dmCheckDataDirVersion() {
return true;
}
#if defined(USE_S3)
extern int32_t s3Begin();
extern void s3End();
#endif
int32_t dmInit() {
dInfo("start to init dnode env");
if (dmDiskInit() != 0) return -1;
@ -156,6 +163,9 @@ int32_t dmInit() {
if (dmInitMonitor() != 0) return -1;
if (dmInitAudit() != 0) return -1;
if (dmInitDnode(dmInstance()) != 0) return -1;
#if defined(USE_S3)
if (s3Begin() != 0) return -1;
#endif
dInfo("dnode env is initialized");
return 0;
@ -181,6 +191,9 @@ void dmCleanup() {
udfStopUdfd();
taosStopCacheRefreshWorker();
dmDiskClose();
#if defined(USE_S3)
s3End();
#endif
dInfo("dnode env is cleaned up");
taosCleanupCfg();
@ -394,7 +407,4 @@ void dmReportStartup(const char *pName, const char *pDesc) {
dDebug("step:%s, %s", pStartup->name, pStartup->desc);
}
int64_t dmGetClusterId() {
return globalDnode.data.clusterId;
}
int64_t dmGetClusterId() { return globalDnode.data.clusterId; }

View File

@ -1310,6 +1310,22 @@ static int32_t mndProcessConfigDnodeReq(SRpcMsg *pReq) {
}
tFreeSMCfgDnodeReq(&cfgReq);
return 0;
} else if (strncasecmp(cfgReq.config, "s3blocksize", 11) == 0) {
int32_t optLen = strlen("s3blocksize");
int32_t flag = -1;
int32_t code = mndMCfgGetValInt32(&cfgReq, optLen, &flag);
if (code < 0) return code;
if (flag > 1024 * 1024 || (flag > -1 && flag < 4) || flag < -1) {
mError("dnode:%d, failed to config s3blocksize since value:%d. Valid range: -1 or [4, 1024 * 1024]",
cfgReq.dnodeId, flag);
terrno = TSDB_CODE_INVALID_CFG;
tFreeSMCfgDnodeReq(&cfgReq);
return -1;
}
strcpy(dcfgReq.config, "s3blocksize");
snprintf(dcfgReq.value, TSDB_DNODE_VALUE_LEN, "%d", flag);
#endif
} else {
mndMCfg2DCfg(&cfgReq, &dcfgReq);

View File

@ -313,7 +313,7 @@ _CONNECT:
code = 0;
char detail[1000] = {0};
sprintf(detail, "%s:%d, app:%s", ip, pConn->port, connReq.app);
sprintf(detail, "app:%s", connReq.app);
auditRecord(pReq, pMnode->clusterId, "login", "", "", detail, strlen(detail));

View File

@ -43,7 +43,7 @@ typedef struct SNodeEntry {
} SNodeEntry;
typedef struct SStreamExecInfo {
SArray *pNodeEntryList;
SArray *pNodeList;
int64_t ts; // snapshot ts
int64_t activeCheckpoint; // active check point id
SHashObj * pTaskMap;
@ -850,7 +850,7 @@ static int32_t mndProcessCreateStreamReq(SRpcMsg *pReq) {
mndTransDrop(pTrans);
taosThreadMutexLock(&execInfo.lock);
mDebug("register to stream task node list");
mDebug("stream tasks register into node list");
keepStreamTasksInBuf(&streamObj, &execInfo);
taosThreadMutexUnlock(&execInfo.lock);
@ -862,8 +862,8 @@ static int32_t mndProcessCreateStreamReq(SRpcMsg *pReq) {
// TODO
if (createStreamReq.sql != NULL) {
auditRecord(pReq, pMnode->clusterId, "createStream", name.dbname, name.tname,
createStreamReq.sql, strlen(createStreamReq.sql));
auditRecord(pReq, pMnode->clusterId, "createStream", name.dbname, name.tname, createStreamReq.sql,
strlen(createStreamReq.sql));
}
_OVER:
if (code != 0 && code != TSDB_CODE_ACTION_IN_PROGRESS) {
@ -877,6 +877,21 @@ _OVER:
return code;
}
int64_t mndStreamGenChkpId(SMnode *pMnode) {
SStreamObj *pStream = NULL;
void * pIter = NULL;
SSdb * pSdb = pMnode->pSdb;
int64_t maxChkpId = 0;
while (1) {
pIter = sdbFetch(pSdb, SDB_STREAM, pIter, (void **)&pStream);
if (pIter == NULL) break;
maxChkpId = TMAX(maxChkpId, pStream->checkpointId);
sdbRelease(pSdb, pStream);
}
return maxChkpId + 1;
}
static int32_t mndProcessStreamCheckpointTmr(SRpcMsg *pReq) {
SMnode *pMnode = pReq->info.node;
SSdb * pSdb = pMnode->pSdb;
@ -885,7 +900,7 @@ static int32_t mndProcessStreamCheckpointTmr(SRpcMsg *pReq) {
}
SMStreamDoCheckpointMsg *pMsg = rpcMallocCont(sizeof(SMStreamDoCheckpointMsg));
pMsg->checkpointId = taosGetTimestampMs();
pMsg->checkpointId = mndStreamGenChkpId(pMnode);
int32_t size = sizeof(SMStreamDoCheckpointMsg);
SRpcMsg rpcMsg = {.msgType = TDMT_MND_STREAM_BEGIN_CHECKPOINT, .pCont = pMsg, .contLen = size};
@ -1070,7 +1085,8 @@ static int32_t mndAddStreamCheckpointToTrans(STrans *pTrans, SStreamObj *pStream
STransAction action = {0};
SEpSet epset = mndGetVgroupEpset(pMnode, pVgObj);
initTransAction(&action, buf, tlen, TDMT_VND_STREAM_CHECK_POINT_SOURCE, &epset, TSDB_CODE_SYN_PROPOSE_NOT_READY);
initTransAction(&action, buf, tlen, TDMT_VND_STREAM_CHECK_POINT_SOURCE, &epset,
TSDB_CODE_SYN_PROPOSE_NOT_READY);
mndReleaseVgroup(pMnode, pVgObj);
if (mndTransAppendRedoAction(pTrans, &action) != 0) {
@ -1125,6 +1141,15 @@ static const char *mndGetStreamDB(SMnode *pMnode) {
return p;
}
static int32_t initStreamNodeList(SMnode* pMnode) {
if (execInfo.pNodeList == NULL || (taosArrayGetSize(execInfo.pNodeList) == 0)) {
execInfo.pNodeList = taosArrayDestroy(execInfo.pNodeList);
execInfo.pNodeList = extractNodeListFromStream(pMnode);
}
return taosArrayGetSize(execInfo.pNodeList);
}
static int32_t mndProcessStreamDoCheckpoint(SRpcMsg *pReq) {
SMnode * pMnode = pReq->info.node;
SSdb * pSdb = pMnode->pSdb;
@ -1135,22 +1160,18 @@ static int32_t mndProcessStreamDoCheckpoint(SRpcMsg *pReq) {
{ // check if the node update happens or not
int64_t ts = taosGetTimestampSec();
if (execInfo.pNodeEntryList == NULL || (taosArrayGetSize(execInfo.pNodeEntryList) == 0)) {
if (execInfo.pNodeEntryList != NULL) {
execInfo.pNodeEntryList = taosArrayDestroy(execInfo.pNodeEntryList);
}
taosThreadMutexLock(&execInfo.lock);
int32_t numOfNodes = initStreamNodeList(pMnode);
taosThreadMutexUnlock(&execInfo.lock);
execInfo.pNodeEntryList = extractNodeListFromStream(pMnode);
}
if (taosArrayGetSize(execInfo.pNodeEntryList) == 0) {
if (numOfNodes == 0) {
mDebug("stream task node change checking done, no vgroups exist, do nothing");
execInfo.ts = ts;
return 0;
}
for(int32_t i = 0; i < taosArrayGetSize(execInfo.pNodeEntryList); ++i) {
SNodeEntry* pNodeEntry = taosArrayGet(execInfo.pNodeEntryList, i);
for(int32_t i = 0; i < numOfNodes; ++i) {
SNodeEntry* pNodeEntry = taosArrayGet(execInfo.pNodeList, i);
if (pNodeEntry->stageUpdated) {
mDebug("stream task not ready due to node update detected, checkpoint not issued");
return 0;
@ -1160,12 +1181,11 @@ static int32_t mndProcessStreamDoCheckpoint(SRpcMsg *pReq) {
bool allReady = true;
SArray *pNodeSnapshot = mndTakeVgroupSnapshot(pMnode, &allReady);
if (!allReady) {
mWarn("not all vnodes are ready, ignore the checkpoint")
taosArrayDestroy(pNodeSnapshot);
mWarn("not all vnodes are ready, ignore the checkpoint") taosArrayDestroy(pNodeSnapshot);
return 0;
}
SVgroupChangeInfo changeInfo = mndFindChangedNodeInfo(pMnode, execInfo.pNodeEntryList, pNodeSnapshot);
SVgroupChangeInfo changeInfo = mndFindChangedNodeInfo(pMnode, execInfo.pNodeList, pNodeSnapshot);
bool nodeUpdated = (taosArrayGetSize(changeInfo.pUpdateNodeList) > 0);
taosArrayDestroy(changeInfo.pUpdateNodeList);
taosHashCleanup(changeInfo.pDBMap);
@ -2083,20 +2103,21 @@ static SArray *mndTakeVgroupSnapshot(SMnode *pMnode, bool* allReady) {
break;
}
SNodeEntry entry = {0};
SNodeEntry entry = {.nodeId = pVgroup->vgId, .hbTimestamp = pVgroup->updateTime};
entry.epset = mndGetVgroupEpset(pMnode, pVgroup);
entry.nodeId = pVgroup->vgId;
entry.hbTimestamp = pVgroup->updateTime;
// if not all ready till now, no need to check the remaining vgroups.
if (*allReady) {
for (int32_t i = 0; i < pVgroup->replica; ++i) {
if (!pVgroup->vnodeGid[i].syncRestore) {
mInfo("vgId:%d not restored, not ready for checkpoint or other operations", pVgroup->vgId);
*allReady = false;
break;
}
ESyncState state = pVgroup->vnodeGid[i].syncState;
if (state == TAOS_SYNC_STATE_OFFLINE || state == TAOS_SYNC_STATE_ERROR) {
mInfo("vgId:%d offline/err, not ready for checkpoint or other operations", pVgroup->vgId);
*allReady = false;
break;
}
@ -2323,8 +2344,8 @@ int32_t removeExpirednodeEntryAndTask(SArray *pNodeSnapshot) {
int32_t size = taosArrayGetSize(pNodeSnapshot);
SArray* pValidNodeEntryList = taosArrayInit(4, sizeof(SNodeEntry));
for(int32_t i = 0; i < taosArrayGetSize(execInfo.pNodeEntryList); ++i) {
SNodeEntry* p = taosArrayGet(execInfo.pNodeEntryList, i);
for(int32_t i = 0; i < taosArrayGetSize(execInfo.pNodeList); ++i) {
SNodeEntry* p = taosArrayGet(execInfo.pNodeList, i);
for (int32_t j = 0; j < size; ++j) {
SNodeEntry *pEntry = taosArrayGet(pNodeSnapshot, j);
@ -2335,8 +2356,8 @@ int32_t removeExpirednodeEntryAndTask(SArray *pNodeSnapshot) {
}
}
execInfo.pNodeEntryList = taosArrayDestroy(execInfo.pNodeEntryList);
execInfo.pNodeEntryList = pValidNodeEntryList;
taosArrayDestroy(execInfo.pNodeList);
execInfo.pNodeList = pValidNodeEntryList;
mDebug("remain %d valid node entries", (int32_t)taosArrayGetSize(pValidNodeEntryList));
taosArrayDestroy(pRemovedTasks);
@ -2346,6 +2367,7 @@ int32_t removeExpirednodeEntryAndTask(SArray *pNodeSnapshot) {
// this function runs by only one thread, so it is not multi-thread safe
static int32_t mndProcessNodeCheckReq(SRpcMsg *pMsg) {
int32_t code = 0;
int32_t old = atomic_val_compare_exchange_32(&mndNodeCheckSentinel, 0, 1);
if (old != 0) {
mDebug("still in checking node change");
@ -2356,23 +2378,21 @@ static int32_t mndProcessNodeCheckReq(SRpcMsg *pMsg) {
int64_t ts = taosGetTimestampSec();
SMnode *pMnode = pMsg->info.node;
if (execInfo.pNodeEntryList == NULL || (taosArrayGetSize(execInfo.pNodeEntryList) == 0)) {
if (execInfo.pNodeEntryList != NULL) {
execInfo.pNodeEntryList = taosArrayDestroy(execInfo.pNodeEntryList);
}
execInfo.pNodeEntryList = extractNodeListFromStream(pMnode);
}
if (taosArrayGetSize(execInfo.pNodeEntryList) == 0) {
taosThreadMutexLock(&execInfo.lock);
int32_t numOfNodes = initStreamNodeList(pMnode);
taosThreadMutexUnlock(&execInfo.lock);
if (numOfNodes == 0) {
mDebug("end to do stream task node change checking, no vgroup exists, do nothing");
execInfo.ts = ts;
atomic_store_32(&mndNodeCheckSentinel, 0);
return 0;
}
bool allVnodeReady = true;
SArray *pNodeSnapshot = mndTakeVgroupSnapshot(pMnode, &allVnodeReady);
if (!allVnodeReady) {
bool allVgroupsReady = true;
SArray *pNodeSnapshot = mndTakeVgroupSnapshot(pMnode, &allVgroupsReady);
if (!allVgroupsReady) {
taosArrayDestroy(pNodeSnapshot);
atomic_store_32(&mndNodeCheckSentinel, 0);
mWarn("not all vnodes are ready, ignore the exec nodeUpdate check");
@ -2382,9 +2402,8 @@ static int32_t mndProcessNodeCheckReq(SRpcMsg *pMsg) {
taosThreadMutexLock(&execInfo.lock);
removeExpirednodeEntryAndTask(pNodeSnapshot);
SVgroupChangeInfo changeInfo = mndFindChangedNodeInfo(pMnode, execInfo.pNodeEntryList, pNodeSnapshot);
SVgroupChangeInfo changeInfo = mndFindChangedNodeInfo(pMnode, execInfo.pNodeList, pNodeSnapshot);
if (taosArrayGetSize(changeInfo.pUpdateNodeList) > 0) {
// kill current active checkpoint transaction, since the transaction is vnode wide.
doKillActiveCheckpointTrans(pMnode);
code = mndProcessVgroupChange(pMnode, &changeInfo);
@ -2392,8 +2411,8 @@ static int32_t mndProcessNodeCheckReq(SRpcMsg *pMsg) {
// keep the new vnode snapshot
if (code == TSDB_CODE_SUCCESS || code == TSDB_CODE_ACTION_IN_PROGRESS) {
mDebug("create trans successfully, update cached node list");
taosArrayDestroy(execInfo.pNodeEntryList);
execInfo.pNodeEntryList = pNodeSnapshot;
taosArrayDestroy(execInfo.pNodeList);
execInfo.pNodeList = pNodeSnapshot;
execInfo.ts = ts;
} else {
mDebug("unexpect code during create nodeUpdate trans, code:%s", tstrerror(code));
@ -2480,7 +2499,6 @@ void removeStreamTasksInBuf(SStreamObj* pStream, SStreamExecInfo * pExecNode) {
break;
}
}
}
}
}
@ -2594,7 +2612,7 @@ int32_t doKillActiveCheckpointTrans(SMnode *pMnode) {
}
if (transId == 0) {
mError("failed to find the checkpoint trans, reset not executed");
mDebug("failed to find the checkpoint trans, reset not executed");
return TSDB_CODE_SUCCESS;
}
@ -2633,16 +2651,18 @@ int32_t mndResetFromCheckpoint(SMnode* pMnode) {
int32_t setNodeEpsetExpiredFlag(const SArray *pNodeList) {
int32_t num = taosArrayGetSize(pNodeList);
mInfo("set node expired for %d nodes", num);
for (int k = 0; k < num; ++k) {
int32_t* pVgId = taosArrayGet(pNodeList, k);
mInfo("set node expired for nodeId:%d, total:%d", *pVgId, num);
int32_t numOfNodes = taosArrayGetSize(execInfo.pNodeEntryList);
int32_t numOfNodes = taosArrayGetSize(execInfo.pNodeList);
for (int i = 0; i < numOfNodes; ++i) {
SNodeEntry* pNodeEntry = taosArrayGet(execInfo.pNodeEntryList, i);
SNodeEntry* pNodeEntry = taosArrayGet(execInfo.pNodeList, i);
if (pNodeEntry->nodeId == *pVgId) {
mInfo("vgId:%d expired in stream task, needs update nodeEp", *pVgId);
mInfo("vgId:%d expired for some stream tasks, needs update nodeEp", *pVgId);
pNodeEntry->stageUpdated = true;
break;
}
@ -2653,11 +2673,10 @@ int32_t setNodeEpsetExpiredFlag(const SArray* pNodeList) {
}
static void updateStageInfo(STaskStatusEntry* pTaskEntry, int64_t stage) {
int32_t numOfNodes = taosArrayGetSize(execInfo.pNodeEntryList);
int32_t numOfNodes = taosArrayGetSize(execInfo.pNodeList);
for(int32_t j = 0; j < numOfNodes; ++j) {
SNodeEntry* pNodeEntry = taosArrayGet(execInfo.pNodeEntryList, j);
SNodeEntry* pNodeEntry = taosArrayGet(execInfo.pNodeList, j);
if (pNodeEntry->nodeId == pTaskEntry->nodeId) {
mInfo("vgId:%d stage updated from %"PRId64 " to %"PRId64 ", nodeUpdate trigger by s-task:0x%" PRIx64, pTaskEntry->nodeId,
pTaskEntry->stage, stage, pTaskEntry->id.taskId);
@ -2688,12 +2707,20 @@ int32_t mndProcessStreamHb(SRpcMsg *pReq) {
mTrace("receive stream-meta hb from vgId:%d, active numOfTasks:%d", req.vgId, req.numOfTasks);
taosThreadMutexLock(&execInfo.lock);
// extract stream task list
int32_t numOfExisted = taosHashGetSize(execInfo.pTaskMap);
if (numOfExisted == 0) {
doExtractTasksFromStream(pMnode);
}
initStreamNodeList(pMnode);
int32_t numOfUpdated = taosArrayGetSize(req.pUpdateNodes);
if (numOfUpdated > 0) {
mDebug("%d stream node(s) need updated from report of hbMsg(vgId:%d)", numOfUpdated, req.vgId);
setNodeEpsetExpiredFlag(req.pUpdateNodes);
}
bool snodeChanged = false;
for (int32_t i = 0; i < req.numOfTasks; ++i) {

View File

@ -1561,7 +1561,11 @@ static int32_t mndProcessCreateUserReq(SRpcMsg *pReq) {
code = mndCreateUser(pMnode, pOperUser->acct, &createReq, pReq);
if (code == 0) code = TSDB_CODE_ACTION_IN_PROGRESS;
auditRecord(pReq, pMnode->clusterId, "createUser", "", createReq.user, createReq.sql, createReq.sqlLen);
char detail[1000] = {0};
sprintf(detail, "enable:%d, superUser:%d, sysInfo:%d, password:xxx",
createReq.enable, createReq.superUser, createReq.sysInfo);
auditRecord(pReq, pMnode->clusterId, "createUser", "", createReq.user, detail, strlen(detail));
_OVER:
if (code != 0 && code != TSDB_CODE_ACTION_IN_PROGRESS) {

View File

@ -108,6 +108,7 @@ struct SRSmaStat {
int64_t refId; // shared by fetch tasks
volatile int64_t nBufItems; // number of items in queue buffer
SRWLatch lock; // r/w lock for rsma fs(e.g. qtaskinfo)
volatile int32_t execStat; // 0 succeed, other failed
volatile int32_t nFetchAll; // active number of fetch all
volatile int8_t triggerStat; // shared by fetch tasks
volatile int8_t commitStat; // 0 not in committing, 1 in committing
@ -115,6 +116,7 @@ struct SRSmaStat {
SRSmaFS fs; // for recovery/snapshot r/w
SHashObj *infoHash; // key: suid, value: SRSmaInfo
tsem_t notEmpty; // has items in queue buffer
SArray *blocks; // SArray<SSDataBlock>
};
struct SSmaStat {
@ -136,13 +138,18 @@ struct SSmaStat {
#define RSMA_FS_LOCK(r) (&(r)->lock)
struct SRSmaInfoItem {
int8_t level : 4;
int8_t fetchLevel : 4;
int8_t level;
int8_t fetchLevel;
int8_t triggerStat;
uint16_t nScanned;
int32_t maxDelay; // ms
int32_t nScanned;
int32_t streamFlushed : 1;
int32_t maxDelay : 31; // ms
int64_t submitReqVer;
int64_t fetchResultVer;
tmr_h tmrId;
void *pStreamState;
void *pStreamTask; // SStreamTask
SArray *pResList;
};
struct SRSmaInfo {
@ -160,11 +167,9 @@ struct SRSmaInfo {
STaosQall *qall; // buffer qall of SubmitReq
};
#define RSMA_INFO_HEAD_LEN offsetof(SRSmaInfo, items)
#define RSMA_INFO_IS_DEL(r) ((r)->delFlag == 1)
#define RSMA_INFO_SET_DEL(r) ((r)->delFlag = 1)
#define RSMA_INFO_QTASK(r, i) ((r)->taskInfo[i])
#define RSMA_INFO_IQTASK(r, i) ((r)->iTaskInfo[i])
#define RSMA_INFO_ITEM(r, i) (&(r)->items[i])
enum {
@ -214,11 +219,11 @@ static FORCE_INLINE void tdUnRefSmaStat(SSma *pSma, SSmaStat *pStat) {
int32_t smaPreClose(SSma *pSma);
// rsma
void *tdFreeRSmaInfo(SSma *pSma, SRSmaInfo *pInfo, bool isDeepFree);
void *tdFreeRSmaInfo(SSma *pSma, SRSmaInfo *pInfo);
int32_t tdRSmaRestore(SSma *pSma, int8_t type, int64_t committedVer, int8_t rollback);
int32_t tdRSmaProcessCreateImpl(SSma *pSma, SRSmaParam *param, int64_t suid, const char *tbName);
int32_t tdRSmaProcessExecImpl(SSma *pSma, ERsmaExecType type);
// int32_t tdRSmaPersistExecImpl(SRSmaStat *pRSmaStat, SHashObj *pInfoHash);
int32_t tdRSmaPersistExecImpl(SRSmaStat *pRSmaStat, SHashObj *pInfoHash);
int32_t tdRSmaProcessRestoreImpl(SSma *pSma, int8_t type, int64_t qtaskFileVer, int8_t rollback);
void tdRSmaQTaskInfoGetFullPath(SVnode *pVnode, tb_uid_t suid, int8_t level, STfs *pTfs, char *outputName);

View File

@ -147,7 +147,7 @@ int32_t tqOffsetDelete(STqOffsetStore* pStore, const char* subscribeKey)
int32_t tqOffsetCommitFile(STqOffsetStore* pStore);
// tqSink
int32_t tqBuildDeleteReq(const char* stbFullName, const SSDataBlock* pDataBlock, SBatchDeleteReq* deleteReq,
int32_t tqBuildDeleteReq(STQ* pTq, const char* stbFullName, const SSDataBlock* pDataBlock, SBatchDeleteReq* deleteReq,
const char* pIdStr);
void tqSinkDataIntoDstTable(SStreamTask* pTask, void* vnode, void* data);
@ -160,7 +160,7 @@ int32_t tqResetStreamTaskStatus(STQ* pTq);
int32_t tqStopStreamTasks(STQ* pTq);
// tq util
int32_t extractDelDataBlock(const void* pData, int32_t len, int64_t ver, SStreamRefDataBlock** pRefBlock);
int32_t extractDelDataBlock(const void* pData, int32_t len, int64_t ver, void** pRefBlock, int32_t type);
int32_t tqExtractDataForMq(STQ* pTq, STqHandle* pHandle, const SMqPollReq* pRequest, SRpcMsg* pMsg);
int32_t tqDoSendDataRsp(const SRpcHandleInfo* pRpcHandleInfo, const SMqDataRsp* pRsp, int32_t epoch, int64_t consumerId,
int32_t type, int64_t sver, int64_t ever);

View File

@ -209,6 +209,7 @@ int32_t tsdbBegin(STsdb* pTsdb);
// int32_t tsdbCommit(STsdb* pTsdb, SCommitInfo* pInfo);
int32_t tsdbCacheCommit(STsdb* pTsdb);
int32_t tsdbCompact(STsdb* pTsdb, SCompactInfo* pInfo);
int32_t tsdbRetention(STsdb *tsdb, int64_t now, int32_t sync);
// int32_t tsdbFinishCommit(STsdb* pTsdb);
// int32_t tsdbRollbackCommit(STsdb* pTsdb);
int tsdbScanAndConvertSubmitMsg(STsdb* pTsdb, SSubmitReq2* pMsg);
@ -279,13 +280,14 @@ int32_t smaPrepareAsyncCommit(SSma* pSma);
int32_t smaCommit(SSma* pSma, SCommitInfo* pInfo);
int32_t smaFinishCommit(SSma* pSma);
int32_t smaPostCommit(SSma* pSma);
int32_t smaDoRetention(SSma* pSma, int64_t now);
int32_t smaRetention(SSma* pSma, int64_t now);
int32_t tdProcessTSmaCreate(SSma* pSma, int64_t version, const char* msg);
int32_t tdProcessTSmaInsert(SSma* pSma, int64_t indexUid, const char* msg);
int32_t tdProcessRSmaCreate(SSma* pSma, SVCreateStbReq* pReq);
int32_t tdProcessRSmaSubmit(SSma* pSma, int64_t version, void* pReq, void* pMsg, int32_t len, int32_t inputType);
int32_t tdProcessRSmaSubmit(SSma* pSma, int64_t version, void* pReq, void* pMsg, int32_t len);
int32_t tdProcessRSmaDelete(SSma* pSma, int64_t version, void* pReq, void* pMsg, int32_t len);
int32_t tdProcessRSmaDrop(SSma* pSma, SVDropStbReq* pReq);
int32_t tdFetchTbUidList(SSma* pSma, STbUidStore** ppStore, tb_uid_t suid, tb_uid_t uid);
int32_t tdUpdateTbUidList(SSma* pSma, STbUidStore* pUidStore, bool isAdd);

View File

@ -156,10 +156,10 @@ static int32_t tdProcessRSmaAsyncPreCommitImpl(SSma *pSma, bool isCommit) {
nLoops = 0;
while (1) {
if (atomic_load_32(&pRSmaStat->nFetchAll) <= 0) {
smaDebug("vgId:%d, rsma commit:%d, fetch tasks are all finished", SMA_VID(pSma), isCommit);
smaDebug("vgId:%d, rsma commit, type:%d, fetch tasks are all finished", SMA_VID(pSma), isCommit);
break;
} else {
smaDebug("vgId:%d, rsma commit%d, fetch tasks are not all finished yet", SMA_VID(pSma), isCommit);
smaDebug("vgId:%d, rsma commit, type:%d, fetch tasks are not all finished yet", SMA_VID(pSma), isCommit);
}
TD_SMA_LOOPS_CHECK(nLoops, 1000);
}
@ -169,22 +169,24 @@ static int32_t tdProcessRSmaAsyncPreCommitImpl(SSma *pSma, bool isCommit) {
* 1) This is high cost task and should not put in asyncPreCommit originally.
* 2) But, if put in asyncCommit, would trigger taskInfo cloning frequently.
*/
smaInfo("vgId:%d, rsma commit:%d, wait for all items to be consumed, TID:%p", SMA_VID(pSma), isCommit,
smaInfo("vgId:%d, rsma commit, type:%d, wait for all items to be consumed, TID:%p", SMA_VID(pSma), isCommit,
(void *)taosGetSelfPthreadId());
nLoops = 0;
while (atomic_load_64(&pRSmaStat->nBufItems) > 0) {
TD_SMA_LOOPS_CHECK(nLoops, 1000);
}
smaInfo("vgId:%d, rsma commit, all items are consumed, TID:%p", SMA_VID(pSma), (void *)taosGetSelfPthreadId());
if (!isCommit) goto _exit;
// code = tdRSmaPersistExecImpl(pRSmaStat, RSMA_INFO_HASH(pRSmaStat));
code = atomic_load_32(&pRSmaStat->execStat);
TSDB_CHECK_CODE(code, lino, _exit);
code = tdRSmaPersistExecImpl(pRSmaStat, RSMA_INFO_HASH(pRSmaStat));
TSDB_CHECK_CODE(code, lino, _exit);
smaInfo("vgId:%d, rsma commit, operator state committed, TID:%p", SMA_VID(pSma), (void *)taosGetSelfPthreadId());
smaInfo("vgId:%d, rsma commit, all items are consumed, TID:%p", SMA_VID(pSma), (void *)taosGetSelfPthreadId());
// all rsma results are written completely
STsdb *pTsdb = NULL;
if ((pTsdb = VND_RSMA1(pSma->pVnode))) {
@ -215,10 +217,7 @@ static int32_t tdProcessRSmaAsyncCommitImpl(SSma *pSma, SCommitInfo *pInfo) {
int32_t lino = 0;
SVnode *pVnode = pSma->pVnode;
SSmaEnv *pSmaEnv = SMA_RSMA_ENV(pSma);
if (!pSmaEnv) {
goto _exit;
}
if (!SMA_RSMA_ENV(pSma)) goto _exit;
code = tsdbCommitBegin(VND_RSMA1(pVnode), pInfo);
TSDB_CHECK_CODE(code, lino, _exit);

View File

@ -179,7 +179,7 @@ static void tRSmaInfoHashFreeNode(void *data) {
if ((pItem = RSMA_INFO_ITEM((SRSmaInfo *)pRSmaInfo, 1)) && pItem->level) {
taosHashRemove(smaMgmt.refHash, &pItem, POINTER_BYTES);
}
tdFreeRSmaInfo(pRSmaInfo->pSma, pRSmaInfo, true);
tdFreeRSmaInfo(pRSmaInfo->pSma, pRSmaInfo);
}
}
@ -209,6 +209,12 @@ static int32_t tdInitSmaStat(SSmaStat **pSmaStat, int8_t smaType, const SSma *pS
pRSmaStat->pSma = (SSma *)pSma;
atomic_store_8(RSMA_TRIGGER_STAT(pRSmaStat), TASK_TRIGGER_STAT_INIT);
tsem_init(&pRSmaStat->notEmpty, 0, 0);
if (!(pRSmaStat->blocks = taosArrayInit(1, sizeof(SSDataBlock)))) {
code = TSDB_CODE_OUT_OF_MEMORY;
TSDB_CHECK_CODE(code, lino, _exit);
}
SSDataBlock datablock = {.info.type = STREAM_CHECKPOINT};
taosArrayPush(pRSmaStat->blocks, &datablock);
// init smaMgmt
smaInit();
@ -290,6 +296,7 @@ static void tdDestroyRSmaStat(void *pRSmaStat) {
// step 5: free pStat
tsem_destroy(&(pStat->notEmpty));
taosArrayDestroy(pStat->blocks);
taosMemoryFreeClear(pStat);
}
}

View File

@ -15,12 +15,19 @@
#include "sma.h"
#include "tq.h"
#include "tstream.h"
#define RSMA_QTASKEXEC_SMOOTH_SIZE (100) // cnt
#define RSMA_SUBMIT_BATCH_SIZE (1024) // cnt
#define RSMA_EXEC_SMOOTH_SIZE (100) // cnt
#define RSMA_EXEC_BATCH_SIZE (1024) // cnt
#define RSMA_FETCH_DELAY_MAX (120000) // ms
#define RSMA_FETCH_ACTIVE_MAX (1000) // ms
#define RSMA_FETCH_INTERVAL (5000) // ms
#define RSMA_EXEC_TASK_FLAG "rsma"
#define RSMA_EXEC_MSG_HLEN (13) // type(int8_t) + len(int32_t) + version(int64_t)
#define RSMA_EXEC_MSG_TYPE(msg) (*(int8_t *)(msg))
#define RSMA_EXEC_MSG_LEN(msg) (*(int32_t *)POINTER_SHIFT((msg), sizeof(int8_t)))
#define RSMA_EXEC_MSG_VER(msg) (*(int64_t *)POINTER_SHIFT((msg), sizeof(int8_t) + sizeof(int32_t)))
#define RSMA_EXEC_MSG_BODY(msg) (POINTER_SHIFT((msg), RSMA_EXEC_MSG_HLEN))
#define RSMA_NEED_FETCH(r) (RSMA_INFO_ITEM((r), 0)->fetchLevel || RSMA_INFO_ITEM((r), 1)->fetchLevel)
@ -36,14 +43,14 @@ static void tdUidStoreDestory(STbUidStore *pStore);
static int32_t tdUpdateTbUidListImpl(SSma *pSma, tb_uid_t *suid, SArray *tbUids, bool isAdd);
static int32_t tdSetRSmaInfoItemParams(SSma *pSma, SRSmaParam *param, SRSmaStat *pStat, SRSmaInfo *pRSmaInfo,
int8_t idx);
static int32_t tdExecuteRSmaImpl(SSma *pSma, const void *pMsg, int32_t msgSize, int32_t inputType, SRSmaInfo *pInfo,
ERsmaExecType type, int8_t level);
static int32_t tdExecuteRSmaImpl(SSma *pSma, const void *pMsg, int32_t msgSize, int64_t version, int32_t inputType,
SRSmaInfo *pInfo, ERsmaExecType type, int8_t level);
static SRSmaInfo *tdAcquireRSmaInfoBySuid(SSma *pSma, int64_t suid);
static void tdReleaseRSmaInfo(SSma *pSma, SRSmaInfo *pInfo);
static void tdFreeRSmaSubmitItems(SArray *pItems);
static void tdFreeRSmaSubmitItems(SArray *pItems, int32_t type);
static int32_t tdRSmaFetchAllResult(SSma *pSma, SRSmaInfo *pInfo);
static int32_t tdRSmaExecAndSubmitResult(SSma *pSma, qTaskInfo_t taskInfo, SRSmaInfoItem *pItem, STSchema *pTSchema,
int64_t suid);
static int32_t tdRSmaExecAndSubmitResult(SSma *pSma, qTaskInfo_t taskInfo, SRSmaInfoItem *pItem, SRSmaInfo *pInfo,
int32_t execType, int8_t *streamFlushed);
static void tdRSmaFetchTrigger(void *param, void *tmrId);
static void tdRSmaQTaskInfoFree(qTaskInfo_t *taskHandle, int32_t vgId, int32_t level);
static int32_t tdRSmaRestoreQTaskInfoInit(SSma *pSma, int64_t *nTables);
@ -72,33 +79,32 @@ static void tdRSmaQTaskInfoFree(qTaskInfo_t *taskHandle, int32_t vgId, int32_t l
*
* @param pSma
* @param pInfo
* @param isDeepFree Only stop tmrId and free pTSchema for deep free
* @return void*
*/
void *tdFreeRSmaInfo(SSma *pSma, SRSmaInfo *pInfo, bool isDeepFree) {
void *tdFreeRSmaInfo(SSma *pSma, SRSmaInfo *pInfo) {
if (pInfo) {
for (int32_t i = 0; i < TSDB_RETENTION_L2; ++i) {
SRSmaInfoItem *pItem = &pInfo->items[i];
if (isDeepFree && pItem->tmrId) {
if (pItem->tmrId) {
smaDebug("vgId:%d, stop fetch timer %p for table %" PRIi64 " level %d", SMA_VID(pSma), pItem->tmrId,
pInfo->suid, i + 1);
taosTmrStopA(&pItem->tmrId);
}
if (isDeepFree && pItem->pStreamState) {
if (pItem->pStreamState) {
streamStateClose(pItem->pStreamState, false);
}
if (isDeepFree && pInfo->taskInfo[i]) {
if (pItem->pStreamTask) {
tFreeStreamTask(pItem->pStreamTask);
}
taosArrayDestroy(pItem->pResList);
tdRSmaQTaskInfoFree(&pInfo->taskInfo[i], SMA_VID(pSma), i + 1);
}
}
if (isDeepFree) {
taosMemoryFreeClear(pInfo->pTSchema);
}
if (isDeepFree) {
taosMemoryFreeClear(pInfo->pTSchema);
if (pInfo->queue) {
taosCloseQueue(pInfo->queue);
pInfo->queue = NULL;
@ -107,7 +113,6 @@ void *tdFreeRSmaInfo(SSma *pSma, SRSmaInfo *pInfo, bool isDeepFree) {
taosFreeQall(pInfo->qall);
pInfo->qall = NULL;
}
}
taosMemoryFree(pInfo);
}
@ -135,7 +140,9 @@ static int32_t tdUpdateTbUidListImpl(SSma *pSma, tb_uid_t *suid, SArray *tbUids,
return TSDB_CODE_FAILED;
}
if (!taosArrayGetSize(tbUids)) {
int32_t nTables = taosArrayGetSize(tbUids);
if (0 == nTables) {
smaDebug("vgId:%d, no need to update tbUidList for suid:%" PRIi64 " since Empty tbUids", SMA_VID(pSma), *suid);
return TSDB_CODE_SUCCESS;
}
@ -156,8 +163,9 @@ static int32_t tdUpdateTbUidListImpl(SSma *pSma, tb_uid_t *suid, SArray *tbUids,
terrstr());
return TSDB_CODE_FAILED;
}
smaDebug("vgId:%d, update tbUidList succeed for qTaskInfo:%p with suid:%" PRIi64 " uid:%" PRIi64 " level %d",
SMA_VID(pSma), pRSmaInfo->taskInfo[i], *suid, *(int64_t *)taosArrayGet(tbUids, 0), i);
smaDebug("vgId:%d, update tbUidList succeed for qTaskInfo:%p. suid:%" PRIi64 " uid:%" PRIi64
"nTables:%d level %d",
SMA_VID(pSma), pRSmaInfo->taskInfo[i], *suid, *(int64_t *)TARRAY_GET_ELEM(tbUids, 0), nTables, i);
}
}
@ -174,8 +182,8 @@ int32_t tdUpdateTbUidList(SSma *pSma, STbUidStore *pStore, bool isAdd) {
return TSDB_CODE_FAILED;
}
void *pIter = taosHashIterate(pStore->uidHash, NULL);
while (pIter) {
void *pIter = NULL;
while ((pIter = taosHashIterate(pStore->uidHash, pIter))) {
tb_uid_t *pTbSuid = (tb_uid_t *)taosHashGetKey(pIter, NULL);
SArray *pTbUids = *(SArray **)pIter;
@ -183,8 +191,6 @@ int32_t tdUpdateTbUidList(SSma *pSma, STbUidStore *pStore, bool isAdd) {
taosHashCancelIterate(pStore->uidHash, pIter);
return TSDB_CODE_FAILED;
}
pIter = taosHashIterate(pStore->uidHash, pIter);
}
return TSDB_CODE_SUCCESS;
}
@ -232,9 +238,32 @@ int32_t tdFetchTbUidList(SSma *pSma, STbUidStore **ppStore, tb_uid_t suid, tb_ui
return TSDB_CODE_SUCCESS;
}
static void tdRSmaTaskInit(SStreamMeta *pMeta, SRSmaInfoItem *pItem, SStreamTaskId *pId) {
STaskId id = {.streamId = pId->streamId, .taskId = pId->taskId};
taosRLockLatch(&pMeta->lock);
SStreamTask **ppTask = (SStreamTask **)taosHashGet(pMeta->pTasksMap, &id, sizeof(id));
if (ppTask && *ppTask) {
pItem->submitReqVer = (*ppTask)->chkInfo.checkpointVer;
pItem->fetchResultVer = (*ppTask)->info.triggerParam;
}
taosRUnLockLatch(&pMeta->lock);
}
static void tdRSmaTaskRemove(SStreamMeta *pMeta, int64_t streamId, int32_t taskId) {
streamMetaUnregisterTask(pMeta, streamId, taskId);
taosWLockLatch(&pMeta->lock);
int32_t numOfTasks = streamMetaGetNumOfTasks(pMeta);
if (streamMetaCommit(pMeta) < 0) {
// persist to disk
}
taosWUnLockLatch(&pMeta->lock);
smaDebug("vgId:%d, rsma task:%" PRIi64 ",%d dropped, remain tasks:%d", pMeta->vgId, streamId, taskId, numOfTasks);
}
static int32_t tdSetRSmaInfoItemParams(SSma *pSma, SRSmaParam *param, SRSmaStat *pStat, SRSmaInfo *pRSmaInfo,
int8_t idx) {
if ((param->qmsgLen > 0) && param->qmsg[idx]) {
SRSmaInfoItem *pItem = &(pRSmaInfo->items[idx]);
SRetention *pRetention = SMA_RETENTION(pSma);
STsdbCfg *pTsdbCfg = SMA_TSDB_CFG(pSma);
SVnode *pVnode = pSma->pVnode;
@ -254,25 +283,46 @@ static int32_t tdSetRSmaInfoItemParams(SSma *pSma, SRSmaParam *param, SRSmaStat
taosMemoryFree(s);
}
SStreamTask task = {.id.taskId = 0, .id.streamId = 0}; // TODO: assign value
task.pMeta = pVnode->pTq->pStreamMeta;
pStreamState = streamStateOpen(taskInfDir, &task, true, -1, -1);
SStreamTask *pStreamTask = taosMemoryCalloc(1, sizeof(*pStreamTask));
if (!pStreamTask) {
terrno = TSDB_CODE_OUT_OF_MEMORY;
return TSDB_CODE_FAILED;
}
pItem->pStreamTask = pStreamTask;
pStreamTask->id.taskId = 0;
pStreamTask->id.streamId = pRSmaInfo->suid + idx;
pStreamTask->chkInfo.startTs = taosGetTimestampMs();
pStreamTask->pMeta = pVnode->pTq->pStreamMeta;
pStreamTask->exec.qmsg = taosMemoryMalloc(strlen(RSMA_EXEC_TASK_FLAG) + 1);
sprintf(pStreamTask->exec.qmsg, "%s", RSMA_EXEC_TASK_FLAG);
pStreamTask->chkInfo.checkpointId = streamMetaGetLatestCheckpointId(pStreamTask->pMeta);
tdRSmaTaskInit(pStreamTask->pMeta, pItem, &pStreamTask->id);
pStreamState = streamStateOpen(taskInfDir, pStreamTask, true, -1, -1);
if (!pStreamState) {
terrno = TSDB_CODE_RSMA_STREAM_STATE_OPEN;
return TSDB_CODE_FAILED;
}
pItem->pStreamState = pStreamState;
SReadHandle handle = {.vnode = pVnode, .initTqReader = 1, .pStateBackend = pStreamState};
tdRSmaTaskRemove(pStreamTask->pMeta, pStreamTask->id.streamId, pStreamTask->id.taskId);
SReadHandle handle = {.vnode = pVnode, .initTqReader = 1, .skipRollup = 1, .pStateBackend = pStreamState};
initStorageAPI(&handle.api);
pRSmaInfo->taskInfo[idx] = qCreateStreamExecTaskInfo(param->qmsg[idx], &handle, TD_VID(pVnode), 0);
if (!pRSmaInfo->taskInfo[idx]) {
terrno = TSDB_CODE_RSMA_QTASKINFO_CREATE;
return TSDB_CODE_FAILED;
}
SRSmaInfoItem *pItem = &(pRSmaInfo->items[idx]);
pItem->triggerStat = TASK_TRIGGER_STAT_ACTIVE; // fetch the data when reboot
pItem->pStreamState = pStreamState;
if (!(pItem->pResList = taosArrayInit(1, POINTER_BYTES))) {
return TSDB_CODE_FAILED;
}
if (pItem->fetchResultVer < pItem->submitReqVer) {
// fetch the data when reboot
pItem->triggerStat = TASK_TRIGGER_STAT_ACTIVE;
}
if (param->maxdelay[idx] < TSDB_MIN_ROLLUP_MAX_DELAY) {
int64_t msInterval =
convertTimeFromPrecisionToUnit(pRetention[idx + 1].freq, pTsdbCfg->precision, TIME_UNIT_MILLISECOND);
@ -291,10 +341,11 @@ static int32_t tdSetRSmaInfoItemParams(SSma *pSma, SRSmaParam *param, SRSmaStat
taosTmrReset(tdRSmaFetchTrigger, RSMA_FETCH_INTERVAL, pItem, smaMgmt.tmrHandle, &pItem->tmrId);
smaInfo("vgId:%d, item:%p table:%" PRIi64 " level:%" PRIi8 " maxdelay:%" PRIi64 " watermark:%" PRIi64
smaInfo("vgId:%d, open rsma task:%p table:%" PRIi64 " level:%" PRIi8 ", checkpointId:%" PRIi64
", submitReqVer:%" PRIi64 ", fetchResultVer:%" PRIi64 ", maxdelay:%" PRIi64 " watermark:%" PRIi64
", finally maxdelay:%" PRIi32,
TD_VID(pVnode), pItem, pRSmaInfo->suid, (int8_t)(idx + 1), param->maxdelay[idx], param->watermark[idx],
pItem->maxDelay);
TD_VID(pVnode), pItem->pStreamTask, pRSmaInfo->suid, (int8_t)(idx + 1), pStreamTask->chkInfo.checkpointId,
pItem->submitReqVer, pItem->fetchResultVer, param->maxdelay[idx], param->watermark[idx], pItem->maxDelay);
}
return TSDB_CODE_SUCCESS;
}
@ -362,7 +413,7 @@ int32_t tdRSmaProcessCreateImpl(SSma *pSma, SRSmaParam *param, int64_t suid, con
return TSDB_CODE_SUCCESS;
_err:
tdFreeRSmaInfo(pSma, pRSmaInfo, true);
tdFreeRSmaInfo(pSma, pRSmaInfo);
return TSDB_CODE_FAILED;
}
@ -498,11 +549,10 @@ static void tdUidStoreDestory(STbUidStore *pStore) {
if (pStore->uidHash) {
if (pStore->tbUids) {
// When pStore->tbUids not NULL, the pStore->uidHash has k/v; otherwise pStore->uidHash only has keys.
void *pIter = taosHashIterate(pStore->uidHash, NULL);
while (pIter) {
void *pIter = NULL;
while ((pIter = taosHashIterate(pStore->uidHash, pIter))) {
SArray *arr = *(SArray **)pIter;
taosArrayDestroy(arr);
pIter = taosHashIterate(pStore->uidHash, pIter);
}
}
taosHashCleanup(pStore->uidHash);
@ -562,7 +612,7 @@ static int32_t tdFetchSubmitReqSuids(SSubmitReq2 *pMsg, STbUidStore *pStore) {
* @param now
* @return int32_t
*/
int32_t smaDoRetention(SSma *pSma, int64_t now) {
int32_t smaRetention(SSma *pSma, int64_t now) {
int32_t code = TSDB_CODE_SUCCESS;
if (!VND_IS_RSMA(pSma->pVnode)) {
return code;
@ -570,8 +620,8 @@ int32_t smaDoRetention(SSma *pSma, int64_t now) {
for (int32_t i = 0; i < TSDB_RETENTION_L2; ++i) {
if (pSma->pRSmaTsdb[i]) {
// code = tsdbDoRetention(pSma->pRSmaTsdb[i], now);
// if (code) goto _end;
code = tsdbRetention(pSma->pRSmaTsdb[i], now, pSma->pVnode->config.sttTrigger == 1);
if (code) goto _end;
}
}
@ -579,17 +629,53 @@ _end:
return code;
}
static int32_t tdRSmaExecAndSubmitResult(SSma *pSma, qTaskInfo_t taskInfo, SRSmaInfoItem *pItem, STSchema *pTSchema,
int64_t suid) {
static int32_t tdRSmaProcessDelReq(SSma *pSma, int64_t suid, int8_t level, SBatchDeleteReq *pDelReq) {
int32_t code = 0;
int32_t lino = 0;
if (taosArrayGetSize(pDelReq->deleteReqs) > 0) {
int32_t len = 0;
tEncodeSize(tEncodeSBatchDeleteReq, pDelReq, len, code);
TSDB_CHECK_CODE(code, lino, _exit);
void *pBuf = rpcMallocCont(len + sizeof(SMsgHead));
if (!pBuf) {
code = terrno;
TSDB_CHECK_CODE(code, lino, _exit);
}
SEncoder encoder;
tEncoderInit(&encoder, POINTER_SHIFT(pBuf, sizeof(SMsgHead)), len);
tEncodeSBatchDeleteReq(&encoder, pDelReq);
tEncoderClear(&encoder);
((SMsgHead *)pBuf)->vgId = TD_VID(pSma->pVnode);
SRpcMsg delMsg = {.msgType = TDMT_VND_BATCH_DEL,
.pCont = pBuf,
.contLen = len + sizeof(SMsgHead)};
code = tmsgPutToQueue(&pSma->pVnode->msgCb, WRITE_QUEUE, &delMsg);
TSDB_CHECK_CODE(code, lino, _exit);
}
_exit:
taosArrayDestroy(pDelReq->deleteReqs);
if (code) {
smaError("vgId:%d, failed at line %d to process delete req for table:%" PRIi64 ", level:%" PRIi8 " since %s",
SMA_VID(pSma), lino, suid, level, tstrerror(code));
}
return code;
}
static int32_t tdRSmaExecAndSubmitResult(SSma *pSma, qTaskInfo_t taskInfo, SRSmaInfoItem *pItem, SRSmaInfo *pInfo,
int32_t execType, int8_t *streamFlushed) {
int32_t code = 0;
int32_t lino = 0;
SSDataBlock *output = NULL;
SArray *pResList = taosArrayInit(1, POINTER_BYTES);
if (pResList == NULL) {
code = TSDB_CODE_OUT_OF_MEMORY;
TSDB_CHECK_CODE(code, lino, _exit);
}
SArray *pResList = pItem->pResList;
STSchema *pTSchema = pInfo->pTSchema;
int64_t suid = pInfo->suid;
while (1) {
uint64_t ts;
@ -604,34 +690,76 @@ static int32_t tdRSmaExecAndSubmitResult(SSma *pSma, qTaskInfo_t taskInfo, SRSma
if (taosArrayGetSize(pResList) == 0) {
break;
}
#if 0
char flag[10] = {0};
snprintf(flag, 10, "level %" PRIi8, pItem->level);
blockDebugShowDataBlocks(pResList, flag);
#endif
for (int32_t i = 0; i < taosArrayGetSize(pResList); ++i) {
output = taosArrayGetP(pResList, i);
smaDebug("vgId:%d, result block, uid:%" PRIu64 ", groupid:%" PRIu64 ", rows:%" PRIi64, SMA_VID(pSma),
output->info.id.uid, output->info.id.groupId, output->info.rows);
if (output->info.type == STREAM_CHECKPOINT) {
if (streamFlushed) *streamFlushed = 1;
continue;
} else if (output->info.type == STREAM_DELETE_RESULT) {
SBatchDeleteReq deleteReq = {.suid = suid, .level = pItem->level};
deleteReq.deleteReqs = taosArrayInit(0, sizeof(SSingleDeleteReq));
if (!deleteReq.deleteReqs) {
code = terrno;
TSDB_CHECK_CODE(code, lino, _exit);
}
code = tqBuildDeleteReq(pSma->pVnode->pTq, NULL, output, &deleteReq, "");
TSDB_CHECK_CODE(code, lino, _exit);
code = tdRSmaProcessDelReq(pSma, suid, pItem->level, &deleteReq);
TSDB_CHECK_CODE(code, lino, _exit);
continue;
}
smaDebug("vgId:%d, result block, execType:%d, ver:%" PRIi64 ", submitReqVer:%" PRIi64 ", fetchResultVer:%" PRIi64
", suid:%" PRIi64 ", level:%" PRIi8 ", uid:%" PRIu64 ", groupid:%" PRIu64 ", rows:%" PRIi64,
SMA_VID(pSma), execType, output->info.version, pItem->submitReqVer, pItem->fetchResultVer, suid,
pItem->level, output->info.id.uid, output->info.id.groupId, output->info.rows);
if (STREAM_GET_ALL == execType) {
/**
* 1. reset the output version when reboot
* 2. delete msg version not updated from the result
*/
if (output->info.version < pItem->submitReqVer) {
// submitReqVer keeps unchanged since tdExecuteRSmaImpl and tdRSmaFetchAllResult are executed synchronously
output->info.version = pItem->submitReqVer;
} else if (output->info.version == pItem->fetchResultVer) {
smaWarn("vgId:%d, result block, skip dup version, execType:%d, ver:%" PRIi64 ", submitReqVer:%" PRIi64
", fetchResultVer:%" PRIi64 ", suid:%" PRIi64 ", level:%" PRIi8 ", uid:%" PRIu64 ", groupid:%" PRIu64
", rows:%" PRIi64,
SMA_VID(pSma), execType, output->info.version, pItem->submitReqVer, pItem->fetchResultVer, suid,
pItem->level, output->info.id.uid, output->info.id.groupId, output->info.rows);
continue;
}
}
STsdb *sinkTsdb = (pItem->level == TSDB_RETENTION_L1 ? pSma->pRSmaTsdb[0] : pSma->pRSmaTsdb[1]);
SSubmitReq2 *pReq = NULL;
// TODO: the schema update should be handled later(TD-17965)
if (buildSubmitReqFromDataBlock(&pReq, output, pTSchema, output->info.id.groupId, SMA_VID(pSma), suid) < 0) {
code = terrno ? terrno : TSDB_CODE_RSMA_RESULT;
TSDB_CHECK_CODE(code, lino, _exit);
}
if (pReq && tdProcessSubmitReq(sinkTsdb, output->info.version, pReq) < 0) {
code = terrno ? terrno : TSDB_CODE_RSMA_RESULT;
if (terrno == TSDB_CODE_TDB_TIMESTAMP_OUT_OF_RANGE) {
// TODO: reconfigure SSubmitReq2
} else {
if (terrno == 0) terrno = TSDB_CODE_RSMA_RESULT;
code = terrno;
}
tDestroySubmitReq(pReq, TSDB_MSG_FLG_ENCODE);
taosMemoryFree(pReq);
TSDB_CHECK_CODE(code, lino, _exit);
}
smaDebug("vgId:%d, process submit req for rsma suid:%" PRIu64 ",uid:%" PRIu64 ", level %" PRIi8 " ver %" PRIi64,
SMA_VID(pSma), suid, output->info.id.groupId, pItem->level, output->info.version);
if (STREAM_GET_ALL == execType) {
atomic_store_64(&pItem->fetchResultVer, output->info.version);
}
smaDebug("vgId:%d, process submit req for rsma suid:%" PRIu64 ",uid:%" PRIu64 ", level:%" PRIi8
", execType:%d, ver:%" PRIi64,
SMA_VID(pSma), suid, output->info.id.groupId, pItem->level, execType, output->info.version);
if (pReq) {
tDestroySubmitReq(pReq, TSDB_MSG_FLG_ENCODE);
@ -648,7 +776,6 @@ _exit:
} else {
smaDebug("vgId:%d, %s succeed, suid:%" PRIi64 ", level:%" PRIi8, SMA_VID(pSma), __func__, suid, pItem->level);
}
taosArrayDestroy(pResList);
qCleanExecTaskBlockBuf(taskInfo);
return code;
}
@ -667,7 +794,7 @@ _exit:
*/
static int32_t tdExecuteRSmaImplAsync(SSma *pSma, int64_t version, const void *pMsg, int32_t len, int32_t inputType,
SRSmaInfo *pInfo, tb_uid_t suid) {
int32_t size = sizeof(int32_t) + sizeof(int64_t) + len;
int32_t size = RSMA_EXEC_MSG_HLEN + len; // header + payload
void *qItem = taosAllocateQitem(size, DEF_QITEM, 0);
if (!qItem) {
@ -676,6 +803,8 @@ static int32_t tdExecuteRSmaImplAsync(SSma *pSma, int64_t version, const void *p
void *pItem = qItem;
*(int8_t *)pItem = (int8_t)inputType;
pItem = POINTER_SHIFT(pItem, sizeof(int8_t));
*(int32_t *)pItem = len;
pItem = POINTER_SHIFT(pItem, sizeof(int32_t));
*(int64_t *)pItem = version;
@ -694,7 +823,7 @@ static int32_t tdExecuteRSmaImplAsync(SSma *pSma, int64_t version, const void *p
}
// smoothing consume
int32_t n = nItems / RSMA_QTASKEXEC_SMOOTH_SIZE;
int32_t n = nItems / RSMA_EXEC_SMOOTH_SIZE;
if (n > 1) {
if (n > 10) {
n = 10;
@ -741,10 +870,11 @@ static int32_t tdRsmaPrintSubmitReq(SSma *pSma, SSubmitReq *pReq) {
* @param level
* @return int32_t
*/
static int32_t tdExecuteRSmaImpl(SSma *pSma, const void *pMsg, int32_t msgSize, int32_t inputType, SRSmaInfo *pInfo,
static int32_t tdExecuteRSmaImpl(SSma *pSma, const void *pMsg, int32_t msgSize, int64_t version, int32_t inputType, SRSmaInfo *pInfo,
ERsmaExecType type, int8_t level) {
int32_t idx = level - 1;
void *qTaskInfo = RSMA_INFO_QTASK(pInfo, idx);
SRSmaInfoItem *pItem = RSMA_INFO_ITEM(pInfo, idx);
if (!qTaskInfo) {
smaDebug("vgId:%d, no qTaskInfo to execute rsma %" PRIi8 " task for suid:%" PRIu64, SMA_VID(pSma), level,
@ -757,25 +887,19 @@ static int32_t tdExecuteRSmaImpl(SSma *pSma, const void *pMsg, int32_t msgSize,
return TSDB_CODE_FAILED;
}
smaDebug("vgId:%d, execute rsma %" PRIi8 " task for qTaskInfo:%p suid:%" PRIu64 " nMsg:%d", SMA_VID(pSma), level,
RSMA_INFO_QTASK(pInfo, idx), pInfo->suid, msgSize);
smaDebug("vgId:%d, execute rsma %" PRIi8 " task for qTaskInfo:%p, suid:%" PRIu64 ", nMsg:%d, submitReqVer:%" PRIi64 ", inputType:%d", SMA_VID(pSma), level,
RSMA_INFO_QTASK(pInfo, idx), pInfo->suid, msgSize, version, inputType);
#if 0
for (int32_t i = 0; i < msgSize; ++i) {
SSubmitReq *pReq = *(SSubmitReq **)((char *)pMsg + i * sizeof(void *));
smaDebug("vgId:%d, [%d][%d] version %" PRIi64, SMA_VID(pSma), msgSize, i, pReq->version);
tdRsmaPrintSubmitReq(pSma, pReq);
}
#endif
if ((terrno = qSetSMAInput(qTaskInfo, pMsg, msgSize, inputType)) < 0) {
smaError("vgId:%d, rsma %" PRIi8 " qSetStreamInput failed since %s", SMA_VID(pSma), level, tstrerror(terrno));
return TSDB_CODE_FAILED;
}
SRSmaInfoItem *pItem = RSMA_INFO_ITEM(pInfo, idx);
tdRSmaExecAndSubmitResult(pSma, qTaskInfo, pItem, pInfo->pTSchema, pInfo->suid);
atomic_store_64(&pItem->submitReqVer, version);
return TSDB_CODE_SUCCESS;
terrno = tdRSmaExecAndSubmitResult(pSma, qTaskInfo, pItem, pInfo, STREAM_NORMAL, NULL);
return terrno ? TSDB_CODE_FAILED : TDB_CODE_SUCCESS;
}
/**
@ -850,7 +974,7 @@ static int32_t tdExecuteRSmaAsync(SSma *pSma, int64_t version, const void *pMsg,
return TSDB_CODE_SUCCESS;
}
if (inputType == STREAM_INPUT__DATA_SUBMIT) {
if (inputType == STREAM_INPUT__DATA_SUBMIT || inputType == STREAM_INPUT__REF_DATA_BLOCK) {
if (tdExecuteRSmaImplAsync(pSma, version, pMsg, len, inputType, pRSmaInfo, suid) < 0) {
tdReleaseRSmaInfo(pSma, pRSmaInfo);
return TSDB_CODE_FAILED;
@ -877,23 +1001,23 @@ static int32_t tdExecuteRSmaAsync(SSma *pSma, int64_t version, const void *pMsg,
return TSDB_CODE_SUCCESS;
}
int32_t tdProcessRSmaSubmit(SSma *pSma, int64_t version, void *pReq, void *pMsg, int32_t len, int32_t inputType) {
SSmaEnv *pEnv = SMA_RSMA_ENV(pSma);
if (!pEnv) {
// only applicable when rsma env exists
return TSDB_CODE_SUCCESS;
int32_t tdProcessRSmaSubmit(SSma *pSma, int64_t version, void *pReq, void *pMsg, int32_t len) {
if (!SMA_RSMA_ENV(pSma)) return TSDB_CODE_SUCCESS;
if ((terrno = atomic_load_32(&SMA_RSMA_STAT(pSma)->execStat))) {
smaError("vgId:%d, failed to process rsma submit since invalid exec code: %s", SMA_VID(pSma), terrstr());
goto _err;
}
STbUidStore uidStore = {0};
if (inputType == STREAM_INPUT__DATA_SUBMIT) {
if (tdFetchSubmitReqSuids(pReq, &uidStore) < 0) {
smaError("vgId:%d, failed to process rsma submit fetch suid since: %s", SMA_VID(pSma), terrstr());
goto _err;
}
if (uidStore.suid != 0) {
if (tdExecuteRSmaAsync(pSma, version, pMsg, len, inputType, uidStore.suid) < 0) {
if (tdExecuteRSmaAsync(pSma, version, pMsg, len, STREAM_INPUT__DATA_SUBMIT, uidStore.suid) < 0) {
smaError("vgId:%d, failed to process rsma submit exec 1 since: %s", SMA_VID(pSma), terrstr());
goto _err;
}
@ -901,19 +1025,36 @@ int32_t tdProcessRSmaSubmit(SSma *pSma, int64_t version, void *pReq, void *pMsg,
void *pIter = NULL;
while ((pIter = taosHashIterate(uidStore.uidHash, pIter))) {
tb_uid_t *pTbSuid = (tb_uid_t *)taosHashGetKey(pIter, NULL);
if (tdExecuteRSmaAsync(pSma, version, pMsg, len, inputType, *pTbSuid) < 0) {
if (tdExecuteRSmaAsync(pSma, version, pMsg, len, STREAM_INPUT__DATA_SUBMIT, *pTbSuid) < 0) {
smaError("vgId:%d, failed to process rsma submit exec 2 since: %s", SMA_VID(pSma), terrstr());
taosHashCancelIterate(uidStore.uidHash, pIter);
goto _err;
}
}
}
}
tdUidStoreDestory(&uidStore);
return TSDB_CODE_SUCCESS;
_err:
tdUidStoreDestory(&uidStore);
return TSDB_CODE_FAILED;
return terrno;
}
int32_t tdProcessRSmaDelete(SSma *pSma, int64_t version, void *pReq, void *pMsg, int32_t len) {
if (!SMA_RSMA_ENV(pSma)) return TSDB_CODE_SUCCESS;
if ((terrno = atomic_load_32(&SMA_RSMA_STAT(pSma)->execStat))) {
smaError("vgId:%d, failed to process rsma delete since invalid exec code: %s", SMA_VID(pSma), terrstr());
goto _err;
}
SDeleteRes *pDelRes = pReq;
if (tdExecuteRSmaAsync(pSma, version, pMsg, len, STREAM_INPUT__REF_DATA_BLOCK, pDelRes->suid) < 0) {
smaError("vgId:%d, failed to process rsma submit exec 1 since: %s", SMA_VID(pSma), terrstr());
goto _err;
}
return TSDB_CODE_SUCCESS;
_err:
return terrno;
}
/**
@ -988,7 +1129,7 @@ static int32_t tdRSmaRestoreQTaskInfoInit(SSma *pSma, int64_t *nTables) {
code = terrno;
TSDB_CHECK_CODE(code, lino, _exit);
}
#if 0
// reload all ctbUids for suid
uidStore.suid = suid;
if (vnodeGetCtbIdList(pVnode, suid, uidStore.tbUids) < 0) {
@ -1002,7 +1143,7 @@ static int32_t tdRSmaRestoreQTaskInfoInit(SSma *pSma, int64_t *nTables) {
}
taosArrayClear(uidStore.tbUids);
#endif
smaDebug("vgId:%d, rsma restore env success for %" PRIi64, TD_VID(pVnode), suid);
}
}
@ -1050,39 +1191,141 @@ _err:
return code;
}
#if 0
int32_t tdRSmaPersistExecImpl(SRSmaStat *pRSmaStat, SHashObj *pInfoHash) {
int32_t code = 0;
int32_t lino = 0;
int32_t nTaskInfo = 0;
SSma *pSma = pRSmaStat->pSma;
SVnode *pVnode = pSma->pVnode;
SRSmaFS fs = {0};
if (taosHashGetSize(pInfoHash) <= 0) {
return TSDB_CODE_SUCCESS;
}
// stream state: trigger checkpoint
do {
void *infoHash = NULL;
while ((infoHash = taosHashIterate(pInfoHash, infoHash))) {
SRSmaInfo *pRSmaInfo = *(SRSmaInfo **)infoHash;
if (RSMA_INFO_IS_DEL(pRSmaInfo)) {
continue;
}
for (int32_t i = 0; i < TSDB_RETENTION_L2; ++i) {
if (pRSmaInfo->taskInfo[i]) {
code = qSetSMAInput(pRSmaInfo->taskInfo[i], pRSmaStat->blocks, 1, STREAM_INPUT__CHECKPOINT);
if (code) {
taosHashCancelIterate(pInfoHash, infoHash);
TSDB_CHECK_CODE(code, lino, _exit);
}
pRSmaInfo->items[i].streamFlushed = 0;
++nTaskInfo;
}
}
}
} while (0);
// stream state: wait checkpoint ready in async mode
do {
int32_t nStreamFlushed = 0;
int32_t nSleep = 0;
void *infoHash = NULL;
while (true) {
while ((infoHash = taosHashIterate(pInfoHash, infoHash))) {
SRSmaInfo *pRSmaInfo = *(SRSmaInfo **)infoHash;
if (RSMA_INFO_IS_DEL(pRSmaInfo)) {
continue;
}
for (int32_t i = 0; i < TSDB_RETENTION_L2; ++i) {
if (pRSmaInfo->taskInfo[i] && (0 == pRSmaInfo->items[i].streamFlushed)) {
int8_t streamFlushed = 0;
code = tdRSmaExecAndSubmitResult(pSma, pRSmaInfo->taskInfo[i], &pRSmaInfo->items[i], pRSmaInfo,
STREAM_CHECKPOINT, &streamFlushed);
if (code) {
taosHashCancelIterate(pInfoHash, infoHash);
TSDB_CHECK_CODE(code, lino, _exit);
}
if (streamFlushed) {
pRSmaInfo->items[i].streamFlushed = 1;
if (++nStreamFlushed >= nTaskInfo) {
smaInfo("vgId:%d, rsma commit, checkpoint ready, %d us consumed, received/total: %d/%d", TD_VID(pVnode),
nSleep * 10, nStreamFlushed, nTaskInfo);
taosHashCancelIterate(pInfoHash, infoHash);
goto _checkpoint;
}
}
}
}
}
taosUsleep(10);
++nSleep;
smaDebug("vgId:%d, rsma commit, wait for checkpoint ready, %d us elapsed, received/total: %d/%d", TD_VID(pVnode),
nSleep * 10, nStreamFlushed, nTaskInfo);
}
} while (0);
_checkpoint:
// stream state: build checkpoint in backend
do {
SStreamMeta *pMeta = NULL;
int64_t checkpointId = taosGetTimestampNs();
bool checkpointBuilt = false;
void *infoHash = NULL;
while ((infoHash = taosHashIterate(pInfoHash, infoHash))) {
SRSmaInfo *pRSmaInfo = *(SRSmaInfo **)infoHash;
if (RSMA_INFO_IS_DEL(pRSmaInfo)) {
continue;
}
for (int32_t i = 0; i < TSDB_RETENTION_L2; ++i) {
SRSmaInfoItem *pItem = RSMA_INFO_ITEM(pRSmaInfo, i);
if (pItem && pItem->pStreamState) {
if (streamStateCommit(pItem->pStreamState) < 0) {
code = TSDB_CODE_RSMA_STREAM_STATE_COMMIT;
if (pItem && pItem->pStreamTask) {
SStreamTask *pTask = pItem->pStreamTask;
atomic_store_32(&pTask->pMeta->chkptNotReadyTasks, 1);
pTask->checkpointingId = checkpointId;
pTask->chkInfo.checkpointId = pTask->checkpointingId;
pTask->chkInfo.checkpointVer = pItem->submitReqVer;
pTask->info.triggerParam = pItem->fetchResultVer;
if (!checkpointBuilt) {
// the stream states share one checkpoint
code = streamTaskBuildCheckpoint(pTask);
if (code) {
taosHashCancelIterate(pInfoHash, infoHash);
TSDB_CHECK_CODE(code, lino, _exit);
}
smaDebug("vgId:%d, rsma persist, stream state commit success, table %" PRIi64 ", level %d", TD_VID(pVnode),
pRSmaInfo->suid, i + 1);
}
}
pMeta = pTask->pMeta;
checkpointBuilt = true;
}
taosWLockLatch(&pMeta->lock);
if (streamMetaSaveTask(pMeta, pTask)) {
taosWUnLockLatch(&pMeta->lock);
code = terrno ? terrno : TSDB_CODE_OUT_OF_MEMORY;
taosHashCancelIterate(pInfoHash, infoHash);
TSDB_CHECK_CODE(code, lino, _exit);
}
taosWUnLockLatch(&pMeta->lock);
smaDebug("vgId:%d, rsma commit, succeed to commit task:%p, submitReqVer:%" PRIi64 ", fetchResultVer:%" PRIi64
", table:%" PRIi64 ", level:%d",
TD_VID(pVnode), pTask, pItem->submitReqVer, pItem->fetchResultVer, pRSmaInfo->suid, i + 1);
}
}
}
if (pMeta) {
taosWLockLatch(&pMeta->lock);
if (streamMetaCommit(pMeta)) {
taosWUnLockLatch(&pMeta->lock);
code = terrno ? terrno : TSDB_CODE_OUT_OF_MEMORY;
TSDB_CHECK_CODE(code, lino, _exit);
}
taosWUnLockLatch(&pMeta->lock);
}
if (checkpointBuilt) {
smaInfo("vgId:%d, rsma commit, succeed to commit checkpoint:%" PRIi64, TD_VID(pVnode), checkpointId);
}
} while (0);
_exit:
if (code) {
smaError("vgId:%d, %s failed at line %d since %s", TD_VID(pVnode), __func__, lino, tstrerror(code));
@ -1091,7 +1334,7 @@ _exit:
terrno = code;
return code;
}
#endif
/**
* @brief trigger to get rsma result in async mode
*
@ -1165,14 +1408,8 @@ static void tdRSmaFetchTrigger(void *param, void *tmrId) {
smaDebug("vgId:%d, rsma fetch task planned for level:%" PRIi8 " suid:%" PRIi64 " since stat is active",
SMA_VID(pSma), pItem->level, pRSmaInfo->suid);
// async process
pItem->fetchLevel = pItem->level;
#if 0
// debugging codes
SRSmaInfo *qInfo = tdAcquireRSmaInfoBySuid(pSma, pRSmaInfo->suid);
SRSmaInfoItem *qItem = RSMA_INFO_ITEM(qInfo, pItem->level - 1);
make sure(qItem->level == pItem->level);
make sure(qItem->fetchLevel == pItem->fetchLevel);
#endif
atomic_store_8(&pItem->fetchLevel, 1);
if (atomic_load_8(&pRSmaInfo->assigned) == 0) {
tsem_post(&(pStat->notEmpty));
}
@ -1198,10 +1435,20 @@ _end:
tdReleaseSmaRef(smaMgmt.rsetId, pRSmaRef->refId);
}
static void tdFreeRSmaSubmitItems(SArray *pItems) {
for (int32_t i = 0; i < taosArrayGetSize(pItems); ++i) {
SPackedData *packData = taosArrayGet(pItems, i);
taosFreeQitem(POINTER_SHIFT(packData->msgStr, -sizeof(int32_t) - sizeof(int64_t)));
static void tdFreeRSmaSubmitItems(SArray *pItems, int32_t type) {
int32_t arrSize = taosArrayGetSize(pItems);
if (type == STREAM_INPUT__MERGED_SUBMIT) {
for (int32_t i = 0; i < arrSize; ++i) {
SPackedData *packData = TARRAY_GET_ELEM(pItems, i);
taosFreeQitem(POINTER_SHIFT(packData->msgStr, -RSMA_EXEC_MSG_HLEN));
}
} else if (type == STREAM_INPUT__REF_DATA_BLOCK) {
for (int32_t i = 0; i < arrSize; ++i) {
SPackedData *packData = TARRAY_GET_ELEM(pItems, i);
blockDataDestroy(packData->pDataBlock);
}
} else {
ASSERTS(0, "unknown type:%d", type);
}
taosArrayClear(pItems);
}
@ -1217,15 +1464,15 @@ static int32_t tdRSmaFetchAllResult(SSma *pSma, SRSmaInfo *pInfo) {
SSDataBlock dataBlock = {.info.type = STREAM_GET_ALL};
for (int8_t i = 1; i <= TSDB_RETENTION_L2; ++i) {
SRSmaInfoItem *pItem = RSMA_INFO_ITEM(pInfo, i - 1);
if (pItem->fetchLevel) {
pItem->fetchLevel = 0;
if (1 == atomic_val_compare_exchange_8(&pItem->fetchLevel, 1, 0)) {
qTaskInfo_t taskInfo = RSMA_INFO_QTASK(pInfo, i - 1);
if (!taskInfo) {
continue;
}
if ((++pItem->nScanned * pItem->maxDelay) > RSMA_FETCH_DELAY_MAX) {
smaDebug("vgId:%d, suid:%" PRIi64 " level:%" PRIi8 " nScanned:%" PRIi16 " maxDelay:%d, fetch executed",
smaDebug("vgId:%d, suid:%" PRIi64 " level:%" PRIi8 " nScanned:%" PRIi32 " maxDelay:%d, fetch executed",
SMA_VID(pSma), pInfo->suid, i, pItem->nScanned, pItem->maxDelay);
} else {
int64_t curMs = taosGetTimestampMs();
@ -1245,14 +1492,15 @@ static int32_t tdRSmaFetchAllResult(SSma *pSma, SRSmaInfo *pInfo) {
if ((terrno = qSetSMAInput(taskInfo, &dataBlock, 1, STREAM_INPUT__DATA_BLOCK)) < 0) {
goto _err;
}
if (tdRSmaExecAndSubmitResult(pSma, taskInfo, pItem, pInfo->pTSchema, pInfo->suid) < 0) {
if (tdRSmaExecAndSubmitResult(pSma, taskInfo, pItem, pInfo, STREAM_GET_ALL, NULL) < 0) {
atomic_store_32(&SMA_RSMA_STAT(pSma)->execStat, terrno);
goto _err;
}
smaDebug("vgId:%d, suid:%" PRIi64 " level:%" PRIi8 " nScanned:%" PRIi16 " maxDelay:%d, fetch finished",
smaDebug("vgId:%d, suid:%" PRIi64 " level:%" PRIi8 " nScanned:%" PRIi32 " maxDelay:%d, fetch finished",
SMA_VID(pSma), pInfo->suid, i, pItem->nScanned, pItem->maxDelay);
} else {
smaDebug("vgId:%d, suid:%" PRIi64 " level:%" PRIi8 " nScanned:%" PRIi16
smaDebug("vgId:%d, suid:%" PRIi64 " level:%" PRIi8 " nScanned:%" PRIi32
" maxDelay:%d, fetch not executed as fetch level is %" PRIi8,
SMA_VID(pSma), pInfo->suid, i, pItem->nScanned, pItem->maxDelay, pItem->fetchLevel);
}
@ -1265,38 +1513,98 @@ _err:
}
static int32_t tdRSmaBatchExec(SSma *pSma, SRSmaInfo *pInfo, STaosQall *qall, SArray *pSubmitArr, ERsmaExecType type) {
taosArrayClear(pSubmitArr);
while (1) {
void *msg = NULL;
int8_t resume = 0;
int32_t nSubmit = 0;
int32_t nDelete = 0;
int64_t version = 0;
SPackedData packData;
taosArrayClear(pSubmitArr);
// the submitReq/deleteReq msg may exsit alternately in the msg queue, consume them sequentially in batch mode
while (1) {
taosGetQitem(qall, (void **)&msg);
if (msg) {
SPackedData packData = {.msgLen = *(int32_t *)msg,
.ver = *(int64_t *)POINTER_SHIFT(msg, sizeof(int32_t)),
.msgStr = POINTER_SHIFT(msg, sizeof(int32_t) + sizeof(int64_t))};
int8_t inputType = RSMA_EXEC_MSG_TYPE(msg);
if (inputType == STREAM_INPUT__DATA_SUBMIT) {
if (nDelete > 0) {
resume = 1;
break;
}
_resume_submit:
packData.msgLen = RSMA_EXEC_MSG_LEN(msg);
packData.ver = RSMA_EXEC_MSG_VER(msg);
packData.msgStr = RSMA_EXEC_MSG_BODY(msg);
version = packData.ver;
if (!taosArrayPush(pSubmitArr, &packData)) {
tdFreeRSmaSubmitItems(pSubmitArr);
taosFreeQitem(msg);
terrno = TSDB_CODE_OUT_OF_MEMORY;
goto _err;
}
++nSubmit;
} else if (inputType == STREAM_INPUT__REF_DATA_BLOCK) {
if (nSubmit > 0) {
resume = 2;
break;
}
_resume_delete:
version = RSMA_EXEC_MSG_VER(msg);
if ((terrno = extractDelDataBlock(RSMA_EXEC_MSG_BODY(msg), RSMA_EXEC_MSG_LEN(msg), version,
&packData.pDataBlock, 1))) {
taosFreeQitem(msg);
goto _err;
}
if (packData.pDataBlock && !taosArrayPush(pSubmitArr, &packData)) {
taosFreeQitem(msg);
terrno = TSDB_CODE_OUT_OF_MEMORY;
goto _err;
}
taosFreeQitem(msg);
if (packData.pDataBlock) {
// packData.pDataBlock is NULL if delete affects 0 row
++nDelete;
}
} else {
ASSERTS(0, "unknown msg type:%d", inputType);
break;
}
}
int32_t size = taosArrayGetSize(pSubmitArr);
if (size > 0) {
if (nSubmit > 0 || nDelete > 0) {
int32_t size = TARRAY_SIZE(pSubmitArr);
ASSERTS(size > 0, "size is %d", size);
int32_t inputType = nSubmit > 0 ? STREAM_INPUT__MERGED_SUBMIT : STREAM_INPUT__REF_DATA_BLOCK;
for (int32_t i = 1; i <= TSDB_RETENTION_L2; ++i) {
if (tdExecuteRSmaImpl(pSma, pSubmitArr->pData, size, STREAM_INPUT__MERGED_SUBMIT, pInfo, type, i) < 0) {
if (tdExecuteRSmaImpl(pSma, pSubmitArr->pData, size, version, inputType, pInfo, type, i) < 0) {
goto _err;
}
}
tdFreeRSmaSubmitItems(pSubmitArr);
tdFreeRSmaSubmitItems(pSubmitArr, inputType);
nSubmit = 0;
nDelete = 0;
} else {
goto _rtn;
}
if (resume == 1) {
resume = 0;
goto _resume_submit;
} else if (resume == 2) {
resume = 0;
goto _resume_delete;
}
}
_rtn:
return TSDB_CODE_SUCCESS;
_err:
atomic_store_32(&SMA_RSMA_STAT(pSma)->execStat, terrno);
smaError("vgId:%d, batch exec for suid:%" PRIi64 " execType:%d size:%d failed since %s", SMA_VID(pSma), pInfo->suid,
type, (int32_t)taosArrayGetSize(pSubmitArr), terrstr());
tdFreeRSmaSubmitItems(pSubmitArr);
tdFreeRSmaSubmitItems(pSubmitArr, nSubmit ? STREAM_INPUT__MERGED_SUBMIT : STREAM_INPUT__REF_DATA_BLOCK);
while (1) {
void *msg = NULL;
taosGetQitem(qall, (void **)&msg);
@ -1333,7 +1641,7 @@ int32_t tdRSmaProcessExecImpl(SSma *pSma, ERsmaExecType type) {
}
if (!(pSubmitArr =
taosArrayInit(TMIN(RSMA_SUBMIT_BATCH_SIZE, atomic_load_64(&pRSmaStat->nBufItems)), sizeof(SPackedData)))) {
taosArrayInit(TMIN(RSMA_EXEC_BATCH_SIZE, atomic_load_64(&pRSmaStat->nBufItems)), sizeof(SPackedData)))) {
code = TSDB_CODE_OUT_OF_MEMORY;
TSDB_CHECK_CODE(code, lino, _exit);
}
@ -1369,15 +1677,11 @@ int32_t tdRSmaProcessExecImpl(SSma *pSma, ERsmaExecType type) {
if (ASSERTS(oldVal >= 0, "oldVal of nFetchAll: %d < 0", oldVal)) {
code = TSDB_CODE_APP_ERROR;
taosHashCancelIterate(infoHash, pIter);
TSDB_CHECK_CODE(code, lino, _exit);
}
int8_t curStat = atomic_load_8(RSMA_COMMIT_STAT(pRSmaStat));
if (curStat == 1) {
smaDebug("vgId:%d, fetch all not exec as commit stat is %" PRIi8, SMA_VID(pSma), curStat);
} else {
tdRSmaFetchAllResult(pSma, pInfo);
}
if (0 == atomic_sub_fetch_32(&pRSmaStat->nFetchAll, 1)) {
atomic_store_8(RSMA_COMMIT_STAT(pRSmaStat), 0);

View File

@ -188,7 +188,8 @@ int32_t smaBlockToSubmit(SVnode *pVnode, const SArray *pBlocks, const STSchema *
if (pDataBlock->info.type == STREAM_DELETE_RESULT) {
pDeleteReq->suid = suid;
pDeleteReq->deleteReqs = taosArrayInit(0, sizeof(SSingleDeleteReq));
tqBuildDeleteReq(stbFullName, pDataBlock, pDeleteReq, "");
code = tqBuildDeleteReq(pVnode->pTq, stbFullName, pDataBlock, pDeleteReq, "");
TSDB_CHECK_CODE(code, lino, _exit);
continue;
}

View File

@ -1668,7 +1668,7 @@ int32_t tqProcessTaskCheckPointSourceReq(STQ* pTq, SRpcMsg* pMsg, SRpcMsg* pRsp)
SStreamCheckpointSourceReq req = {0};
if (!vnodeIsRoleLeader(pTq->pVnode)) {
tqDebug("vgId:%d not leader, ignore checkpoint-source msg", vgId);
tqDebug("vgId:%d not leader, ignore checkpoint-source msg, s-task:0x%x", vgId, req.taskId);
SRpcMsg rsp = {0};
buildCheckpointSourceRsp(&req, &pMsg->info, &rsp, 0);
tmsgSendRsp(&rsp); // error occurs
@ -1676,7 +1676,7 @@ int32_t tqProcessTaskCheckPointSourceReq(STQ* pTq, SRpcMsg* pMsg, SRpcMsg* pRsp)
}
if (!pTq->pVnode->restored) {
tqDebug("vgId:%d checkpoint-source msg received during restoring, ignore it", vgId);
tqDebug("vgId:%d checkpoint-source msg received during restoring, s-task:0x%x ignore it", vgId, req.taskId);
SRpcMsg rsp = {0};
buildCheckpointSourceRsp(&req, &pMsg->info, &rsp, 0);
tmsgSendRsp(&rsp); // error occurs
@ -1696,7 +1696,6 @@ int32_t tqProcessTaskCheckPointSourceReq(STQ* pTq, SRpcMsg* pMsg, SRpcMsg* pRsp)
}
tDecoderClear(&decoder);
// todo handle failure to reset from checkpoint procedure
SStreamTask* pTask = streamMetaAcquireTask(pMeta, req.streamId, req.taskId);
if (pTask == NULL) {
tqError("vgId:%d failed to find s-task:0x%x, ignore checkpoint msg. it may have been destroyed already", vgId,
@ -1707,7 +1706,6 @@ int32_t tqProcessTaskCheckPointSourceReq(STQ* pTq, SRpcMsg* pMsg, SRpcMsg* pRsp)
return TSDB_CODE_SUCCESS;
}
// todo handle failure to reset from checkpoint procedure
// downstream not ready, current the stream tasks are not all ready. Ignore this checkpoint req.
if (pTask->status.downstreamReady != 1) {
pTask->chkInfo.failedId = req.checkpointId; // record the latest failed checkpoint id
@ -1728,17 +1726,32 @@ int32_t tqProcessTaskCheckPointSourceReq(STQ* pTq, SRpcMsg* pMsg, SRpcMsg* pRsp)
ETaskStatus status = streamTaskGetStatus(pTask, NULL);
if (status == TASK_STATUS__HALT || status == TASK_STATUS__PAUSE) {
qError("s-task:%s not ready for checkpoint, since it is halt, ignore this checkpoint:%" PRId64 ", set it failure",
tqError("s-task:%s not ready for checkpoint, since it is halt, ignore this checkpoint:%" PRId64 ", set it failure",
pTask->id.idStr, req.checkpointId);
taosThreadMutexUnlock(&pTask->lock);
taosThreadMutexUnlock(&pTask->lock);
streamMetaReleaseTask(pMeta, pTask);
SRpcMsg rsp = {0};
buildCheckpointSourceRsp(&req, &pMsg->info, &rsp, 0);
tmsgSendRsp(&rsp); // error occurs
return TSDB_CODE_SUCCESS;
}
// check if the checkpoint msg already sent or not.
if (status == TASK_STATUS__CK) {
ASSERT(pTask->checkpointingId == req.checkpointId);
tqWarn("s-task:%s recv checkpoint-source msg again checkpointId:%" PRId64
" already received, ignore this msg and continue process checkpoint",
pTask->id.idStr, pTask->checkpointingId);
taosThreadMutexUnlock(&pTask->lock);
streamMetaReleaseTask(pMeta, pTask);
return TSDB_CODE_SUCCESS;
}
streamProcessCheckpointSourceReq(pTask, &req);
taosThreadMutexUnlock(&pTask->lock);
@ -1924,8 +1937,59 @@ int32_t tqProcessTaskUpdateReq(STQ* pTq, SRpcMsg* pMsg) {
pMeta->startInfo.tasksWillRestart = 0;
streamMetaWUnLock(pMeta);
} else {
streamMetaWUnLock(pMeta);
tqDebug("vgId:%d all %d task(s) nodeEp updated and closed", vgId, numOfTasks);
#if 1
tqStartStreamTaskAsync(pTq, true);
streamMetaWUnLock(pMeta);
#else
streamMetaWUnLock(pMeta);
// For debug purpose.
// the following procedure consume many CPU resource, result in the re-election of leader
// with high probability. So we employ it as a test case for the stream processing framework, with
// checkpoint/restart/nodeUpdate etc.
while(1) {
int32_t startVal = atomic_val_compare_exchange_32(&pMeta->startInfo.taskStarting, 0, 1);
if (startVal == 0) {
break;
}
tqDebug("vgId:%d in start stream tasks procedure, wait for 500ms and recheck", vgId);
taosMsleep(500);
}
while (streamMetaTaskInTimer(pMeta)) {
tqDebug("vgId:%d some tasks in timer, wait for 100ms and recheck", pMeta->vgId);
taosMsleep(100);
}
streamMetaWLock(pMeta);
int32_t code = streamMetaReopen(pMeta);
if (code != 0) {
tqError("vgId:%d failed to reopen stream meta", vgId);
streamMetaWUnLock(pMeta);
taosArrayDestroy(req.pNodeList);
return -1;
}
if (streamMetaLoadAllTasks(pTq->pStreamMeta) < 0) {
tqError("vgId:%d failed to load stream tasks", vgId);
streamMetaWUnLock(pMeta);
taosArrayDestroy(req.pNodeList);
return -1;
}
if (vnodeIsRoleLeader(pTq->pVnode) && !tsDisableStream) {
tqInfo("vgId:%d start all stream tasks after all being updated", vgId);
tqResetStreamTaskStatus(pTq);
tqStartStreamTaskAsync(pTq, false);
} else {
tqInfo("vgId:%d, follower node not start stream tasks", vgId);
}
streamMetaWUnLock(pMeta);
#endif
}
}

View File

@ -343,7 +343,7 @@ int32_t extractMsgFromWal(SWalReader* pReader, void** pItem, int64_t maxVer, con
void* pBody = POINTER_SHIFT(pCont->body, sizeof(SMsgHead));
int32_t len = pCont->bodyLen - sizeof(SMsgHead);
code = extractDelDataBlock(pBody, len, ver, (SStreamRefDataBlock**)pItem);
code = extractDelDataBlock(pBody, len, ver, (void**)pItem, 0);
if (code == TSDB_CODE_SUCCESS) {
if (*pItem == NULL) {
tqDebug("s-task:%s empty delete msg, discard it, len:%d, ver:%" PRId64, id, len, ver);

View File

@ -43,7 +43,7 @@ static SArray* createDefaultTagColName();
static void setCreateTableMsgTableName(SVCreateTbReq* pCreateTableReq, SSDataBlock* pDataBlock, const char* stbFullName,
int64_t gid);
int32_t tqBuildDeleteReq(const char* stbFullName, const SSDataBlock* pDataBlock, SBatchDeleteReq* deleteReq,
int32_t tqBuildDeleteReq(STQ* pTq, const char* stbFullName, const SSDataBlock* pDataBlock, SBatchDeleteReq* deleteReq,
const char* pIdStr) {
int32_t totalRows = pDataBlock->info.rows;
SColumnInfoData* pStartTsCol = taosArrayGet(pDataBlock->pDataBlock, START_TS_COLUMN_INDEX);
@ -58,7 +58,8 @@ int32_t tqBuildDeleteReq(const char* stbFullName, const SSDataBlock* pDataBlock,
int64_t ekey = *(int64_t*)colDataGetData(pEndTsCol, row);
int64_t groupId = *(int64_t*)colDataGetData(pGidCol, row);
char* name;
char* name = NULL;
char* originName = NULL;
void* varTbName = NULL;
if (!colDataIsNull(pTbNameCol, totalRows, row, NULL)) {
varTbName = colDataGetVarData(pTbNameCol, row);
@ -67,19 +68,30 @@ int32_t tqBuildDeleteReq(const char* stbFullName, const SSDataBlock* pDataBlock,
if (varTbName != NULL && varTbName != (void*)-1) {
name = taosMemoryCalloc(1, TSDB_TABLE_NAME_LEN);
memcpy(name, varDataVal(varTbName), varDataLen(varTbName));
} else {
} else if (stbFullName) {
name = buildCtbNameByGroupId(stbFullName, groupId);
} else {
originName = taosMemoryCalloc(1, TSDB_TABLE_NAME_LEN + VARSTR_HEADER_SIZE);
if (metaGetTableNameByUid(pTq->pVnode, groupId, originName) == 0) {
name = varDataVal(originName);
}
}
tqDebug("s-task:%s build delete msg groupId:%" PRId64 ", name:%s, skey:%" PRId64 " ekey:%" PRId64,
pIdStr, groupId, name, skey, ekey);
if (!name || *name == '\0') {
tqWarn("s-task:%s failed to build delete msg groupId:%" PRId64 ", skey:%" PRId64 " ekey:%" PRId64
" since invalid tbname:%s",
pIdStr, groupId, skey, ekey, name ? name : "NULL");
} else {
tqDebug("s-task:%s build delete msg groupId:%" PRId64 ", name:%s, skey:%" PRId64 " ekey:%" PRId64, pIdStr,
groupId, name, skey, ekey);
SSingleDeleteReq req = {.startTs = skey, .endTs = ekey};
strncpy(req.tbname, name, TSDB_TABLE_NAME_LEN - 1);
taosMemoryFree(name);
taosArrayPush(deleteReq->deleteReqs, &req);
}
if (originName) name = originName;
taosMemoryFreeClear(name);
}
return 0;
}
@ -345,7 +357,7 @@ int32_t doBuildAndSendDeleteMsg(SVnode* pVnode, char* stbFullName, SSDataBlock*
int64_t suid) {
SBatchDeleteReq deleteReq = {.suid = suid, .deleteReqs = taosArrayInit(0, sizeof(SSingleDeleteReq))};
int32_t code = tqBuildDeleteReq(stbFullName, pDataBlock, &deleteReq, pTask->id.idStr);
int32_t code = tqBuildDeleteReq(pVnode->pTq, stbFullName, pDataBlock, &deleteReq, pTask->id.idStr);
if (code != TSDB_CODE_SUCCESS) {
return code;
}

View File

@ -299,7 +299,7 @@ int32_t tqResetStreamTaskStatus(STQ* pTq) {
int32_t vgId = TD_VID(pTq->pVnode);
int32_t numOfTasks = taosArrayGetSize(pMeta->pTaskList);
tqDebug("vgId:%d start all %d stream task(s)", vgId, numOfTasks);
tqDebug("vgId:%d reset all %d stream task(s) status to be uninit", vgId, numOfTasks);
if (numOfTasks == 0) {
return TSDB_CODE_SUCCESS;
}
@ -448,7 +448,7 @@ bool doPutDataIntoInputQFromWal(SStreamTask* pTask, int64_t maxVer, int32_t* num
numOfNewItems += 1;
int64_t ver = walReaderGetCurrentVer(pTask->exec.pWalReader);
pTask->chkInfo.nextProcessVer = ver;
tqDebug("s-task:%s set the ver:%" PRId64 " from WALReader after extract block from WAL", id, ver);
tqDebug("s-task:%s set ver:%" PRId64 " for reader after extract data from WAL", id, ver);
bool itemInFillhistory = handleFillhistoryScanComplete(pTask, ver);
if (itemInFillhistory) {

View File

@ -399,7 +399,7 @@ int32_t tqDoSendDataRsp(const SRpcHandleInfo* pRpcHandleInfo, const SMqDataRsp*
return 0;
}
int32_t extractDelDataBlock(const void* pData, int32_t len, int64_t ver, SStreamRefDataBlock** pRefBlock) {
int32_t extractDelDataBlock(const void* pData, int32_t len, int64_t ver, void** pRefBlock, int32_t type) {
SDecoder* pCoder = &(SDecoder){0};
SDeleteRes* pRes = &(SDeleteRes){0};
@ -442,6 +442,7 @@ int32_t extractDelDataBlock(const void* pData, int32_t len, int64_t ver, SStream
}
taosArrayDestroy(pRes->uidList);
if (type == 0) {
*pRefBlock = taosAllocateQitem(sizeof(SStreamRefDataBlock), DEF_QITEM, 0);
if (*pRefBlock == NULL) {
blockDataCleanup(pDelBlock);
@ -449,7 +450,13 @@ int32_t extractDelDataBlock(const void* pData, int32_t len, int64_t ver, SStream
return TSDB_CODE_OUT_OF_MEMORY;
}
(*pRefBlock)->type = STREAM_INPUT__REF_DATA_BLOCK;
(*pRefBlock)->pBlock = pDelBlock;
((SStreamRefDataBlock*)(*pRefBlock))->type = STREAM_INPUT__REF_DATA_BLOCK;
((SStreamRefDataBlock*)(*pRefBlock))->pBlock = pDelBlock;
} else if (type == 1) {
*pRefBlock = pDelBlock;
} else {
ASSERTS(0, "unknown type:%d", type);
}
return TSDB_CODE_SUCCESS;
}

View File

@ -3099,7 +3099,7 @@ static int32_t tsdbCacheLoadBlockS3(STsdbFD *pFD, uint8_t **ppBlock) {
}
*/
int64_t block_offset = (pFD->blkno - 1) * tsS3BlockSize * pFD->szPage;
code = s3GetObjectBlock(pFD->objName, block_offset, tsS3BlockSize * pFD->szPage, ppBlock);
code = s3GetObjectBlock(pFD->objName, block_offset, tsS3BlockSize * pFD->szPage, 0, ppBlock);
if (code != TSDB_CODE_SUCCESS) {
// taosMemoryFree(pBlock);
// code = TSDB_CODE_OUT_OF_MEMORY;

View File

@ -28,13 +28,18 @@ static int32_t saveOneRow(SArray* pRow, SSDataBlock* pBlock, SCacheRowsReader* p
// bool allNullRow = true;
if (HASTYPE(pReader->type, CACHESCAN_RETRIEVE_LAST)) {
uint64_t ts = 0;
SFirstLastRes* p;
col_id_t colId;
for (int32_t i = 0; i < pReader->numOfCols; ++i) {
SColumnInfoData* pColInfoData = taosArrayGet(pBlock->pDataBlock, dstSlotIds[i]);
SFirstLastRes* p = (SFirstLastRes*)varDataVal(pRes[i]);
int32_t slotId = slotIds[i];
SLastCol* pColVal = (SLastCol*)taosArrayGet(pRow, i);
colId = pColVal->colVal.cid;
p = (SFirstLastRes*)varDataVal(pRes[i]);
p->ts = pColVal->ts;
ts = p->ts;
p->isNull = !COL_VAL_IS_VALUE(&pColVal->colVal);
// allNullRow = p->isNull & allNullRow;
@ -55,6 +60,19 @@ static int32_t saveOneRow(SArray* pRow, SSDataBlock* pBlock, SCacheRowsReader* p
varDataSetLen(pRes[i], pColInfoData->info.bytes - VARSTR_HEADER_SIZE);
colDataSetVal(pColInfoData, numOfRows, (const char*)pRes[i], false);
}
for (int32_t idx = 0; idx < taosArrayGetSize(pBlock->pDataBlock); ++idx) {
SColumnInfoData* pCol = taosArrayGet(pBlock->pDataBlock, idx);
if (pCol->info.colId == PRIMARYKEY_TIMESTAMP_COL_ID && pCol->info.type == TSDB_DATA_TYPE_TIMESTAMP) {
colDataSetVal(pCol, numOfRows, (const char*)&ts, false);
continue;
} else if (pReader->numOfCols == 1 && idx != dstSlotIds[0] && pCol->info.colId == colId) {
if (!p->isNull) {
colDataSetVal(pCol, numOfRows, p->buf, false);
} else {
colDataSetNULL(pCol, numOfRows);
}
}
}
// pBlock->info.rows += allNullRow ? 0 : 1;
++pBlock->info.rows;

View File

@ -192,7 +192,7 @@ static int32_t tsdbCommitTombData(SCommitter2 *committer) {
committer->ctx->tbid->uid = record->uid;
if (metaGetInfo(committer->tsdb->pVnode->pMeta, record->uid, &info, NULL) != 0) {
code = tsdbIterMergerSkipTableData(committer->dataIterMerger, committer->ctx->tbid);
code = tsdbIterMergerSkipTableData(committer->tombIterMerger, committer->ctx->tbid);
TSDB_CHECK_CODE(code, lino, _exit);
continue;
}

View File

@ -22,7 +22,7 @@
extern int vnodeScheduleTask(int (*execute)(void *), void *arg);
extern int vnodeScheduleTaskEx(int tpid, int (*execute)(void *), void *arg);
extern void remove_file(const char *fname);
extern void remove_file(const char *fname, bool last_level);
#define TSDB_FS_EDIT_MIN TSDB_FEDIT_COMMIT
#define TSDB_FS_EDIT_MAX (TSDB_FEDIT_MERGE + 1)
@ -532,7 +532,8 @@ static int32_t tsdbFSDoSanAndFix(STFileSystem *fs) {
if (taosIsDir(file->aname)) continue;
if (tsdbFSGetFileObjHashEntry(&fobjHash, file->aname) == NULL) {
remove_file(file->aname);
int32_t nlevel = tfsGetLevel(fs->tsdb->pVnode->pTfs);
remove_file(file->aname, nlevel > 1 && file->did.level == nlevel - 1);
}
}

View File

@ -14,6 +14,7 @@
*/
#include "tsdbFile2.h"
#include "cos.h"
// to_json
static int32_t head_to_json(const STFile *file, cJSON *json);
@ -41,10 +42,20 @@ static const struct {
[TSDB_FTYPE_STT] = {"stt", stt_to_json, stt_from_json},
};
void remove_file(const char *fname) {
void remove_file(const char *fname, bool last_level) {
int32_t code = taosRemoveFile(fname);
if (code) {
if (tsS3Enabled && last_level) {
const char *object_name = taosDirEntryBaseName((char *)fname);
long s3_size = tsS3Enabled ? s3Size(object_name) : 0;
if (!strncmp(fname + strlen(fname) - 5, ".data", 5) && s3_size > 0) {
s3DeleteObjects(&object_name, 1);
} else {
tsdbError("file:%s remove failed", fname);
}
} else {
tsdbError("file:%s remove failed", fname);
}
} else {
tsdbInfo("file:%s is removed", fname);
}
@ -224,6 +235,7 @@ int32_t tsdbTFileObjInit(STsdb *pTsdb, const STFile *f, STFileObj **fobj) {
fobj[0]->state = TSDB_FSTATE_LIVE;
fobj[0]->ref = 1;
tsdbTFileName(pTsdb, f, fobj[0]->fname);
fobj[0]->nlevel = tfsGetLevel(pTsdb->pVnode->pTfs);
return 0;
}
@ -245,7 +257,7 @@ int32_t tsdbTFileObjUnref(STFileObj *fobj) {
tsdbTrace("unref file %s, fobj:%p ref %d", fobj->fname, fobj, nRef);
if (nRef == 0) {
if (fobj->state == TSDB_FSTATE_DEAD) {
remove_file(fobj->fname);
remove_file(fobj->fname, fobj->nlevel > 1 && fobj->f->did.level == fobj->nlevel - 1);
}
taosMemoryFree(fobj);
}
@ -261,7 +273,7 @@ int32_t tsdbTFileObjRemove(STFileObj *fobj) {
taosThreadMutexUnlock(&fobj->mutex);
tsdbTrace("remove unref file %s, fobj:%p ref %d", fobj->fname, fobj, nRef);
if (nRef == 0) {
remove_file(fobj->fname);
remove_file(fobj->fname, fobj->nlevel > 1 && fobj->f->did.level == fobj->nlevel - 1);
taosMemoryFree(fobj);
}
return 0;

View File

@ -76,6 +76,7 @@ struct STFileObj {
STFile f[1];
int32_t state;
int32_t ref;
int32_t nlevel;
char fname[TSDB_FILENAME_LEN];
};

View File

@ -89,6 +89,8 @@ int tsdbOpen(SVnode *pVnode, STsdb **ppTsdb, const char *dir, STsdbKeepCfg *pKee
return 0;
_err:
tsdbCloseFS(&pTsdb->pFS);
taosThreadMutexDestroy(&pTsdb->mutex);
taosMemoryFree(pTsdb);
return -1;
}

View File

@ -49,7 +49,7 @@ static int32_t mergeRowsInFileBlocks(SBlockData* pBlockData, STableBlockScanInfo
STsdbReader* pReader);
static int32_t initDelSkylineIterator(STableBlockScanInfo* pBlockScanInfo, int32_t order, SCostSummary* pCost);
static STsdb* getTsdbByRetentions(SVnode* pVnode, TSKEY winSKey, SRetention* retentions, const char* idstr,
static STsdb* getTsdbByRetentions(SVnode* pVnode, SQueryTableDataCond* pCond, SRetention* retentions, const char* idstr,
int8_t* pLevel);
static SVersionRange getQueryVerRange(SVnode* pVnode, SQueryTableDataCond* pCond, int8_t level);
static bool hasDataInLastBlock(SLastBlockReader* pLastBlockReader);
@ -384,7 +384,7 @@ static int32_t tsdbReaderCreate(SVnode* pVnode, SQueryTableDataCond* pCond, void
initReaderStatus(&pReader->status);
pReader->pTsdb = getTsdbByRetentions(pVnode, pCond->twindows.skey, pVnode->config.tsdbCfg.retentions, idstr, &level);
pReader->pTsdb = getTsdbByRetentions(pVnode, pCond, pVnode->config.tsdbCfg.retentions, idstr, &level);
pReader->info.suid = pCond->suid;
pReader->info.order = pCond->order;
@ -3152,9 +3152,9 @@ static int32_t buildBlockFromFiles(STsdbReader* pReader) {
}
}
static STsdb* getTsdbByRetentions(SVnode* pVnode, TSKEY winSKey, SRetention* retentions, const char* idStr,
static STsdb* getTsdbByRetentions(SVnode* pVnode, SQueryTableDataCond* pCond, SRetention* retentions, const char* idStr,
int8_t* pLevel) {
if (VND_IS_RSMA(pVnode)) {
if (VND_IS_RSMA(pVnode) && !pCond->skipRollup) {
int8_t level = 0;
int8_t precision = pVnode->config.tsdbCfg.precision;
int64_t now = taosGetTimestamp(precision);
@ -3170,7 +3170,7 @@ static STsdb* getTsdbByRetentions(SVnode* pVnode, TSKEY winSKey, SRetention* ret
}
break;
}
if ((now - pRetention->keep) <= (winSKey + offset)) {
if ((now - pRetention->keep) <= (pCond->twindows.skey + offset)) {
break;
}
++level;

View File

@ -338,9 +338,9 @@ static int32_t tsdbReadFileS3(STsdbFD *pFD, int64_t offset, uint8_t *pBuf, int64
// 2, retrieve pgs from s3
uint8_t *pBlock = NULL;
int64_t retrieve_offset = PAGE_OFFSET(pgno, pFD->szPage);
int64_t pgnoEnd = pgno - 1 + (size - n + szPgCont - 1) / szPgCont;
int64_t pgnoEnd = pgno - 1 + (bOffset + size - n + szPgCont - 1) / szPgCont;
int64_t retrieve_size = (pgnoEnd - pgno + 1) * pFD->szPage;
code = s3GetObjectBlock(pFD->objName, retrieve_offset, retrieve_size, &pBlock);
code = s3GetObjectBlock(pFD->objName, retrieve_offset, retrieve_size, 1, &pBlock);
if (code != TSDB_CODE_SUCCESS) {
goto _exit;
}

View File

@ -13,9 +13,9 @@
* along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
#include "cos.h"
#include "tsdb.h"
#include "tsdbFS2.h"
#include "cos.h"
#include "vnd.h"
typedef struct {
@ -292,15 +292,15 @@ static int32_t tsdbDoRetentionOnFileSet(SRTNer *rtner, STFileSet *fset) {
if (expLevel < 0) { // remove the fileset
for (int32_t ftype = 0; (ftype < TSDB_FTYPE_MAX) && (fobj = fset->farr[ftype], 1); ++ftype) {
if (fobj == NULL) continue;
/*
int32_t nlevel = tfsGetLevel(rtner->tsdb->pVnode->pTfs);
if (tsS3Enabled && nlevel > 1 && TSDB_FTYPE_DATA == ftype && fobj->f->did.level == nlevel - 1) {
code = tsdbRemoveFileObjectS3(rtner, fobj);
TSDB_CHECK_CODE(code, lino, _exit);
} else {
} else {*/
code = tsdbDoRemoveFileObject(rtner, fobj);
TSDB_CHECK_CODE(code, lino, _exit);
}
//}
}
SSttLvl *lvl;
@ -388,6 +388,8 @@ _exit:
return code;
}
static void tsdbFreeRtnArg(void *arg) { taosMemoryFree(arg); }
static int32_t tsdbDoRetentionSync(void *arg) {
int32_t code = 0;
int32_t lino = 0;
@ -410,6 +412,7 @@ _exit:
TSDB_ERROR_LOG(TD_VID(rtner->tsdb->pVnode), lino, code);
}
tsem_post(&((SRtnArg *)arg)->tsdb->pVnode->canCommit);
tsdbFreeRtnArg(arg);
return code;
}
@ -439,7 +442,7 @@ _exit:
return code;
}
static void tsdbFreeRtnArg(void *arg) { taosMemoryFree(arg); }
int32_t tsdbRetention(STsdb *tsdb, int64_t now, int32_t sync) {
int32_t code = 0;

View File

@ -39,7 +39,7 @@ int tsdbInsertData(STsdb *pTsdb, int64_t version, SSubmitReq2 *pMsg, SSubmitRsp2
arrSize = taosArrayGetSize(pMsg->aSubmitTbData);
// scan and convert
if (tsdbScanAndConvertSubmitMsg(pTsdb, pMsg) < 0) {
if ((terrno = tsdbScanAndConvertSubmitMsg(pTsdb, pMsg)) < 0) {
if (terrno != TSDB_CODE_TDB_TABLE_RECONFIGURE) {
tsdbError("vgId:%d, failed to insert data since %s", TD_VID(pTsdb->pVnode), tstrerror(terrno));
}

View File

@ -15,8 +15,12 @@
#include "vnd.h"
extern int32_t tsdbRetention(STsdb *tsdb, int64_t now, int32_t sync);
int32_t vnodeDoRetention(SVnode *pVnode, int64_t now) {
return tsdbRetention(pVnode->pTsdb, now, pVnode->config.sttTrigger == 1);
int32_t code = TSDB_CODE_SUCCESS;
code = tsdbRetention(pVnode->pTsdb, now, pVnode->config.sttTrigger == 1);
if (TSDB_CODE_SUCCESS == code) code = smaRetention(pVnode->pSma, now);
return code;
}

View File

@ -25,9 +25,11 @@
static int32_t vnodeProcessCreateStbReq(SVnode *pVnode, int64_t ver, void *pReq, int32_t len, SRpcMsg *pRsp);
static int32_t vnodeProcessAlterStbReq(SVnode *pVnode, int64_t ver, void *pReq, int32_t len, SRpcMsg *pRsp);
static int32_t vnodeProcessDropStbReq(SVnode *pVnode, int64_t ver, void *pReq, int32_t len, SRpcMsg *pRsp);
static int32_t vnodeProcessCreateTbReq(SVnode *pVnode, int64_t ver, void *pReq, int32_t len, SRpcMsg *pRsp);
static int32_t vnodeProcessCreateTbReq(SVnode *pVnode, int64_t ver, void *pReq, int32_t len, SRpcMsg *pRsp,
SRpcMsg *pOriginRpc);
static int32_t vnodeProcessAlterTbReq(SVnode *pVnode, int64_t ver, void *pReq, int32_t len, SRpcMsg *pRsp);
static int32_t vnodeProcessDropTbReq(SVnode *pVnode, int64_t ver, void *pReq, int32_t len, SRpcMsg *pRsp);
static int32_t vnodeProcessDropTbReq(SVnode *pVnode, int64_t ver, void *pReq, int32_t len, SRpcMsg *pRsp,
SRpcMsg *pOriginRpc);
static int32_t vnodeProcessSubmitReq(SVnode *pVnode, int64_t ver, void *pReq, int32_t len, SRpcMsg *pRsp);
static int32_t vnodeProcessCreateTSmaReq(SVnode *pVnode, int64_t ver, void *pReq, int32_t len, SRpcMsg *pRsp);
static int32_t vnodeProcessAlterConfirmReq(SVnode *pVnode, int64_t ver, void *pReq, int32_t len, SRpcMsg *pRsp);
@ -380,7 +382,7 @@ static int32_t vnodePreProcessDeleteMsg(SVnode *pVnode, SRpcMsg *pMsg) {
SEncoder *pCoder = &(SEncoder){0};
SDeleteRes res = {0};
SReadHandle handle = {.vnode = pVnode, .pMsgCb = &pVnode->msgCb};
SReadHandle handle = {.vnode = pVnode, .pMsgCb = &pVnode->msgCb, .skipRollup = 1};
initStorageAPI(&handle.api);
code = qWorkerProcessDeleteMsg(&handle, pVnode->pQuery, pMsg, &res);
@ -509,13 +511,13 @@ int32_t vnodeProcessWriteMsg(SVnode *pVnode, SRpcMsg *pMsg, int64_t ver, SRpcMsg
if (vnodeProcessDropStbReq(pVnode, ver, pReq, len, pRsp) < 0) goto _err;
break;
case TDMT_VND_CREATE_TABLE:
if (vnodeProcessCreateTbReq(pVnode, ver, pReq, len, pRsp) < 0) goto _err;
if (vnodeProcessCreateTbReq(pVnode, ver, pReq, len, pRsp, pMsg) < 0) goto _err;
break;
case TDMT_VND_ALTER_TABLE:
if (vnodeProcessAlterTbReq(pVnode, ver, pReq, len, pRsp) < 0) goto _err;
break;
case TDMT_VND_DROP_TABLE:
if (vnodeProcessDropTbReq(pVnode, ver, pReq, len, pRsp) < 0) goto _err;
if (vnodeProcessDropTbReq(pVnode, ver, pReq, len, pRsp, pMsg) < 0) goto _err;
break;
case TDMT_VND_DROP_TTL_TABLE:
if (vnodeProcessDropTtlTbReq(pVnode, ver, pReq, len, pRsp) < 0) goto _err;
@ -878,7 +880,8 @@ _err:
return -1;
}
static int32_t vnodeProcessCreateTbReq(SVnode *pVnode, int64_t ver, void *pReq, int32_t len, SRpcMsg *pRsp) {
static int32_t vnodeProcessCreateTbReq(SVnode *pVnode, int64_t ver, void *pReq, int32_t len, SRpcMsg *pRsp,
SRpcMsg *pOriginRpc) {
SDecoder decoder = {0};
SEncoder encoder = {0};
int32_t rcode = 0;
@ -928,6 +931,17 @@ static int32_t vnodeProcessCreateTbReq(SVnode *pVnode, int64_t ver, void *pReq,
goto _exit;
}
if(tsEnableAudit && tsEnableAuditCreateTable){
char* str = taosMemoryCalloc(1, TSDB_TABLE_FNAME_LEN);
if (str == NULL) {
terrno = TSDB_CODE_OUT_OF_MEMORY;
rcode = -1;
goto _exit;
}
strcpy(str, pCreateReq->name);
taosArrayPush(tbNames, &str);
}
// validate hash
sprintf(tbName, "%s.%s", pVnode->config.dbname, pCreateReq->name);
if (vnodeValidateTableHash(pVnode, tbName) < 0) {
@ -951,12 +965,6 @@ static int32_t vnodeProcessCreateTbReq(SVnode *pVnode, int64_t ver, void *pReq,
}
taosArrayPush(rsp.pArray, &cRsp);
if (tsEnableAuditCreateTable) {
char *str = taosMemoryCalloc(1, TSDB_TABLE_FNAME_LEN);
strcpy(str, pCreateReq->name);
taosArrayPush(tbNames, &str);
}
}
vDebug("vgId:%d, add %d new created tables into query table list", TD_VID(pVnode), (int32_t)taosArrayGetSize(tbUids));
@ -978,17 +986,17 @@ static int32_t vnodeProcessCreateTbReq(SVnode *pVnode, int64_t ver, void *pReq,
tEncoderInit(&encoder, pRsp->pCont, pRsp->contLen);
tEncodeSVCreateTbBatchRsp(&encoder, &rsp);
if (tsEnableAuditCreateTable) {
if(tsEnableAudit && tsEnableAuditCreateTable){
int64_t clusterId = pVnode->config.syncCfg.nodeInfo[0].clusterId;
SName name = {0};
tNameFromString(&name, pVnode->config.dbname, T_NAME_ACCT | T_NAME_DB);
SStringBuilder sb = {0};
for (int32_t iReq = 0; iReq < req.nReqs; iReq++) {
char **key = (char **)taosArrayGet(tbNames, iReq);
for(int32_t i = 0; i < tbNames->size; i++){
char** key = (char**)taosArrayGet(tbNames, i);
taosStringBuilderAppendStringLen(&sb, *key, strlen(*key));
if (iReq < req.nReqs - 1) {
if(i < tbNames->size - 1){
taosStringBuilderAppendChar(&sb, ',');
}
taosMemoryFreeClear(*key);
@ -997,7 +1005,7 @@ static int32_t vnodeProcessCreateTbReq(SVnode *pVnode, int64_t ver, void *pReq,
size_t len = 0;
char *keyJoined = taosStringBuilderGetResult(&sb, &len);
auditRecord(NULL, clusterId, "createTable", name.dbname, "", keyJoined, len);
auditRecord(pOriginRpc, clusterId, "createTable", name.dbname, "", keyJoined, len);
taosStringBuilderDestroy(&sb);
}
@ -1139,7 +1147,8 @@ _exit:
return 0;
}
static int32_t vnodeProcessDropTbReq(SVnode *pVnode, int64_t ver, void *pReq, int32_t len, SRpcMsg *pRsp) {
static int32_t vnodeProcessDropTbReq(SVnode *pVnode, int64_t ver, void *pReq, int32_t len, SRpcMsg *pRsp,
SRpcMsg *pOriginRpc) {
SVDropTbBatchReq req = {0};
SVDropTbBatchRsp rsp = {0};
SDecoder decoder = {0};
@ -1218,7 +1227,7 @@ static int32_t vnodeProcessDropTbReq(SVnode *pVnode, int64_t ver, void *pReq, in
size_t len = 0;
char *keyJoined = taosStringBuilderGetResult(&sb, &len);
auditRecord(NULL, clusterId, "dropTable", name.dbname, "", keyJoined, len);
auditRecord(pOriginRpc, clusterId, "dropTable", name.dbname, "", keyJoined, len);
taosStringBuilderDestroy(&sb);
}
@ -1669,7 +1678,7 @@ _exit:
atomic_add_fetch_64(&pVnode->statis.nBatchInsert, 1);
if (code == 0) {
atomic_add_fetch_64(&pVnode->statis.nBatchInsertSuccess, 1);
tdProcessRSmaSubmit(pVnode->pSma, ver, pSubmitReq, pReq, len, STREAM_INPUT__DATA_SUBMIT);
code = tdProcessRSmaSubmit(pVnode->pSma, ver, pSubmitReq, pReq, len);
}
// clear
@ -1886,6 +1895,11 @@ static int32_t vnodeProcessBatchDeleteReq(SVnode *pVnode, int64_t ver, void *pRe
SMetaReader mr = {0};
metaReaderDoInit(&mr, pVnode->pMeta, META_READER_NOLOCK);
STsdb *pTsdb = pVnode->pTsdb;
if (deleteReq.level) {
pTsdb = deleteReq.level == 1 ? VND_RSMA1(pVnode) : VND_RSMA2(pVnode);
}
int32_t sz = taosArrayGetSize(deleteReq.deleteReqs);
for (int32_t i = 0; i < sz; i++) {
@ -1898,13 +1912,14 @@ static int32_t vnodeProcessBatchDeleteReq(SVnode *pVnode, int64_t ver, void *pRe
int64_t uid = mr.me.uid;
int32_t code = tsdbDeleteTableData(pVnode->pTsdb, ver, deleteReq.suid, uid, pOneReq->startTs, pOneReq->endTs);
int32_t code = tsdbDeleteTableData(pTsdb, ver, deleteReq.suid, uid, pOneReq->startTs, pOneReq->endTs);
if (code < 0) {
terrno = code;
vError("vgId:%d, delete error since %s, suid:%" PRId64 ", uid:%" PRId64 ", start ts:%" PRId64 ", end ts:%" PRId64,
TD_VID(pVnode), terrstr(), deleteReq.suid, uid, pOneReq->startTs, pOneReq->endTs);
}
if (deleteReq.level == 0) {
code = metaUpdateChangeTimeWithLock(pVnode->pMeta, uid, deleteReq.ctimeMs);
if (code < 0) {
terrno = code;
@ -1912,7 +1927,7 @@ static int32_t vnodeProcessBatchDeleteReq(SVnode *pVnode, int64_t ver, void *pRe
", end ts:%" PRId64,
TD_VID(pVnode), terrstr(), deleteReq.suid, uid, pOneReq->startTs, pOneReq->endTs);
}
}
tDecoderClear(&mr.coder);
}
metaReaderClear(&mr);
@ -1947,6 +1962,8 @@ static int32_t vnodeProcessDeleteReq(SVnode *pVnode, int64_t ver, void *pReq, in
if (code) goto _err;
}
code = tdProcessRSmaDelete(pVnode->pSma, ver, pRes, pReq, len);
tDecoderClear(pCoder);
taosArrayDestroy(pRes->uidList);

View File

@ -37,7 +37,7 @@ extern "C" {
#define EXPLAIN_TABLE_COUNT_SCAN_FORMAT "Table Count Row Scan on %s"
#define EXPLAIN_PROJECTION_FORMAT "Projection"
#define EXPLAIN_JOIN_FORMAT "%s"
#define EXPLAIN_AGG_FORMAT "Aggragate"
#define EXPLAIN_AGG_FORMAT "%s"
#define EXPLAIN_INDEF_ROWS_FORMAT "Indefinite Rows Function"
#define EXPLAIN_EXCHANGE_FORMAT "Data Exchange %d:1"
#define EXPLAIN_SORT_FORMAT "Sort"
@ -59,7 +59,7 @@ extern "C" {
#define EXPLAIN_TIME_WINDOWS_FORMAT "Time Window: interval=%" PRId64 "%c offset=%" PRId64 "%c sliding=%" PRId64 "%c"
#define EXPLAIN_WINDOW_FORMAT "Window: gap=%" PRId64
#define EXPLAIN_RATIO_TIME_FORMAT "Ratio: %f"
#define EXPLAIN_MERGE_FORMAT "SortMerge"
#define EXPLAIN_MERGE_FORMAT "Merge"
#define EXPLAIN_MERGE_KEYS_FORMAT "Merge Key: "
#define EXPLAIN_IGNORE_GROUPID_FORMAT "Ignore Group Id: %s"
#define EXPLAIN_PARTITION_KETS_FORMAT "Partition Key: "
@ -85,7 +85,9 @@ extern "C" {
#define EXPLAIN_COLUMNS_FORMAT "columns=%d"
#define EXPLAIN_PSEUDO_COLUMNS_FORMAT "pseudo_columns=%d"
#define EXPLAIN_WIDTH_FORMAT "width=%d"
#define EXPLAIN_TABLE_SCAN_FORMAT "order=[asc|%d desc|%d]"
#define EXPLAIN_SCAN_ORDER_FORMAT "order=[asc|%d desc|%d]"
#define EXPLAIN_SCAN_MODE_FORMAT "mode=%s"
#define EXPLAIN_SCAN_DATA_LOAD_FORMAT "data_load=%s"
#define EXPLAIN_GROUPS_FORMAT "groups=%d"
#define EXPLAIN_WIDTH_FORMAT "width=%d"
#define EXPLAIN_INTERVAL_VALUE_FORMAT "interval=%" PRId64 "%c"
@ -105,6 +107,7 @@ extern "C" {
#define EXPLAIN_UID_SLOT_FORMAT "uid_slot=%d,%d"
#define EXPLAIN_SRC_SCAN_FORMAT "src_scan=%d,%d"
#define EXPLAIN_PLAN_BLOCKING "blocking=%d"
#define EXPLAIN_MERGE_MODE_FORMAT "mode=%s"
#define COMMAND_RESET_LOG "resetLog"
#define COMMAND_SCHEDULE_POLICY "schedulePolicy"
@ -156,6 +159,7 @@ typedef struct SExplainCtx {
#define EXPLAIN_ORDER_STRING(_order) ((ORDER_ASC == _order) ? "asc" : ORDER_DESC == _order ? "desc" : "unknown")
#define EXPLAIN_JOIN_STRING(_type) ((JOIN_TYPE_INNER == _type) ? "Inner join" : "Join")
#define EXPLAIN_MERGE_MODE_STRING(_mode) ((_mode) == MERGE_TYPE_SORT ? "sort" : ((_mode) == MERGE_TYPE_NON_SORT ? "merge" : "column"))
#define INVERAL_TIME_FROM_PRECISION_TO_UNIT(_t, _u, _p) (((_u) == 'n' || (_u) == 'y') ? (_t) : (convertTimeFromPrecisionToUnit(_t, _p, _u)))

View File

@ -20,6 +20,7 @@
#include "tcommon.h"
#include "tdatablock.h"
#include "systable.h"
#include "functionMgt.h"
int32_t qExplainGenerateResNode(SPhysiNode *pNode, SExplainGroup *group, SExplainResNode **pRes);
int32_t qExplainAppendGroupResRows(void *pCtx, int32_t groupId, int32_t level, bool singleChannel);
@ -284,10 +285,49 @@ int32_t qExplainResAppendRow(SExplainCtx *ctx, char *tbuf, int32_t len, int32_t
return TSDB_CODE_SUCCESS;
}
static uint8_t getIntervalPrecision(SIntervalPhysiNode *pIntNode) {
static uint8_t qExplainGetIntervalPrecision(SIntervalPhysiNode *pIntNode) {
return ((SColumnNode *)pIntNode->window.pTspk)->node.resType.precision;
}
static char* qExplainGetScanMode(STableScanPhysiNode* pScan) {
bool isGroupByTbname = false;
bool isGroupByTag = false;
bool seq = false;
bool groupOrder = false;
if (pScan->pGroupTags && LIST_LENGTH(pScan->pGroupTags) == 1) {
SNode* p = nodesListGetNode(pScan->pGroupTags, 0);
if (QUERY_NODE_FUNCTION == nodeType(p) && (strcmp(((struct SFunctionNode*)p)->functionName, "tbname") == 0)) {
isGroupByTbname = true;
}
}
isGroupByTag = (NULL != pScan->pGroupTags) && !isGroupByTbname;
if ((((!isGroupByTag) || isGroupByTbname) && pScan->groupSort) || (isGroupByTag && (pScan->groupSort || pScan->scan.groupOrderScan))) {
return "seq_grp_order";
}
if ((isGroupByTbname && (pScan->groupSort || pScan->scan.groupOrderScan)) || (isGroupByTag && (pScan->groupSort || pScan->scan.groupOrderScan))) {
return "grp_order";
}
return "ts_order";
}
static char* qExplainGetScanDataLoad(STableScanPhysiNode* pScan) {
switch (pScan->dataRequired) {
case FUNC_DATA_REQUIRED_DATA_LOAD:
return "data";
case FUNC_DATA_REQUIRED_SMA_LOAD:
return "sma";
case FUNC_DATA_REQUIRED_NOT_LOAD:
return "no";
default:
break;
}
return "unknown";
}
int32_t qExplainResNodeToRowsImpl(SExplainResNode *pResNode, SExplainCtx *ctx, int32_t level) {
int32_t tlen = 0;
bool isVerboseLine = false;
@ -360,7 +400,11 @@ int32_t qExplainResNodeToRowsImpl(SExplainResNode *pResNode, SExplainCtx *ctx, i
}
EXPLAIN_ROW_APPEND(EXPLAIN_WIDTH_FORMAT, pTblScanNode->scan.node.pOutputDataBlockDesc->totalRowSize);
EXPLAIN_ROW_APPEND(EXPLAIN_BLANK_FORMAT);
EXPLAIN_ROW_APPEND(EXPLAIN_TABLE_SCAN_FORMAT, pTblScanNode->scanSeq[0], pTblScanNode->scanSeq[1]);
EXPLAIN_ROW_APPEND(EXPLAIN_SCAN_ORDER_FORMAT, pTblScanNode->scanSeq[0], pTblScanNode->scanSeq[1]);
EXPLAIN_ROW_APPEND(EXPLAIN_BLANK_FORMAT);
EXPLAIN_ROW_APPEND(EXPLAIN_SCAN_MODE_FORMAT, qExplainGetScanMode(pTblScanNode));
EXPLAIN_ROW_APPEND(EXPLAIN_BLANK_FORMAT);
EXPLAIN_ROW_APPEND(EXPLAIN_SCAN_DATA_LOAD_FORMAT, qExplainGetScanDataLoad(pTblScanNode));
EXPLAIN_ROW_APPEND(EXPLAIN_RIGHT_PARENTHESIS_FORMAT);
EXPLAIN_ROW_END();
QRY_ERR_RET(qExplainResAppendRow(ctx, tbuf, tlen, level));
@ -599,7 +643,7 @@ int32_t qExplainResNodeToRowsImpl(SExplainResNode *pResNode, SExplainCtx *ctx, i
}
case QUERY_NODE_PHYSICAL_PLAN_HASH_AGG: {
SAggPhysiNode *pAggNode = (SAggPhysiNode *)pNode;
EXPLAIN_ROW_NEW(level, EXPLAIN_AGG_FORMAT);
EXPLAIN_ROW_NEW(level, EXPLAIN_AGG_FORMAT, (pAggNode->pGroupKeys ? "GroupAggragate" : "Aggragate"));
EXPLAIN_ROW_APPEND(EXPLAIN_LEFT_PARENTHESIS_FORMAT);
if (pResNode->pExecInfo) {
QRY_ERR_RET(qExplainBufAppendExecInfo(pResNode->pExecInfo, tbuf, &tlen));
@ -841,7 +885,7 @@ int32_t qExplainResNodeToRowsImpl(SExplainResNode *pResNode, SExplainCtx *ctx, i
EXPLAIN_ROW_APPEND_SLIMIT(pIntNode->window.node.pSlimit);
EXPLAIN_ROW_END();
QRY_ERR_RET(qExplainResAppendRow(ctx, tbuf, tlen, level + 1));
uint8_t precision = getIntervalPrecision(pIntNode);
uint8_t precision = qExplainGetIntervalPrecision(pIntNode);
EXPLAIN_ROW_NEW(level + 1, EXPLAIN_TIME_WINDOWS_FORMAT,
INVERAL_TIME_FROM_PRECISION_TO_UNIT(pIntNode->interval, pIntNode->intervalUnit, precision),
pIntNode->intervalUnit, pIntNode->offset, getPrecisionUnit(precision),
@ -893,7 +937,7 @@ int32_t qExplainResNodeToRowsImpl(SExplainResNode *pResNode, SExplainCtx *ctx, i
EXPLAIN_ROW_APPEND_SLIMIT(pIntNode->window.node.pSlimit);
EXPLAIN_ROW_END();
QRY_ERR_RET(qExplainResAppendRow(ctx, tbuf, tlen, level + 1));
uint8_t precision = getIntervalPrecision(pIntNode);
uint8_t precision = qExplainGetIntervalPrecision(pIntNode);
EXPLAIN_ROW_NEW(level + 1, EXPLAIN_TIME_WINDOWS_FORMAT,
INVERAL_TIME_FROM_PRECISION_TO_UNIT(pIntNode->interval, pIntNode->intervalUnit, precision),
pIntNode->intervalUnit, pIntNode->offset, getPrecisionUnit(precision),
@ -1119,23 +1163,14 @@ int32_t qExplainResNodeToRowsImpl(SExplainResNode *pResNode, SExplainCtx *ctx, i
EXPLAIN_ROW_APPEND(EXPLAIN_INPUT_ORDER_FORMAT, EXPLAIN_ORDER_STRING(pMergeNode->node.inputTsOrder));
EXPLAIN_ROW_APPEND(EXPLAIN_BLANK_FORMAT);
EXPLAIN_ROW_APPEND(EXPLAIN_OUTPUT_ORDER_TYPE_FORMAT, EXPLAIN_ORDER_STRING(pMergeNode->node.outputTsOrder));
EXPLAIN_ROW_APPEND(EXPLAIN_BLANK_FORMAT);
EXPLAIN_ROW_APPEND(EXPLAIN_MERGE_MODE_FORMAT, EXPLAIN_MERGE_MODE_STRING(pMergeNode->type));
EXPLAIN_ROW_APPEND(EXPLAIN_RIGHT_PARENTHESIS_FORMAT);
EXPLAIN_ROW_END();
QRY_ERR_RET(qExplainResAppendRow(ctx, tbuf, tlen, level));
if (EXPLAIN_MODE_ANALYZE == ctx->mode) {
// sort key
EXPLAIN_ROW_NEW(level + 1, "Merge Key: ");
if (pResNode->pExecInfo) {
for (int32_t i = 0; i < LIST_LENGTH(pMergeNode->pMergeKeys); ++i) {
SOrderByExprNode *ptn = (SOrderByExprNode *)nodesListGetNode(pMergeNode->pMergeKeys, i);
EXPLAIN_ROW_APPEND("%s ", nodesGetNameFromColumnNode(ptn->pExpr));
}
}
EXPLAIN_ROW_END();
QRY_ERR_RET(qExplainResAppendRow(ctx, tbuf, tlen, level));
if (MERGE_TYPE_SORT == pMergeNode->type) {
// sort method
EXPLAIN_ROW_NEW(level + 1, "Sort Method: ");
@ -1155,6 +1190,7 @@ int32_t qExplainResNodeToRowsImpl(SExplainResNode *pResNode, SExplainCtx *ctx, i
EXPLAIN_ROW_END();
QRY_ERR_RET(qExplainResAppendRow(ctx, tbuf, tlen, level));
}
}
if (verbose) {
EXPLAIN_ROW_NEW(level + 1, EXPLAIN_OUTPUT_FORMAT);
@ -1167,6 +1203,7 @@ int32_t qExplainResNodeToRowsImpl(SExplainResNode *pResNode, SExplainCtx *ctx, i
EXPLAIN_ROW_END();
QRY_ERR_RET(qExplainResAppendRow(ctx, tbuf, tlen, level + 1));
if (MERGE_TYPE_SORT == pMergeNode->type) {
EXPLAIN_ROW_NEW(level + 1, EXPLAIN_OUTPUT_FORMAT);
EXPLAIN_ROW_APPEND(EXPLAIN_IGNORE_GROUPID_FORMAT, pMergeNode->ignoreGroupId ? "true" : "false");
EXPLAIN_ROW_END();
@ -1190,6 +1227,7 @@ int32_t qExplainResNodeToRowsImpl(SExplainResNode *pResNode, SExplainCtx *ctx, i
}
EXPLAIN_ROW_END();
QRY_ERR_RET(qExplainResAppendRow(ctx, tbuf, tlen, level + 1));
}
if (pMergeNode->node.pConditions) {
EXPLAIN_ROW_NEW(level + 1, EXPLAIN_FILTER_FORMAT);
@ -1419,7 +1457,7 @@ int32_t qExplainResNodeToRowsImpl(SExplainResNode *pResNode, SExplainCtx *ctx, i
EXPLAIN_ROW_APPEND_SLIMIT(pIntNode->window.node.pSlimit);
EXPLAIN_ROW_END();
QRY_ERR_RET(qExplainResAppendRow(ctx, tbuf, tlen, level + 1));
uint8_t precision = getIntervalPrecision(pIntNode);
uint8_t precision = qExplainGetIntervalPrecision(pIntNode);
EXPLAIN_ROW_NEW(level + 1, EXPLAIN_TIME_WINDOWS_FORMAT,
INVERAL_TIME_FROM_PRECISION_TO_UNIT(pIntNode->interval, pIntNode->intervalUnit, precision),
pIntNode->intervalUnit, pIntNode->offset, getPrecisionUnit(precision),

View File

@ -40,7 +40,9 @@
#define GET_RES_WINDOW_KEY_LEN(_l) ((_l) + sizeof(uint64_t))
typedef struct SGroupResInfo {
int32_t index;
int32_t index; // rows consumed in func:doCopyToSDataBlockXX
int32_t iter; // relate to index-1, last consumed data's slot id in hash table
void* dataPos; // relate to index-1, last consumed data's position, in the nodelist of cur slot
SArray* pRows; // SArray<SResKeyPos>
char* pBuf;
bool freeItem;
@ -178,7 +180,7 @@ void initExecTimeWindowInfo(SColumnInfoData* pColData, STimeWindow* pQueryWindow
SInterval extractIntervalInfo(const STableScanPhysiNode* pTableScanNode);
SColumn extractColumnFromColumnNode(SColumnNode* pColNode);
int32_t initQueryTableDataCond(SQueryTableDataCond* pCond, const STableScanPhysiNode* pTableScanNode);
int32_t initQueryTableDataCond(SQueryTableDataCond* pCond, const STableScanPhysiNode* pTableScanNode, const SReadHandle* readHandle);
void cleanupQueryTableDataCond(SQueryTableDataCond* pCond);
int32_t convertFillType(int32_t mode);

View File

@ -679,6 +679,12 @@ void initResultSizeInfo(SResultInfo* pResultInfo, int32_t numOfRows);
void doBuildResultDatablock(struct SOperatorInfo* pOperator, SOptrBasicInfo* pbInfo, SGroupResInfo* pGroupResInfo,
SDiskbasedBuf* pBuf);
/**
* @brief copydata from hash table, instead of copying from SGroupResInfo's pRow
*/
int32_t doCopyToSDataBlockByHash(SExecTaskInfo* pTaskInfo, SSDataBlock* pBlock, SExprSupp* pSup, SDiskbasedBuf* pBuf,
SGroupResInfo* pGroupResInfo, SSHashObj* pHashmap, int32_t threshold, bool ignoreGroup);
bool hasLimitOffsetInfo(SLimitInfo* pLimitInfo);
bool hasSlimitOffsetInfo(SLimitInfo* pLimitInfo);
void initLimitInfo(const SNode* pLimit, const SNode* pSLimit, SLimitInfo* pLimitInfo);

View File

@ -54,6 +54,19 @@ static int32_t removeRedundantTsCol(SLastRowScanPhysiNode* pScanNode, SColM
#define SCAN_ROW_TYPE(_t) ((_t) ? CACHESCAN_RETRIEVE_LAST : CACHESCAN_RETRIEVE_LAST_ROW)
static void setColIdForCacheReadBlock(SSDataBlock* pBlock, SNodeList* pTargets) {
SNode* pNode;
int32_t idx = 0;
FOREACH(pNode, pTargets) {
if (nodeType(pNode) == QUERY_NODE_COLUMN) {
SColumnNode* pCol = (SColumnNode*)pNode;
SColumnInfoData* pColInfo = taosArrayGet(pBlock->pDataBlock, idx);
pColInfo->info.colId = pCol->colId;
}
idx++;
}
}
SOperatorInfo* createCacherowsScanOperator(SLastRowScanPhysiNode* pScanNode, SReadHandle* readHandle,
STableListInfo* pTableListInfo, SExecTaskInfo* pTaskInfo) {
int32_t code = TSDB_CODE_SUCCESS;
@ -114,10 +127,12 @@ SOperatorInfo* createCacherowsScanOperator(SLastRowScanPhysiNode* pScanNode, SRe
capacity = TMIN(totalTables, 4096);
pInfo->pBufferredRes = createOneDataBlock(pInfo->pRes, false);
setColIdForCacheReadBlock(pInfo->pBufferredRes, pScanNode->pTargets);
blockDataEnsureCapacity(pInfo->pBufferredRes, capacity);
} else { // by tags
pInfo->retrieveType = CACHESCAN_RETRIEVE_TYPE_SINGLE | SCAN_ROW_TYPE(pScanNode->ignoreNull);
capacity = 1; // only one row output
setColIdForCacheReadBlock(pInfo->pRes, pScanNode->pTargets);
}
initResultSizeInfo(&pOperator->resultInfo, capacity);
@ -191,9 +206,9 @@ SSDataBlock* doScanCache(SOperatorInfo* pOperator) {
SSDataBlock* pRes = pInfo->pRes;
if (pInfo->indexOfBufferedRes < pInfo->pBufferredRes->info.rows) {
for (int32_t i = 0; i < taosArrayGetSize(pInfo->matchInfo.pList); ++i) {
SColMatchItem* pMatchInfo = taosArrayGet(pInfo->matchInfo.pList, i);
int32_t slotId = pMatchInfo->dstSlotId;
for (int32_t i = 0; i < taosArrayGetSize(pInfo->pBufferredRes->pDataBlock); ++i) {
SColumnInfoData* pCol = taosArrayGet(pRes->pDataBlock, i);
int32_t slotId = pCol->info.slotId;
SColumnInfoData* pSrc = taosArrayGet(pInfo->pBufferredRes->pDataBlock, slotId);
SColumnInfoData* pDst = taosArrayGet(pRes->pDataBlock, slotId);
@ -201,10 +216,12 @@ SSDataBlock* doScanCache(SOperatorInfo* pOperator) {
if (colDataIsNull_s(pSrc, pInfo->indexOfBufferedRes)) {
colDataSetNULL(pDst, 0);
} else {
if (pSrc->pData) {
char* p = colDataGetData(pSrc, pInfo->indexOfBufferedRes);
colDataSetVal(pDst, 0, p, false);
}
}
}
pRes->info.id.uid = *(tb_uid_t*)taosArrayGet(pInfo->pUidList, pInfo->indexOfBufferedRes);
pRes->info.rows = 1;

View File

@ -1713,7 +1713,7 @@ SColumn extractColumnFromColumnNode(SColumnNode* pColNode) {
return c;
}
int32_t initQueryTableDataCond(SQueryTableDataCond* pCond, const STableScanPhysiNode* pTableScanNode) {
int32_t initQueryTableDataCond(SQueryTableDataCond* pCond, const STableScanPhysiNode* pTableScanNode, const SReadHandle* readHandle) {
pCond->order = pTableScanNode->scanSeq[0] > 0 ? TSDB_ORDER_ASC : TSDB_ORDER_DESC;
pCond->numOfCols = LIST_LENGTH(pTableScanNode->scan.pScanCols);
@ -1732,6 +1732,7 @@ int32_t initQueryTableDataCond(SQueryTableDataCond* pCond, const STableScanPhysi
pCond->type = TIMEWINDOW_RANGE_CONTAINED;
pCond->startVersion = -1;
pCond->endVersion = -1;
pCond->skipRollup = readHandle->skipRollup;
int32_t j = 0;
for (int32_t i = 0; i < pCond->numOfCols; ++i) {

View File

@ -69,12 +69,20 @@ static int32_t doSetSMABlock(SOperatorInfo* pOperator, void* input, size_t numOf
} else if (type == STREAM_INPUT__DATA_BLOCK) {
for (int32_t i = 0; i < numOfBlocks; ++i) {
SSDataBlock* pDataBlock = &((SSDataBlock*)input)[i];
SPackedData tmp = {
.pDataBlock = pDataBlock,
};
SPackedData tmp = {.pDataBlock = pDataBlock};
taosArrayPush(pInfo->pBlockLists, &tmp);
}
pInfo->blockType = STREAM_INPUT__DATA_BLOCK;
} else if (type == STREAM_INPUT__CHECKPOINT) {
SPackedData tmp = {.pDataBlock = input};
taosArrayPush(pInfo->pBlockLists, &tmp);
pInfo->blockType = STREAM_INPUT__CHECKPOINT;
} else if (type == STREAM_INPUT__REF_DATA_BLOCK) {
for (int32_t i = 0; i < numOfBlocks; ++i) {
SPackedData* pReq = POINTER_SHIFT(input, i * sizeof(SPackedData));
taosArrayPush(pInfo->pBlockLists, pReq);
}
pInfo->blockType = STREAM_INPUT__DATA_BLOCK;
}
return TSDB_CODE_SUCCESS;
@ -633,7 +641,7 @@ int32_t qExecTaskOpt(qTaskInfo_t tinfo, SArray* pResList, uint64_t* useconds, bo
blockIndex += 1;
current += p->info.rows;
ASSERT(p->info.rows > 0);
ASSERT(p->info.rows > 0 || p->info.type == STREAM_CHECKPOINT);
taosArrayPush(pResList, &p);
if (current >= rowsThreshold) {

View File

@ -655,6 +655,85 @@ int32_t finalizeResultRows(SDiskbasedBuf* pBuf, SResultRowPosition* resultRowPos
return 0;
}
int32_t doCopyToSDataBlockByHash(SExecTaskInfo* pTaskInfo, SSDataBlock* pBlock, SExprSupp* pSup, SDiskbasedBuf* pBuf,
SGroupResInfo* pGroupResInfo, SSHashObj* pHashmap, int32_t threshold,
bool ignoreGroup) {
SExprInfo* pExprInfo = pSup->pExprInfo;
int32_t numOfExprs = pSup->numOfExprs;
int32_t* rowEntryOffset = pSup->rowEntryInfoOffset;
SqlFunctionCtx* pCtx = pSup->pCtx;
size_t keyLen = 0;
int32_t numOfRows = tSimpleHashGetSize(pHashmap);
// begin from last iter
void* pData = pGroupResInfo->dataPos;
int32_t iter = pGroupResInfo->iter;
while ((pData = tSimpleHashIterate(pHashmap, pData, &iter)) != NULL) {
void* key = tSimpleHashGetKey(pData, &keyLen);
SResultRowPosition* pos = pData;
uint64_t groupId = *(uint64_t*)key;
SFilePage* page = getBufPage(pBuf, pos->pageId);
if (page == NULL) {
qError("failed to get buffer, code:%s, %s", tstrerror(terrno), GET_TASKID(pTaskInfo));
T_LONG_JMP(pTaskInfo->env, terrno);
}
SResultRow* pRow = (SResultRow*)((char*)page + pos->offset);
doUpdateNumOfRows(pCtx, pRow, numOfExprs, rowEntryOffset);
// no results, continue to check the next one
if (pRow->numOfRows == 0) {
pGroupResInfo->index += 1;
pGroupResInfo->iter = iter;
pGroupResInfo->dataPos = pData;
releaseBufPage(pBuf, page);
continue;
}
if (!ignoreGroup) {
if (pBlock->info.id.groupId == 0) {
pBlock->info.id.groupId = groupId;
} else {
// current value belongs to different group, it can't be packed into one datablock
if (pBlock->info.id.groupId != groupId) {
releaseBufPage(pBuf, page);
break;
}
}
}
if (pBlock->info.rows + pRow->numOfRows > pBlock->info.capacity) {
uint32_t newSize = pBlock->info.rows + pRow->numOfRows + ((numOfRows - iter) > 1 ? 1 : 0);
blockDataEnsureCapacity(pBlock, newSize);
qDebug("datablock capacity not sufficient, expand to required:%d, current capacity:%d, %s", newSize,
pBlock->info.capacity, GET_TASKID(pTaskInfo));
// todo set the pOperator->resultInfo size
}
pGroupResInfo->index += 1;
pGroupResInfo->iter = iter;
pGroupResInfo->dataPos = pData;
copyResultrowToDataBlock(pExprInfo, numOfExprs, pRow, pCtx, pBlock, rowEntryOffset, pTaskInfo);
releaseBufPage(pBuf, page);
pBlock->info.rows += pRow->numOfRows;
if (pBlock->info.rows >= threshold) {
break;
}
}
qDebug("%s result generated, rows:%" PRId64 ", groupId:%" PRIu64, GET_TASKID(pTaskInfo), pBlock->info.rows,
pBlock->info.id.groupId);
pBlock->info.dataLoad = 1;
blockDataUpdateTsWindow(pBlock, 0);
return 0;
}
int32_t doCopyToSDataBlock(SExecTaskInfo* pTaskInfo, SSDataBlock* pBlock, SExprSupp* pSup, SDiskbasedBuf* pBuf,
SGroupResInfo* pGroupResInfo, int32_t threshold, bool ignoreGroup) {
SExprInfo* pExprInfo = pSup->pExprInfo;

View File

@ -370,6 +370,72 @@ static SSDataBlock* buildGroupResultDataBlock(SOperatorInfo* pOperator) {
return (pRes->info.rows == 0) ? NULL : pRes;
}
bool hasRemainResultByHash(SOperatorInfo* pOperator) {
SGroupbyOperatorInfo* pInfo = pOperator->info;
SSHashObj* pHashmap = pInfo->aggSup.pResultRowHashTable;
return pInfo->groupResInfo.index < tSimpleHashGetSize(pHashmap);
}
void doBuildResultDatablockByHash(SOperatorInfo* pOperator, SOptrBasicInfo* pbInfo, SGroupResInfo* pGroupResInfo,
SDiskbasedBuf* pBuf) {
SGroupbyOperatorInfo* pInfo = pOperator->info;
SSHashObj* pHashmap = pInfo->aggSup.pResultRowHashTable;
SExecTaskInfo* pTaskInfo = pOperator->pTaskInfo;
SSDataBlock* pBlock = pInfo->binfo.pRes;
// set output datablock version
pBlock->info.version = pTaskInfo->version;
blockDataCleanup(pBlock);
if (!hasRemainResultByHash(pOperator)) {
return;
}
pBlock->info.id.groupId = 0;
if (!pInfo->binfo.mergeResultBlock) {
doCopyToSDataBlockByHash(pTaskInfo, pBlock, &pOperator->exprSupp, pInfo->aggSup.pResultBuf, &pInfo->groupResInfo,
pHashmap, pOperator->resultInfo.threshold, false);
} else {
while (hasRemainResultByHash(pOperator)) {
doCopyToSDataBlockByHash(pTaskInfo, pBlock, &pOperator->exprSupp, pInfo->aggSup.pResultBuf, &pInfo->groupResInfo,
pHashmap, pOperator->resultInfo.threshold, true);
if (pBlock->info.rows >= pOperator->resultInfo.threshold) {
break;
}
pBlock->info.id.groupId = 0;
}
// clear the group id info in SSDataBlock, since the client does not need it
pBlock->info.id.groupId = 0;
}
}
static SSDataBlock* buildGroupResultDataBlockByHash(SOperatorInfo* pOperator) {
SGroupbyOperatorInfo* pInfo = pOperator->info;
SSDataBlock* pRes = pInfo->binfo.pRes;
// after filter, if result block turn to null, get next from whole set
while (1) {
doBuildResultDatablockByHash(pOperator, &pInfo->binfo, &pInfo->groupResInfo, pInfo->aggSup.pResultBuf);
doFilter(pRes, pOperator->exprSupp.pFilterInfo, NULL);
if (!hasRemainResultByHash(pOperator)) {
setOperatorCompleted(pOperator);
// clean hash after completed
tSimpleHashCleanup(pInfo->aggSup.pResultRowHashTable);
pInfo->aggSup.pResultRowHashTable = NULL;
break;
}
if (pRes->info.rows > 0) {
break;
}
}
pOperator->resultInfo.totalRows += pRes->info.rows;
return (pRes->info.rows == 0) ? NULL : pRes;
}
static SSDataBlock* hashGroupbyAggregate(SOperatorInfo* pOperator) {
if (pOperator->status == OP_EXEC_DONE) {
return NULL;
@ -379,8 +445,9 @@ static SSDataBlock* hashGroupbyAggregate(SOperatorInfo* pOperator) {
SGroupbyOperatorInfo* pInfo = pOperator->info;
if (pOperator->status == OP_RES_TO_RETURN) {
return buildGroupResultDataBlock(pOperator);
return buildGroupResultDataBlockByHash(pOperator);
}
SGroupResInfo* pGroupResInfo = &pInfo->groupResInfo;
int32_t order = pInfo->binfo.inputTsOrder;
int64_t st = taosGetTimestampUs();
@ -425,10 +492,20 @@ static SSDataBlock* hashGroupbyAggregate(SOperatorInfo* pOperator) {
}
}
#endif
initGroupedResultInfo(&pInfo->groupResInfo, pInfo->aggSup.pResultRowHashTable, 0);
// initGroupedResultInfo(&pInfo->groupResInfo, pInfo->aggSup.pResultRowHashTable, 0);
if (pGroupResInfo->pRows != NULL) {
taosArrayDestroy(pGroupResInfo->pRows);
}
if (pGroupResInfo->pBuf) {
taosMemoryFree(pGroupResInfo->pBuf);
pGroupResInfo->pBuf = NULL;
}
pGroupResInfo->index = 0;
pGroupResInfo->iter = 0;
pGroupResInfo->dataPos = NULL;
pOperator->cost.openCost = (taosGetTimestampUs() - st) / 1000.0;
return buildGroupResultDataBlock(pOperator);
return buildGroupResultDataBlockByHash(pOperator);
}
SOperatorInfo* createGroupOperatorInfo(SOperatorInfo* downstream, SAggPhysiNode* pAggNode, SExecTaskInfo* pTaskInfo) {

View File

@ -239,7 +239,7 @@ SOperatorInfo* createMergeJoinOperatorInfo(SOperatorInfo** pDownstream, int32_t
numOfDownstream = 2;
} else {
pInfo->downstreamResBlkId[0] = getOperatorResultBlockId(pDownstream[0], 0);
pInfo->downstreamResBlkId[1] = getOperatorResultBlockId(pDownstream[1], 1);
pInfo->downstreamResBlkId[1] = getOperatorResultBlockId(pDownstream[1], 0);
}
int32_t numOfCols = 0;

View File

@ -0,0 +1,531 @@
/*
* Copyright (c) 2019 TAOS Data, Inc. <jhtao@taosdata.com>
*
* This program is free software: you can use, redistribute, and/or modify
* it under the terms of the GNU Affero General Public License, version 3
* or later ("AGPL"), as published by the Free Software Foundation.
*
* This program is distributed in the hope that it will be useful, but WITHOUT
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
* FITNESS FOR A PARTICULAR PURPOSE.
*
* You should have received a copy of the GNU Affero General Public License
* along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
#include "executorInt.h"
#include "filter.h"
#include "operator.h"
#include "querytask.h"
#include "tdatablock.h"
typedef struct SSortMergeInfo {
SArray* pSortInfo;
SSortHandle* pSortHandle;
STupleHandle* prefetchedTuple;
int32_t bufPageSize;
uint32_t sortBufSize; // max buffer size for in-memory sort
SSDataBlock* pIntermediateBlock; // to hold the intermediate result
SSDataBlock* pInputBlock;
SColMatchInfo matchInfo;
} SSortMergeInfo;
typedef struct SNonSortMergeInfo {
int32_t lastSourceIdx;
int32_t sourceWorkIdx;
int32_t sourceNum;
int32_t* pSourceStatus;
} SNonSortMergeInfo;
typedef struct SColsMergeInfo {
SNodeList* pTargets;
uint64_t srcBlkIds[2];
} SColsMergeInfo;
typedef struct SMultiwayMergeOperatorInfo {
SOptrBasicInfo binfo;
EMergeType type;
union {
SSortMergeInfo sortMergeInfo;
SNonSortMergeInfo nsortMergeInfo;
SColsMergeInfo colsMergeInfo;
};
SLimitInfo limitInfo;
bool groupMerge;
bool ignoreGroupId;
uint64_t groupId;
bool inputWithGroupId;
} SMultiwayMergeOperatorInfo;
SSDataBlock* sortMergeloadNextDataBlock(void* param) {
SOperatorInfo* pOperator = (SOperatorInfo*)param;
SSDataBlock* pBlock = pOperator->fpSet.getNextFn(pOperator);
return pBlock;
}
int32_t openSortMergeOperator(SOperatorInfo* pOperator) {
SMultiwayMergeOperatorInfo* pInfo = pOperator->info;
SExecTaskInfo* pTaskInfo = pOperator->pTaskInfo;
SSortMergeInfo* pSortMergeInfo = &pInfo->sortMergeInfo;
int32_t numOfBufPage = pSortMergeInfo->sortBufSize / pSortMergeInfo->bufPageSize;
pSortMergeInfo->pSortHandle = tsortCreateSortHandle(pSortMergeInfo->pSortInfo, SORT_MULTISOURCE_MERGE, pSortMergeInfo->bufPageSize, numOfBufPage,
pSortMergeInfo->pInputBlock, pTaskInfo->id.str, 0, 0, 0);
tsortSetFetchRawDataFp(pSortMergeInfo->pSortHandle, sortMergeloadNextDataBlock, NULL, NULL);
tsortSetCompareGroupId(pSortMergeInfo->pSortHandle, pInfo->groupMerge);
for (int32_t i = 0; i < pOperator->numOfDownstream; ++i) {
SOperatorInfo* pDownstream = pOperator->pDownstream[i];
if (pDownstream->operatorType == QUERY_NODE_PHYSICAL_PLAN_EXCHANGE) {
pDownstream->fpSet._openFn(pDownstream);
}
SSortSource* ps = taosMemoryCalloc(1, sizeof(SSortSource));
ps->param = pDownstream;
ps->onlyRef = true;
tsortAddSource(pSortMergeInfo->pSortHandle, ps);
}
return tsortOpen(pSortMergeInfo->pSortHandle);
}
static void doGetSortedBlockData(SMultiwayMergeOperatorInfo* pInfo, SSortHandle* pHandle, int32_t capacity,
SSDataBlock* p, bool* newgroup) {
SSortMergeInfo* pSortMergeInfo = &pInfo->sortMergeInfo;
*newgroup = false;
while (1) {
STupleHandle* pTupleHandle = NULL;
if (pInfo->groupMerge || pInfo->inputWithGroupId) {
if (pSortMergeInfo->prefetchedTuple == NULL) {
pTupleHandle = tsortNextTuple(pHandle);
} else {
pTupleHandle = pSortMergeInfo->prefetchedTuple;
pSortMergeInfo->prefetchedTuple = NULL;
uint64_t gid = tsortGetGroupId(pTupleHandle);
if (gid != pInfo->groupId) {
*newgroup = true;
pInfo->groupId = gid;
}
}
} else {
pTupleHandle = tsortNextTuple(pHandle);
pInfo->groupId = 0;
}
if (pTupleHandle == NULL) {
break;
}
if (pInfo->groupMerge || pInfo->inputWithGroupId) {
uint64_t tupleGroupId = tsortGetGroupId(pTupleHandle);
if (pInfo->groupId == 0 || pInfo->groupId == tupleGroupId) {
appendOneRowToDataBlock(p, pTupleHandle);
p->info.id.groupId = tupleGroupId;
pInfo->groupId = tupleGroupId;
} else {
if (p->info.rows == 0) {
appendOneRowToDataBlock(p, pTupleHandle);
p->info.id.groupId = pInfo->groupId = tupleGroupId;
} else {
pSortMergeInfo->prefetchedTuple = pTupleHandle;
break;
}
}
} else {
appendOneRowToDataBlock(p, pTupleHandle);
}
if (p->info.rows >= capacity) {
break;
}
}
}
SSDataBlock* doSortMerge(SOperatorInfo* pOperator) {
SExecTaskInfo* pTaskInfo = pOperator->pTaskInfo;
SMultiwayMergeOperatorInfo* pInfo = pOperator->info;
SSortMergeInfo* pSortMergeInfo = &pInfo->sortMergeInfo;
SSortHandle* pHandle = pSortMergeInfo->pSortHandle;
SSDataBlock* pDataBlock = pInfo->binfo.pRes;
SArray* pColMatchInfo = pSortMergeInfo->matchInfo.pList;
int32_t capacity = pOperator->resultInfo.capacity;
qDebug("start to merge final sorted rows, %s", GET_TASKID(pTaskInfo));
blockDataCleanup(pDataBlock);
if (pSortMergeInfo->pIntermediateBlock == NULL) {
pSortMergeInfo->pIntermediateBlock = tsortGetSortedDataBlock(pHandle);
if (pSortMergeInfo->pIntermediateBlock == NULL) {
terrno = TSDB_CODE_OUT_OF_MEMORY;
return NULL;
}
blockDataEnsureCapacity(pSortMergeInfo->pIntermediateBlock, capacity);
} else {
blockDataCleanup(pSortMergeInfo->pIntermediateBlock);
}
SSDataBlock* p = pSortMergeInfo->pIntermediateBlock;
bool newgroup = false;
while (1) {
doGetSortedBlockData(pInfo, pHandle, capacity, p, &newgroup);
if (p->info.rows == 0) {
break;
}
if (newgroup) {
resetLimitInfoForNextGroup(&pInfo->limitInfo);
}
applyLimitOffset(&pInfo->limitInfo, p, pTaskInfo);
if (p->info.rows > 0) {
break;
}
}
if (p->info.rows > 0) {
int32_t numOfCols = taosArrayGetSize(pColMatchInfo);
for (int32_t i = 0; i < numOfCols; ++i) {
SColMatchItem* pmInfo = taosArrayGet(pColMatchInfo, i);
SColumnInfoData* pSrc = taosArrayGet(p->pDataBlock, pmInfo->srcSlotId);
SColumnInfoData* pDst = taosArrayGet(pDataBlock->pDataBlock, pmInfo->dstSlotId);
colDataAssign(pDst, pSrc, p->info.rows, &pDataBlock->info);
}
pDataBlock->info.rows = p->info.rows;
pDataBlock->info.scanFlag = p->info.scanFlag;
if (pInfo->ignoreGroupId) {
pDataBlock->info.id.groupId = 0;
} else {
pDataBlock->info.id.groupId = pInfo->groupId;
}
pDataBlock->info.dataLoad = 1;
}
qDebug("%s get sorted block, groupId:0x%" PRIx64 " rows:%" PRId64 , GET_TASKID(pTaskInfo), pDataBlock->info.id.groupId,
pDataBlock->info.rows);
return (pDataBlock->info.rows > 0) ? pDataBlock : NULL;
}
int32_t getSortMergeExplainExecInfo(SOperatorInfo* pOptr, void** pOptrExplain, uint32_t* len) {
SSortExecInfo* pSortExecInfo = taosMemoryCalloc(1, sizeof(SSortExecInfo));
SMultiwayMergeOperatorInfo* pInfo = (SMultiwayMergeOperatorInfo*)pOptr->info;
SSortMergeInfo* pSortMergeInfo = &pInfo->sortMergeInfo;
*pSortExecInfo = tsortGetSortExecInfo(pSortMergeInfo->pSortHandle);
*pOptrExplain = pSortExecInfo;
*len = sizeof(SSortExecInfo);
return TSDB_CODE_SUCCESS;
}
void destroySortMergeOperatorInfo(void* param) {
SSortMergeInfo* pSortMergeInfo = param;
pSortMergeInfo->pInputBlock = blockDataDestroy(pSortMergeInfo->pInputBlock);
pSortMergeInfo->pIntermediateBlock = blockDataDestroy(pSortMergeInfo->pIntermediateBlock);
taosArrayDestroy(pSortMergeInfo->matchInfo.pList);
tsortDestroySortHandle(pSortMergeInfo->pSortHandle);
taosArrayDestroy(pSortMergeInfo->pSortInfo);
}
#define NON_SORT_NEXT_SRC(_info, _idx) ((++(_idx) >= (_info)->sourceNum) ? ((_info)->sourceWorkIdx) : (_idx))
int32_t openNonSortMergeOperator(SOperatorInfo* pOperator) {
SMultiwayMergeOperatorInfo* pInfo = pOperator->info;
SExecTaskInfo* pTaskInfo = pOperator->pTaskInfo;
SNonSortMergeInfo* pNonSortMergeInfo = &pInfo->nsortMergeInfo;
pNonSortMergeInfo->sourceWorkIdx = 0;
pNonSortMergeInfo->sourceNum = pOperator->numOfDownstream;
pNonSortMergeInfo->lastSourceIdx = -1;
pNonSortMergeInfo->pSourceStatus = taosMemoryCalloc(pOperator->numOfDownstream, sizeof(*pNonSortMergeInfo->pSourceStatus));
if (NULL == pNonSortMergeInfo->pSourceStatus) {
terrno = TSDB_CODE_OUT_OF_MEMORY;
return -1;
}
for (int32_t i = 0; i < pOperator->numOfDownstream; ++i) {
pNonSortMergeInfo->pSourceStatus[i] = i;
}
return TSDB_CODE_SUCCESS;
}
SSDataBlock* doNonSortMerge(SOperatorInfo* pOperator) {
SExecTaskInfo* pTaskInfo = pOperator->pTaskInfo;
SMultiwayMergeOperatorInfo* pInfo = pOperator->info;
SNonSortMergeInfo* pNonSortMerge = &pInfo->nsortMergeInfo;
SSDataBlock* pBlock = NULL;
qDebug("start to merge no sorted rows, %s", GET_TASKID(pTaskInfo));
int32_t idx = NON_SORT_NEXT_SRC(pNonSortMerge, pNonSortMerge->lastSourceIdx);
while (idx < pNonSortMerge->sourceNum) {
pBlock = getNextBlockFromDownstream(pOperator, pNonSortMerge->pSourceStatus[idx]);
if (NULL == pBlock) {
TSWAP(pNonSortMerge->pSourceStatus[pNonSortMerge->sourceWorkIdx], pNonSortMerge->pSourceStatus[idx]);
pNonSortMerge->sourceWorkIdx++;
idx = NON_SORT_NEXT_SRC(pNonSortMerge, idx);
continue;
}
break;
}
return pBlock;
}
void destroyNonSortMergeOperatorInfo(void* param) {
SNonSortMergeInfo* pNonSortMerge = param;
taosMemoryFree(pNonSortMerge->pSourceStatus);
}
int32_t getNonSortMergeExplainExecInfo(SOperatorInfo* pOptr, void** pOptrExplain, uint32_t* len) {
return TSDB_CODE_SUCCESS;
}
int32_t openColsMergeOperator(SOperatorInfo* pOperator) {
return TSDB_CODE_SUCCESS;
}
int32_t copyColumnsValue(SNodeList* pNodeList, uint64_t targetBlkId, SSDataBlock* pDst, SSDataBlock* pSrc) {
bool isNull = (NULL == pSrc || pSrc->info.rows <= 0);
size_t numOfCols = LIST_LENGTH(pNodeList);
for (int32_t i = 0; i < numOfCols; ++i) {
STargetNode* pNode = (STargetNode*)nodesListGetNode(pNodeList, i);
if (nodeType(pNode->pExpr) == QUERY_NODE_COLUMN && ((SColumnNode*)pNode->pExpr)->dataBlockId == targetBlkId) {
SColumnInfoData* pDstCol = taosArrayGet(pDst->pDataBlock, pNode->slotId);
if (isNull) {
colDataSetVal(pDstCol, 0, NULL, true);
} else {
SColumnInfoData* pSrcCol = taosArrayGet(pSrc->pDataBlock, ((SColumnNode*)pNode->pExpr)->slotId);
colDataAssign(pDstCol, pSrcCol, 1, &pDst->info);
}
}
}
return TSDB_CODE_SUCCESS;
}
SSDataBlock* doColsMerge(SOperatorInfo* pOperator) {
SExecTaskInfo* pTaskInfo = pOperator->pTaskInfo;
SMultiwayMergeOperatorInfo* pInfo = pOperator->info;
SSDataBlock* pBlock = NULL;
SColsMergeInfo* pColsMerge = &pInfo->colsMergeInfo;
int32_t nullBlkNum = 0;
qDebug("start to merge columns, %s", GET_TASKID(pTaskInfo));
for (int32_t i = 0; i < 2; ++i) {
pBlock = getNextBlockFromDownstream(pOperator, i);
if (pBlock && pBlock->info.rows > 1) {
qError("more than 1 row returned from downstream, rows:%" PRId64, pBlock->info.rows);
T_LONG_JMP(pTaskInfo->env, TSDB_CODE_QRY_EXECUTOR_INTERNAL_ERROR);
} else if (NULL == pBlock) {
nullBlkNum++;
}
copyColumnsValue(pColsMerge->pTargets, pColsMerge->srcBlkIds[i], pInfo->binfo.pRes, pBlock);
}
setOperatorCompleted(pOperator);
if (2 == nullBlkNum) {
return NULL;
}
pInfo->binfo.pRes->info.rows = 1;
return pInfo->binfo.pRes;
}
void destroyColsMergeOperatorInfo(void* param) {
}
int32_t getColsMergeExplainExecInfo(SOperatorInfo* pOptr, void** pOptrExplain, uint32_t* len) {
return TSDB_CODE_SUCCESS;
}
SOperatorFpSet gMultiwayMergeFps[MERGE_TYPE_MAX_VALUE] = {
{0},
{._openFn = openSortMergeOperator, .getNextFn = doSortMerge, .closeFn = destroySortMergeOperatorInfo, .getExplainFn = getSortMergeExplainExecInfo},
{._openFn = openNonSortMergeOperator, .getNextFn = doNonSortMerge, .closeFn = destroyNonSortMergeOperatorInfo, .getExplainFn = getNonSortMergeExplainExecInfo},
{._openFn = openColsMergeOperator, .getNextFn = doColsMerge, .closeFn = destroyColsMergeOperatorInfo, .getExplainFn = getColsMergeExplainExecInfo},
};
int32_t openMultiwayMergeOperator(SOperatorInfo* pOperator) {
int32_t code = 0;
SMultiwayMergeOperatorInfo* pInfo = pOperator->info;
SExecTaskInfo* pTaskInfo = pOperator->pTaskInfo;
if (OPTR_IS_OPENED(pOperator)) {
return TSDB_CODE_SUCCESS;
}
int64_t startTs = taosGetTimestampUs();
if (NULL != gMultiwayMergeFps[pInfo->type]._openFn) {
code = (*gMultiwayMergeFps[pInfo->type]._openFn)(pOperator);
}
pOperator->cost.openCost = (taosGetTimestampUs() - startTs) / 1000.0;
pOperator->status = OP_RES_TO_RETURN;
if (code != TSDB_CODE_SUCCESS) {
T_LONG_JMP(pTaskInfo->env, terrno);
}
OPTR_SET_OPENED(pOperator);
return code;
}
SSDataBlock* doMultiwayMerge(SOperatorInfo* pOperator) {
if (pOperator->status == OP_EXEC_DONE) {
return NULL;
}
SSDataBlock* pBlock = NULL;
SExecTaskInfo* pTaskInfo = pOperator->pTaskInfo;
SMultiwayMergeOperatorInfo* pInfo = pOperator->info;
int32_t code = pOperator->fpSet._openFn(pOperator);
if (code != TSDB_CODE_SUCCESS) {
T_LONG_JMP(pTaskInfo->env, code);
}
if (NULL != gMultiwayMergeFps[pInfo->type].getNextFn) {
pBlock = (*gMultiwayMergeFps[pInfo->type].getNextFn)(pOperator);
}
if (pBlock != NULL) {
pOperator->resultInfo.totalRows += pBlock->info.rows;
} else {
setOperatorCompleted(pOperator);
}
return pBlock;
}
void destroyMultiwayMergeOperatorInfo(void* param) {
SMultiwayMergeOperatorInfo* pInfo = (SMultiwayMergeOperatorInfo*)param;
pInfo->binfo.pRes = blockDataDestroy(pInfo->binfo.pRes);
if (NULL != gMultiwayMergeFps[pInfo->type].closeFn) {
(*gMultiwayMergeFps[pInfo->type].closeFn)(&pInfo->sortMergeInfo);
}
taosMemoryFreeClear(param);
}
int32_t getMultiwayMergeExplainExecInfo(SOperatorInfo* pOptr, void** pOptrExplain, uint32_t* len) {
int32_t code = 0;
SMultiwayMergeOperatorInfo* pInfo = (SMultiwayMergeOperatorInfo*)pOptr->info;
if (NULL != gMultiwayMergeFps[pInfo->type].getExplainFn) {
code = (*gMultiwayMergeFps[pInfo->type].getExplainFn)(pOptr, pOptrExplain, len);
}
return code;
}
SOperatorInfo* createMultiwayMergeOperatorInfo(SOperatorInfo** downStreams, size_t numStreams,
SMergePhysiNode* pMergePhyNode, SExecTaskInfo* pTaskInfo) {
SPhysiNode* pPhyNode = (SPhysiNode*)pMergePhyNode;
SMultiwayMergeOperatorInfo* pInfo = taosMemoryCalloc(1, sizeof(SMultiwayMergeOperatorInfo));
SOperatorInfo* pOperator = taosMemoryCalloc(1, sizeof(SOperatorInfo));
SDataBlockDescNode* pDescNode = pPhyNode->pOutputDataBlockDesc;
int32_t code = TSDB_CODE_SUCCESS;
if (pInfo == NULL || pOperator == NULL) {
code = TSDB_CODE_OUT_OF_MEMORY;
goto _error;
}
pInfo->groupMerge = pMergePhyNode->groupSort;
pInfo->ignoreGroupId = pMergePhyNode->ignoreGroupId;
pInfo->binfo.inputTsOrder = pMergePhyNode->node.inputTsOrder;
pInfo->binfo.outputTsOrder = pMergePhyNode->node.outputTsOrder;
pInfo->inputWithGroupId = pMergePhyNode->inputWithGroupId;
pInfo->type = pMergePhyNode->type;
switch (pInfo->type) {
case MERGE_TYPE_SORT: {
SSortMergeInfo* pSortMergeInfo = &pInfo->sortMergeInfo;
initLimitInfo(pMergePhyNode->node.pLimit, pMergePhyNode->node.pSlimit, &pInfo->limitInfo);
pInfo->binfo.pRes = createDataBlockFromDescNode(pDescNode);
SPhysiNode* pChildNode = (SPhysiNode*)nodesListGetNode(pPhyNode->pChildren, 0);
SSDataBlock* pInputBlock = createDataBlockFromDescNode(pChildNode->pOutputDataBlockDesc);
initResultSizeInfo(&pOperator->resultInfo, 1024);
blockDataEnsureCapacity(pInfo->binfo.pRes, pOperator->resultInfo.capacity);
size_t numOfCols = taosArrayGetSize(pInfo->binfo.pRes->pDataBlock);
int32_t rowSize = pInfo->binfo.pRes->info.rowSize;
int32_t numOfOutputCols = 0;
pSortMergeInfo->pSortInfo = createSortInfo(pMergePhyNode->pMergeKeys);
pSortMergeInfo->bufPageSize = getProperSortPageSize(rowSize, numOfCols);
pSortMergeInfo->sortBufSize = pSortMergeInfo->bufPageSize * (numStreams + 1); // one additional is reserved for merged result.
pSortMergeInfo->pInputBlock = pInputBlock;
code = extractColMatchInfo(pMergePhyNode->pTargets, pDescNode, &numOfOutputCols, COL_MATCH_FROM_SLOT_ID,
&pSortMergeInfo->matchInfo);
if (code != TSDB_CODE_SUCCESS) {
goto _error;
}
break;
}
case MERGE_TYPE_NON_SORT: {
SNonSortMergeInfo* pNonSortMerge = &pInfo->nsortMergeInfo;
break;
}
case MERGE_TYPE_COLUMNS: {
SColsMergeInfo* pColsMerge = &pInfo->colsMergeInfo;
pInfo->binfo.pRes = createDataBlockFromDescNode(pDescNode);
initResultSizeInfo(&pOperator->resultInfo, 1);
blockDataEnsureCapacity(pInfo->binfo.pRes, pOperator->resultInfo.capacity);
pColsMerge->pTargets = pMergePhyNode->pTargets;
pColsMerge->srcBlkIds[0] = getOperatorResultBlockId(downStreams[0], 0);
pColsMerge->srcBlkIds[1] = getOperatorResultBlockId(downStreams[1], 0);
break;
}
default:
qError("Invalid merge type: %d", pInfo->type);
code = TSDB_CODE_INVALID_PARA;
goto _error;
}
setOperatorInfo(pOperator, "MultiwayMergeOperator", QUERY_NODE_PHYSICAL_PLAN_MERGE, false, OP_NOT_OPENED, pInfo, pTaskInfo);
pOperator->fpSet = createOperatorFpSet(openMultiwayMergeOperator, doMultiwayMerge, NULL,
destroyMultiwayMergeOperatorInfo, optrDefaultBufFn, getMultiwayMergeExplainExecInfo, optrDefaultGetNextExtFn, NULL);
code = appendDownstream(pOperator, downStreams, numStreams);
if (code != TSDB_CODE_SUCCESS) {
goto _error;
}
return pOperator;
_error:
if (pInfo != NULL) {
destroyMultiwayMergeOperatorInfo(pInfo);
}
pTaskInfo->code = code;
taosMemoryFree(pOperator);
return NULL;
}

View File

@ -1035,7 +1035,7 @@ SOperatorInfo* createTableScanOperatorInfo(STableScanPhysiNode* pTableScanNode,
}
initLimitInfo(pScanNode->node.pLimit, pScanNode->node.pSlimit, &pInfo->base.limitInfo);
code = initQueryTableDataCond(&pInfo->base.cond, pTableScanNode);
code = initQueryTableDataCond(&pInfo->base.cond, pTableScanNode, readHandle);
if (code != TSDB_CODE_SUCCESS) {
goto _error;
}
@ -3533,7 +3533,7 @@ SOperatorInfo* createTableMergeScanOperatorInfo(STableScanPhysiNode* pTableScanN
goto _error;
}
code = initQueryTableDataCond(&pInfo->base.cond, pTableScanNode);
code = initQueryTableDataCond(&pInfo->base.cond, pTableScanNode, readHandle);
if (code != TSDB_CODE_SUCCESS) {
taosArrayDestroy(pInfo->base.matchInfo.pList);
goto _error;

View File

@ -675,293 +675,5 @@ _error:
return NULL;
}
//=====================================================================================
// Multiway Sort Merge operator
typedef struct SMultiwayMergeOperatorInfo {
SOptrBasicInfo binfo;
int32_t bufPageSize;
uint32_t sortBufSize; // max buffer size for in-memory sort
SLimitInfo limitInfo;
SArray* pSortInfo;
SSortHandle* pSortHandle;
SColMatchInfo matchInfo;
SSDataBlock* pInputBlock;
SSDataBlock* pIntermediateBlock; // to hold the intermediate result
int64_t startTs; // sort start time
bool groupSort;
bool ignoreGroupId;
uint64_t groupId;
STupleHandle* prefetchedTuple;
bool inputWithGroupId;
} SMultiwayMergeOperatorInfo;
int32_t openMultiwayMergeOperator(SOperatorInfo* pOperator) {
SMultiwayMergeOperatorInfo* pInfo = pOperator->info;
SExecTaskInfo* pTaskInfo = pOperator->pTaskInfo;
if (OPTR_IS_OPENED(pOperator)) {
return TSDB_CODE_SUCCESS;
}
pInfo->startTs = taosGetTimestampUs();
int32_t numOfBufPage = pInfo->sortBufSize / pInfo->bufPageSize;
pInfo->pSortHandle = tsortCreateSortHandle(pInfo->pSortInfo, SORT_MULTISOURCE_MERGE, pInfo->bufPageSize, numOfBufPage,
pInfo->pInputBlock, pTaskInfo->id.str, 0, 0, 0);
tsortSetFetchRawDataFp(pInfo->pSortHandle, loadNextDataBlock, NULL, NULL);
tsortSetCompareGroupId(pInfo->pSortHandle, pInfo->groupSort);
for (int32_t i = 0; i < pOperator->numOfDownstream; ++i) {
SOperatorInfo* pDownstream = pOperator->pDownstream[i];
if (pDownstream->operatorType == QUERY_NODE_PHYSICAL_PLAN_EXCHANGE) {
pDownstream->fpSet._openFn(pDownstream);
}
SSortSource* ps = taosMemoryCalloc(1, sizeof(SSortSource));
ps->param = pDownstream;
ps->onlyRef = true;
tsortAddSource(pInfo->pSortHandle, ps);
}
int32_t code = tsortOpen(pInfo->pSortHandle);
if (code != TSDB_CODE_SUCCESS) {
T_LONG_JMP(pTaskInfo->env, terrno);
}
pOperator->cost.openCost = (taosGetTimestampUs() - pInfo->startTs) / 1000.0;
pOperator->status = OP_RES_TO_RETURN;
OPTR_SET_OPENED(pOperator);
return TSDB_CODE_SUCCESS;
}
static void doGetSortedBlockData(SMultiwayMergeOperatorInfo* pInfo, SSortHandle* pHandle, int32_t capacity,
SSDataBlock* p, bool* newgroup) {
*newgroup = false;
while (1) {
STupleHandle* pTupleHandle = NULL;
if (pInfo->groupSort || pInfo->inputWithGroupId) {
if (pInfo->prefetchedTuple == NULL) {
pTupleHandle = tsortNextTuple(pHandle);
} else {
pTupleHandle = pInfo->prefetchedTuple;
pInfo->prefetchedTuple = NULL;
uint64_t gid = tsortGetGroupId(pTupleHandle);
if (gid != pInfo->groupId) {
*newgroup = true;
pInfo->groupId = gid;
}
}
} else {
pTupleHandle = tsortNextTuple(pHandle);
pInfo->groupId = 0;
}
if (pTupleHandle == NULL) {
break;
}
if (pInfo->groupSort || pInfo->inputWithGroupId) {
uint64_t tupleGroupId = tsortGetGroupId(pTupleHandle);
if (pInfo->groupId == 0 || pInfo->groupId == tupleGroupId) {
appendOneRowToDataBlock(p, pTupleHandle);
p->info.id.groupId = tupleGroupId;
pInfo->groupId = tupleGroupId;
} else {
if (p->info.rows == 0) {
appendOneRowToDataBlock(p, pTupleHandle);
p->info.id.groupId = pInfo->groupId = tupleGroupId;
} else {
pInfo->prefetchedTuple = pTupleHandle;
break;
}
}
} else {
appendOneRowToDataBlock(p, pTupleHandle);
}
if (p->info.rows >= capacity) {
break;
}
}
}
SSDataBlock* getMultiwaySortedBlockData(SSortHandle* pHandle, SSDataBlock* pDataBlock, SArray* pColMatchInfo,
SOperatorInfo* pOperator) {
SMultiwayMergeOperatorInfo* pInfo = pOperator->info;
int32_t capacity = pOperator->resultInfo.capacity;
SExecTaskInfo* pTaskInfo = pOperator->pTaskInfo;
blockDataCleanup(pDataBlock);
if (pInfo->pIntermediateBlock == NULL) {
pInfo->pIntermediateBlock = tsortGetSortedDataBlock(pHandle);
if (pInfo->pIntermediateBlock == NULL) {
terrno = TSDB_CODE_OUT_OF_MEMORY;
return NULL;
}
blockDataEnsureCapacity(pInfo->pIntermediateBlock, capacity);
} else {
blockDataCleanup(pInfo->pIntermediateBlock);
}
SSDataBlock* p = pInfo->pIntermediateBlock;
bool newgroup = false;
while (1) {
doGetSortedBlockData(pInfo, pHandle, capacity, p, &newgroup);
if (p->info.rows == 0) {
break;
}
if (newgroup) {
resetLimitInfoForNextGroup(&pInfo->limitInfo);
}
applyLimitOffset(&pInfo->limitInfo, p, pTaskInfo);
if (p->info.rows > 0) {
break;
}
}
if (p->info.rows > 0) {
int32_t numOfCols = taosArrayGetSize(pColMatchInfo);
for (int32_t i = 0; i < numOfCols; ++i) {
SColMatchItem* pmInfo = taosArrayGet(pColMatchInfo, i);
SColumnInfoData* pSrc = taosArrayGet(p->pDataBlock, pmInfo->srcSlotId);
SColumnInfoData* pDst = taosArrayGet(pDataBlock->pDataBlock, pmInfo->dstSlotId);
colDataAssign(pDst, pSrc, p->info.rows, &pDataBlock->info);
}
pDataBlock->info.rows = p->info.rows;
pDataBlock->info.scanFlag = p->info.scanFlag;
if (pInfo->ignoreGroupId) {
pDataBlock->info.id.groupId = 0;
} else {
pDataBlock->info.id.groupId = pInfo->groupId;
}
pDataBlock->info.dataLoad = 1;
}
qDebug("%s get sorted block, groupId:0x%" PRIx64 " rows:%" PRId64 , GET_TASKID(pTaskInfo), pDataBlock->info.id.groupId,
pDataBlock->info.rows);
return (pDataBlock->info.rows > 0) ? pDataBlock : NULL;
}
SSDataBlock* doMultiwayMerge(SOperatorInfo* pOperator) {
if (pOperator->status == OP_EXEC_DONE) {
return NULL;
}
SExecTaskInfo* pTaskInfo = pOperator->pTaskInfo;
SMultiwayMergeOperatorInfo* pInfo = pOperator->info;
int32_t code = pOperator->fpSet._openFn(pOperator);
if (code != TSDB_CODE_SUCCESS) {
T_LONG_JMP(pTaskInfo->env, code);
}
qDebug("start to merge final sorted rows, %s", GET_TASKID(pTaskInfo));
SSDataBlock* pBlock = getMultiwaySortedBlockData(pInfo->pSortHandle, pInfo->binfo.pRes, pInfo->matchInfo.pList, pOperator);
if (pBlock != NULL) {
pOperator->resultInfo.totalRows += pBlock->info.rows;
} else {
setOperatorCompleted(pOperator);
}
return pBlock;
}
void destroyMultiwayMergeOperatorInfo(void* param) {
SMultiwayMergeOperatorInfo* pInfo = (SMultiwayMergeOperatorInfo*)param;
pInfo->binfo.pRes = blockDataDestroy(pInfo->binfo.pRes);
pInfo->pInputBlock = blockDataDestroy(pInfo->pInputBlock);
pInfo->pIntermediateBlock = blockDataDestroy(pInfo->pIntermediateBlock);
tsortDestroySortHandle(pInfo->pSortHandle);
taosArrayDestroy(pInfo->pSortInfo);
taosArrayDestroy(pInfo->matchInfo.pList);
taosMemoryFreeClear(param);
}
int32_t getMultiwayMergeExplainExecInfo(SOperatorInfo* pOptr, void** pOptrExplain, uint32_t* len) {
SSortExecInfo* pSortExecInfo = taosMemoryCalloc(1, sizeof(SSortExecInfo));
SMultiwayMergeOperatorInfo* pInfo = (SMultiwayMergeOperatorInfo*)pOptr->info;
*pSortExecInfo = tsortGetSortExecInfo(pInfo->pSortHandle);
*pOptrExplain = pSortExecInfo;
*len = sizeof(SSortExecInfo);
return TSDB_CODE_SUCCESS;
}
SOperatorInfo* createMultiwayMergeOperatorInfo(SOperatorInfo** downStreams, size_t numStreams,
SMergePhysiNode* pMergePhyNode, SExecTaskInfo* pTaskInfo) {
SPhysiNode* pPhyNode = (SPhysiNode*)pMergePhyNode;
SMultiwayMergeOperatorInfo* pInfo = taosMemoryCalloc(1, sizeof(SMultiwayMergeOperatorInfo));
SOperatorInfo* pOperator = taosMemoryCalloc(1, sizeof(SOperatorInfo));
SDataBlockDescNode* pDescNode = pPhyNode->pOutputDataBlockDesc;
int32_t code = TSDB_CODE_SUCCESS;
if (pInfo == NULL || pOperator == NULL) {
code = TSDB_CODE_OUT_OF_MEMORY;
goto _error;
}
initLimitInfo(pMergePhyNode->node.pLimit, pMergePhyNode->node.pSlimit, &pInfo->limitInfo);
pInfo->binfo.pRes = createDataBlockFromDescNode(pDescNode);
int32_t rowSize = pInfo->binfo.pRes->info.rowSize;
int32_t numOfOutputCols = 0;
code = extractColMatchInfo(pMergePhyNode->pTargets, pDescNode, &numOfOutputCols, COL_MATCH_FROM_SLOT_ID,
&pInfo->matchInfo);
if (code != TSDB_CODE_SUCCESS) {
goto _error;
}
SPhysiNode* pChildNode = (SPhysiNode*)nodesListGetNode(pPhyNode->pChildren, 0);
SSDataBlock* pInputBlock = createDataBlockFromDescNode(pChildNode->pOutputDataBlockDesc);
initResultSizeInfo(&pOperator->resultInfo, 1024);
blockDataEnsureCapacity(pInfo->binfo.pRes, pOperator->resultInfo.capacity);
pInfo->groupSort = pMergePhyNode->groupSort;
pInfo->ignoreGroupId = pMergePhyNode->ignoreGroupId;
pInfo->pSortInfo = createSortInfo(pMergePhyNode->pMergeKeys);
pInfo->pInputBlock = pInputBlock;
size_t numOfCols = taosArrayGetSize(pInfo->binfo.pRes->pDataBlock);
pInfo->bufPageSize = getProperSortPageSize(rowSize, numOfCols);
pInfo->sortBufSize = pInfo->bufPageSize * (numStreams + 1); // one additional is reserved for merged result.
pInfo->binfo.inputTsOrder = pMergePhyNode->node.inputTsOrder;
pInfo->binfo.outputTsOrder = pMergePhyNode->node.outputTsOrder;
pInfo->inputWithGroupId = pMergePhyNode->inputWithGroupId;
setOperatorInfo(pOperator, "MultiwayMergeOperator", QUERY_NODE_PHYSICAL_PLAN_MERGE, false, OP_NOT_OPENED, pInfo, pTaskInfo);
pOperator->fpSet = createOperatorFpSet(openMultiwayMergeOperator, doMultiwayMerge, NULL,
destroyMultiwayMergeOperatorInfo, optrDefaultBufFn, getMultiwayMergeExplainExecInfo, optrDefaultGetNextExtFn, NULL);
code = appendDownstream(pOperator, downStreams, numStreams);
if (code != TSDB_CODE_SUCCESS) {
goto _error;
}
return pOperator;
_error:
if (pInfo != NULL) {
destroyMultiwayMergeOperatorInfo(pInfo);
}
pTaskInfo->code = code;
taosMemoryFree(pOperator);
return NULL;
}

View File

@ -2772,7 +2772,7 @@ const SBuiltinFuncDefinition funcMgtBuiltins[] = {
{
.name = "_cache_last",
.type = FUNCTION_TYPE_CACHE_LAST,
.classification = FUNC_MGT_AGG_FUNC | FUNC_MGT_MULTI_RES_FUNC | FUNC_MGT_FORBID_STREAM_FUNC | FUNC_MGT_FORBID_SYSTABLE_FUNC,
.classification = FUNC_MGT_AGG_FUNC | FUNC_MGT_MULTI_RES_FUNC | FUNC_MGT_SELECT_FUNC | FUNC_MGT_FORBID_STREAM_FUNC | FUNC_MGT_FORBID_SYSTABLE_FUNC,
.translateFunc = translateFirstLast,
.getEnvFunc = getFirstLastFuncEnv,
.initFunc = functionSetup,

View File

@ -565,7 +565,7 @@ int32_t avgFunction(SqlFunctionCtx* pCtx) {
numOfElem = pInput->numOfRows;
pAvgRes->count += pInput->numOfRows;
bool simdAvailable = tsAVXEnable && tsSIMDBuiltins && (numOfRows > THRESHOLD_SIZE);
bool simdAvailable = tsAVXEnable && tsSIMDEnable && (numOfRows > THRESHOLD_SIZE);
switch(type) {
case TSDB_DATA_TYPE_UTINYINT:

View File

@ -370,7 +370,7 @@ static int32_t findFirstValPosition(const SColumnInfoData* pCol, int32_t start,
static void handleInt8Col(const void* data, int32_t start, int32_t numOfRows, SMinmaxResInfo* pBuf, bool isMinFunc,
bool signVal) {
// AVX2 version to speedup the loop
if (tsAVX2Enable && tsSIMDBuiltins) {
if (tsAVX2Enable && tsSIMDEnable) {
pBuf->v = i8VectorCmpAVX2(data, numOfRows, isMinFunc, signVal);
} else {
if (!pBuf->assign) {
@ -404,7 +404,7 @@ static void handleInt8Col(const void* data, int32_t start, int32_t numOfRows, SM
static void handleInt16Col(const void* data, int32_t start, int32_t numOfRows, SMinmaxResInfo* pBuf, bool isMinFunc,
bool signVal) {
// AVX2 version to speedup the loop
if (tsAVX2Enable && tsSIMDBuiltins) {
if (tsAVX2Enable && tsSIMDEnable) {
pBuf->v = i16VectorCmpAVX2(data, numOfRows, isMinFunc, signVal);
} else {
if (!pBuf->assign) {
@ -438,7 +438,7 @@ static void handleInt16Col(const void* data, int32_t start, int32_t numOfRows, S
static void handleInt32Col(const void* data, int32_t start, int32_t numOfRows, SMinmaxResInfo* pBuf, bool isMinFunc,
bool signVal) {
// AVX2 version to speedup the loop
if (tsAVX2Enable && tsSIMDBuiltins) {
if (tsAVX2Enable && tsSIMDEnable) {
pBuf->v = i32VectorCmpAVX2(data, numOfRows, isMinFunc, signVal);
} else {
if (!pBuf->assign) {
@ -502,7 +502,7 @@ static void handleFloatCol(SColumnInfoData* pCol, int32_t start, int32_t numOfRo
float* val = (float*)&pBuf->v;
// AVX version to speedup the loop
if (tsAVXEnable && tsSIMDBuiltins) {
if (tsAVXEnable && tsSIMDEnable) {
*val = floatVectorCmpAVX(pData, numOfRows, isMinFunc);
} else {
if (!pBuf->assign) {
@ -533,7 +533,7 @@ static void handleDoubleCol(SColumnInfoData* pCol, int32_t start, int32_t numOfR
double* val = (double*)&pBuf->v;
// AVX version to speedup the loop
if (tsAVXEnable && tsSIMDBuiltins) {
if (tsAVXEnable && tsSIMDEnable) {
*val = (double)doubleVectorCmpAVX(pData, numOfRows, isMinFunc);
} else {
if (!pBuf->assign) {

View File

@ -419,6 +419,7 @@ static int32_t logicScanCopy(const SScanLogicNode* pSrc, SScanLogicNode* pDst) {
COPY_SCALAR_FIELD(groupSort);
CLONE_NODE_LIST_FIELD(pTags);
CLONE_NODE_FIELD(pSubtable);
COPY_SCALAR_FIELD(cacheLastMode);
COPY_SCALAR_FIELD(igLastNull);
COPY_SCALAR_FIELD(groupOrderScan);
COPY_SCALAR_FIELD(onlyMetaCtbIdx);
@ -443,8 +444,14 @@ static int32_t logicAggCopy(const SAggLogicNode* pSrc, SAggLogicNode* pDst) {
COPY_BASE_OBJECT_FIELD(node, logicNodeCopy);
CLONE_NODE_LIST_FIELD(pGroupKeys);
CLONE_NODE_LIST_FIELD(pAggFuncs);
COPY_SCALAR_FIELD(hasLastRow);
COPY_SCALAR_FIELD(hasLast);
COPY_SCALAR_FIELD(hasTimeLineFunc);
COPY_SCALAR_FIELD(onlyHasKeepOrderFunc);
COPY_SCALAR_FIELD(hasGroupKeyOptimized);
COPY_SCALAR_FIELD(isGroupTb);
COPY_SCALAR_FIELD(isPartTb);
COPY_SCALAR_FIELD(hasGroup);
return TSDB_CODE_SUCCESS;
}
@ -488,6 +495,8 @@ static int32_t logicMergeCopy(const SMergeLogicNode* pSrc, SMergeLogicNode* pDst
CLONE_NODE_LIST_FIELD(pInputs);
COPY_SCALAR_FIELD(numOfChannels);
COPY_SCALAR_FIELD(srcGroupId);
COPY_SCALAR_FIELD(colsMerge);
COPY_SCALAR_FIELD(needSort);
COPY_SCALAR_FIELD(groupSort);
COPY_SCALAR_FIELD(ignoreGroupId);
COPY_SCALAR_FIELD(inputWithGroupId);

View File

@ -1773,6 +1773,7 @@ static int32_t jsonToPhysiTagScanNode(const SJson* pJson, void* pObj) {
static const char* jkLastRowScanPhysiPlanGroupTags = "GroupTags";
static const char* jkLastRowScanPhysiPlanGroupSort = "GroupSort";
static const char* jkLastRowScanPhysiPlanTargets = "Targets";
static int32_t physiLastRowScanNodeToJson(const void* pObj, SJson* pJson) {
const SLastRowScanPhysiNode* pNode = (const SLastRowScanPhysiNode*)pObj;
@ -1784,6 +1785,9 @@ static int32_t physiLastRowScanNodeToJson(const void* pObj, SJson* pJson) {
if (TSDB_CODE_SUCCESS == code) {
code = tjsonAddBoolToObject(pJson, jkLastRowScanPhysiPlanGroupSort, pNode->groupSort);
}
if (TSDB_CODE_SUCCESS == code) {
code = nodeListToJson(pJson, jkLastRowScanPhysiPlanTargets, pNode->pTargets);
}
return code;
}
@ -1798,6 +1802,9 @@ static int32_t jsonToPhysiLastRowScanNode(const SJson* pJson, void* pObj) {
if (TSDB_CODE_SUCCESS == code) {
code = tjsonGetBoolValue(pJson, jkLastRowScanPhysiPlanGroupSort, &pNode->groupSort);
}
if (TSDB_CODE_SUCCESS == code) {
code = jsonToNodeList(pJson, jkLastRowScanPhysiPlanTargets, &pNode->pTargets);
}
return code;
}
@ -2278,6 +2285,7 @@ static const char* jkMergePhysiPlanSrcGroupId = "SrcGroupId";
static const char* jkMergePhysiPlanGroupSort = "GroupSort";
static const char* jkMergePhysiPlanIgnoreGroupID = "IgnoreGroupID";
static const char* jkMergePhysiPlanInputWithGroupId = "InputWithGroupId";
static const char* jkMergePhysiPlanType = "Type";
static int32_t physiMergeNodeToJson(const void* pObj, SJson* pJson) {
const SMergePhysiNode* pNode = (const SMergePhysiNode*)pObj;
@ -2304,6 +2312,9 @@ static int32_t physiMergeNodeToJson(const void* pObj, SJson* pJson) {
if (TSDB_CODE_SUCCESS == code) {
code = tjsonAddBoolToObject(pJson, jkMergePhysiPlanInputWithGroupId, pNode->inputWithGroupId);
}
if (TSDB_CODE_SUCCESS == code) {
code = tjsonAddIntegerToObject(pJson, jkMergePhysiPlanType, pNode->type);
}
return code;
}
@ -2330,6 +2341,9 @@ static int32_t jsonToPhysiMergeNode(const SJson* pJson, void* pObj) {
if (TSDB_CODE_SUCCESS == code) {
code = tjsonGetBoolValue(pJson, jkMergePhysiPlanIgnoreGroupID, &pNode->ignoreGroupId);
}
if (TSDB_CODE_SUCCESS == code) {
code = tjsonGetIntValue(pJson, jkMergePhysiPlanType, (int32_t*)&pNode->type);
}
return code;
}

View File

@ -2052,7 +2052,8 @@ enum {
PHY_LAST_ROW_SCAN_CODE_SCAN = 1,
PHY_LAST_ROW_SCAN_CODE_GROUP_TAGS,
PHY_LAST_ROW_SCAN_CODE_GROUP_SORT,
PHY_LAST_ROW_SCAN_CODE_IGNULL
PHY_LAST_ROW_SCAN_CODE_IGNULL,
PHY_LAST_ROW_SCAN_CODE_TARGETS
};
static int32_t physiLastRowScanNodeToMsg(const void* pObj, STlvEncoder* pEncoder) {
@ -2068,6 +2069,9 @@ static int32_t physiLastRowScanNodeToMsg(const void* pObj, STlvEncoder* pEncoder
if (TSDB_CODE_SUCCESS == code) {
code = tlvEncodeBool(pEncoder, PHY_LAST_ROW_SCAN_CODE_IGNULL, pNode->ignoreNull);
}
if (TSDB_CODE_SUCCESS == code) {
code = tlvEncodeObj(pEncoder, PHY_LAST_ROW_SCAN_CODE_TARGETS, nodeListToMsg, pNode->pTargets);
}
return code;
}
@ -2091,6 +2095,9 @@ static int32_t msgToPhysiLastRowScanNode(STlvDecoder* pDecoder, void* pObj) {
case PHY_LAST_ROW_SCAN_CODE_IGNULL:
code = tlvDecodeBool(pTlv, &pNode->ignoreNull);
break;
case PHY_LAST_ROW_SCAN_CODE_TARGETS:
code = msgToNodeListFromTlv(pTlv, (void**)&pNode->pTargets);
break;
default:
break;
}
@ -2683,6 +2690,7 @@ enum {
PHY_MERGE_CODE_GROUP_SORT,
PHY_MERGE_CODE_IGNORE_GROUP_ID,
PHY_MERGE_CODE_INPUT_WITH_GROUP_ID,
PHY_MERGE_CODE_TYPE,
};
static int32_t physiMergeNodeToMsg(const void* pObj, STlvEncoder* pEncoder) {
@ -2710,6 +2718,9 @@ static int32_t physiMergeNodeToMsg(const void* pObj, STlvEncoder* pEncoder) {
if (TSDB_CODE_SUCCESS == code) {
code = tlvEncodeBool(pEncoder, PHY_MERGE_CODE_INPUT_WITH_GROUP_ID, pNode->inputWithGroupId);
}
if (TSDB_CODE_SUCCESS == code) {
code = tlvEncodeI32(pEncoder, PHY_MERGE_CODE_TYPE, pNode->type);
}
return code;
}
@ -2745,6 +2756,9 @@ static int32_t msgToPhysiMergeNode(STlvDecoder* pDecoder, void* pObj) {
case PHY_MERGE_CODE_INPUT_WITH_GROUP_ID:
code = tlvDecodeBool(pTlv, &pNode->inputWithGroupId);
break;
case PHY_MERGE_CODE_TYPE:
code = tlvDecodeI32(pTlv, (int32_t*)&pNode->type);
break;
default:
break;
}

View File

@ -1285,6 +1285,7 @@ void nodesDestroyNode(SNode* pNode) {
SLastRowScanPhysiNode* pPhyNode = (SLastRowScanPhysiNode*)pNode;
destroyScanPhysiNode((SScanPhysiNode*)pNode);
nodesDestroyList(pPhyNode->pGroupTags);
nodesDestroyList(pPhyNode->pTargets);
break;
}
case QUERY_NODE_PHYSICAL_PLAN_TABLE_SCAN:
@ -1571,6 +1572,19 @@ int32_t nodesListStrictAppendList(SNodeList* pTarget, SNodeList* pSrc) {
return code;
}
int32_t nodesListMakeStrictAppendList(SNodeList** pTarget, SNodeList* pSrc) {
if (NULL == *pTarget) {
*pTarget = nodesMakeList();
if (NULL == *pTarget) {
terrno = TSDB_CODE_OUT_OF_MEMORY;
return TSDB_CODE_OUT_OF_MEMORY;
}
}
return nodesListStrictAppendList(*pTarget, pSrc);
}
int32_t nodesListPushFront(SNodeList* pList, SNode* pNode) {
if (NULL == pList || NULL == pNode) {
return TSDB_CODE_FAILED;

View File

@ -176,12 +176,15 @@ static int32_t calcConstStmtCondition(SCalcConstContext* pCxt, SNode** pCond, bo
static EDealRes doFindAndReplaceNode(SNode** pNode, void* pContext) {
SCalcConstContext* pCxt = pContext;
if (pCxt->replaceCxt.pTarget == *pNode) {
char aliasName[TSDB_COL_NAME_LEN] = {0};
strcpy(aliasName, ((SExprNode*)*pNode)->aliasName);
nodesDestroyNode(*pNode);
*pNode = nodesCloneNode(pCxt->replaceCxt.pNew);
if (NULL == *pNode) {
pCxt->code = TSDB_CODE_OUT_OF_MEMORY;
return DEAL_RES_ERROR;
}
strcpy(((SExprNode*)*pNode)->aliasName, aliasName);
pCxt->replaceCxt.replaced = true;
return DEAL_RES_END;
@ -211,7 +214,6 @@ static int32_t calcConstProject(SCalcConstContext* pCxt, SNode* pProject, bool d
}
char aliasName[TSDB_COL_NAME_LEN] = {0};
strcpy(aliasName, ((SExprNode*)pProject)->aliasName);
int32_t code = TSDB_CODE_SUCCESS;
if (dual) {
code = scalarCalculateConstantsFromDual(pProject, pNew);
@ -219,15 +221,20 @@ static int32_t calcConstProject(SCalcConstContext* pCxt, SNode* pProject, bool d
code = scalarCalculateConstants(pProject, pNew);
}
if (TSDB_CODE_SUCCESS == code) {
strcpy(((SExprNode*)*pNew)->aliasName, aliasName);
if (QUERY_NODE_VALUE == nodeType(*pNew) && NULL != pAssociation) {
int32_t size = taosArrayGetSize(pAssociation);
for (int32_t i = 0; i < size; ++i) {
SAssociationNode* pAssNode = taosArrayGet(pAssociation, i);
SNode** pCol = pAssNode->pPlace;
if (*pCol == pAssNode->pAssociationNode) {
strcpy(aliasName, ((SExprNode*)*pCol)->aliasName);
SArray* pOrigAss = NULL;
TSWAP(((SExprNode*)*pCol)->pAssociation, pOrigAss);
nodesDestroyNode(*pCol);
*pCol = nodesCloneNode(*pNew);
TSWAP(pOrigAss, ((SExprNode*)*pCol)->pAssociation);
taosArrayDestroy(pOrigAss);
strcpy(((SExprNode*)*pCol)->aliasName, aliasName);
if (NULL == *pCol) {
code = TSDB_CODE_OUT_OF_MEMORY;
break;

View File

@ -3925,6 +3925,267 @@ static int32_t translatePartitionBy(STranslateContext* pCxt, SSelectStmt* pSelec
return code;
}
typedef struct SEqCondTbNameTableInfo {
SRealTableNode* pRealTable;
SArray* aTbnames;
} SEqCondTbNameTableInfo;
//[tableAlias.]tbname = tbNamVal
static bool isOperatorEqTbnameCond(STranslateContext* pCxt, SOperatorNode* pOperator, char** ppTableAlias, SArray** ppTabNames) {
if (pOperator->opType != OP_TYPE_EQUAL) return false;
SFunctionNode* pTbnameFunc = NULL;
SValueNode* pValueNode = NULL;
if (nodeType(pOperator->pLeft) == QUERY_NODE_FUNCTION &&
((SFunctionNode*)(pOperator->pLeft))->funcType == FUNCTION_TYPE_TBNAME &&
nodeType(pOperator->pRight) == QUERY_NODE_VALUE) {
pTbnameFunc = (SFunctionNode*)pOperator->pLeft;
pValueNode = (SValueNode*)pOperator->pRight;
} else if (nodeType(pOperator->pRight) == QUERY_NODE_FUNCTION &&
((SFunctionNode*)(pOperator->pRight))->funcType == FUNCTION_TYPE_TBNAME &&
nodeType(pOperator->pLeft) == QUERY_NODE_VALUE) {
pTbnameFunc = (SFunctionNode*)pOperator->pRight;
pValueNode = (SValueNode*)pOperator->pLeft;
} else {
return false;
}
if (LIST_LENGTH(pTbnameFunc->pParameterList) == 0) {
*ppTableAlias = NULL;
} else if (LIST_LENGTH(pTbnameFunc->pParameterList) == 1) {
SNode* pQualNode = nodesListGetNode(pTbnameFunc->pParameterList, 0);
if (nodeType(pQualNode) != QUERY_NODE_VALUE) return false;
SValueNode* pQualValNode = (SValueNode*)pQualNode;
*ppTableAlias = pQualValNode->literal;
} else {
return false;
}
*ppTabNames = taosArrayInit(1, sizeof(void*));
taosArrayPush(*ppTabNames, &(pValueNode->literal));
return true;
}
//[tableAlias.]tbname in (value1, value2, ...)
static bool isOperatorTbnameInCond(STranslateContext* pCxt, SOperatorNode* pOperator, char** ppTableAlias, SArray** ppTbNames) {
if (pOperator->opType != OP_TYPE_IN) return false;
if (nodeType(pOperator->pLeft) != QUERY_NODE_FUNCTION ||
((SFunctionNode*)(pOperator->pLeft))->funcType != FUNCTION_TYPE_TBNAME ||
nodeType(pOperator->pRight) != QUERY_NODE_NODE_LIST) {
return false;
}
SFunctionNode* pTbnameFunc = (SFunctionNode*)pOperator->pLeft;
if (LIST_LENGTH(pTbnameFunc->pParameterList) == 0) {
*ppTableAlias = NULL;
} else if (LIST_LENGTH(pTbnameFunc->pParameterList) == 1) {
SNode* pQualNode = nodesListGetNode(pTbnameFunc->pParameterList, 0);
if (nodeType(pQualNode) != QUERY_NODE_VALUE) return false;
SValueNode* pQualValNode = (SValueNode*)pQualNode;
*ppTableAlias = pQualValNode->literal;
} else {
return false;
}
*ppTbNames = taosArrayInit(1, sizeof(void*));
SNodeListNode* pValueListNode = (SNodeListNode*)pOperator->pRight;
SNodeList* pValueNodeList = pValueListNode->pNodeList;
SNode* pValNode = NULL;
FOREACH(pValNode, pValueNodeList) {
if (nodeType(pValNode) != QUERY_NODE_VALUE) {
return false;
}
taosArrayPush(*ppTbNames, &((SValueNode*)pValNode)->literal);
}
return true;
}
static bool findEqCondTbNameInOperatorNode(STranslateContext* pCxt, SNode* pWhere, SEqCondTbNameTableInfo* pInfo) {
int32_t code = TSDB_CODE_SUCCESS;
char* pTableAlias = NULL;
char* pTbNameVal = NULL;
if (isOperatorEqTbnameCond(pCxt, (SOperatorNode*)pWhere, &pTableAlias, &pInfo->aTbnames) ||
isOperatorTbnameInCond(pCxt, (SOperatorNode*)pWhere, &pTableAlias, &pInfo->aTbnames)) {
STableNode* pTable;
if (pTableAlias == NULL) {
pTable = (STableNode*)((SSelectStmt*)(pCxt->pCurrStmt))->pFromTable;
} else {
code = findTable(pCxt, pTableAlias, &pTable);
}
if (code == TSDB_CODE_SUCCESS && nodeType(pTable) == QUERY_NODE_REAL_TABLE &&
((SRealTableNode*)pTable)->pMeta && ((SRealTableNode*)pTable)->pMeta->tableType == TSDB_SUPER_TABLE) {
pInfo->pRealTable = (SRealTableNode*)pTable;
return true;
}
taosArrayDestroy(pInfo->aTbnames);
pInfo->aTbnames = NULL;
}
return false;
}
static bool isTableExistInTableTbnames(SArray* aTableTbNames, SRealTableNode* pTable) {
for (int i = 0; i < taosArrayGetSize(aTableTbNames); ++i) {
SEqCondTbNameTableInfo* info = taosArrayGet(aTableTbNames, i);
if (info->pRealTable == pTable) {
return true;
}
}
return false;
}
static void findEqualCondTbnameInLogicCondAnd(STranslateContext* pCxt, SNode* pWhere, SArray* aTableTbnames) {
SNode* pTmpNode = NULL;
FOREACH(pTmpNode, ((SLogicConditionNode*)pWhere)->pParameterList) {
if (nodeType(pTmpNode) == QUERY_NODE_OPERATOR) {
SEqCondTbNameTableInfo info = {0};
bool bIsEqTbnameCond = findEqCondTbNameInOperatorNode(pCxt, pTmpNode, &info);
if (bIsEqTbnameCond) {
if (!isTableExistInTableTbnames(aTableTbnames, info.pRealTable)) {
//TODO: intersect tbNames of same table? speed
taosArrayPush(aTableTbnames, &info);
} else {
taosArrayDestroy(info.aTbnames);
}
}
}
//TODO: logic cond
}
}
static void unionEqualCondTbnamesOfSameTable(SArray* aTableTbnames, SEqCondTbNameTableInfo* pInfo) {
bool bFoundTable = false;
for (int i = 0; i < taosArrayGetSize(aTableTbnames); ++i) {
SEqCondTbNameTableInfo* info = taosArrayGet(aTableTbnames, i);
if (info->pRealTable == pInfo->pRealTable) {
taosArrayAddAll(info->aTbnames, pInfo->aTbnames);
taosArrayDestroy(pInfo->aTbnames);
pInfo->aTbnames = NULL;
bFoundTable = true;
break;
}
}
if (!bFoundTable) {
taosArrayPush(aTableTbnames, pInfo);
}
}
static void findEqualCondTbnameInLogicCondOr(STranslateContext* pCxt, SNode* pWhere, SArray* aTableTbnames) {
bool bAllTbName = true;
SNode* pTmpNode = NULL;
FOREACH(pTmpNode, ((SLogicConditionNode*)pWhere)->pParameterList) {
//TODO: logic cond
if (nodeType(pTmpNode) == QUERY_NODE_OPERATOR) {
SEqCondTbNameTableInfo info = {0};
bool bIsEqTbnameCond = findEqCondTbNameInOperatorNode(pCxt, pTmpNode, &info);
if (!bIsEqTbnameCond) {
bAllTbName = false;
break;
} else {
unionEqualCondTbnamesOfSameTable(aTableTbnames, &info);
}
} else {
bAllTbName = false;
break;
}
}
if (!bAllTbName) {
for (int i = 0; i < taosArrayGetSize(aTableTbnames); ++i) {
SEqCondTbNameTableInfo* pInfo = taosArrayGet(aTableTbnames, i);
taosArrayDestroy(pInfo->aTbnames);
pInfo->aTbnames = NULL;
}
taosArrayClear(aTableTbnames);
}
}
static int32_t findEqualCondTbname(STranslateContext* pCxt, SNode* pWhere, SArray* aTableTbnames) {
//TODO: optimize nested and/or condition. now only the fist level is processed.
if (nodeType(pWhere) == QUERY_NODE_OPERATOR) {
SEqCondTbNameTableInfo info = {0};
bool bIsEqTbnameCond = findEqCondTbNameInOperatorNode(pCxt, pWhere, &info);
if (bIsEqTbnameCond) {
taosArrayPush(aTableTbnames, &info);
}
} else if (nodeType(pWhere) == QUERY_NODE_LOGIC_CONDITION) {
if (((SLogicConditionNode*)pWhere)->condType == LOGIC_COND_TYPE_AND) {
findEqualCondTbnameInLogicCondAnd(pCxt, pWhere, aTableTbnames);
} else if (((SLogicConditionNode*)pWhere)->condType == LOGIC_COND_TYPE_OR) {
findEqualCondTbnameInLogicCondOr(pCxt, pWhere, aTableTbnames);
}
}
return TSDB_CODE_SUCCESS;
}
static int32_t findVgroupsFromEqualTbname(STranslateContext* pCxt, SEqCondTbNameTableInfo* pInfo, SVgroupsInfo* vgsInfo) {
int32_t nVgroups = 0;
int32_t nTbls = taosArrayGetSize(pInfo->aTbnames);
if (nTbls >= pInfo->pRealTable->pVgroupList->numOfVgroups) {
vgsInfo->numOfVgroups = 0;
return TSDB_CODE_SUCCESS;
}
for (int j = 0; j < nTbls; ++j) {
char* dbName = pInfo->pRealTable->table.dbName;
SName snameTb;
char* tbName = taosArrayGetP(pInfo->aTbnames, j);
toName(pCxt->pParseCxt->acctId, dbName, tbName, &snameTb);
SVgroupInfo vgInfo;
bool bExists;
int32_t code = catalogGetCachedTableHashVgroup(pCxt->pParseCxt->pCatalog, &snameTb, &vgInfo, &bExists);
if (code == TSDB_CODE_SUCCESS && bExists) {
bool bFoundVg = false;
for (int32_t k = 0; k < nVgroups; ++k) {
if (vgsInfo->vgroups[k].vgId == vgInfo.vgId) {
bFoundVg = true;
break;
}
}
if (!bFoundVg) {
vgsInfo->vgroups[nVgroups] = vgInfo;
++nVgroups;
vgsInfo->numOfVgroups = nVgroups;
}
} else {
vgsInfo->numOfVgroups = 0;
break;
}
}
return TSDB_CODE_SUCCESS;
}
static int32_t setEqualTbnameTableVgroups(STranslateContext* pCxt, SSelectStmt* pSelect, SArray* aTables) {
int32_t code = TSDB_CODE_SUCCESS;
for (int i = 0; i < taosArrayGetSize(aTables); ++i) {
SEqCondTbNameTableInfo* pInfo = taosArrayGet(aTables, i);
int32_t nTbls = taosArrayGetSize(pInfo->aTbnames);
SVgroupsInfo* vgsInfo = taosMemoryMalloc(sizeof(SVgroupsInfo) + nTbls * sizeof(SVgroupInfo));
int32_t nVgroups = 0;
findVgroupsFromEqualTbname(pCxt, pInfo, vgsInfo);
if (vgsInfo->numOfVgroups != 0) {
taosMemoryFree(pInfo->pRealTable->pVgroupList);
pInfo->pRealTable->pVgroupList = vgsInfo;
} else {
taosMemoryFree(vgsInfo);
}
}
return TSDB_CODE_SUCCESS;
}
static int32_t setTableVgroupsFromEqualTbnameCond(STranslateContext* pCxt, SSelectStmt* pSelect) {
int32_t code = TSDB_CODE_SUCCESS;
SArray* aTables = taosArrayInit(1, sizeof(SEqCondTbNameTableInfo));
code = findEqualCondTbname(pCxt, pSelect->pWhere, aTables);
if (code == TSDB_CODE_SUCCESS) {
code = setEqualTbnameTableVgroups(pCxt, pSelect, aTables);
}
for (int i = 0; i < taosArrayGetSize(aTables); ++i) {
SEqCondTbNameTableInfo* pInfo = taosArrayGet(aTables, i);
taosArrayDestroy(pInfo->aTbnames);
}
taosArrayDestroy(aTables);
return code;
}
static int32_t translateWhere(STranslateContext* pCxt, SSelectStmt* pSelect) {
pCxt->currClause = SQL_CLAUSE_WHERE;
int32_t code = translateExpr(pCxt, &pSelect->pWhere);
@ -3934,6 +4195,9 @@ static int32_t translateWhere(STranslateContext* pCxt, SSelectStmt* pSelect) {
if (TSDB_CODE_SUCCESS == code && pSelect->timeRange.skey > pSelect->timeRange.ekey) {
pSelect->isEmptyResult = true;
}
if (pSelect->pWhere != NULL) {
setTableVgroupsFromEqualTbnameCond(pCxt, pSelect);
}
return code;
}

View File

@ -43,6 +43,7 @@ int32_t optimizeLogicPlan(SPlanContext* pCxt, SLogicSubplan* pLogicSubplan);
int32_t splitLogicPlan(SPlanContext* pCxt, SLogicSubplan* pLogicSubplan);
int32_t scaleOutLogicPlan(SPlanContext* pCxt, SLogicSubplan* pLogicSubplan, SQueryLogicPlan** pLogicPlan);
int32_t createPhysiPlan(SPlanContext* pCxt, SQueryLogicPlan* pLogicPlan, SQueryPlan** pPlan, SArray* pExecNodeList);
int32_t validateQueryPlan(SPlanContext* pCxt, SQueryPlan* pPlan);
bool getBatchScanOptionFromHint(SNodeList* pList);
bool getSortForGroupOptHint(SNodeList* pList);

View File

@ -747,6 +747,7 @@ static int32_t createAggLogicNode(SLogicPlanContext* pCxt, SSelectStmt* pSelect,
pAgg->isGroupTb = pAgg->pGroupKeys ? keysHasTbname(pAgg->pGroupKeys) : 0;
pAgg->isPartTb = pSelect->pPartitionByList ? keysHasTbname(pSelect->pPartitionByList) : 0;
pAgg->hasGroup = pAgg->pGroupKeys || pSelect->pPartitionByList;
if (TSDB_CODE_SUCCESS == code) {
*pLogicNode = (SLogicNode*)pAgg;

View File

@ -26,6 +26,7 @@
#define OPTIMIZE_FLAG_PUSH_DOWN_CONDE OPTIMIZE_FLAG_MASK(1)
#define OPTIMIZE_FLAG_SET_MASK(val, mask) (val) |= (mask)
#define OPTIMIZE_FLAG_CLEAR_MASK(val, mask) (val) &= (~(mask))
#define OPTIMIZE_FLAG_TEST_MASK(val, mask) (((val) & (mask)) != 0)
typedef struct SOptimizeContext {
@ -2478,24 +2479,32 @@ static bool hasSuitableCache(int8_t cacheLastMode, bool hasLastRow, bool hasLast
return false;
}
static bool lastRowScanOptMayBeOptimized(SLogicNode* pNode) {
if (QUERY_NODE_LOGIC_PLAN_AGG != nodeType(pNode) || 1 != LIST_LENGTH(pNode->pChildren) ||
QUERY_NODE_LOGIC_PLAN_SCAN != nodeType(nodesListGetNode(pNode->pChildren, 0))) {
return false;
/// @brief check if we can apply last row scan optimization
/// @param lastColNum how many distinct last col specified
/// @param lastColId only used when lastColNum equals 1, the col id of the only one last col
/// @param selectNonPKColNum num of normal cols
/// @param selectNonPKColId only used when selectNonPKColNum equals 1, the col id of the only one select col
static bool lastRowScanOptCheckColNum(int32_t lastColNum, col_id_t lastColId,
int32_t selectNonPKColNum, col_id_t selectNonPKColId) {
// multi select non pk col + last func: select c1, c2, last(c1)
if (selectNonPKColNum > 1 && lastColNum > 0) return false;
if (selectNonPKColNum == 1) {
// select last(c1), last(c2), c1 ...
// which is not possible currently
if (lastColNum > 1) return false;
// select last(c1), c2 ...
if (lastColNum == 1 && lastColId != selectNonPKColId) return false;
}
return true;
}
SAggLogicNode* pAgg = (SAggLogicNode*)pNode;
SScanLogicNode* pScan = (SScanLogicNode*)nodesListGetNode(pNode->pChildren, 0);
// Only one of LAST and LASTROW can appear
if (pAgg->hasLastRow == pAgg->hasLast || NULL != pAgg->pGroupKeys || NULL != pScan->node.pConditions ||
!hasSuitableCache(pScan->cacheLastMode, pAgg->hasLastRow, pAgg->hasLast) ||
IS_TSWINDOW_SPECIFIED(pScan->scanRange)) {
return false;
}
bool hasLastFunc = false;
bool hasSelectFunc = false;
static bool lastRowScanOptCheckFuncList(SLogicNode* pNode, bool* hasOtherFunc) {
bool hasNonPKSelectFunc = false;
SNode* pFunc = NULL;
int32_t lastColNum = 0, selectNonPKColNum = 0;
col_id_t lastColId = -1, selectNonPKColId = -1;
FOREACH(pFunc, ((SAggLogicNode*)pNode)->pAggFuncs) {
SFunctionNode* pAggFunc = (SFunctionNode*)pFunc;
if (FUNCTION_TYPE_LAST == pAggFunc->funcType) {
@ -2505,23 +2514,75 @@ static bool lastRowScanOptMayBeOptimized(SLogicNode* pNode) {
if (pCol->colType != COLUMN_TYPE_COLUMN) {
return false;
}
if (lastColId != pCol->colId) {
lastColId = pCol->colId;
lastColNum++;
}
if (hasSelectFunc || QUERY_NODE_VALUE == nodeType(nodesListGetNode(pAggFunc->pParameterList, 0))) {
}
if (QUERY_NODE_VALUE == nodeType(nodesListGetNode(pAggFunc->pParameterList, 0))) {
return false;
}
hasLastFunc = true;
if (!lastRowScanOptCheckColNum(lastColNum, lastColId, selectNonPKColNum, selectNonPKColId))
return false;
} else if (FUNCTION_TYPE_SELECT_VALUE == pAggFunc->funcType) {
if (hasLastFunc) {
SNode* pParam = nodesListGetNode(pAggFunc->pParameterList, 0);
if (QUERY_NODE_COLUMN == nodeType(pParam)) {
SColumnNode* pCol = (SColumnNode*)pParam;
if (PRIMARYKEY_TIMESTAMP_COL_ID != pCol->colId) {
if (selectNonPKColId != pCol->colId) {
selectNonPKColId = pCol->colId;
selectNonPKColNum++;
}
} else {
continue;
}
} else if (lastColNum > 0) {
return false;
}
hasSelectFunc = true;
if (!lastRowScanOptCheckColNum(lastColNum, lastColId, selectNonPKColNum, selectNonPKColId))
return false;
} else if (FUNCTION_TYPE_GROUP_KEY == pAggFunc->funcType) {
if (!lastRowScanOptLastParaIsTag(nodesListGetNode(pAggFunc->pParameterList, 0))) {
return false;
}
} else if (FUNCTION_TYPE_LAST_ROW != pAggFunc->funcType) {
*hasOtherFunc = true;
}
}
return true;
}
static bool lastRowScanOptCheckLastCache(SAggLogicNode* pAgg, SScanLogicNode* pScan) {
// Only one of LAST and LASTROW can appear
if (pAgg->hasLastRow == pAgg->hasLast || (!pAgg->hasLast && !pAgg->hasLastRow) || NULL != pAgg->pGroupKeys || NULL != pScan->node.pConditions ||
!hasSuitableCache(pScan->cacheLastMode, pAgg->hasLastRow, pAgg->hasLast) ||
IS_TSWINDOW_SPECIFIED(pScan->scanRange)) {
return false;
}
return true;
}
static bool lastRowScanOptMayBeOptimized(SLogicNode* pNode) {
if (QUERY_NODE_LOGIC_PLAN_AGG != nodeType(pNode) || 1 != LIST_LENGTH(pNode->pChildren) ||
QUERY_NODE_LOGIC_PLAN_SCAN != nodeType(nodesListGetNode(pNode->pChildren, 0))) {
return false;
}
SAggLogicNode* pAgg = (SAggLogicNode*)pNode;
SScanLogicNode* pScan = (SScanLogicNode*)nodesListGetNode(pNode->pChildren, 0);
if (!lastRowScanOptCheckLastCache(pAgg, pScan)) {
return false;
}
bool hasOtherFunc = false;
if (!lastRowScanOptCheckFuncList(pNode, &hasOtherFunc)) {
return false;
}
if (hasOtherFunc) {
return false;
}
return true;
@ -2530,6 +2591,7 @@ static bool lastRowScanOptMayBeOptimized(SLogicNode* pNode) {
typedef struct SLastRowScanOptSetColDataTypeCxt {
bool doAgg;
SNodeList* pLastCols;
SNodeList* pOtherCols;
} SLastRowScanOptSetColDataTypeCxt;
static EDealRes lastRowScanOptSetColDataType(SNode* pNode, void* pContext) {
@ -2572,6 +2634,33 @@ static void lastRowScanOptSetLastTargets(SNodeList* pTargets, SNodeList* pLastCo
}
}
static void lastRowScanOptRemoveUslessTargets(SNodeList* pTargets, SNodeList* pList1, SNodeList* pList2) {
SNode* pTarget = NULL;
WHERE_EACH(pTarget, pTargets) {
bool found = false;
SNode* pCol = NULL;
FOREACH(pCol, pList1) {
if (nodesEqualNode(pCol, pTarget)) {
found = true;
break;
}
}
if (!found) {
FOREACH(pCol, pList2) {
if (nodesEqualNode(pCol, pTarget)) {
found = true;
break;
}
}
}
if (!found) {
ERASE_NODE(pTargets);
continue;
}
WHERE_NEXT;
}
}
static int32_t lastRowScanOptimize(SOptimizeContext* pCxt, SLogicSubplan* pLogicSubplan) {
SAggLogicNode* pAgg = (SAggLogicNode*)optFindPossibleNode(pLogicSubplan->pNode, lastRowScanOptMayBeOptimized);
@ -2579,8 +2668,11 @@ static int32_t lastRowScanOptimize(SOptimizeContext* pCxt, SLogicSubplan* pLogic
return TSDB_CODE_SUCCESS;
}
SLastRowScanOptSetColDataTypeCxt cxt = {.doAgg = true, .pLastCols = NULL};
SLastRowScanOptSetColDataTypeCxt cxt = {.doAgg = true, .pLastCols = NULL, .pOtherCols = NULL};
SNode* pNode = NULL;
SColumnNode* pPKTsCol = NULL;
SColumnNode* pNonPKCol = NULL;
FOREACH(pNode, pAgg->pAggFuncs) {
SFunctionNode* pFunc = (SFunctionNode*)pNode;
int32_t funcType = pFunc->funcType;
@ -2597,6 +2689,20 @@ static int32_t lastRowScanOptimize(SOptimizeContext* pCxt, SLogicSubplan* pLogic
nodesWalkExpr(nodesListGetNode(pFunc->pParameterList, 0), lastRowScanOptSetColDataType, &cxt);
nodesListErase(pFunc->pParameterList, nodesListGetCell(pFunc->pParameterList, 1));
}
} else {
pNode = nodesListGetNode(pFunc->pParameterList, 0);
nodesListMakeAppend(&cxt.pOtherCols, pNode);
if (FUNCTION_TYPE_SELECT_VALUE == funcType) {
if (nodeType(pNode) == QUERY_NODE_COLUMN) {
SColumnNode* pCol = (SColumnNode*)pNode;
if (pCol->colId == PRIMARYKEY_TIMESTAMP_COL_ID) {
pPKTsCol = pCol;
} else {
pNonPKCol = pCol;
}
}
}
}
}
@ -2608,6 +2714,17 @@ static int32_t lastRowScanOptimize(SOptimizeContext* pCxt, SLogicSubplan* pLogic
lastRowScanOptSetLastTargets(pScan->pScanCols, cxt.pLastCols, true);
nodesWalkExprs(pScan->pScanPseudoCols, lastRowScanOptSetColDataType, &cxt);
lastRowScanOptSetLastTargets(pScan->node.pTargets, cxt.pLastCols, false);
lastRowScanOptRemoveUslessTargets(pScan->node.pTargets, cxt.pLastCols, cxt.pOtherCols);
if (pPKTsCol && pScan->node.pTargets->length == 1) {
// when select last(ts),ts from ..., we add another ts to targets
sprintf(pPKTsCol->colName, "#sel_val.%p", pPKTsCol);
nodesListAppend(pScan->node.pTargets, nodesCloneNode((SNode*)pPKTsCol));
}
if (pNonPKCol && cxt.pLastCols->length == 1 && nodesEqualNode((SNode*)pNonPKCol, nodesListGetNode(cxt.pLastCols, 0))) {
// when select last(c1), c1 from ..., we add c1 to targets
sprintf(pNonPKCol->colName, "#sel_val.%p", pNonPKCol);
nodesListAppend(pScan->node.pTargets, nodesCloneNode((SNode*)pNonPKCol));
}
nodesClearList(cxt.pLastCols);
}
pAgg->hasLastRow = false;
@ -2617,6 +2734,208 @@ static int32_t lastRowScanOptimize(SOptimizeContext* pCxt, SLogicSubplan* pLogic
return TSDB_CODE_SUCCESS;
}
static bool splitCacheLastFuncOptMayBeOptimized(SLogicNode* pNode) {
if (QUERY_NODE_LOGIC_PLAN_AGG != nodeType(pNode) || 1 != LIST_LENGTH(pNode->pChildren) ||
QUERY_NODE_LOGIC_PLAN_SCAN != nodeType(nodesListGetNode(pNode->pChildren, 0))) {
return false;
}
SAggLogicNode* pAgg = (SAggLogicNode*)pNode;
SScanLogicNode* pScan = (SScanLogicNode*)nodesListGetNode(pNode->pChildren, 0);
if (!lastRowScanOptCheckLastCache(pAgg, pScan)) {
return false;
}
bool hasOtherFunc = false;
if (!lastRowScanOptCheckFuncList(pNode, &hasOtherFunc)) {
return false;
}
if (pAgg->hasGroup || !hasOtherFunc) {
return false;
}
return true;
}
static int32_t splitCacheLastFuncOptCreateAggLogicNode(SAggLogicNode** pNewAgg, SAggLogicNode* pAgg, SNodeList* pFunc, SNodeList* pTargets) {
SAggLogicNode* pNew = (SAggLogicNode*)nodesMakeNode(QUERY_NODE_LOGIC_PLAN_AGG);
if (NULL == pNew) {
nodesDestroyList(pFunc);
nodesDestroyList(pTargets);
return TSDB_CODE_OUT_OF_MEMORY;
}
pNew->hasLastRow = false;
pNew->hasLast = false;
pNew->hasTimeLineFunc = pAgg->hasTimeLineFunc;
pNew->hasGroupKeyOptimized = false;
pNew->onlyHasKeepOrderFunc = pAgg->onlyHasKeepOrderFunc;
pNew->node.groupAction = pAgg->node.groupAction;
pNew->node.requireDataOrder = pAgg->node.requireDataOrder;
pNew->node.resultDataOrder = pAgg->node.resultDataOrder;
pNew->node.pTargets = pTargets;
pNew->pAggFuncs = pFunc;
pNew->pGroupKeys = nodesCloneList(pAgg->pGroupKeys);
pNew->node.pConditions = nodesCloneNode(pAgg->node.pConditions);
pNew->isGroupTb = pAgg->isGroupTb;
pNew->isPartTb = pAgg->isPartTb;
pNew->hasGroup = pAgg->hasGroup;
pNew->node.pChildren = nodesCloneList(pAgg->node.pChildren);
SNode* pNode = NULL;
FOREACH(pNode, pNew->node.pChildren) {
if (QUERY_NODE_LOGIC_PLAN_SCAN == nodeType(pNode)) {
OPTIMIZE_FLAG_CLEAR_MASK(((SScanLogicNode*)pNode)->node.optimizedFlag, OPTIMIZE_FLAG_SCAN_PATH);
}
}
*pNewAgg = pNew;
return TSDB_CODE_SUCCESS;
}
static int32_t splitCacheLastFuncOptModifyAggLogicNode(SAggLogicNode* pAgg) {
pAgg->hasTimeLineFunc = false;
pAgg->onlyHasKeepOrderFunc = true;
return TSDB_CODE_SUCCESS;
}
static int32_t splitCacheLastFuncOptCreateMergeLogicNode(SMergeLogicNode** pNew, SAggLogicNode* pAgg1, SAggLogicNode* pAgg2) {
SMergeLogicNode* pMerge = (SMergeLogicNode*)nodesMakeNode(QUERY_NODE_LOGIC_PLAN_MERGE);
if (NULL == pMerge) {
return TSDB_CODE_OUT_OF_MEMORY;
}
pMerge->colsMerge = true;
pMerge->numOfChannels = 2;
pMerge->srcGroupId = -1;
pMerge->node.precision = pAgg1->node.precision;
SNode* pNewAgg1 = nodesCloneNode((SNode*)pAgg1);
SNode* pNewAgg2 = nodesCloneNode((SNode*)pAgg2);
if (NULL == pNewAgg1 || NULL == pNewAgg2) {
nodesDestroyNode(pNewAgg1);
nodesDestroyNode(pNewAgg2);
return TSDB_CODE_OUT_OF_MEMORY;
}
((SAggLogicNode*)pNewAgg1)->node.pParent = (SLogicNode*)pMerge;
((SAggLogicNode*)pNewAgg2)->node.pParent = (SLogicNode*)pMerge;
SNode* pNode = NULL;
FOREACH(pNode, ((SAggLogicNode*)pNewAgg1)->node.pChildren) {
((SLogicNode*)pNode)->pParent = (SLogicNode*)pNewAgg1;
}
FOREACH(pNode, ((SAggLogicNode*)pNewAgg2)->node.pChildren) {
((SLogicNode*)pNode)->pParent = (SLogicNode*)pNewAgg2;
}
int32_t code = nodesListMakeStrictAppendList(&pMerge->node.pTargets, nodesCloneList(pAgg1->node.pTargets));
if (TSDB_CODE_SUCCESS == code) {
code = nodesListMakeStrictAppendList(&pMerge->node.pTargets, nodesCloneList(pAgg2->node.pTargets));
}
if (TSDB_CODE_SUCCESS == code) {
code = nodesListMakeStrictAppend(&pMerge->node.pChildren, pNewAgg1);
}
if (TSDB_CODE_SUCCESS == code) {
code = nodesListMakeStrictAppend(&pMerge->node.pChildren, pNewAgg2);
}
if (TSDB_CODE_SUCCESS != code) {
nodesDestroyNode(pNewAgg1);
nodesDestroyNode(pNewAgg2);
nodesDestroyNode((SNode*)pMerge);
} else {
*pNew = pMerge;
}
return code;
}
static int32_t splitCacheLastFuncOptimize(SOptimizeContext* pCxt, SLogicSubplan* pLogicSubplan) {
SAggLogicNode* pAgg = (SAggLogicNode*)optFindPossibleNode(pLogicSubplan->pNode, splitCacheLastFuncOptMayBeOptimized);
if (NULL == pAgg) {
return TSDB_CODE_SUCCESS;
}
SNode* pNode = NULL;
SNodeList* pAggFuncList = NULL;
{
WHERE_EACH(pNode, pAgg->pAggFuncs) {
SFunctionNode* pFunc = (SFunctionNode*)pNode;
int32_t funcType = pFunc->funcType;
if (FUNCTION_TYPE_LAST_ROW != funcType && FUNCTION_TYPE_LAST != funcType &&
FUNCTION_TYPE_SELECT_VALUE != funcType && FUNCTION_TYPE_GROUP_KEY != funcType) {
nodesListMakeStrictAppend(&pAggFuncList, nodesCloneNode(pNode));
ERASE_NODE(pAgg->pAggFuncs);
continue;
}
WHERE_NEXT;
}
}
if (NULL == pAggFuncList) {
planError("empty agg func list while splite projections, funcNum:%d", pAgg->pAggFuncs->length);
return TSDB_CODE_PLAN_INTERNAL_ERROR;
}
SNodeList* pTargets = NULL;
{
WHERE_EACH(pNode, pAgg->node.pTargets) {
SColumnNode* pCol = (SColumnNode*)pNode;
SNode* pFuncNode = NULL;
bool found = false;
FOREACH(pFuncNode, pAggFuncList) {
SFunctionNode* pFunc = (SFunctionNode*)pFuncNode;
if (0 == strcmp(pFunc->node.aliasName, pCol->colName)) {
nodesListMakeStrictAppend(&pTargets, nodesCloneNode(pNode));
found = true;
break;
}
}
if (found) {
ERASE_NODE(pAgg->node.pTargets);
continue;
}
WHERE_NEXT;
}
}
if (NULL == pTargets) {
planError("empty target func list while splite projections, targetsNum:%d", pAgg->node.pTargets->length);
nodesDestroyList(pAggFuncList);
return TSDB_CODE_PLAN_INTERNAL_ERROR;
}
SMergeLogicNode* pMerge = NULL;
SAggLogicNode* pNewAgg = NULL;
int32_t code = splitCacheLastFuncOptCreateAggLogicNode(&pNewAgg, pAgg, pAggFuncList, pTargets);
if (TSDB_CODE_SUCCESS == code) {
code = splitCacheLastFuncOptModifyAggLogicNode(pAgg);
}
if (TSDB_CODE_SUCCESS == code) {
code = splitCacheLastFuncOptCreateMergeLogicNode(&pMerge, pNewAgg, pAgg);
}
if (TSDB_CODE_SUCCESS == code) {
code = replaceLogicNode(pLogicSubplan, (SLogicNode*)pAgg, (SLogicNode*)pMerge);
}
nodesDestroyNode((SNode *)pAgg);
nodesDestroyNode((SNode *)pNewAgg);
if (TSDB_CODE_SUCCESS != code) {
nodesDestroyNode((SNode *)pMerge);
}
pCxt->optimized = true;
return code;
}
// merge projects
static bool mergeProjectsMayBeOptimized(SLogicNode* pNode) {
if (QUERY_NODE_LOGIC_PLAN_PROJECT != nodeType(pNode) || 1 != LIST_LENGTH(pNode->pChildren)) {
@ -3700,6 +4019,7 @@ static const SOptimizeRule optimizeRuleSet[] = {
{.pName = "MergeProjects", .optimizeFunc = mergeProjectsOptimize},
{.pName = "RewriteTail", .optimizeFunc = rewriteTailOptimize},
{.pName = "RewriteUnique", .optimizeFunc = rewriteUniqueOptimize},
{.pName = "splitCacheLastFunc", .optimizeFunc = splitCacheLastFuncOptimize},
{.pName = "LastRowScan", .optimizeFunc = lastRowScanOptimize},
{.pName = "TagScan", .optimizeFunc = tagScanOptimize},
{.pName = "TableCountScan", .optimizeFunc = tableCountScanOptimize},

View File

@ -552,6 +552,7 @@ static int32_t createLastRowScanPhysiNode(SPhysiPlanContext* pCxt, SSubplan* pSu
if (NULL == pScan) {
return TSDB_CODE_OUT_OF_MEMORY;
}
pScan->pTargets = nodesCloneList(pScanLogicNode->node.pTargets);
pScan->pGroupTags = nodesCloneList(pScanLogicNode->pGroupTags);
if (NULL != pScanLogicNode->pGroupTags && NULL == pScan->pGroupTags) {
@ -1950,20 +1951,30 @@ static int32_t createExchangePhysiNodeByMerge(SMergePhysiNode* pMerge) {
return nodesListMakeStrictAppend(&pMerge->node.pChildren, (SNode*)pExchange);
}
static int32_t createMergePhysiNode(SPhysiPlanContext* pCxt, SMergeLogicNode* pMergeLogicNode, SPhysiNode** pPhyNode) {
static int32_t createMergePhysiNode(SPhysiPlanContext* pCxt, SNodeList* pChildren, SMergeLogicNode* pMergeLogicNode, SPhysiNode** pPhyNode) {
int32_t code = TSDB_CODE_SUCCESS;
SMergePhysiNode* pMerge =
(SMergePhysiNode*)makePhysiNode(pCxt, (SLogicNode*)pMergeLogicNode, QUERY_NODE_PHYSICAL_PLAN_MERGE);
if (NULL == pMerge) {
return TSDB_CODE_OUT_OF_MEMORY;
}
if (pMergeLogicNode->colsMerge) {
pMerge->type = MERGE_TYPE_COLUMNS;
} else if (pMergeLogicNode->needSort) {
pMerge->type = MERGE_TYPE_SORT;
} else {
pMerge->type = MERGE_TYPE_NON_SORT;
}
pMerge->numOfChannels = pMergeLogicNode->numOfChannels;
pMerge->srcGroupId = pMergeLogicNode->srcGroupId;
pMerge->groupSort = pMergeLogicNode->groupSort;
pMerge->ignoreGroupId = pMergeLogicNode->ignoreGroupId;
pMerge->inputWithGroupId = pMergeLogicNode->inputWithGroupId;
int32_t code = addDataBlockSlots(pCxt, pMergeLogicNode->pInputs, pMerge->node.pOutputDataBlockDesc);
if (!pMergeLogicNode->colsMerge) {
code = addDataBlockSlots(pCxt, pMergeLogicNode->pInputs, pMerge->node.pOutputDataBlockDesc);
if (TSDB_CODE_SUCCESS == code) {
for (int32_t i = 0; i < pMerge->numOfChannels; ++i) {
@ -1986,6 +1997,15 @@ static int32_t createMergePhysiNode(SPhysiPlanContext* pCxt, SMergeLogicNode* pM
if (TSDB_CODE_SUCCESS == code) {
code = addDataBlockSlots(pCxt, pMerge->pTargets, pMerge->node.pOutputDataBlockDesc);
}
} else {
SDataBlockDescNode* pLeftDesc = ((SPhysiNode*)nodesListGetNode(pChildren, 0))->pOutputDataBlockDesc;
SDataBlockDescNode* pRightDesc = ((SPhysiNode*)nodesListGetNode(pChildren, 1))->pOutputDataBlockDesc;
code = setListSlotId(pCxt, pLeftDesc->dataBlockId, pRightDesc->dataBlockId, pMergeLogicNode->node.pTargets, &pMerge->pTargets);
if (TSDB_CODE_SUCCESS == code) {
code = addDataBlockSlots(pCxt, pMerge->pTargets, pMerge->node.pOutputDataBlockDesc);
}
}
if (TSDB_CODE_SUCCESS == code) {
*pPhyNode = (SPhysiNode*)pMerge;
@ -2022,7 +2042,7 @@ static int32_t doCreatePhysiNode(SPhysiPlanContext* pCxt, SLogicNode* pLogicNode
case QUERY_NODE_LOGIC_PLAN_INTERP_FUNC:
return createInterpFuncPhysiNode(pCxt, pChildren, (SInterpFuncLogicNode*)pLogicNode, pPhyNode);
case QUERY_NODE_LOGIC_PLAN_MERGE:
return createMergePhysiNode(pCxt, (SMergeLogicNode*)pLogicNode, pPhyNode);
return createMergePhysiNode(pCxt, pChildren, (SMergeLogicNode*)pLogicNode, pPhyNode);
case QUERY_NODE_LOGIC_PLAN_GROUP_CACHE:
return createGroupCachePhysiNode(pCxt, pChildren, (SGroupCacheLogicNode*)pLogicNode, pPhyNode);
case QUERY_NODE_LOGIC_PLAN_DYN_QUERY_CTRL:

View File

@ -248,8 +248,6 @@ static bool stbSplHasMultiTbScan(bool streamQuery, SLogicNode* pNode) {
}
if (QUERY_NODE_LOGIC_PLAN_SCAN == nodeType(pChild) && stbSplIsMultiTbScan(streamQuery, (SScanLogicNode*)pChild)) {
return true;
} else if (QUERY_NODE_LOGIC_PLAN_SORT == nodeType(pChild)) {
return stbSplHasMultiTbScan(streamQuery, (SLogicNode*)pChild);
}
return false;
}
@ -540,11 +538,12 @@ static int32_t stbSplRewriteFromMergeNode(SMergeLogicNode* pMerge, SLogicNode* p
}
static int32_t stbSplCreateMergeNode(SSplitContext* pCxt, SLogicSubplan* pSubplan, SLogicNode* pSplitNode,
SNodeList* pMergeKeys, SLogicNode* pPartChild, bool groupSort) {
SNodeList* pMergeKeys, SLogicNode* pPartChild, bool groupSort, bool needSort) {
SMergeLogicNode* pMerge = (SMergeLogicNode*)nodesMakeNode(QUERY_NODE_LOGIC_PLAN_MERGE);
if (NULL == pMerge) {
return TSDB_CODE_OUT_OF_MEMORY;
}
pMerge->needSort = needSort;
pMerge->numOfChannels = stbSplGetNumOfVgroups(pPartChild);
pMerge->srcGroupId = pCxt->groupId;
pMerge->node.precision = pPartChild->precision;
@ -621,7 +620,7 @@ static int32_t stbSplSplitIntervalForBatch(SSplitContext* pCxt, SStableSplitInfo
code = stbSplCreateMergeKeysByPrimaryKey(((SWindowLogicNode*)pInfo->pSplitNode)->pTspk,
((SWindowLogicNode*)pInfo->pSplitNode)->node.outputTsOrder, &pMergeKeys);
if (TSDB_CODE_SUCCESS == code) {
code = stbSplCreateMergeNode(pCxt, NULL, pInfo->pSplitNode, pMergeKeys, pPartWindow, true);
code = stbSplCreateMergeNode(pCxt, NULL, pInfo->pSplitNode, pMergeKeys, pPartWindow, true, true);
}
if (TSDB_CODE_SUCCESS != code) {
nodesDestroyList(pMergeKeys);
@ -712,7 +711,7 @@ static int32_t stbSplSplitSessionOrStateForBatch(SSplitContext* pCxt, SStableSpl
((SWindowLogicNode*)pWindow)->node.inputTsOrder, &pMergeKeys);
if (TSDB_CODE_SUCCESS == code) {
code = stbSplCreateMergeNode(pCxt, pInfo->pSubplan, pChild, pMergeKeys, (SLogicNode*)pChild, true);
code = stbSplCreateMergeNode(pCxt, pInfo->pSubplan, pChild, pMergeKeys, (SLogicNode*)pChild, true, true);
}
if (TSDB_CODE_SUCCESS == code) {
@ -982,7 +981,7 @@ static int32_t stbSplAggNodeCreateMerge(SSplitContext* pCtx, SStableSplitInfo* p
}
}
}
code = stbSplCreateMergeNode(pCtx, NULL, pInfo->pSplitNode, pMergeKeys, pChildAgg, groupSort);
code = stbSplCreateMergeNode(pCtx, NULL, pInfo->pSplitNode, pMergeKeys, pChildAgg, groupSort, true);
if (TSDB_CODE_SUCCESS == code && sortForGroup) {
SMergeLogicNode* pMerge =
(SMergeLogicNode*)nodesListGetNode(pInfo->pSplitNode->pChildren, LIST_LENGTH(pInfo->pSplitNode->pChildren) - 1);
@ -1145,7 +1144,7 @@ static int32_t stbSplSplitSortNode(SSplitContext* pCxt, SStableSplitInfo* pInfo)
bool groupSort = ((SSortLogicNode*)pInfo->pSplitNode)->groupSort;
int32_t code = stbSplCreatePartSortNode((SSortLogicNode*)pInfo->pSplitNode, &pPartSort, &pMergeKeys);
if (TSDB_CODE_SUCCESS == code) {
code = stbSplCreateMergeNode(pCxt, pInfo->pSubplan, pInfo->pSplitNode, pMergeKeys, pPartSort, groupSort);
code = stbSplCreateMergeNode(pCxt, pInfo->pSubplan, pInfo->pSplitNode, pMergeKeys, pPartSort, groupSort, true);
}
if (TSDB_CODE_SUCCESS == code) {
nodesDestroyNode((SNode*)pInfo->pSplitNode);
@ -1195,7 +1194,7 @@ static int32_t stbSplSplitScanNodeWithPartTags(SSplitContext* pCxt, SStableSplit
SLogicNode* pSplitNode = NULL;
int32_t code = stbSplGetSplitNodeForScan(pInfo, &pSplitNode);
if (TSDB_CODE_SUCCESS == code) {
code = stbSplCreateMergeNode(pCxt, pInfo->pSubplan, pSplitNode, NULL, pSplitNode, true);
code = stbSplCreateMergeNode(pCxt, pInfo->pSubplan, pSplitNode, NULL, pSplitNode, true, true);
}
if (TSDB_CODE_SUCCESS == code) {
code = nodesListMakeStrictAppend(&pInfo->pSubplan->pChildren,
@ -1269,7 +1268,7 @@ static int32_t stbSplSplitMergeScanNode(SSplitContext* pCxt, SLogicSubplan* pSub
((SLimitNode*)pMergeScan->pLimit)->limit += ((SLimitNode*)pMergeScan->pLimit)->offset;
((SLimitNode*)pMergeScan->pLimit)->offset = 0;
}
code = stbSplCreateMergeNode(pCxt, pSubplan, (SLogicNode*)pScan, pMergeKeys, pMergeScan, groupSort);
code = stbSplCreateMergeNode(pCxt, pSubplan, (SLogicNode*)pScan, pMergeKeys, pMergeScan, groupSort, true);
}
if (TSDB_CODE_SUCCESS == code) {
nodesDestroyNode((SNode*)pScan);
@ -1345,7 +1344,7 @@ static int32_t stbSplSplitPartitionNode(SSplitContext* pCxt, SStableSplitInfo* p
code = stbSplCreateMergeKeysForPartitionNode(pInfo->pSplitNode, &pMergeKeys);
}
if (TSDB_CODE_SUCCESS == code) {
code = stbSplCreateMergeNode(pCxt, pInfo->pSubplan, pInfo->pSplitNode, pMergeKeys, pInfo->pSplitNode, true);
code = stbSplCreateMergeNode(pCxt, pInfo->pSubplan, pInfo->pSplitNode, pMergeKeys, pInfo->pSplitNode, true, true);
}
if (TSDB_CODE_SUCCESS == code) {
code = nodesListMakeStrictAppend(&pInfo->pSubplan->pChildren,
@ -1587,10 +1586,13 @@ typedef struct SSmaIndexSplitInfo {
static bool smaIdxSplFindSplitNode(SSplitContext* pCxt, SLogicSubplan* pSubplan, SLogicNode* pNode,
SSmaIndexSplitInfo* pInfo) {
if (QUERY_NODE_LOGIC_PLAN_MERGE == nodeType(pNode) && LIST_LENGTH(pNode->pChildren) > 1) {
int32_t nodeType = nodeType(nodesListGetNode(pNode->pChildren, 0));
if (nodeType == QUERY_NODE_LOGIC_PLAN_EXCHANGE || nodeType == QUERY_NODE_LOGIC_PLAN_MERGE) {
pInfo->pMerge = (SMergeLogicNode*)pNode;
pInfo->pSubplan = pSubplan;
return true;
}
}
return false;
}

View File

@ -0,0 +1,164 @@
/*
* Copyright (c) 2019 TAOS Data, Inc. <jhtao@taosdata.com>
*
* This program is free software: you can use, redistribute, and/or modify
* it under the terms of the GNU Affero General Public License, version 3
* or later ("AGPL"), as published by the Free Software Foundation.
*
* This program is distributed in the hope that it will be useful, but WITHOUT
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
* FITNESS FOR A PARTICULAR PURPOSE.
*
* You should have received a copy of the GNU Affero General Public License
* along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
#include "planInt.h"
#include "catalog.h"
#include "functionMgt.h"
#include "systable.h"
#include "tglobal.h"
typedef struct SValidatePlanContext {
SPlanContext* pPlanCxt;
int32_t errCode;
} SValidatePlanContext;
int32_t doValidatePhysiNode(SValidatePlanContext* pCxt, SNode* pNode);
int32_t validateMergePhysiNode(SValidatePlanContext* pCxt, SMergePhysiNode* pMerge) {
if ((NULL != pMerge->node.pLimit || NULL != pMerge->node.pSlimit) && pMerge->type == MERGE_TYPE_NON_SORT) {
planError("no limit&slimit supported for non sort merge, pLimit:%p", pMerge->node.pLimit);
return TSDB_CODE_PLAN_INTERNAL_ERROR;
}
return TSDB_CODE_SUCCESS;
}
int32_t validateSubplanNode(SValidatePlanContext* pCxt, SSubplan* pSubPlan) {
if (SUBPLAN_TYPE_MODIFY == pSubPlan->subplanType) {
return TSDB_CODE_SUCCESS;
}
return doValidatePhysiNode(pCxt, (SNode*)pSubPlan->pNode);
}
int32_t validateQueryPlanNode(SValidatePlanContext* pCxt, SQueryPlan* pPlan) {
int32_t code = TSDB_CODE_SUCCESS;
SNode* pNode = NULL;
FOREACH(pNode, pPlan->pSubplans) {
if (QUERY_NODE_NODE_LIST != nodeType(pNode)) {
code = TSDB_CODE_PLAN_INTERNAL_ERROR;
break;
}
SNode* pSubNode = NULL;
SNodeListNode* pSubplans = (SNodeListNode*)pNode;
FOREACH(pSubNode, pSubplans->pNodeList) {
if (QUERY_NODE_PHYSICAL_SUBPLAN != nodeType(pNode)) {
code = TSDB_CODE_PLAN_INTERNAL_ERROR;
break;
}
code = doValidatePhysiNode(pCxt, pSubNode);
if (code) {
break;
}
}
}
return code;
}
int32_t doValidatePhysiNode(SValidatePlanContext* pCxt, SNode* pNode) {
switch (nodeType(pNode)) {
case QUERY_NODE_PHYSICAL_PLAN_TAG_SCAN:
case QUERY_NODE_PHYSICAL_PLAN_TABLE_SCAN:
case QUERY_NODE_PHYSICAL_PLAN_TABLE_SEQ_SCAN:
case QUERY_NODE_PHYSICAL_PLAN_TABLE_MERGE_SCAN:
case QUERY_NODE_PHYSICAL_PLAN_STREAM_SCAN:
case QUERY_NODE_PHYSICAL_PLAN_SYSTABLE_SCAN:
case QUERY_NODE_PHYSICAL_PLAN_BLOCK_DIST_SCAN:
case QUERY_NODE_PHYSICAL_PLAN_LAST_ROW_SCAN:
case QUERY_NODE_PHYSICAL_PLAN_PROJECT:
case QUERY_NODE_PHYSICAL_PLAN_MERGE_JOIN:
case QUERY_NODE_PHYSICAL_PLAN_HASH_AGG:
case QUERY_NODE_PHYSICAL_PLAN_EXCHANGE:
break;
case QUERY_NODE_PHYSICAL_PLAN_MERGE:
return validateMergePhysiNode(pCxt, (SMergePhysiNode*)pNode);
case QUERY_NODE_PHYSICAL_PLAN_SORT:
case QUERY_NODE_PHYSICAL_PLAN_GROUP_SORT:
case QUERY_NODE_PHYSICAL_PLAN_HASH_INTERVAL:
case QUERY_NODE_PHYSICAL_PLAN_MERGE_INTERVAL:
case QUERY_NODE_PHYSICAL_PLAN_MERGE_ALIGNED_INTERVAL:
case QUERY_NODE_PHYSICAL_PLAN_STREAM_INTERVAL:
case QUERY_NODE_PHYSICAL_PLAN_STREAM_FINAL_INTERVAL:
case QUERY_NODE_PHYSICAL_PLAN_STREAM_SEMI_INTERVAL:
case QUERY_NODE_PHYSICAL_PLAN_FILL:
case QUERY_NODE_PHYSICAL_PLAN_STREAM_FILL:
case QUERY_NODE_PHYSICAL_PLAN_MERGE_SESSION:
case QUERY_NODE_PHYSICAL_PLAN_STREAM_SESSION:
case QUERY_NODE_PHYSICAL_PLAN_STREAM_SEMI_SESSION:
case QUERY_NODE_PHYSICAL_PLAN_STREAM_FINAL_SESSION:
case QUERY_NODE_PHYSICAL_PLAN_MERGE_STATE:
case QUERY_NODE_PHYSICAL_PLAN_STREAM_STATE:
case QUERY_NODE_PHYSICAL_PLAN_PARTITION:
case QUERY_NODE_PHYSICAL_PLAN_STREAM_PARTITION:
case QUERY_NODE_PHYSICAL_PLAN_INDEF_ROWS_FUNC:
case QUERY_NODE_PHYSICAL_PLAN_INTERP_FUNC:
case QUERY_NODE_PHYSICAL_PLAN_DISPATCH:
case QUERY_NODE_PHYSICAL_PLAN_INSERT:
case QUERY_NODE_PHYSICAL_PLAN_QUERY_INSERT:
case QUERY_NODE_PHYSICAL_PLAN_DELETE:
case QUERY_NODE_PHYSICAL_PLAN_TABLE_COUNT_SCAN:
case QUERY_NODE_PHYSICAL_PLAN_MERGE_EVENT:
case QUERY_NODE_PHYSICAL_PLAN_STREAM_EVENT:
case QUERY_NODE_PHYSICAL_PLAN_HASH_JOIN:
case QUERY_NODE_PHYSICAL_PLAN_GROUP_CACHE:
case QUERY_NODE_PHYSICAL_PLAN_DYN_QUERY_CTRL:
break;
case QUERY_NODE_PHYSICAL_SUBPLAN:
return validateSubplanNode(pCxt, (SSubplan*)pNode);
case QUERY_NODE_PHYSICAL_PLAN:
return validateQueryPlanNode(pCxt, (SQueryPlan *)pNode);
default:
break;
}
return TSDB_CODE_SUCCESS;
}
static void destoryValidatePlanContext(SValidatePlanContext* pCxt) {
}
int32_t validateQueryPlan(SPlanContext* pCxt, SQueryPlan* pPlan) {
SValidatePlanContext cxt = {.pPlanCxt = pCxt,
.errCode = TSDB_CODE_SUCCESS
};
int32_t code = TSDB_CODE_SUCCESS;
SNode* pNode = NULL;
FOREACH(pNode, pPlan->pSubplans) {
if (QUERY_NODE_NODE_LIST != nodeType(pNode)) {
code = TSDB_CODE_PLAN_INTERNAL_ERROR;
break;
}
SNode* pSubNode = NULL;
SNodeListNode* pSubplans = (SNodeListNode*)pNode;
FOREACH(pSubNode, pSubplans->pNodeList) {
code = doValidatePhysiNode(&cxt, pSubNode);
if (code) {
break;
}
}
if (code) {
break;
}
}
destoryValidatePlanContext(&cxt);
return code;
}

View File

@ -57,6 +57,9 @@ int32_t qCreateQueryPlan(SPlanContext* pCxt, SQueryPlan** pPlan, SArray* pExecNo
if (TSDB_CODE_SUCCESS == code) {
code = createPhysiPlan(pCxt, pLogicPlan, pPlan, pExecNodeList);
}
if (TSDB_CODE_SUCCESS == code) {
code = validateQueryPlan(pCxt, *pPlan);
}
if (TSDB_CODE_SUCCESS == code) {
dumpQueryPlan(*pPlan);
}

View File

@ -1230,7 +1230,6 @@ int32_t toTimestampFunction(SScalarParam* pInput, int32_t inputNum, SScalarParam
code = taosChar2Ts(format, &formats, tsStr, &ts, precision, errMsg, 128);
if (code) {
qError("func to_timestamp failed %s", errMsg);
code = code == -1 ? TSDB_CODE_FUNC_TO_TIMESTAMP_FAILED_FORMAT_ERR : TSDB_CODE_FUNC_TO_TIMESTAMP_FAILED_TS_ERR;
break;
}
colDataSetVal(pOutput->columnData, i, (char *)&ts, false);

View File

@ -44,7 +44,7 @@ typedef struct {
int64_t defaultCfInit;
} SBackendWrapper;
void* streamBackendInit(const char* path, int64_t chkpId);
void* streamBackendInit(const char* path, int64_t chkpId, int32_t vgId);
void streamBackendCleanup(void* arg);
void streamBackendHandleCleanup(void* arg);
int32_t streamBackendLoadCheckpointInfo(void* pMeta);

View File

@ -106,7 +106,6 @@ int32_t streamBroadcastToChildren(SStreamTask* pTask, const SSDataBlock* pBlock)
int32_t tEncodeStreamRetrieveReq(SEncoder* pEncoder, const SStreamRetrieveReq* pReq);
int32_t streamSaveAllTaskStatus(SStreamMeta* pMeta, int64_t checkpointId);
int32_t streamTaskBuildCheckpoint(SStreamTask* pTask);
int32_t streamSendCheckMsg(SStreamTask* pTask, const SStreamTaskCheckReq* pReq, int32_t nodeId, SEpSet* pEpSet);
int32_t streamAddCheckpointReadyMsg(SStreamTask* pTask, int32_t srcTaskId, int32_t index, int64_t checkpointId);

View File

@ -469,11 +469,11 @@ int32_t rebuildDirFromCheckpoint(const char* path, int64_t chkpId, char** dst) {
return 0;
}
void* streamBackendInit(const char* streamPath, int64_t chkpId) {
void* streamBackendInit(const char* streamPath, int64_t chkpId, int32_t vgId) {
char* backendPath = NULL;
int32_t code = rebuildDirFromCheckpoint(streamPath, chkpId, &backendPath);
stDebug("start to init stream backend at %s, checkpointid: %" PRId64 "", backendPath, chkpId);
stDebug("start to init stream backend at %s, checkpointid: %" PRId64 " vgId:%d", backendPath, chkpId, vgId);
uint32_t dbMemLimit = nextPow2(tsMaxStreamBackendCache) << 20;
SBackendWrapper* pHandle = taosMemoryCalloc(1, sizeof(SBackendWrapper));
@ -534,7 +534,7 @@ void* streamBackendInit(const char* streamPath, int64_t chkpId) {
if (cfs != NULL) {
rocksdb_list_column_families_destroy(cfs, nCf);
}
stDebug("succ to init stream backend at %s, backend:%p", backendPath, pHandle);
stDebug("succ to init stream backend at %s, backend:%p, vgId:%d", backendPath, pHandle, vgId);
taosMemoryFreeClear(backendPath);
return (void*)pHandle;

View File

@ -299,9 +299,12 @@ int32_t streamSaveAllTaskStatus(SStreamMeta* pMeta, int64_t checkpointId) {
continue;
}
ASSERT(p->chkInfo.checkpointId < p->checkpointingId && p->checkpointingId == checkpointId);
ASSERT(p->chkInfo.checkpointId < p->checkpointingId && p->checkpointingId == checkpointId &&
p->chkInfo.checkpointVer <= p->chkInfo.processedVer);
p->chkInfo.checkpointId = p->checkpointingId;
p->chkInfo.checkpointVer = p->chkInfo.processedVer;
streamTaskClearCheckInfo(p);
char* str = NULL;

View File

@ -129,6 +129,7 @@ SStreamDataSubmit* streamDataSubmitNew(SPackedData* pData, int32_t type) {
void streamDataSubmitDestroy(SStreamDataSubmit* pDataSubmit) {
ASSERT(pDataSubmit->type == STREAM_INPUT__DATA_SUBMIT);
taosMemoryFree(pDataSubmit->submit.msgStr);
taosFreeQitem(pDataSubmit);
}
SStreamMergedSubmit* streamMergedSubmitNew() {
@ -208,12 +209,10 @@ void streamFreeQitem(SStreamQueueItem* data) {
if (type == STREAM_INPUT__GET_RES) {
blockDataDestroy(((SStreamTrigger*)data)->pBlock);
taosFreeQitem(data);
} else if (type == STREAM_INPUT__DATA_BLOCK || type == STREAM_INPUT__DATA_RETRIEVE || type == STREAM_INPUT__TRANS_STATE) {
taosArrayDestroyEx(((SStreamDataBlock*)data)->blocks, (FDelete)blockDataFreeRes);
taosFreeQitem(data);
} else if (type == STREAM_INPUT__DATA_BLOCK || type == STREAM_INPUT__DATA_RETRIEVE) {
destroyStreamDataBlock((SStreamDataBlock*)data);
} else if (type == STREAM_INPUT__DATA_SUBMIT) {
streamDataSubmitDestroy((SStreamDataSubmit*)data);
taosFreeQitem(data);
} else if (type == STREAM_INPUT__MERGED_SUBMIT) {
SStreamMergedSubmit* pMerge = (SStreamMergedSubmit*)data;
@ -228,7 +227,7 @@ void streamFreeQitem(SStreamQueueItem* data) {
SStreamRefDataBlock* pRefBlock = (SStreamRefDataBlock*)data;
blockDataDestroy(pRefBlock->pBlock);
taosFreeQitem(pRefBlock);
} else if (type == STREAM_INPUT__CHECKPOINT || type == STREAM_INPUT__CHECKPOINT_TRIGGER) {
} else if (type == STREAM_INPUT__CHECKPOINT || type == STREAM_INPUT__CHECKPOINT_TRIGGER || type == STREAM_INPUT__TRANS_STATE) {
SStreamDataBlock* pBlock = (SStreamDataBlock*) data;
taosArrayDestroyEx(pBlock->blocks, freeItems);
taosFreeQitem(pBlock);

View File

@ -593,7 +593,7 @@ int32_t streamExecForAll(SStreamTask* pTask) {
const SStreamQueueItem* pItem = pInput;
stDebug("s-task:%s start to process batch of blocks, num:%d, type:%d", id, numOfBlocks, pItem->type);
int64_t ver = pTask->chkInfo.checkpointVer;
int64_t ver = pTask->chkInfo.processedVer;
doSetStreamInputBlock(pTask, pInput, &ver, id);
int64_t resSize = 0;
@ -604,13 +604,16 @@ int32_t streamExecForAll(SStreamTask* pTask) {
stDebug("s-task:%s batch of input blocks exec end, elapsed time:%.2fs, result size:%.2fMiB, numOfBlocks:%d", id, el,
SIZE_IN_MiB(resSize), totalBlocks);
// update the currentVer if processing the submit blocks.
ASSERT(pTask->chkInfo.checkpointVer <= pTask->chkInfo.nextProcessVer && ver >= pTask->chkInfo.checkpointVer);
SCheckpointInfo* pInfo = &pTask->chkInfo;
if (ver != pTask->chkInfo.checkpointVer) {
stDebug("s-task:%s update checkpointVer(unsaved) from %" PRId64 " to %" PRId64 ", nextProcessVer:%" PRId64,
pTask->id.idStr, pTask->chkInfo.checkpointVer, ver, pTask->chkInfo.nextProcessVer);
pTask->chkInfo.checkpointVer = ver;
// update the currentVer if processing the submit blocks.
ASSERT(pInfo->checkpointVer <= pInfo->nextProcessVer && ver >= pInfo->checkpointVer);
if (ver != pInfo->processedVer) {
stDebug("s-task:%s update processedVer(unsaved) from %" PRId64 " to %" PRId64 " nextProcessVer:%" PRId64
" ckpt:%" PRId64,
pTask->id.idStr, pInfo->processedVer, ver, pInfo->nextProcessVer, pInfo->checkpointVer);
pInfo->processedVer = ver;
}
streamFreeQitem(pInput);

View File

@ -28,7 +28,6 @@ int32_t streamBackendId = 0;
int32_t streamBackendCfWrapperId = 0;
int32_t streamMetaId = 0;
static int64_t streamGetLatestCheckpointId(SStreamMeta* pMeta);
static void metaHbToMnode(void* param, void* tmrId);
static void streamMetaClear(SStreamMeta* pMeta);
static int32_t streamMetaBegin(SStreamMeta* pMeta);
@ -192,14 +191,14 @@ SStreamMeta* streamMetaOpen(const char* path, void* ahandle, FTaskExpand expandF
taosHashInit(64, taosGetDefaultHashFunction(TSDB_DATA_TYPE_BINARY), false, HASH_ENTRY_LOCK);
pMeta->chkpSaved = taosArrayInit(4, sizeof(int64_t));
pMeta->chkpInUse = taosArrayInit(4, sizeof(int64_t));
pMeta->chkpCap = 8;
pMeta->chkpCap = 2;
taosInitRWLatch(&pMeta->chkpDirLock);
pMeta->chkpId = streamGetLatestCheckpointId(pMeta);
pMeta->streamBackend = streamBackendInit(pMeta->path, pMeta->chkpId);
pMeta->chkpId = streamMetaGetLatestCheckpointId(pMeta);
pMeta->streamBackend = streamBackendInit(pMeta->path, pMeta->chkpId, pMeta->vgId);
while (pMeta->streamBackend == NULL) {
taosMsleep(100);
pMeta->streamBackend = streamBackendInit(pMeta->path, pMeta->chkpId);
pMeta->streamBackend = streamBackendInit(pMeta->path, pMeta->chkpId, vgId);
if (pMeta->streamBackend == NULL) {
stInfo("vgId:%d failed to init stream backend, retry in 100ms", pMeta->vgId);
}
@ -263,7 +262,8 @@ int32_t streamMetaReopen(SStreamMeta* pMeta) {
}
}
while ((pMeta->streamBackend = streamBackendInit(pMeta->path, pMeta->chkpId)) == NULL) {
// todo: not wait in a critical region
while ((pMeta->streamBackend = streamBackendInit(pMeta->path, pMeta->chkpId, pMeta->vgId)) == NULL) {
stInfo("vgId:%d failed to init stream backend, retry in 100ms", pMeta->vgId);
taosMsleep(100);
}
@ -602,7 +602,7 @@ int32_t streamMetaCommit(SStreamMeta* pMeta) {
return 0;
}
int64_t streamGetLatestCheckpointId(SStreamMeta* pMeta) {
int64_t streamMetaGetLatestCheckpointId(SStreamMeta* pMeta) {
int64_t chkpId = 0;
TBC* pCur = NULL;
@ -853,6 +853,37 @@ static void clearHbMsg(SStreamHbMsg* pMsg, SArray* pIdList) {
taosArrayDestroy(pIdList);
}
static bool existInHbMsg(SStreamHbMsg* pMsg, SDownstreamTaskEpset* pTaskEpset) {
int32_t numOfExisted = taosArrayGetSize(pMsg->pUpdateNodes);
for (int k = 0; k < numOfExisted; ++k) {
if (pTaskEpset->nodeId == *(int32_t*)taosArrayGet(pMsg->pUpdateNodes, k)) {
return true;
}
}
return false;
}
static void addUpdateNodeIntoHbMsg(SStreamTask* pTask, SStreamHbMsg* pMsg) {
SStreamMeta* pMeta = pTask->pMeta;
taosThreadMutexLock(&pTask->lock);
int32_t num = taosArrayGetSize(pTask->outputInfo.pDownstreamUpdateList);
for (int j = 0; j < num; ++j) {
SDownstreamTaskEpset* pTaskEpset = taosArrayGet(pTask->outputInfo.pDownstreamUpdateList, j);
bool exist = existInHbMsg(pMsg, pTaskEpset);
if (!exist) {
taosArrayPush(pMsg->pUpdateNodes, &pTaskEpset->nodeId);
stDebug("vgId:%d nodeId:%d added into hb update list, total:%d", pMeta->vgId, pTaskEpset->nodeId,
(int32_t)taosArrayGetSize(pMsg->pUpdateNodes));
}
}
taosArrayClear(pTask->outputInfo.pDownstreamUpdateList);
taosThreadMutexUnlock(&pTask->lock);
}
void metaHbToMnode(void* param, void* tmrId) {
int64_t rid = *(int64_t*)param;
@ -948,28 +979,7 @@ void metaHbToMnode(void* param, void* tmrId) {
walReaderValidVersionRange((*pTask)->exec.pWalReader, &entry.verStart, &entry.verEnd);
}
taosThreadMutexLock(&(*pTask)->lock);
int32_t num = taosArrayGetSize((*pTask)->outputInfo.pDownstreamUpdateList);
for (int j = 0; j < num; ++j) {
int32_t* pNodeId = taosArrayGet((*pTask)->outputInfo.pDownstreamUpdateList, j);
bool exist = false;
int32_t numOfExisted = taosArrayGetSize(hbMsg.pUpdateNodes);
for (int k = 0; k < numOfExisted; ++k) {
if (*pNodeId == *(int32_t*)taosArrayGet(hbMsg.pUpdateNodes, k)) {
exist = true;
break;
}
}
if (!exist) {
taosArrayPush(hbMsg.pUpdateNodes, pNodeId);
}
}
taosArrayClear((*pTask)->outputInfo.pDownstreamUpdateList);
taosThreadMutexUnlock(&(*pTask)->lock);
addUpdateNodeIntoHbMsg(*pTask, &hbMsg);
taosArrayPush(hbMsg.pTaskStatus, &entry);
if (!hasMnodeEpset) {
epsetAssign(&epset, &(*pTask)->info.mnodeEpset);
@ -1009,7 +1019,7 @@ void metaHbToMnode(void* param, void* tmrId) {
pMeta->pHbInfo->hbCount += 1;
stDebug("vgId:%d, build and send hb to mnode, numOfTasks:%d total:%d", pMeta->vgId, hbMsg.numOfTasks,
stDebug("vgId:%d build and send hb to mnode, numOfTasks:%d total:%d", pMeta->vgId, hbMsg.numOfTasks,
pMeta->pHbInfo->hbCount);
tmsgSendReq(&epset, &msg);
} else {

View File

@ -270,7 +270,6 @@ int32_t streamTaskPutDataIntoInputQ(SStreamTask* pTask, SStreamQueueItem* pItem)
"s-task:%s inputQ is full, capacity(size:%d num:%dMiB), current(blocks:%d, size:%.2fMiB) stop to push data",
pTask->id.idStr, STREAM_TASK_QUEUE_CAPACITY, STREAM_TASK_QUEUE_CAPACITY_IN_SIZE, total, size);
streamDataSubmitDestroy(px);
taosFreeQitem(pItem);
return -1;
}
@ -280,7 +279,6 @@ int32_t streamTaskPutDataIntoInputQ(SStreamTask* pTask, SStreamQueueItem* pItem)
int32_t code = taosWriteQitem(pQueue, pItem);
if (code != TSDB_CODE_SUCCESS) {
streamDataSubmitDestroy(px);
taosFreeQitem(pItem);
return code;
}
@ -296,13 +294,13 @@ int32_t streamTaskPutDataIntoInputQ(SStreamTask* pTask, SStreamQueueItem* pItem)
stTrace("s-task:%s input queue is full, capacity:%d size:%d MiB, current(blocks:%d, size:%.2fMiB) abort",
pTask->id.idStr, STREAM_TASK_QUEUE_CAPACITY, STREAM_TASK_QUEUE_CAPACITY_IN_SIZE, total, size);
destroyStreamDataBlock((SStreamDataBlock*)pItem);
streamFreeQitem(pItem);
return -1;
}
int32_t code = taosWriteQitem(pQueue, pItem);
if (code != TSDB_CODE_SUCCESS) {
destroyStreamDataBlock((SStreamDataBlock*)pItem);
streamFreeQitem(pItem);
return code;
}
@ -312,7 +310,7 @@ int32_t streamTaskPutDataIntoInputQ(SStreamTask* pTask, SStreamQueueItem* pItem)
type == STREAM_INPUT__TRANS_STATE) {
int32_t code = taosWriteQitem(pQueue, pItem);
if (code != TSDB_CODE_SUCCESS) {
taosFreeQitem(pItem);
streamFreeQitem(pItem);
return code;
}
@ -323,7 +321,7 @@ int32_t streamTaskPutDataIntoInputQ(SStreamTask* pTask, SStreamQueueItem* pItem)
// use the default memory limit, refactor later.
int32_t code = taosWriteQitem(pQueue, pItem);
if (code != TSDB_CODE_SUCCESS) {
taosFreeQitem(pItem);
streamFreeQitem(pItem);
return code;
}

View File

@ -19,7 +19,6 @@
#include "streamBackendRocksdb.h"
#include "streamInt.h"
#include "tcommon.h"
#include "streamInt.h"
enum SBackendFileType {
ROCKSDB_OPTIONS_TYPE = 1,
@ -52,6 +51,7 @@ struct SStreamSnapHandle {
int8_t filetype;
SArray* pFileList;
int32_t currFileIdx;
int8_t delFlag; // 0 : not del, 1: del
};
struct SStreamSnapBlockHdr {
int8_t type;
@ -148,6 +148,7 @@ int32_t streamSnapHandleInit(SStreamSnapHandle* pHandle, char* path, int64_t chk
taosMemoryFree(tdir);
return code;
}
pHandle->delFlag = 1;
chkpId = 0;
}
@ -193,8 +194,8 @@ int32_t streamSnapHandleInit(SStreamSnapHandle* pHandle, char* path, int64_t chk
taosArrayPush(pFile->pSst, &sst);
}
}
{
char* buf = taosMemoryCalloc(1, 512);
if (qDebugFlag & DEBUG_TRACE) {
char* buf = taosMemoryCalloc(1, 128 + taosArrayGetSize(pFile->pSst) * 16);
sprintf(buf, "[current: %s,", pFile->pCurrent);
sprintf(buf + strlen(buf), "MANIFEST: %s,", pFile->pMainfest);
sprintf(buf + strlen(buf), "options: %s,", pFile->pOptions);
@ -274,7 +275,7 @@ void streamSnapHandleDestroy(SStreamSnapHandle* handle) {
if (handle->checkpointId == 0) {
// del tmp dir
if (pFile && taosIsDir(pFile->path)) {
taosRemoveDir(pFile->path);
if (handle->delFlag) taosRemoveDir(pFile->path);
}
} else {
streamBackendDelInUseChkp(handle->handle, handle->checkpointId);
@ -423,6 +424,7 @@ int32_t streamSnapWriterOpen(void* pMeta, int64_t sver, int64_t ever, char* path
pHandle->pFileList = list;
pHandle->currFileIdx = 0;
pHandle->offset = 0;
pHandle->delFlag = 0;
*ppWriter = pWriter;
return 0;
@ -480,8 +482,8 @@ int32_t streamSnapWrite(SStreamSnapWriter* pWriter, uint8_t* pData, uint32_t nDa
}
int32_t streamSnapWriterClose(SStreamSnapWriter* pWriter, int8_t rollback) {
SStreamSnapHandle* handle = &pWriter->handle;
if (qDebugFlag & DEBUG_DEBUG) {
char* buf = (char*)taosMemoryMalloc(1024);
if (qDebugFlag & DEBUG_TRACE) {
char* buf = (char*)taosMemoryMalloc(128 + taosArrayGetSize(handle->pFileList) * 16);
int n = sprintf(buf, "[");
for (int i = 0; i < taosArrayGetSize(handle->pFileList); i++) {
SBackendFileItem* item = taosArrayGet(handle->pFileList, i);

View File

@ -562,7 +562,6 @@ int32_t streamTaskPutTranstateIntoInputQ(SStreamTask* pTask) {
taosMemoryFree(pBlock);
if (streamTaskPutDataIntoInputQ(pTask, (SStreamQueueItem*)pTranstate) < 0) {
taosFreeQitem(pTranstate);
return TSDB_CODE_OUT_OF_MEMORY;
}
@ -1084,7 +1083,7 @@ int32_t streamMetaUpdateTaskDownstreamStatus(SStreamTask* pTask, int64_t startTs
int32_t numOfTotal = streamMetaGetNumOfTasks(pMeta);
if (taosHashGetSize(pStartInfo->pReadyTaskSet) + taosHashGetSize(pStartInfo->pFailedTaskSet) == numOfTotal) {
pStartInfo->readyTs = pTask->execInfo.start;
pStartInfo->readyTs = taosGetTimestampMs();
pStartInfo->elapsedTime = (pStartInfo->startTs != 0) ? pStartInfo->readyTs - pStartInfo->startTs : 0;
stDebug("vgId:%d all %d task(s) check downstream completed, last completed task:%s level:%d, startTs:%" PRId64

View File

@ -221,7 +221,6 @@ SStreamState* streamStateOpen(char* path, void* pTask, bool specPath, int32_t sz
}
pState->pTdbState->pOwner = pTask;
pState->checkPointId = 0;
return pState;
@ -274,7 +273,6 @@ int32_t streamStateCommit(SStreamState* pState) {
SStreamSnapshot* pShot = getSnapshot(pState->pFileState);
flushSnapshot(pState->pFileState, pShot, true);
}
pState->checkPointId++;
return 0;
#else
if (tdbCommit(pState->pTdbState->db, pState->pTdbState->txn) < 0) {
@ -288,7 +286,6 @@ int32_t streamStateCommit(SStreamState* pState) {
TDB_TXN_WRITE | TDB_TXN_READ_UNCOMMITTED) < 0) {
return -1;
}
pState->checkPointId++;
return 0;
#endif
}

View File

@ -431,8 +431,10 @@ int32_t streamTaskInit(SStreamTask* pTask, SStreamMeta* pMeta, SMsgCb* pMsgCb, i
pTask->outputq.status = TASK_OUTPUT_STATUS__NORMAL;
pTask->pMeta = pMeta;
pTask->chkInfo.checkpointVer = ver - 1;
pTask->chkInfo.nextProcessVer = ver;
pTask->chkInfo.checkpointVer = ver - 1; // only update when generating checkpoint
pTask->chkInfo.processedVer = ver - 1; // already processed version
pTask->chkInfo.nextProcessVer = ver; // next processed version
pTask->dataRange.range.maxVer = ver;
pTask->dataRange.range.minVer = ver;
pTask->pMsgCb = pMsgCb;

View File

@ -27,6 +27,9 @@
#define DEFAULT_MAX_STREAM_BUFFER_SIZE (128 * 1024 * 1024)
#define MIN_NUM_OF_ROW_BUFF 10240
#define TASK_KEY "streamFileState"
#define STREAM_STATE_INFO_NAME "StreamStateCheckPoint"
struct SStreamFileState {
SList* usedBuffs;
SList* freeBuffs;
@ -113,6 +116,15 @@ void* sessionCreateStateKey(SRowBuffPos* pPos, int64_t num) {
return pStateKey;
}
static void streamFileStateDecode(TSKEY* pKey, void* pBuff, int32_t len) { pBuff = taosDecodeFixedI64(pBuff, pKey); }
static void streamFileStateEncode(TSKEY* pKey, void** pVal, int32_t* pLen) {
*pLen = sizeof(TSKEY);
(*pVal) = taosMemoryCalloc(1, *pLen);
void* buff = *pVal;
taosEncodeFixedI64(&buff, *pKey);
}
SStreamFileState* streamFileStateInit(int64_t memSize, uint32_t keySize, uint32_t rowSize, uint32_t selectRowSize,
GetTsFun fp, void* pFile, TSKEY delMark, const char* taskId, int64_t checkpointId,
int8_t type) {
@ -181,6 +193,15 @@ SStreamFileState* streamFileStateInit(int64_t memSize, uint32_t keySize, uint32_
recoverSesssion(pFileState, checkpointId);
}
void* valBuf = NULL;
int32_t len = 0;
int32_t code = streamDefaultGet_rocksdb(pFileState->pFileStore, STREAM_STATE_INFO_NAME, &valBuf, &len);
if (code == TSDB_CODE_SUCCESS) {
ASSERT(len == sizeof(TSKEY));
streamFileStateDecode(&pFileState->flushMark, valBuf, len);
qDebug("===stream===flushMark read:%" PRId64, pFileState->flushMark);
}
taosMemoryFreeClear(valBuf);
return pFileState;
_error:
@ -506,15 +527,6 @@ SStreamSnapshot* getSnapshot(SStreamFileState* pFileState) {
return pFileState->usedBuffs;
}
void streamFileStateDecode(TSKEY* pKey, void* pBuff, int32_t len) { pBuff = taosDecodeFixedI64(pBuff, pKey); }
void streamFileStateEncode(TSKEY* pKey, void** pVal, int32_t* pLen) {
*pLen = sizeof(TSKEY);
(*pVal) = taosMemoryCalloc(1, *pLen);
void* buff = *pVal;
taosEncodeFixedI64(&buff, *pKey);
}
int32_t flushSnapshot(SStreamFileState* pFileState, SStreamSnapshot* pSnapshot, bool flushState) {
int32_t code = TSDB_CODE_SUCCESS;
SListIter iter = {0};
@ -538,6 +550,7 @@ int32_t flushSnapshot(SStreamFileState* pFileState, SStreamSnapshot* pSnapshot,
continue;
}
pPos->beFlushed = true;
pFileState->flushMark = TMAX(pFileState->flushMark, pFileState->getTs(pPos->pKey));
qDebug("===stream===flushed start:%" PRId64, pFileState->getTs(pPos->pKey));
if (streamStateGetBatchSize(batch) >= BATCH_LIMIT) {
@ -565,24 +578,12 @@ int32_t flushSnapshot(SStreamFileState* pFileState, SStreamSnapshot* pSnapshot,
pFileState->id, numOfElems, BATCH_LIMIT, elapsed);
if (flushState) {
const char* taskKey = "streamFileState";
{
char keyBuf[128] = {0};
void* valBuf = NULL;
int32_t len = 0;
sprintf(keyBuf, "%s:%" PRId64 "", taskKey, ((SStreamState*)pFileState->pFileStore)->checkPointId);
streamFileStateEncode(&pFileState->flushMark, &valBuf, &len);
streamStatePutBatch(pFileState->pFileStore, "default", batch, keyBuf, valBuf, len, 0);
qDebug("===stream===flushMark write:%" PRId64, pFileState->flushMark);
streamStatePutBatch(pFileState->pFileStore, "default", batch, STREAM_STATE_INFO_NAME, valBuf, len, 0);
taosMemoryFree(valBuf);
}
{
char keyBuf[128] = {0};
char valBuf[64] = {0};
int32_t len = 0;
memcpy(keyBuf, taskKey, strlen(taskKey));
len = sprintf(valBuf, "%" PRId64 "", ((SStreamState*)pFileState->pFileStore)->checkPointId);
code = streamStatePutBatch(pFileState->pFileStore, "default", batch, keyBuf, valBuf, len, 0);
}
streamStatePutBatch_rocksdb(pFileState->pFileStore, batch);
}
@ -591,26 +592,23 @@ int32_t flushSnapshot(SStreamFileState* pFileState, SStreamSnapshot* pSnapshot,
}
int32_t forceRemoveCheckpoint(SStreamFileState* pFileState, int64_t checkpointId) {
const char* taskKey = "streamFileState";
char keyBuf[128] = {0};
sprintf(keyBuf, "%s:%" PRId64 "", taskKey, checkpointId);
sprintf(keyBuf, "%s:%" PRId64 "", TASK_KEY, checkpointId);
return streamDefaultDel_rocksdb(pFileState->pFileStore, keyBuf);
}
int32_t getSnapshotIdList(SStreamFileState* pFileState, SArray* list) {
const char* taskKey = "streamFileState";
return streamDefaultIterGet_rocksdb(pFileState->pFileStore, taskKey, NULL, list);
return streamDefaultIterGet_rocksdb(pFileState->pFileStore, TASK_KEY, NULL, list);
}
int32_t deleteExpiredCheckPoint(SStreamFileState* pFileState, TSKEY mark) {
int32_t code = TSDB_CODE_SUCCESS;
const char* taskKey = "streamFileState";
int64_t maxCheckPointId = 0;
{
char buf[128] = {0};
void* val = NULL;
int32_t len = 0;
memcpy(buf, taskKey, strlen(taskKey));
memcpy(buf, TASK_KEY, strlen(TASK_KEY));
code = streamDefaultGet_rocksdb(pFileState->pFileStore, buf, &val, &len);
if (code != 0 || len == 0 || val == NULL) {
return TSDB_CODE_FAILED;
@ -624,7 +622,7 @@ int32_t deleteExpiredCheckPoint(SStreamFileState* pFileState, TSKEY mark) {
char buf[128] = {0};
void* val = 0;
int32_t len = 0;
sprintf(buf, "%s:%" PRId64 "", taskKey, i);
sprintf(buf, "%s:%" PRId64 "", TASK_KEY, i);
code = streamDefaultGet_rocksdb(pFileState->pFileStore, buf, &val, &len);
if (code != 0) {
return TSDB_CODE_FAILED;

View File

@ -12,7 +12,7 @@ class StreamStateEnv : public ::testing::Test {
protected:
virtual void SetUp() {
streamMetaInit();
backend = streamBackendInit(path, 0);
backend = streamBackendInit(path, 0, 0);
}
virtual void TearDown() {
streamMetaCleanup();

Some files were not shown because too many files have changed in this diff Show More