From bdee8ab3c11b360c3d4f57d52e297acc906f0f36 Mon Sep 17 00:00:00 2001 From: shenglian zhou Date: Fri, 12 May 2023 19:00:26 +0800 Subject: [PATCH 01/49] enhance: save work and sync home/office --- source/libs/planner/src/planOptimizer.c | 6 +- source/libs/scalar/src/filter.c | 156 +++++++++++++++++++++++- 2 files changed, 159 insertions(+), 3 deletions(-) diff --git a/source/libs/planner/src/planOptimizer.c b/source/libs/planner/src/planOptimizer.c index 4f8b57de5f..a6d4898bc8 100644 --- a/source/libs/planner/src/planOptimizer.c +++ b/source/libs/planner/src/planOptimizer.c @@ -2630,11 +2630,13 @@ static bool tbCntScanOptIsEligibleConds(STbCntScanOptInfo* pInfo, SNode* pCondit } if (QUERY_NODE_LOGIC_CONDITION == nodeType(pConditions)) { - return tbCntScanOptIsEligibleLogicCond(pInfo, (SLogicConditionNode*)pConditions); + return tbCntScanOptIsEligibleLogicCond(pInfo, (SLogicConditionNode*)pConditions) && + LIST_LENGTH(pInfo->pAgg->pGroupKeys) == 0; } if (QUERY_NODE_OPERATOR == nodeType(pConditions)) { - return tbCntScanOptIsEligibleOpCond((SOperatorNode*)pConditions); + return tbCntScanOptIsEligibleOpCond((SOperatorNode*)pConditions) && + LIST_LENGTH(pInfo->pAgg->pGroupKeys) == 0; } return false; diff --git a/source/libs/scalar/src/filter.c b/source/libs/scalar/src/filter.c index 02a21b66ed..77534db411 100644 --- a/source/libs/scalar/src/filter.c +++ b/source/libs/scalar/src/filter.c @@ -3942,8 +3942,162 @@ _return: FLT_RET(code); } +// compare ranges, null < min < val < max. null=null, min=min, max=max +typedef enum { + FLT_SCL_DATUM_KIND_NULL, + FLT_SCL_DATUM_KIND_MIN, + FLT_SCL_DATUM_KIND_INT64, + FLT_SCL_DATUM_KIND_UINT64, + FLT_SCL_DATUM_KIND_FLOAT64, + FLT_SCL_DATUM_KIND_VARCHAR, + FLT_SCL_DATUM_KIND_NCHAR, + FLT_SCL_DATUM_KIND_MAX, +} SFltSclDatumKind; + +typedef struct { + SFltSclDatumKind kind; + union { + int64_t val; // may be int64, uint64 and double + struct { + uint32_t nData; + uint8_t *pData; + }; // maybe varchar, nchar + } datum; +} SFltSclDatum; + +typedef struct { + SFltSclDatum val; + bool excl; + bool start; +} SFltSclPoint; + +typedef struct { + SFltSclDatum low; + SFltSclDatum high; + bool lowExcl; + bool highExcl; +} SFltSclRange; + +int32_t fltSclCompareDatum(SFltSclDatum* val1, SFltSclDatum* val2) { +} + +bool fltSclLessPoint(SFltSclPoint* pt1, SFltSclPoint* pt2) { + // first value compare + int32_t cmp = fltSclCompareDatum(&pt1->val, &pt2->val); + if (cmp != 0) { + return cmp < 0 ; + } + + if (pt1->start && pt2->start) { + return !pt1->excl && pt2->excl; + } else if (pt1->start) { + return pt1->excl && !pt2->excl; + } else if (pt2->start) { + return pt1->excl || pt2->excl; + } + return pt1->excl && !pt2->excl; +} + +int32_t fltSclMergeSort(SArray* pts1, SArray* pts2, SArray* result) { + size_t len1 = taosArrayGetSize(pts1); + size_t len2 = taosArrayGetSize(pts2); + size_t i = 0; + size_t j = 0; + while (i < len1 && j < len2) { + SFltSclPoint* pt1 = taosArrayGet(pts1, i); + SFltSclPoint* pt2 = taosArrayGet(pts2, j); + bool less = fltSclLessPoint(pt1, pt2); + if (less) { + taosArrayPush(result, pt1); + ++i; + } else { + taosArrayPush(result, pt2); + ++j; + } + } + if (i < len1) { + for (; i < len1; ++i) { + SFltSclPoint* pt1 = taosArrayGet(pts1, i); + taosArrayPush(result, pt1); + } + } + if (j < len2) { + for (; j < len2; ++j) { + SFltSclPoint *pt2 = taosArrayGet(pts2, j); + taosArrayPush(result, pt2); + } + } + return 0; +} + +// pts1 and pts2 must be ordered and de-duplicated and each range can not be a range of another range +int32_t fltSclMerge(SArray* pts1, SArray* pts2, bool isUnion, SArray* merged) { + size_t len1 = taosArrayGetSize(pts1); + size_t len2 = taosArrayGetSize(pts2); + //first merge sort pts1 and pts2 + SArray* all = taosArrayInit(len1 + len2, sizeof(SFltSclPoint)); + fltSclMergeSort(pts1, pts2, all); + int32_t countRequired = (isUnion) ? 1 : 2; + int32_t count = 0; + for (int32_t i = 0; i < taosArrayGetSize(all); ++i) { + SFltSclPoint* pt = taosArrayGet(pts1, i); + if (pt->start) { + ++count; + if (count == countRequired) { + taosArrayPush(merged, pt); + } + } else { + if (count == countRequired) { + taosArrayPush(merged, pt); + } + --count; + } + } + taosArrayDestroy(all); + return 0; +} + + +int32_t fltSclIntersect(SArray* pts1, SArray* pts2, SArray* merged) { + return fltSclMerge(pts1, pts2, false, merged); +} + +int32_t fltSclUnion(SArray* pts1, SArray* pts2, SArray* merged) { + return fltSclMerge(pts1, pts2, true, merged); +} + + +typedef struct { + SColumnNode* colNode; + SValueNode* valNode; + EOperatorType type; +} SFltSclScalarFunc; + +typedef struct { + SColumnNode* colNode; + +} SFltSclConstColumn; + +typedef struct { + SValueNode* value; +} SFltSclConstant; + +typedef struct { + bool useRange; + SHashObj* colRanges; + +} SFltSclOptCtx; + +static EDealRes fltSclMayBeOptimed(SNode* pNode, void* pCtx) { + SFltSclOptCtx* ctx = (SFltSclOptCtx*)pCtx; + +} + + int32_t fltOptimizeNodes(SFilterInfo *pInfo, SNode **pNode, SFltTreeStat *pStat) { - // TODO + SFltSclOptCtx ctx; + nodesWalkExprPostOrder(*pNode, fltSclMayBeOptimed, &ctx); + return TSDB_CODE_SUCCESS; } From 47a1d64125b1bf4f79535e7d715c49e14c1c2b87 Mon Sep 17 00:00:00 2001 From: Benguang Zhao Date: Mon, 15 May 2023 11:32:08 +0800 Subject: [PATCH 02/49] enh: refactor some func names for syncLogReplProcessReply --- source/libs/sync/inc/syncPipeline.h | 7 +++---- source/libs/sync/src/syncPipeline.c | 10 +++++----- source/libs/sync/src/syncReplication.c | 2 +- 3 files changed, 9 insertions(+), 10 deletions(-) diff --git a/source/libs/sync/inc/syncPipeline.h b/source/libs/sync/inc/syncPipeline.h index 02790732a2..65e2cc22a0 100644 --- a/source/libs/sync/inc/syncPipeline.h +++ b/source/libs/sync/inc/syncPipeline.h @@ -79,17 +79,16 @@ static FORCE_INLINE int32_t syncLogReplGetNextRetryBackoff(SSyncLogReplMgr* pMgr SyncTerm syncLogReplGetPrevLogTerm(SSyncLogReplMgr* pMgr, SSyncNode* pNode, SyncIndex index); -int32_t syncLogReplDoOnce(SSyncLogReplMgr* pMgr, SSyncNode* pNode); +int32_t syncLogReplStart(SSyncLogReplMgr* pMgr, SSyncNode* pNode); int32_t syncLogReplAttempt(SSyncLogReplMgr* pMgr, SSyncNode* pNode); int32_t syncLogReplProbe(SSyncLogReplMgr* pMgr, SSyncNode* pNode, SyncIndex index); - int32_t syncLogReplRetryOnNeed(SSyncLogReplMgr* pMgr, SSyncNode* pNode); int32_t syncLogReplSendTo(SSyncLogReplMgr* pMgr, SSyncNode* pNode, SyncIndex index, SyncTerm* pTerm, SRaftId* pDestId, bool* pBarrier); int32_t syncLogReplProcessReply(SSyncLogReplMgr* pMgr, SSyncNode* pNode, SyncAppendEntriesReply* pMsg); -int32_t syncLogReplProcessReplyAsRecovery(SSyncLogReplMgr* pMgr, SSyncNode* pNode, SyncAppendEntriesReply* pMsg); -int32_t syncLogReplProcessReplyAsNormal(SSyncLogReplMgr* pMgr, SSyncNode* pNode, SyncAppendEntriesReply* pMsg); +int32_t syncLogReplRecover(SSyncLogReplMgr* pMgr, SSyncNode* pNode, SyncAppendEntriesReply* pMsg); +int32_t syncLogReplContinue(SSyncLogReplMgr* pMgr, SSyncNode* pNode, SyncAppendEntriesReply* pMsg); int32_t syncLogReplProcessHeartbeatReply(SSyncLogReplMgr* pMgr, SSyncNode* pNode, SyncHeartbeatReply* pMsg); diff --git a/source/libs/sync/src/syncPipeline.c b/source/libs/sync/src/syncPipeline.c index 8bb72de518..794d80bbfa 100644 --- a/source/libs/sync/src/syncPipeline.c +++ b/source/libs/sync/src/syncPipeline.c @@ -717,7 +717,7 @@ _out: return ret; } -int32_t syncLogReplProcessReplyAsRecovery(SSyncLogReplMgr* pMgr, SSyncNode* pNode, SyncAppendEntriesReply* pMsg) { +int32_t syncLogReplRecover(SSyncLogReplMgr* pMgr, SSyncNode* pNode, SyncAppendEntriesReply* pMsg) { SSyncLogBuffer* pBuf = pNode->pLogBuf; SRaftId destId = pMsg->srcId; ASSERT(pMgr->restored == false); @@ -820,15 +820,15 @@ int32_t syncLogReplProcessReply(SSyncLogReplMgr* pMgr, SSyncNode* pNode, SyncApp } if (pMgr->restored) { - (void)syncLogReplProcessReplyAsNormal(pMgr, pNode, pMsg); + (void)syncLogReplContinue(pMgr, pNode, pMsg); } else { - (void)syncLogReplProcessReplyAsRecovery(pMgr, pNode, pMsg); + (void)syncLogReplRecover(pMgr, pNode, pMsg); } taosThreadMutexUnlock(&pBuf->mutex); return 0; } -int32_t syncLogReplDoOnce(SSyncLogReplMgr* pMgr, SSyncNode* pNode) { +int32_t syncLogReplStart(SSyncLogReplMgr* pMgr, SSyncNode* pNode) { if (pMgr->restored) { (void)syncLogReplAttempt(pMgr, pNode); } else { @@ -931,7 +931,7 @@ int32_t syncLogReplAttempt(SSyncLogReplMgr* pMgr, SSyncNode* pNode) { return 0; } -int32_t syncLogReplProcessReplyAsNormal(SSyncLogReplMgr* pMgr, SSyncNode* pNode, SyncAppendEntriesReply* pMsg) { +int32_t syncLogReplContinue(SSyncLogReplMgr* pMgr, SSyncNode* pNode, SyncAppendEntriesReply* pMsg) { ASSERT(pMgr->restored == true); if (pMgr->startIndex <= pMsg->lastSendIndex && pMsg->lastSendIndex < pMgr->endIndex) { if (pMgr->startIndex < pMgr->matchIndex && pMgr->retryBackoff > 0) { diff --git a/source/libs/sync/src/syncReplication.c b/source/libs/sync/src/syncReplication.c index 77e73402fa..015593948b 100644 --- a/source/libs/sync/src/syncReplication.c +++ b/source/libs/sync/src/syncReplication.c @@ -74,7 +74,7 @@ int32_t syncNodeReplicateWithoutLock(SSyncNode* pNode) { continue; } SSyncLogReplMgr* pMgr = pNode->logReplMgrs[i]; - (void)syncLogReplDoOnce(pMgr, pNode); + (void)syncLogReplStart(pMgr, pNode); } return 0; } From 51fa363fd50e148ec730f4b1814e1957caa41a4b Mon Sep 17 00:00:00 2001 From: shenglian zhou Date: Mon, 15 May 2023 11:34:05 +0800 Subject: [PATCH 03/49] feature: save work --- source/libs/scalar/src/filter.c | 57 +++++++++++++++++++++++++++++---- 1 file changed, 50 insertions(+), 7 deletions(-) diff --git a/source/libs/scalar/src/filter.c b/source/libs/scalar/src/filter.c index 77534db411..8c3e97ccbc 100644 --- a/source/libs/scalar/src/filter.c +++ b/source/libs/scalar/src/filter.c @@ -3957,12 +3957,13 @@ typedef enum { typedef struct { SFltSclDatumKind kind; union { - int64_t val; // may be int64, uint64 and double + int64_t val; // for int64, uint64 and double and bool (1 true, 0 false) struct { - uint32_t nData; + uint32_t nData; // TODO maybe remove this and make pData len prefixed? uint8_t *pData; - }; // maybe varchar, nchar + }; // for varchar, nchar } datum; + SDataType type; // TODO: original data type, may not be used? } SFltSclDatum; typedef struct { @@ -3979,6 +3980,42 @@ typedef struct { } SFltSclRange; int32_t fltSclCompareDatum(SFltSclDatum* val1, SFltSclDatum* val2) { + switch (val1->kind) { + case FLT_SCL_DATUM_KIND_NULL: { + if (val2->kind == FLT_SCL_DATUM_KIND_NULL) return 0 ; else return -1; + break; + } + case FLT_SCL_DATUM_KIND_MIN: { + if (val2->kind == FLT_SCL_DATUM_KIND_NULL) return 1; + else { + if (val2->kind == FLT_SCL_DATUM_KIND_MIN) return 0; else return -1; + } + break; + } + case FLT_SCL_DATUM_KIND_MAX: { + if (val2->kind == FLT_SCL_DATUM_KIND_MAX) return 0; else return 1; + } + case FLT_SCL_DATUM_KIND_UINT64: { + return compareUint64Val(&val1->datum.val, &val1->datum.val); + } + case FLT_SCL_DATUM_KIND_INT64: { + return compareInt64Val(&val1->datum.val, &val2->datum.val); + } + case FLT_SCL_DATUM_KIND_FLOAT64: { + return compareFloatDouble(&val1->datum.val, &val2->datum.val); + } + case FLT_SCL_DATUM_KIND_NCHAR: { + return compareLenPrefixedWStr(&val1->datum.pData, &val2->datum.pData); //TODO + } + case FLT_SCL_DATUM_KIND_VARCHAR: { + return compareLenPrefixedStr(&val1->datum.pData, &val2->datum.pData); //TODO + } + default: + fltError("not supported kind. just return 0"); + return 0; + break; + } + return 0; } bool fltSclLessPoint(SFltSclPoint* pt1, SFltSclPoint* pt2) { @@ -4071,26 +4108,32 @@ typedef struct { SColumnNode* colNode; SValueNode* valNode; EOperatorType type; -} SFltSclScalarFunc; +} SFltSclScalarOp; +// simple define it, TODO: implement later typedef struct { SColumnNode* colNode; - } SFltSclConstColumn; +// simple define it, TODO: implement later typedef struct { SValueNode* value; } SFltSclConstant; + +int32_t fltSclBuildRangeFromScalarFunc(SFltSclScalarOp* func, SFltSclPoint* points) { + +} + + typedef struct { bool useRange; SHashObj* colRanges; - } SFltSclOptCtx; static EDealRes fltSclMayBeOptimed(SNode* pNode, void* pCtx) { SFltSclOptCtx* ctx = (SFltSclOptCtx*)pCtx; - + return DEAL_RES_CONTINUE; } From e16f4c7c440ae02905e18b397eb985aa3b91f668 Mon Sep 17 00:00:00 2001 From: shenglian zhou Date: Wed, 17 May 2023 16:57:09 +0800 Subject: [PATCH 04/49] fix: refactor and save work --- source/libs/scalar/src/filter.c | 220 ++++++++++++++++++-------------- 1 file changed, 124 insertions(+), 96 deletions(-) diff --git a/source/libs/scalar/src/filter.c b/source/libs/scalar/src/filter.c index 8c3e97ccbc..85974bcd3a 100644 --- a/source/libs/scalar/src/filter.c +++ b/source/libs/scalar/src/filter.c @@ -22,6 +22,7 @@ #include "sclInt.h" #include "tcompare.h" #include "tdatablock.h" +#include "tsimplehash.h" #include "ttime.h" bool filterRangeCompGi(const void *minv, const void *maxv, const void *minr, const void *maxr, __compar_fn_t cfunc) { @@ -260,7 +261,7 @@ int8_t filterGetCompFuncIdx(int32_t type, int32_t optr) { comparFn = 19; } else if (optr == OP_TYPE_NMATCH) { comparFn = 20; - } else if (optr == OP_TYPE_LIKE) { /* wildcard query using like operator */ + } else if (optr == OP_TYPE_LIKE) { /* wildcard query using like operator */ comparFn = 7; } else if (optr == OP_TYPE_NOT_LIKE) { /* wildcard query using like operator */ comparFn = 26; @@ -1633,7 +1634,7 @@ void filterDumpInfoToString(SFilterInfo *info, const char *msg, int32_t options) SDataType *dType = &var->node.resType; qDebug("VAL%d => [type:%d][val:%" PRIx64 "]", i, dType->type, var->datum.i); // TODO } else if (field->data) { - qDebug("VAL%d => [type:NIL][val:NIL]", i); // TODO + qDebug("VAL%d => [type:NIL][val:NIL]", i); // TODO } } @@ -3957,59 +3958,96 @@ typedef enum { typedef struct { SFltSclDatumKind kind; union { - int64_t val; // for int64, uint64 and double and bool (1 true, 0 false) - struct { - uint32_t nData; // TODO maybe remove this and make pData len prefixed? - uint8_t *pData; - }; // for varchar, nchar + int64_t val; // for int64, uint64 and double and bool (1 true, 0 false) + uint8_t *pData; // for varchar, nchar } datum; - SDataType type; // TODO: original data type, may not be used? + SDataType type; // TODO: original data type, may not be used? } SFltSclDatum; typedef struct { SFltSclDatum val; - bool excl; - bool start; + bool excl; + bool start; } SFltSclPoint; typedef struct { SFltSclDatum low; SFltSclDatum high; - bool lowExcl; - bool highExcl; + bool lowExcl; + bool highExcl; } SFltSclRange; -int32_t fltSclCompareDatum(SFltSclDatum* val1, SFltSclDatum* val2) { +int32_t fltSclCompareWithFloat64(SFltSclDatum *val1, SFltSclDatum *val2) { + // val2->kind == int64 switch (val1->kind) { - case FLT_SCL_DATUM_KIND_NULL: { - if (val2->kind == FLT_SCL_DATUM_KIND_NULL) return 0 ; else return -1; - break; + case FLT_SCL_DATUM_KIND_UINT64: + return compareUint64Double(&val1->datum.val, &val2->datum.val); + case FLT_SCL_DATUM_KIND_INT64: + return compareInt64Double(&val1->datum.val, &val2->datum.val); + case FLT_SCL_DATUM_KIND_FLOAT64: { + return compareDoubleVal(&val1->datum.val, &val2->datum.val); } - case FLT_SCL_DATUM_KIND_MIN: { - if (val2->kind == FLT_SCL_DATUM_KIND_NULL) return 1; - else { - if (val2->kind == FLT_SCL_DATUM_KIND_MIN) return 0; else return -1; - } - break; + //TODO: varchar, nchar + default: + qError("not support comparsion. %d, %d", val1->kind, val2->kind); + return (val1->kind - val2->kind); + } +} + +int32_t fltSclCompareWithInt64(SFltSclDatum *val1, SFltSclDatum *val2) { + // val2->kind == int64 + switch (val1->kind) { + case FLT_SCL_DATUM_KIND_UINT64: + return compareUint64Int64(&val1->datum.val, &val2->datum.val); + case FLT_SCL_DATUM_KIND_INT64: + return compareInt64Val(&val1->datum.val, &val2->datum.val); + case FLT_SCL_DATUM_KIND_FLOAT64: { + return compareDoubleInt64(&val1->datum.val, &val2->datum.val); } - case FLT_SCL_DATUM_KIND_MAX: { - if (val2->kind == FLT_SCL_DATUM_KIND_MAX) return 0; else return 1; + //TODO: varchar, nchar + default: + qError("not support comparsion. %d, %d", val1->kind, val2->kind); + return (val1->kind - val2->kind); + } +} + +int32_t fltSclCompareWithUInt64(SFltSclDatum *val1, SFltSclDatum *val2) { + // val2 kind == uint64 + switch (val1->kind) { + case FLT_SCL_DATUM_KIND_UINT64: + return compareUint64Val(&val1->datum.val, &val2->datum.val); + case FLT_SCL_DATUM_KIND_INT64: + return compareInt64Uint64(&val1->datum.val, &val2->datum.val); + case FLT_SCL_DATUM_KIND_FLOAT64: { + return compareDoubleUint64(&val1->datum.val, &val2->datum.val); } + //TODO: varchar, nchar + default: + qError("not support comparsion. %d, %d", val1->kind, val2->kind); + return (val1->kind - val2->kind); + } +} + +int32_t fltSclCompareDatum(SFltSclDatum *val1, SFltSclDatum *val2) { + if (val2->kind == FLT_SCL_DATUM_KIND_NULL || val2->kind == FLT_SCL_DATUM_KIND_MIN || + val2->kind == FLT_SCL_DATUM_KIND_MAX) { + if (val1->kind == FLT_SCL_DATUM_KIND_NULL || val1->kind == FLT_SCL_DATUM_KIND_MIN || + val1->kind == FLT_SCL_DATUM_KIND_MAX) { + return (val1->kind < val2->kind) ? -1 : ((val1->kind > val2->kind) ? 1 : 0); + } + } + + switch (val2->kind) { case FLT_SCL_DATUM_KIND_UINT64: { - return compareUint64Val(&val1->datum.val, &val1->datum.val); + return fltSclCompareWithUInt64(val1, val2); } case FLT_SCL_DATUM_KIND_INT64: { - return compareInt64Val(&val1->datum.val, &val2->datum.val); + return fltSclCompareWithInt64(val1, val2); } case FLT_SCL_DATUM_KIND_FLOAT64: { - return compareFloatDouble(&val1->datum.val, &val2->datum.val); - } - case FLT_SCL_DATUM_KIND_NCHAR: { - return compareLenPrefixedWStr(&val1->datum.pData, &val2->datum.pData); //TODO - } - case FLT_SCL_DATUM_KIND_VARCHAR: { - return compareLenPrefixedStr(&val1->datum.pData, &val2->datum.pData); //TODO + return fltSclCompareWithFloat64(val1, val2); } + //TODO: varchar/nchar default: fltError("not supported kind. just return 0"); return 0; @@ -4018,125 +4056,115 @@ int32_t fltSclCompareDatum(SFltSclDatum* val1, SFltSclDatum* val2) { return 0; } -bool fltSclLessPoint(SFltSclPoint* pt1, SFltSclPoint* pt2) { +bool fltSclLessPoint(SFltSclPoint *pt1, SFltSclPoint *pt2) { // first value compare int32_t cmp = fltSclCompareDatum(&pt1->val, &pt2->val); if (cmp != 0) { - return cmp < 0 ; + return cmp < 0; } if (pt1->start && pt2->start) { - return !pt1->excl && pt2->excl; - } else if (pt1->start) { - return pt1->excl && !pt2->excl; - } else if (pt2->start) { - return pt1->excl || pt2->excl; - } + return !pt1->excl && pt2->excl; + } else if (pt1->start) { + return pt1->excl && !pt2->excl; + } else if (pt2->start) { + return pt1->excl || pt2->excl; + } return pt1->excl && !pt2->excl; } -int32_t fltSclMergeSort(SArray* pts1, SArray* pts2, SArray* result) { +int32_t fltSclMergeSort(SArray *pts1, SArray *pts2, SArray *result) { size_t len1 = taosArrayGetSize(pts1); size_t len2 = taosArrayGetSize(pts2); size_t i = 0; size_t j = 0; while (i < len1 && j < len2) { - SFltSclPoint* pt1 = taosArrayGet(pts1, i); - SFltSclPoint* pt2 = taosArrayGet(pts2, j); - bool less = fltSclLessPoint(pt1, pt2); - if (less) { + SFltSclPoint *pt1 = taosArrayGet(pts1, i); + SFltSclPoint *pt2 = taosArrayGet(pts2, j); + bool less = fltSclLessPoint(pt1, pt2); + if (less) { taosArrayPush(result, pt1); ++i; - } else { + } else { taosArrayPush(result, pt2); ++j; - } + } } if (i < len1) { - for (; i < len1; ++i) { - SFltSclPoint* pt1 = taosArrayGet(pts1, i); + for (; i < len1; ++i) { + SFltSclPoint *pt1 = taosArrayGet(pts1, i); taosArrayPush(result, pt1); - } + } } if (j < len2) { - for (; j < len2; ++j) { + for (; j < len2; ++j) { SFltSclPoint *pt2 = taosArrayGet(pts2, j); taosArrayPush(result, pt2); - } + } } return 0; } // pts1 and pts2 must be ordered and de-duplicated and each range can not be a range of another range -int32_t fltSclMerge(SArray* pts1, SArray* pts2, bool isUnion, SArray* merged) { +int32_t fltSclMerge(SArray *pts1, SArray *pts2, bool isUnion, SArray *merged) { size_t len1 = taosArrayGetSize(pts1); size_t len2 = taosArrayGetSize(pts2); - //first merge sort pts1 and pts2 - SArray* all = taosArrayInit(len1 + len2, sizeof(SFltSclPoint)); + // first merge sort pts1 and pts2 + SArray *all = taosArrayInit(len1 + len2, sizeof(SFltSclPoint)); fltSclMergeSort(pts1, pts2, all); int32_t countRequired = (isUnion) ? 1 : 2; int32_t count = 0; for (int32_t i = 0; i < taosArrayGetSize(all); ++i) { - SFltSclPoint* pt = taosArrayGet(pts1, i); - if (pt->start) { - ++count; - if (count == countRequired) { - taosArrayPush(merged, pt); - } - } else { - if (count == countRequired) { - taosArrayPush(merged, pt); - } - --count; + SFltSclPoint *pt = taosArrayGet(pts1, i); + if (pt->start) { + ++count; + if (count == countRequired) { + taosArrayPush(merged, pt); } + } else { + if (count == countRequired) { + taosArrayPush(merged, pt); + } + --count; + } } taosArrayDestroy(all); return 0; } +int32_t fltSclIntersect(SArray *pts1, SArray *pts2, SArray *merged) { return fltSclMerge(pts1, pts2, false, merged); } -int32_t fltSclIntersect(SArray* pts1, SArray* pts2, SArray* merged) { - return fltSclMerge(pts1, pts2, false, merged); -} - -int32_t fltSclUnion(SArray* pts1, SArray* pts2, SArray* merged) { - return fltSclMerge(pts1, pts2, true, merged); -} - +int32_t fltSclUnion(SArray *pts1, SArray *pts2, SArray *merged) { return fltSclMerge(pts1, pts2, true, merged); } typedef struct { - SColumnNode* colNode; - SValueNode* valNode; + SColumnNode *colNode; + SValueNode *valNode; EOperatorType type; -} SFltSclScalarOp; +} SFltSclOperator; -// simple define it, TODO: implement later -typedef struct { - SColumnNode* colNode; -} SFltSclConstColumn; - -// simple define it, TODO: implement later -typedef struct { - SValueNode* value; -} SFltSclConstant; - - -int32_t fltSclBuildRangeFromScalarFunc(SFltSclScalarOp* func, SFltSclPoint* points) { +//TODO: column, constant, operator, and/or/not +int32_t fltSclBuildRangeFromOperator(SFltSclOperator *sclOper, SSHashObj* obj) { + switch (sclOper->type) { + case OP_TYPE_GREATER_THAN: { + } + break; + default: + break; + } + return TSDB_CODE_SUCCESS; } - typedef struct { - bool useRange; - SHashObj* colRanges; + bool useRange; + SHashObj *colRanges; } SFltSclOptCtx; -static EDealRes fltSclMayBeOptimed(SNode* pNode, void* pCtx) { - SFltSclOptCtx* ctx = (SFltSclOptCtx*)pCtx; +static EDealRes fltSclMayBeOptimed(SNode *pNode, void *pCtx) { + SFltSclOptCtx *ctx = (SFltSclOptCtx *)pCtx; return DEAL_RES_CONTINUE; } - int32_t fltOptimizeNodes(SFilterInfo *pInfo, SNode **pNode, SFltTreeStat *pStat) { SFltSclOptCtx ctx; nodesWalkExprPostOrder(*pNode, fltSclMayBeOptimed, &ctx); From 09837b7639190340f0d9a339a6da7db163b56ddd Mon Sep 17 00:00:00 2001 From: shenglian zhou Date: Thu, 18 May 2023 19:00:10 +0800 Subject: [PATCH 05/49] fix: save work --- source/libs/scalar/src/filter.c | 124 ++++++++++++++++++++++++++++---- 1 file changed, 109 insertions(+), 15 deletions(-) diff --git a/source/libs/scalar/src/filter.c b/source/libs/scalar/src/filter.c index 85974bcd3a..01bd53008e 100644 --- a/source/libs/scalar/src/filter.c +++ b/source/libs/scalar/src/filter.c @@ -3960,7 +3960,7 @@ typedef struct { union { int64_t val; // for int64, uint64 and double and bool (1 true, 0 false) uint8_t *pData; // for varchar, nchar - } datum; + }; SDataType type; // TODO: original data type, may not be used? } SFltSclDatum; @@ -3981,11 +3981,11 @@ int32_t fltSclCompareWithFloat64(SFltSclDatum *val1, SFltSclDatum *val2) { // val2->kind == int64 switch (val1->kind) { case FLT_SCL_DATUM_KIND_UINT64: - return compareUint64Double(&val1->datum.val, &val2->datum.val); + return compareUint64Double(&val1->val, &val2->val); case FLT_SCL_DATUM_KIND_INT64: - return compareInt64Double(&val1->datum.val, &val2->datum.val); + return compareInt64Double(&val1->val, &val2->val); case FLT_SCL_DATUM_KIND_FLOAT64: { - return compareDoubleVal(&val1->datum.val, &val2->datum.val); + return compareDoubleVal(&val1->val, &val2->val); } //TODO: varchar, nchar default: @@ -3998,11 +3998,11 @@ int32_t fltSclCompareWithInt64(SFltSclDatum *val1, SFltSclDatum *val2) { // val2->kind == int64 switch (val1->kind) { case FLT_SCL_DATUM_KIND_UINT64: - return compareUint64Int64(&val1->datum.val, &val2->datum.val); + return compareUint64Int64(&val1->val, &val2->val); case FLT_SCL_DATUM_KIND_INT64: - return compareInt64Val(&val1->datum.val, &val2->datum.val); + return compareInt64Val(&val1->val, &val2->val); case FLT_SCL_DATUM_KIND_FLOAT64: { - return compareDoubleInt64(&val1->datum.val, &val2->datum.val); + return compareDoubleInt64(&val1->val, &val2->val); } //TODO: varchar, nchar default: @@ -4015,11 +4015,11 @@ int32_t fltSclCompareWithUInt64(SFltSclDatum *val1, SFltSclDatum *val2) { // val2 kind == uint64 switch (val1->kind) { case FLT_SCL_DATUM_KIND_UINT64: - return compareUint64Val(&val1->datum.val, &val2->datum.val); + return compareUint64Val(&val1->val, &val2->val); case FLT_SCL_DATUM_KIND_INT64: - return compareInt64Uint64(&val1->datum.val, &val2->datum.val); + return compareInt64Uint64(&val1->val, &val2->val); case FLT_SCL_DATUM_KIND_FLOAT64: { - return compareDoubleUint64(&val1->datum.val, &val2->datum.val); + return compareDoubleUint64(&val1->val, &val2->val); } //TODO: varchar, nchar default: @@ -4142,15 +4142,109 @@ typedef struct { EOperatorType type; } SFltSclOperator; -//TODO: column, constant, operator, and/or/not +//TODO: column, constant -int32_t fltSclBuildRangeFromOperator(SFltSclOperator *sclOper, SSHashObj* obj) { - switch (sclOper->type) { - case OP_TYPE_GREATER_THAN: { +typedef struct { + SColumnNode* colNode; + SArray* points; +} SFltSclColumnRange; + +SFltSclColumnRange* fltSclGetColumnRange(SColumnNode* colNode, SArray* colRangeList) { + for (int32_t i = 0; i < taosArrayGetSize(colRangeList); ++i) { + SFltSclColumnRange* colRange = taosArrayGet(colRangeList, i); + if (nodesEqualNode((SNode*)colRange->colNode, (SNode*)colNode)) { + return colRange; } + } + SFltSclColumnRange newColRange = {.colNode = colNode, .points = taosArrayInit(4, sizeof(SFltSclPoint))}; + taosArrayPush(colRangeList, &newColRange); + return taosArrayGetLast(colRangeList); +} + +int32_t fltSclBuildDatumFromValueNode(SFltSclDatum* datum, SValueNode* valNode) { + datum->type = valNode->node.resType; + + if (valNode->isNull) { + datum->kind = FLT_SCL_DATUM_KIND_NULL; + } else { + switch (valNode->node.resType.type) { + case TSDB_DATA_TYPE_NULL: { + datum->kind = FLT_SCL_DATUM_KIND_NULL; + break; + } + case TSDB_DATA_TYPE_BOOL: { + datum->kind = FLT_SCL_DATUM_KIND_INT64; + datum->val = (valNode->datum.b) ? 0 : 1; + break; + } + case TSDB_DATA_TYPE_TINYINT: + case TSDB_DATA_TYPE_SMALLINT: + case TSDB_DATA_TYPE_INT: + case TSDB_DATA_TYPE_BIGINT: + case TSDB_DATA_TYPE_TIMESTAMP: { + datum->kind = FLT_SCL_DATUM_KIND_INT64; + datum->val = valNode->datum.i; + break; + } + case TSDB_DATA_TYPE_UTINYINT: + case TSDB_DATA_TYPE_USMALLINT: + case TSDB_DATA_TYPE_UINT: + case TSDB_DATA_TYPE_UBIGINT: { + datum->kind = FLT_SCL_DATUM_KIND_UINT64; + *(uint64_t *)&datum->val = valNode->datum.u; + break; + } + case TSDB_DATA_TYPE_FLOAT: + case TSDB_DATA_TYPE_DOUBLE: { + datum->kind = FLT_SCL_DATUM_KIND_FLOAT64; + *(double *)&datum->val = valNode->datum.d; + break; + } + //TODO:varchar/nchar/json + default: { + qError("not supported"); + break; + } + } + + } + return TSDB_CODE_SUCCESS; +} + +int32_t fltSclBuildRangePoints(SFltSclOperator* oper, SArray* points) { + switch (oper->type) { + case OP_TYPE_GREATER_THAN: { + SFltSclDatum start; + fltSclBuildDatumFromValueNode(&start, oper->valNode); + SFltSclPoint startPt = {.start = true, .excl = true, .val = start}; + SFltSclDatum end = {.kind = FLT_SCL_DATUM_KIND_MAX, .type = oper->colNode->node.resType}; + SFltSclPoint endPt = {.start = false, .excl = false, .val = end}; + taosArrayPush(points, &start); + taosArrayPush(points, &end); break; - default: + } + default: { + qError("not supported op"); break; + } + } + return TSDB_CODE_SUCCESS; +} + +//TODO: process DNF composed of CNF +int32_t fltSclProcessCNF(SArray* sclOpList, SArray* colRangeList) { + size_t sz = taosArrayGetSize(sclOpList); + for (int32_t i = 0; i < sz; ++i) { + SFltSclOperator* sclOper = taosArrayGet(sclOpList, i); + SFltSclColumnRange *colRange = fltSclGetColumnRange(sclOper->colNode, colRangeList); + SArray* points = taosArrayInit(4, sizeof(SFltSclPoint)); + fltSclBuildRangePoints(sclOper, points); + SArray* merged = taosArrayInit(4, sizeof(SFltSclPoint)); + int32_t code = fltSclIntersect(colRange->points, points, merged); + taosArrayDestroy(colRange->points); + taosArrayDestroy(points); + colRange->points = merged; + } return TSDB_CODE_SUCCESS; } From 3e92ec7a8a4c6f701c31a95a2c4d80e816e6817f Mon Sep 17 00:00:00 2001 From: shenglian zhou Date: Fri, 19 May 2023 14:09:37 +0800 Subject: [PATCH 06/49] fix: save work --- source/libs/scalar/src/filter.c | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/source/libs/scalar/src/filter.c b/source/libs/scalar/src/filter.c index 01bd53008e..ffc165cba9 100644 --- a/source/libs/scalar/src/filter.c +++ b/source/libs/scalar/src/filter.c @@ -4223,6 +4223,16 @@ int32_t fltSclBuildRangePoints(SFltSclOperator* oper, SArray* points) { taosArrayPush(points, &end); break; } + case OP_TYPE_GREATER_EQUAL: { + SFltSclDatum start; + fltSclBuildDatumFromValueNode(&start, oper->valNode); + SFltSclPoint startPt = {.start = true, .excl = false, .val = start}; + SFltSclDatum end = {.kind = FLT_SCL_DATUM_KIND_MAX, .type = oper->colNode->node.resType}; + SFltSclPoint endPt = {.start = false, .excl = false, .val = end}; + taosArrayPush(points, &start); + taosArrayPush(points, &end); + } + case default: { qError("not supported op"); break; From 1f6d894dfd12170afd9208e67fb0136faf30b420 Mon Sep 17 00:00:00 2001 From: shenglian zhou Date: Fri, 19 May 2023 17:06:03 +0800 Subject: [PATCH 07/49] fix: save work --- source/libs/scalar/src/filter.c | 121 ++++++++++++++++++++++++-------- 1 file changed, 91 insertions(+), 30 deletions(-) diff --git a/source/libs/scalar/src/filter.c b/source/libs/scalar/src/filter.c index ffc165cba9..fd6e79ed42 100644 --- a/source/libs/scalar/src/filter.c +++ b/source/libs/scalar/src/filter.c @@ -3958,10 +3958,10 @@ typedef enum { typedef struct { SFltSclDatumKind kind; union { - int64_t val; // for int64, uint64 and double and bool (1 true, 0 false) - uint8_t *pData; // for varchar, nchar + int64_t val; // for int64, uint64 and double and bool (1 true, 0 false) + uint8_t *pData; // for varchar, nchar }; - SDataType type; // TODO: original data type, may not be used? + SDataType type; // TODO: original data type, may not be used? } SFltSclDatum; typedef struct { @@ -3987,7 +3987,7 @@ int32_t fltSclCompareWithFloat64(SFltSclDatum *val1, SFltSclDatum *val2) { case FLT_SCL_DATUM_KIND_FLOAT64: { return compareDoubleVal(&val1->val, &val2->val); } - //TODO: varchar, nchar + // TODO: varchar, nchar default: qError("not support comparsion. %d, %d", val1->kind, val2->kind); return (val1->kind - val2->kind); @@ -4004,7 +4004,7 @@ int32_t fltSclCompareWithInt64(SFltSclDatum *val1, SFltSclDatum *val2) { case FLT_SCL_DATUM_KIND_FLOAT64: { return compareDoubleInt64(&val1->val, &val2->val); } - //TODO: varchar, nchar + // TODO: varchar, nchar default: qError("not support comparsion. %d, %d", val1->kind, val2->kind); return (val1->kind - val2->kind); @@ -4021,7 +4021,7 @@ int32_t fltSclCompareWithUInt64(SFltSclDatum *val1, SFltSclDatum *val2) { case FLT_SCL_DATUM_KIND_FLOAT64: { return compareDoubleUint64(&val1->val, &val2->val); } - //TODO: varchar, nchar + // TODO: varchar, nchar default: qError("not support comparsion. %d, %d", val1->kind, val2->kind); return (val1->kind - val2->kind); @@ -4047,7 +4047,7 @@ int32_t fltSclCompareDatum(SFltSclDatum *val1, SFltSclDatum *val2) { case FLT_SCL_DATUM_KIND_FLOAT64: { return fltSclCompareWithFloat64(val1, val2); } - //TODO: varchar/nchar + // TODO: varchar/nchar default: fltError("not supported kind. just return 0"); return 0; @@ -4142,17 +4142,17 @@ typedef struct { EOperatorType type; } SFltSclOperator; -//TODO: column, constant +// TODO: column, constant typedef struct { - SColumnNode* colNode; - SArray* points; + SColumnNode *colNode; + SArray *points; } SFltSclColumnRange; -SFltSclColumnRange* fltSclGetColumnRange(SColumnNode* colNode, SArray* colRangeList) { +SFltSclColumnRange *fltSclGetColumnRange(SColumnNode *colNode, SArray *colRangeList) { for (int32_t i = 0; i < taosArrayGetSize(colRangeList); ++i) { - SFltSclColumnRange* colRange = taosArrayGet(colRangeList, i); - if (nodesEqualNode((SNode*)colRange->colNode, (SNode*)colNode)) { + SFltSclColumnRange *colRange = taosArrayGet(colRangeList, i); + if (nodesEqualNode((SNode *)colRange->colNode, (SNode *)colNode)) { return colRange; } } @@ -4161,7 +4161,7 @@ SFltSclColumnRange* fltSclGetColumnRange(SColumnNode* colNode, SArray* colRangeL return taosArrayGetLast(colRangeList); } -int32_t fltSclBuildDatumFromValueNode(SFltSclDatum* datum, SValueNode* valNode) { +int32_t fltSclBuildDatumFromValueNode(SFltSclDatum *datum, SValueNode *valNode) { datum->type = valNode->node.resType; if (valNode->isNull) { @@ -4200,39 +4200,101 @@ int32_t fltSclBuildDatumFromValueNode(SFltSclDatum* datum, SValueNode* valNode) *(double *)&datum->val = valNode->datum.d; break; } - //TODO:varchar/nchar/json + // TODO:varchar/nchar/json default: { qError("not supported"); break; } } - } return TSDB_CODE_SUCCESS; } -int32_t fltSclBuildRangePoints(SFltSclOperator* oper, SArray* points) { +int32_t fltSclBuildRangePoints(SFltSclOperator *oper, SArray *points) { + if (IS_UNSIGNED_NUMERIC_TYPE(oper->colNode->node.resType.type) && + ((IS_SIGNED_NUMERIC_TYPE(oper->valNode->node.resType.type) && oper->valNode->datum.i < 0) || + (IS_FLOAT_TYPE(oper->valNode->node.resType.type) && oper->valNode->datum.d < 0))) { + if (oper->type == OP_TYPE_GREATER_THAN || oper->type == OP_TYPE_GREATER_EQUAL || oper->type == OP_TYPE_NOT_EQUAL) { + SFltSclDatum start; + fltSclBuildDatumFromValueNode(&start, oper->valNode); + start.val = 0; + SFltSclPoint startPt = {.start = true, .excl = false, .val = start}; + SFltSclDatum end = {.kind = FLT_SCL_DATUM_KIND_MAX, .type = oper->colNode->node.resType}; + SFltSclPoint endPt = {.start = false, .excl = false, .val = end}; + taosArrayPush(points, &startPt); + taosArrayPush(points, &endPt); + } + return TSDB_CODE_SUCCESS; + } switch (oper->type) { case OP_TYPE_GREATER_THAN: { SFltSclDatum start; fltSclBuildDatumFromValueNode(&start, oper->valNode); SFltSclPoint startPt = {.start = true, .excl = true, .val = start}; - SFltSclDatum end = {.kind = FLT_SCL_DATUM_KIND_MAX, .type = oper->colNode->node.resType}; + SFltSclDatum end = {.kind = FLT_SCL_DATUM_KIND_MAX, .type = oper->colNode->node.resType}; SFltSclPoint endPt = {.start = false, .excl = false, .val = end}; - taosArrayPush(points, &start); - taosArrayPush(points, &end); + taosArrayPush(points, &startPt); + taosArrayPush(points, &endPt); break; } case OP_TYPE_GREATER_EQUAL: { SFltSclDatum start; fltSclBuildDatumFromValueNode(&start, oper->valNode); SFltSclPoint startPt = {.start = true, .excl = false, .val = start}; - SFltSclDatum end = {.kind = FLT_SCL_DATUM_KIND_MAX, .type = oper->colNode->node.resType}; + SFltSclDatum end = {.kind = FLT_SCL_DATUM_KIND_MAX, .type = oper->colNode->node.resType}; SFltSclPoint endPt = {.start = false, .excl = false, .val = end}; - taosArrayPush(points, &start); - taosArrayPush(points, &end); + taosArrayPush(points, &startPt); + taosArrayPush(points, &endPt); + break; + } + case OP_TYPE_LOWER_THAN: { + SFltSclDatum end; + fltSclBuildDatumFromValueNode(&end, oper->valNode); + SFltSclPoint endPt = {.start = false, .excl = true, .val = end}; + SFltSclDatum start = {.kind = FLT_SCL_DATUM_KIND_MIN, .type = oper->colNode->node.resType}; + SFltSclPoint startPt = {.start = true, .excl = false, .val = start}; + taosArrayPush(points, &startPt); + taosArrayPush(points, &endPt); + break; + } + case OP_TYPE_LOWER_EQUAL: { + SFltSclDatum end; + fltSclBuildDatumFromValueNode(&end, oper->valNode); + SFltSclPoint endPt = {.start = false, .excl = false, .val = end}; + SFltSclDatum start = {.kind = FLT_SCL_DATUM_KIND_MIN, .type = oper->colNode->node.resType}; + SFltSclPoint startPt = {.start = true, .excl = false, .val = start}; + taosArrayPush(points, &startPt); + taosArrayPush(points, &endPt); + break; + } + case OP_TYPE_EQUAL: { + SFltSclDatum valDatum; + fltSclBuildDatumFromValueNode(&valDatum, oper->valNode); + SFltSclPoint startPt = {.start = true, .excl = false, .val = valDatum}; + SFltSclPoint endPt = {.start = false, .excl = false, .val = valDatum}; + taosArrayPush(points, &startPt); + taosArrayPush(points, &endPt); + break; + } + case OP_TYPE_NOT_EQUAL: { + SFltSclDatum valDatum; + fltSclBuildDatumFromValueNode(&valDatum, oper->valNode); + { + SFltSclDatum start = {.kind = FLT_SCL_DATUM_KIND_MIN, .type = oper->colNode->node.resType}; + SFltSclPoint startPt = {.start = true, .excl = false, .val = start}; + SFltSclPoint endPt = {.start = false, .excl = true, .val = valDatum}; + taosArrayPush(points, &startPt); + taosArrayPush(points, &endPt); + } + { + SFltSclPoint startPt = {.start = true, .excl = true, .val = valDatum}; + SFltSclDatum end = {.kind = FLT_SCL_DATUM_KIND_MAX, .type = oper->colNode->node.resType}; + SFltSclPoint endPt = {.start = false, .excl = false, .val = end}; + taosArrayPush(points, &startPt); + taosArrayPush(points, &endPt); + } + break; } - case default: { qError("not supported op"); break; @@ -4241,20 +4303,19 @@ int32_t fltSclBuildRangePoints(SFltSclOperator* oper, SArray* points) { return TSDB_CODE_SUCCESS; } -//TODO: process DNF composed of CNF -int32_t fltSclProcessCNF(SArray* sclOpList, SArray* colRangeList) { +// TODO: process DNF composed of CNF +int32_t fltSclProcessCNF(SArray *sclOpList, SArray *colRangeList) { size_t sz = taosArrayGetSize(sclOpList); for (int32_t i = 0; i < sz; ++i) { - SFltSclOperator* sclOper = taosArrayGet(sclOpList, i); + SFltSclOperator *sclOper = taosArrayGet(sclOpList, i); SFltSclColumnRange *colRange = fltSclGetColumnRange(sclOper->colNode, colRangeList); - SArray* points = taosArrayInit(4, sizeof(SFltSclPoint)); + SArray *points = taosArrayInit(4, sizeof(SFltSclPoint)); fltSclBuildRangePoints(sclOper, points); - SArray* merged = taosArrayInit(4, sizeof(SFltSclPoint)); + SArray *merged = taosArrayInit(4, sizeof(SFltSclPoint)); int32_t code = fltSclIntersect(colRange->points, points, merged); taosArrayDestroy(colRange->points); taosArrayDestroy(points); colRange->points = merged; - } return TSDB_CODE_SUCCESS; } From 6456d5dccfd11afe518fc3a282494eb50a0e8a38 Mon Sep 17 00:00:00 2001 From: shenglian zhou Date: Mon, 22 May 2023 11:41:19 +0800 Subject: [PATCH 08/49] enhance: code finish --- source/libs/scalar/inc/filterInt.h | 7 + source/libs/scalar/src/filter.c | 700 ++++++++++++++++------------- 2 files changed, 407 insertions(+), 300 deletions(-) diff --git a/source/libs/scalar/inc/filterInt.h b/source/libs/scalar/inc/filterInt.h index 2023d38777..1ca8ac1d8c 100644 --- a/source/libs/scalar/inc/filterInt.h +++ b/source/libs/scalar/inc/filterInt.h @@ -227,8 +227,10 @@ typedef struct SFltTreeStat { SFilterInfo *info; } SFltTreeStat; + typedef struct SFltScalarCtx { SNode *node; + SArray* fltSclRange; } SFltScalarCtx; typedef struct SFltBuildGroupCtx { @@ -237,6 +239,11 @@ typedef struct SFltBuildGroupCtx { int32_t code; } SFltBuildGroupCtx; +typedef struct { + SColumnNode *colNode; + SArray *points; +} SFltSclColumnRange; + struct SFilterInfo { bool scalarMode; SFltScalarCtx sclCtx; diff --git a/source/libs/scalar/src/filter.c b/source/libs/scalar/src/filter.c index fd6e79ed42..33f37c722e 100644 --- a/source/libs/scalar/src/filter.c +++ b/source/libs/scalar/src/filter.c @@ -1841,6 +1841,13 @@ void filterFreeInfo(SFilterInfo *info) { return; } + for (int32_t i = 0 ; i < taosArrayGetSize(info->sclCtx.fltSclRange); ++i) { + SFltSclColumnRange* colRange = taosArrayGet(info->sclCtx.fltSclRange, i); + nodesDestroyNode((SNode*)colRange->colNode); + taosArrayDestroy(colRange->points); + } + taosArrayDestroy(info->sclCtx.fltSclRange); + taosMemoryFreeClear(info->cunits); taosMemoryFreeClear(info->blkUnitRes); taosMemoryFreeClear(info->blkUnits); @@ -3424,8 +3431,335 @@ _return: return code; } + +// compare ranges, null < min < val < max. null=null, min=min, max=max +typedef enum { + FLT_SCL_DATUM_KIND_NULL, + FLT_SCL_DATUM_KIND_MIN, + FLT_SCL_DATUM_KIND_INT64, + FLT_SCL_DATUM_KIND_UINT64, + FLT_SCL_DATUM_KIND_FLOAT64, + FLT_SCL_DATUM_KIND_VARCHAR, + FLT_SCL_DATUM_KIND_NCHAR, + FLT_SCL_DATUM_KIND_MAX, +} SFltSclDatumKind; + +typedef struct { + SFltSclDatumKind kind; + union { + int64_t i; // for int64, uint64 and double and bool (1 true, 0 false) + uint64_t u; + double d; + uint8_t *pData; // for varchar, nchar + }; + SDataType type; // TODO: original data type, may not be used? +} SFltSclDatum; + +typedef struct { + SFltSclDatum val; + bool excl; + bool start; +} SFltSclPoint; + +int32_t fltSclCompareWithFloat64(SFltSclDatum *val1, SFltSclDatum *val2) { + // val2->kind == float64 + switch (val1->kind) { + case FLT_SCL_DATUM_KIND_UINT64: + return compareUint64Double(&val1->u, &val2->d); + case FLT_SCL_DATUM_KIND_INT64: + return compareInt64Double(&val1->i, &val2->d); + case FLT_SCL_DATUM_KIND_FLOAT64: { + return compareDoubleVal(&val1->d, &val2->d); + } + // TODO: varchar, nchar + default: + qError("not support comparsion. %d, %d", val1->kind, val2->kind); + return (val1->kind - val2->kind); + } +} + +int32_t fltSclCompareWithInt64(SFltSclDatum *val1, SFltSclDatum *val2) { + // val2->kind == int64 + switch (val1->kind) { + case FLT_SCL_DATUM_KIND_UINT64: + return compareUint64Int64(&val1->u, &val2->i); + case FLT_SCL_DATUM_KIND_INT64: + return compareInt64Val(&val1->i, &val2->i); + case FLT_SCL_DATUM_KIND_FLOAT64: { + return compareDoubleInt64(&val1->d, &val2->i); + } + // TODO: varchar, nchar + default: + qError("not support comparsion. %d, %d", val1->kind, val2->kind); + return (val1->kind - val2->kind); + } +} + +int32_t fltSclCompareWithUInt64(SFltSclDatum *val1, SFltSclDatum *val2) { + // val2 kind == uint64 + switch (val1->kind) { + case FLT_SCL_DATUM_KIND_UINT64: + return compareUint64Val(&val1->u, &val2->u); + case FLT_SCL_DATUM_KIND_INT64: + return compareInt64Uint64(&val1->i, &val2->u); + case FLT_SCL_DATUM_KIND_FLOAT64: { + return compareDoubleUint64(&val1->d, &val2->u); + } + // TODO: varchar, nchar + default: + qError("not support comparsion. %d, %d", val1->kind, val2->kind); + return (val1->kind - val2->kind); + } +} + +int32_t fltSclCompareDatum(SFltSclDatum *val1, SFltSclDatum *val2) { + if (val2->kind == FLT_SCL_DATUM_KIND_NULL || val2->kind == FLT_SCL_DATUM_KIND_MIN || + val2->kind == FLT_SCL_DATUM_KIND_MAX) { + if (val1->kind == FLT_SCL_DATUM_KIND_NULL || val1->kind == FLT_SCL_DATUM_KIND_MIN || + val1->kind == FLT_SCL_DATUM_KIND_MAX) { + return (val1->kind < val2->kind) ? -1 : ((val1->kind > val2->kind) ? 1 : 0); + } + } + + switch (val2->kind) { + case FLT_SCL_DATUM_KIND_UINT64: { + return fltSclCompareWithUInt64(val1, val2); + } + case FLT_SCL_DATUM_KIND_INT64: { + return fltSclCompareWithInt64(val1, val2); + } + case FLT_SCL_DATUM_KIND_FLOAT64: { + return fltSclCompareWithFloat64(val1, val2); + } + // TODO: varchar/nchar + default: + fltError("not supported kind. just return 0"); + return 0; + break; + } + return 0; +} + +bool fltSclLessPoint(SFltSclPoint *pt1, SFltSclPoint *pt2) { + // first value compare + int32_t cmp = fltSclCompareDatum(&pt1->val, &pt2->val); + if (cmp != 0) { + return cmp < 0; + } + + if (pt1->start && pt2->start) { + return !pt1->excl && pt2->excl; + } else if (pt1->start) { + return pt1->excl && !pt2->excl; + } else if (pt2->start) { + return pt1->excl || pt2->excl; + } + return pt1->excl && !pt2->excl; +} + +int32_t fltSclMergeSort(SArray *pts1, SArray *pts2, SArray *result) { + size_t len1 = taosArrayGetSize(pts1); + size_t len2 = taosArrayGetSize(pts2); + size_t i = 0; + size_t j = 0; + while (i < len1 && j < len2) { + SFltSclPoint *pt1 = taosArrayGet(pts1, i); + SFltSclPoint *pt2 = taosArrayGet(pts2, j); + bool less = fltSclLessPoint(pt1, pt2); + if (less) { + taosArrayPush(result, pt1); + ++i; + } else { + taosArrayPush(result, pt2); + ++j; + } + } + if (i < len1) { + for (; i < len1; ++i) { + SFltSclPoint *pt1 = taosArrayGet(pts1, i); + taosArrayPush(result, pt1); + } + } + if (j < len2) { + for (; j < len2; ++j) { + SFltSclPoint *pt2 = taosArrayGet(pts2, j); + taosArrayPush(result, pt2); + } + } + return 0; +} + +// pts1 and pts2 must be ordered and de-duplicated and each range can not be a range of another range +int32_t fltSclMerge(SArray *pts1, SArray *pts2, bool isUnion, SArray *merged) { + size_t len1 = taosArrayGetSize(pts1); + size_t len2 = taosArrayGetSize(pts2); + // first merge sort pts1 and pts2 + SArray *all = taosArrayInit(len1 + len2, sizeof(SFltSclPoint)); + fltSclMergeSort(pts1, pts2, all); + int32_t countRequired = (isUnion) ? 1 : 2; + int32_t count = 0; + for (int32_t i = 0; i < taosArrayGetSize(all); ++i) { + SFltSclPoint *pt = taosArrayGet(pts1, i); + if (pt->start) { + ++count; + if (count == countRequired) { + taosArrayPush(merged, pt); + } + } else { + if (count == countRequired) { + taosArrayPush(merged, pt); + } + --count; + } + } + taosArrayDestroy(all); + return 0; +} + +int32_t fltSclIntersect(SArray *pts1, SArray *pts2, SArray *merged) { return fltSclMerge(pts1, pts2, false, merged); } + +int32_t fltSclUnion(SArray *pts1, SArray *pts2, SArray *merged) { return fltSclMerge(pts1, pts2, true, merged); } + +// TODO: column, constant +typedef struct { + SColumnNode *colNode; + SValueNode *valNode; + EOperatorType type; +} SFltSclOperator; + + +SFltSclColumnRange *fltSclGetOrCreateColumnRange(SColumnNode *colNode, SArray *colRangeList) { + for (int32_t i = 0; i < taosArrayGetSize(colRangeList); ++i) { + SFltSclColumnRange *colRange = taosArrayGet(colRangeList, i); + if (nodesEqualNode((SNode *)colRange->colNode, (SNode *)colNode)) { + return colRange; + } + } + SColumnNode* pColumnNode = (SColumnNode*)nodesCloneNode((SNode*)colNode); + SFltSclColumnRange newColRange = {.colNode = pColumnNode, .points = taosArrayInit(4, sizeof(SFltSclPoint))}; + taosArrayPush(colRangeList, &newColRange); + return taosArrayGetLast(colRangeList); +} + +int32_t fltSclBuildDatumFromValueNode(SFltSclDatum *datum, SValueNode *valNode) { + datum->type = valNode->node.resType; + + if (valNode->isNull) { + datum->kind = FLT_SCL_DATUM_KIND_NULL; + } else { + switch (valNode->node.resType.type) { + case TSDB_DATA_TYPE_NULL: { + datum->kind = FLT_SCL_DATUM_KIND_NULL; + break; + } + case TSDB_DATA_TYPE_BOOL: { + datum->kind = FLT_SCL_DATUM_KIND_INT64; + datum->i = (valNode->datum.b) ? 0 : 1; + break; + } + case TSDB_DATA_TYPE_TINYINT: + case TSDB_DATA_TYPE_SMALLINT: + case TSDB_DATA_TYPE_INT: + case TSDB_DATA_TYPE_BIGINT: + case TSDB_DATA_TYPE_TIMESTAMP: { + datum->kind = FLT_SCL_DATUM_KIND_INT64; + datum->i = valNode->datum.i; + break; + } + case TSDB_DATA_TYPE_UTINYINT: + case TSDB_DATA_TYPE_USMALLINT: + case TSDB_DATA_TYPE_UINT: + case TSDB_DATA_TYPE_UBIGINT: { + datum->kind = FLT_SCL_DATUM_KIND_UINT64; + datum->u = valNode->datum.u; + break; + } + case TSDB_DATA_TYPE_FLOAT: + case TSDB_DATA_TYPE_DOUBLE: { + datum->kind = FLT_SCL_DATUM_KIND_FLOAT64; + datum->d = valNode->datum.d; + break; + } + // TODO:varchar/nchar/json + default: { + qError("not supported"); + break; + } + } + } + return TSDB_CODE_SUCCESS; +} + +int32_t fltSclBuildDatumFromBlockSmaValue(SFltSclDatum* datum, uint8_t type, int64_t val) { + switch (type) { + case TSDB_DATA_TYPE_BOOL: { + datum->kind = FLT_SCL_DATUM_KIND_INT64; + datum->i = val; + break; + } + case TSDB_DATA_TYPE_UTINYINT: + case TSDB_DATA_TYPE_USMALLINT: + case TSDB_DATA_TYPE_UINT: + case TSDB_DATA_TYPE_UBIGINT: { + datum->kind = FLT_SCL_DATUM_KIND_UINT64; + datum->u = *(uint64_t*)&val; + break; + } + case TSDB_DATA_TYPE_FLOAT: + case TSDB_DATA_TYPE_DOUBLE: { + datum->kind = FLT_SCL_DATUM_KIND_FLOAT64; + datum->d = *(double*)&val; + break; + } + // TODO:varchar/nchar/json + default: { + qError("not supported"); + break; + } + } + + + return TSDB_CODE_SUCCESS; +} + +int32_t fltSclBuildRangeFromBlockSma(SFltSclColumnRange* colRange, SColumnDataAgg* pAgg, SArray* points) { + SFltSclDatum min; + fltSclBuildDatumFromBlockSmaValue(&min, colRange->colNode->node.resType.type, pAgg->min); + SFltSclPoint minPt = {.excl = false, .start = true, .val = min}; + SFltSclDatum max; + fltSclBuildDatumFromBlockSmaValue(&max, colRange->colNode->node.resType.type, pAgg->max); + SFltSclPoint maxPt = {.excl = false, .start = false, .val = max}; + taosArrayPush(points, &minPt); + taosArrayPush(points, &maxPt); + return TSDB_CODE_SUCCESS; +} + bool filterRangeExecute(SFilterInfo *info, SColumnDataAgg **pDataStatis, int32_t numOfCols, int32_t numOfRows) { if (info->scalarMode) { + SArray* colRanges = info->sclCtx.fltSclRange; + for (int32_t i = 0; i < taosArrayGetSize(colRanges); ++i) { + SFltSclColumnRange* colRange = taosArrayGet(colRanges, i); + bool foundCol = false; + int32_t j = 0; + for (; j < numOfCols; ++j) { + if (pDataStatis[j] != NULL && pDataStatis[j]->colId == colRange->colNode->colId) { + foundCol = true; + } + } + if (foundCol) { + SColumnDataAgg* pAgg = pDataStatis[j]; + SArray* points = taosArrayInit(2, sizeof(SFltSclPoint)); + fltSclBuildRangeFromBlockSma(colRange, pAgg, points); + SArray* merged = taosArrayInit(8, sizeof(SFltSclPoint)); + fltSclIntersect(points, colRange->points, merged); + bool isIntersect = taosArrayGetSize(merged) != 0; + taosArrayDestroy(merged); + taosArrayDestroy(points); + if (!isIntersect) { + return false; + } + } + } return true; } @@ -3943,289 +4277,7 @@ _return: FLT_RET(code); } -// compare ranges, null < min < val < max. null=null, min=min, max=max -typedef enum { - FLT_SCL_DATUM_KIND_NULL, - FLT_SCL_DATUM_KIND_MIN, - FLT_SCL_DATUM_KIND_INT64, - FLT_SCL_DATUM_KIND_UINT64, - FLT_SCL_DATUM_KIND_FLOAT64, - FLT_SCL_DATUM_KIND_VARCHAR, - FLT_SCL_DATUM_KIND_NCHAR, - FLT_SCL_DATUM_KIND_MAX, -} SFltSclDatumKind; - -typedef struct { - SFltSclDatumKind kind; - union { - int64_t val; // for int64, uint64 and double and bool (1 true, 0 false) - uint8_t *pData; // for varchar, nchar - }; - SDataType type; // TODO: original data type, may not be used? -} SFltSclDatum; - -typedef struct { - SFltSclDatum val; - bool excl; - bool start; -} SFltSclPoint; - -typedef struct { - SFltSclDatum low; - SFltSclDatum high; - bool lowExcl; - bool highExcl; -} SFltSclRange; - -int32_t fltSclCompareWithFloat64(SFltSclDatum *val1, SFltSclDatum *val2) { - // val2->kind == int64 - switch (val1->kind) { - case FLT_SCL_DATUM_KIND_UINT64: - return compareUint64Double(&val1->val, &val2->val); - case FLT_SCL_DATUM_KIND_INT64: - return compareInt64Double(&val1->val, &val2->val); - case FLT_SCL_DATUM_KIND_FLOAT64: { - return compareDoubleVal(&val1->val, &val2->val); - } - // TODO: varchar, nchar - default: - qError("not support comparsion. %d, %d", val1->kind, val2->kind); - return (val1->kind - val2->kind); - } -} - -int32_t fltSclCompareWithInt64(SFltSclDatum *val1, SFltSclDatum *val2) { - // val2->kind == int64 - switch (val1->kind) { - case FLT_SCL_DATUM_KIND_UINT64: - return compareUint64Int64(&val1->val, &val2->val); - case FLT_SCL_DATUM_KIND_INT64: - return compareInt64Val(&val1->val, &val2->val); - case FLT_SCL_DATUM_KIND_FLOAT64: { - return compareDoubleInt64(&val1->val, &val2->val); - } - // TODO: varchar, nchar - default: - qError("not support comparsion. %d, %d", val1->kind, val2->kind); - return (val1->kind - val2->kind); - } -} - -int32_t fltSclCompareWithUInt64(SFltSclDatum *val1, SFltSclDatum *val2) { - // val2 kind == uint64 - switch (val1->kind) { - case FLT_SCL_DATUM_KIND_UINT64: - return compareUint64Val(&val1->val, &val2->val); - case FLT_SCL_DATUM_KIND_INT64: - return compareInt64Uint64(&val1->val, &val2->val); - case FLT_SCL_DATUM_KIND_FLOAT64: { - return compareDoubleUint64(&val1->val, &val2->val); - } - // TODO: varchar, nchar - default: - qError("not support comparsion. %d, %d", val1->kind, val2->kind); - return (val1->kind - val2->kind); - } -} - -int32_t fltSclCompareDatum(SFltSclDatum *val1, SFltSclDatum *val2) { - if (val2->kind == FLT_SCL_DATUM_KIND_NULL || val2->kind == FLT_SCL_DATUM_KIND_MIN || - val2->kind == FLT_SCL_DATUM_KIND_MAX) { - if (val1->kind == FLT_SCL_DATUM_KIND_NULL || val1->kind == FLT_SCL_DATUM_KIND_MIN || - val1->kind == FLT_SCL_DATUM_KIND_MAX) { - return (val1->kind < val2->kind) ? -1 : ((val1->kind > val2->kind) ? 1 : 0); - } - } - - switch (val2->kind) { - case FLT_SCL_DATUM_KIND_UINT64: { - return fltSclCompareWithUInt64(val1, val2); - } - case FLT_SCL_DATUM_KIND_INT64: { - return fltSclCompareWithInt64(val1, val2); - } - case FLT_SCL_DATUM_KIND_FLOAT64: { - return fltSclCompareWithFloat64(val1, val2); - } - // TODO: varchar/nchar - default: - fltError("not supported kind. just return 0"); - return 0; - break; - } - return 0; -} - -bool fltSclLessPoint(SFltSclPoint *pt1, SFltSclPoint *pt2) { - // first value compare - int32_t cmp = fltSclCompareDatum(&pt1->val, &pt2->val); - if (cmp != 0) { - return cmp < 0; - } - - if (pt1->start && pt2->start) { - return !pt1->excl && pt2->excl; - } else if (pt1->start) { - return pt1->excl && !pt2->excl; - } else if (pt2->start) { - return pt1->excl || pt2->excl; - } - return pt1->excl && !pt2->excl; -} - -int32_t fltSclMergeSort(SArray *pts1, SArray *pts2, SArray *result) { - size_t len1 = taosArrayGetSize(pts1); - size_t len2 = taosArrayGetSize(pts2); - size_t i = 0; - size_t j = 0; - while (i < len1 && j < len2) { - SFltSclPoint *pt1 = taosArrayGet(pts1, i); - SFltSclPoint *pt2 = taosArrayGet(pts2, j); - bool less = fltSclLessPoint(pt1, pt2); - if (less) { - taosArrayPush(result, pt1); - ++i; - } else { - taosArrayPush(result, pt2); - ++j; - } - } - if (i < len1) { - for (; i < len1; ++i) { - SFltSclPoint *pt1 = taosArrayGet(pts1, i); - taosArrayPush(result, pt1); - } - } - if (j < len2) { - for (; j < len2; ++j) { - SFltSclPoint *pt2 = taosArrayGet(pts2, j); - taosArrayPush(result, pt2); - } - } - return 0; -} - -// pts1 and pts2 must be ordered and de-duplicated and each range can not be a range of another range -int32_t fltSclMerge(SArray *pts1, SArray *pts2, bool isUnion, SArray *merged) { - size_t len1 = taosArrayGetSize(pts1); - size_t len2 = taosArrayGetSize(pts2); - // first merge sort pts1 and pts2 - SArray *all = taosArrayInit(len1 + len2, sizeof(SFltSclPoint)); - fltSclMergeSort(pts1, pts2, all); - int32_t countRequired = (isUnion) ? 1 : 2; - int32_t count = 0; - for (int32_t i = 0; i < taosArrayGetSize(all); ++i) { - SFltSclPoint *pt = taosArrayGet(pts1, i); - if (pt->start) { - ++count; - if (count == countRequired) { - taosArrayPush(merged, pt); - } - } else { - if (count == countRequired) { - taosArrayPush(merged, pt); - } - --count; - } - } - taosArrayDestroy(all); - return 0; -} - -int32_t fltSclIntersect(SArray *pts1, SArray *pts2, SArray *merged) { return fltSclMerge(pts1, pts2, false, merged); } - -int32_t fltSclUnion(SArray *pts1, SArray *pts2, SArray *merged) { return fltSclMerge(pts1, pts2, true, merged); } - -typedef struct { - SColumnNode *colNode; - SValueNode *valNode; - EOperatorType type; -} SFltSclOperator; - -// TODO: column, constant - -typedef struct { - SColumnNode *colNode; - SArray *points; -} SFltSclColumnRange; - -SFltSclColumnRange *fltSclGetColumnRange(SColumnNode *colNode, SArray *colRangeList) { - for (int32_t i = 0; i < taosArrayGetSize(colRangeList); ++i) { - SFltSclColumnRange *colRange = taosArrayGet(colRangeList, i); - if (nodesEqualNode((SNode *)colRange->colNode, (SNode *)colNode)) { - return colRange; - } - } - SFltSclColumnRange newColRange = {.colNode = colNode, .points = taosArrayInit(4, sizeof(SFltSclPoint))}; - taosArrayPush(colRangeList, &newColRange); - return taosArrayGetLast(colRangeList); -} - -int32_t fltSclBuildDatumFromValueNode(SFltSclDatum *datum, SValueNode *valNode) { - datum->type = valNode->node.resType; - - if (valNode->isNull) { - datum->kind = FLT_SCL_DATUM_KIND_NULL; - } else { - switch (valNode->node.resType.type) { - case TSDB_DATA_TYPE_NULL: { - datum->kind = FLT_SCL_DATUM_KIND_NULL; - break; - } - case TSDB_DATA_TYPE_BOOL: { - datum->kind = FLT_SCL_DATUM_KIND_INT64; - datum->val = (valNode->datum.b) ? 0 : 1; - break; - } - case TSDB_DATA_TYPE_TINYINT: - case TSDB_DATA_TYPE_SMALLINT: - case TSDB_DATA_TYPE_INT: - case TSDB_DATA_TYPE_BIGINT: - case TSDB_DATA_TYPE_TIMESTAMP: { - datum->kind = FLT_SCL_DATUM_KIND_INT64; - datum->val = valNode->datum.i; - break; - } - case TSDB_DATA_TYPE_UTINYINT: - case TSDB_DATA_TYPE_USMALLINT: - case TSDB_DATA_TYPE_UINT: - case TSDB_DATA_TYPE_UBIGINT: { - datum->kind = FLT_SCL_DATUM_KIND_UINT64; - *(uint64_t *)&datum->val = valNode->datum.u; - break; - } - case TSDB_DATA_TYPE_FLOAT: - case TSDB_DATA_TYPE_DOUBLE: { - datum->kind = FLT_SCL_DATUM_KIND_FLOAT64; - *(double *)&datum->val = valNode->datum.d; - break; - } - // TODO:varchar/nchar/json - default: { - qError("not supported"); - break; - } - } - } - return TSDB_CODE_SUCCESS; -} - int32_t fltSclBuildRangePoints(SFltSclOperator *oper, SArray *points) { - if (IS_UNSIGNED_NUMERIC_TYPE(oper->colNode->node.resType.type) && - ((IS_SIGNED_NUMERIC_TYPE(oper->valNode->node.resType.type) && oper->valNode->datum.i < 0) || - (IS_FLOAT_TYPE(oper->valNode->node.resType.type) && oper->valNode->datum.d < 0))) { - if (oper->type == OP_TYPE_GREATER_THAN || oper->type == OP_TYPE_GREATER_EQUAL || oper->type == OP_TYPE_NOT_EQUAL) { - SFltSclDatum start; - fltSclBuildDatumFromValueNode(&start, oper->valNode); - start.val = 0; - SFltSclPoint startPt = {.start = true, .excl = false, .val = start}; - SFltSclDatum end = {.kind = FLT_SCL_DATUM_KIND_MAX, .type = oper->colNode->node.resType}; - SFltSclPoint endPt = {.start = false, .excl = false, .val = end}; - taosArrayPush(points, &startPt); - taosArrayPush(points, &endPt); - } - return TSDB_CODE_SUCCESS; - } switch (oper->type) { case OP_TYPE_GREATER_THAN: { SFltSclDatum start; @@ -4304,36 +4356,84 @@ int32_t fltSclBuildRangePoints(SFltSclOperator *oper, SArray *points) { } // TODO: process DNF composed of CNF -int32_t fltSclProcessCNF(SArray *sclOpList, SArray *colRangeList) { - size_t sz = taosArrayGetSize(sclOpList); +int32_t fltSclProcessCNF(SArray *sclOpListCNF, SArray *colRangeList) { + size_t sz = taosArrayGetSize(sclOpListCNF); for (int32_t i = 0; i < sz; ++i) { - SFltSclOperator *sclOper = taosArrayGet(sclOpList, i); - SFltSclColumnRange *colRange = fltSclGetColumnRange(sclOper->colNode, colRangeList); + SFltSclOperator *sclOper = taosArrayGet(sclOpListCNF, i); + SFltSclColumnRange *colRange = fltSclGetOrCreateColumnRange(sclOper->colNode, colRangeList); SArray *points = taosArrayInit(4, sizeof(SFltSclPoint)); fltSclBuildRangePoints(sclOper, points); - SArray *merged = taosArrayInit(4, sizeof(SFltSclPoint)); - int32_t code = fltSclIntersect(colRange->points, points, merged); - taosArrayDestroy(colRange->points); - taosArrayDestroy(points); - colRange->points = merged; + if (taosArrayGetSize(colRange->points) != 0) { + SArray *merged = taosArrayInit(4, sizeof(SFltSclPoint)); + int32_t code = fltSclIntersect(colRange->points, points, merged); + taosArrayDestroy(colRange->points); + taosArrayDestroy(points); + colRange->points = merged; + } else { + colRange->points = points; + } + } return TSDB_CODE_SUCCESS; } -typedef struct { - bool useRange; - SHashObj *colRanges; -} SFltSclOptCtx; +static int32_t fltSclCollectOperatorFromNode(SNode* pNode, SArray* sclOpList) { + if (nodeType(pNode) != QUERY_NODE_OPERATOR) { + return TSDB_CODE_SUCCESS; + } + SOperatorNode* pOper = (SOperatorNode*)pNode; + //TODO: left value node, right column node + //TODO: datatype + //TODO: operator + if (nodeType(pOper->pLeft) == QUERY_NODE_COLUMN && nodeType(pOper->pRight) == QUERY_NODE_VALUE && + (pOper->opType == OP_TYPE_GREATER_THAN || pOper->opType == OP_TYPE_GREATER_EQUAL || + pOper->opType == OP_TYPE_LOWER_THAN || pOper->opType == OP_TYPE_LOWER_EQUAL || + pOper->opType == OP_TYPE_NOT_EQUAL || pOper->opType == OP_TYPE_EQUAL)) { + SValueNode* valNode = (SValueNode*)pOper->pRight; + if (IS_NUMERIC_TYPE(valNode->node.resType.type) || valNode->node.resType.type == TSDB_DATA_TYPE_TIMESTAMP) { + SNode *p = nodesCloneNode(pNode); + taosArrayPush(sclOpList, &p); + } + } + return TSDB_CODE_SUCCESS; +} -static EDealRes fltSclMayBeOptimed(SNode *pNode, void *pCtx) { - SFltSclOptCtx *ctx = (SFltSclOptCtx *)pCtx; - return DEAL_RES_CONTINUE; +static int32_t fltSclCollectOperatorsFromLogicCond(SNode* pNode, SArray* sclOpList) { + if (nodeType(pNode) != QUERY_NODE_LOGIC_CONDITION) { + return TSDB_CODE_SUCCESS; + } + SLogicConditionNode* pLogicCond = (SLogicConditionNode*)pNode; + //TODO: support LOGIC_COND_TYPE_OR + if (pLogicCond->condType != LOGIC_COND_TYPE_AND) { + return TSDB_CODE_SUCCESS; + } + SNode* pExpr = NULL; + FOREACH(pExpr, pLogicCond->pParameterList) { fltSclCollectOperatorFromNode(pExpr, sclOpList); + } + return TSDB_CODE_SUCCESS; +} + +static int32_t fltSclCollectOperators(SNode *pNode, SArray* sclOpList) { + if (nodeType(pNode) == QUERY_NODE_OPERATOR) { + fltSclCollectOperatorFromNode(pNode, sclOpList); + } else if (nodeType(pNode) == QUERY_NODE_LOGIC_CONDITION) { + fltSclCollectOperatorsFromLogicCond(pNode, sclOpList); + } + return TSDB_CODE_SUCCESS; } int32_t fltOptimizeNodes(SFilterInfo *pInfo, SNode **pNode, SFltTreeStat *pStat) { - SFltSclOptCtx ctx; - nodesWalkExprPostOrder(*pNode, fltSclMayBeOptimed, &ctx); + SArray* sclOpList = taosArrayInit(16, POINTER_BYTES); + fltSclCollectOperators(*pNode, sclOpList); + SArray* colRangeList = taosArrayInit(16, sizeof(SFltSclColumnRange)); + fltSclProcessCNF(sclOpList, colRangeList); + pInfo->sclCtx.fltSclRange = colRangeList; + for (int32_t i = 0; i < taosArrayGetSize(sclOpList); ++i) { + SNode* p = taosArrayGetP(sclOpList, i); + nodesDestroyNode(p); + } + taosArrayDestroy(sclOpList); return TSDB_CODE_SUCCESS; } From 43ac6fb1fd532c96f5deba1b21e46abd5b2e0762 Mon Sep 17 00:00:00 2001 From: shenglian zhou Date: Tue, 23 May 2023 08:43:31 +0800 Subject: [PATCH 09/49] fix: all null from block sma --- source/libs/scalar/src/filter.c | 12 ++++++++++-- 1 file changed, 10 insertions(+), 2 deletions(-) diff --git a/source/libs/scalar/src/filter.c b/source/libs/scalar/src/filter.c index 33f37c722e..4475a07dcd 100644 --- a/source/libs/scalar/src/filter.c +++ b/source/libs/scalar/src/filter.c @@ -3722,7 +3722,15 @@ int32_t fltSclBuildDatumFromBlockSmaValue(SFltSclDatum* datum, uint8_t type, int return TSDB_CODE_SUCCESS; } -int32_t fltSclBuildRangeFromBlockSma(SFltSclColumnRange* colRange, SColumnDataAgg* pAgg, SArray* points) { +int32_t fltSclBuildRangeFromBlockSma(SFltSclColumnRange* colRange, SColumnDataAgg* pAgg, int32_t numOfRows, SArray* points) { + if (pAgg->numOfNull == numOfRows) { + SFltSclDatum datum = {.kind = FLT_SCL_DATUM_KIND_NULL}; + SFltSclPoint startPt = {.start = true, .excl = false, .val = datum}; + SFltSclPoint endPt = {.start = false, .excl = false, .val = datum}; + taosArrayPush(points, &startPt); + taosArrayPush(points, &endPt); + return TSDB_CODE_SUCCESS; + } SFltSclDatum min; fltSclBuildDatumFromBlockSmaValue(&min, colRange->colNode->node.resType.type, pAgg->min); SFltSclPoint minPt = {.excl = false, .start = true, .val = min}; @@ -3749,7 +3757,7 @@ bool filterRangeExecute(SFilterInfo *info, SColumnDataAgg **pDataStatis, int32_t if (foundCol) { SColumnDataAgg* pAgg = pDataStatis[j]; SArray* points = taosArrayInit(2, sizeof(SFltSclPoint)); - fltSclBuildRangeFromBlockSma(colRange, pAgg, points); + fltSclBuildRangeFromBlockSma(colRange, pAgg, numOfRows, points); SArray* merged = taosArrayInit(8, sizeof(SFltSclPoint)); fltSclIntersect(points, colRange->points, merged); bool isIntersect = taosArrayGetSize(merged) != 0; From 24ae8d7ecb870852c9979083eddb5686b32bb278 Mon Sep 17 00:00:00 2001 From: slzhou Date: Tue, 23 May 2023 09:49:42 +0800 Subject: [PATCH 10/49] fix: fix ci memory leak error --- source/libs/scalar/src/filter.c | 1 + 1 file changed, 1 insertion(+) diff --git a/source/libs/scalar/src/filter.c b/source/libs/scalar/src/filter.c index 4475a07dcd..c21abc1380 100644 --- a/source/libs/scalar/src/filter.c +++ b/source/libs/scalar/src/filter.c @@ -4378,6 +4378,7 @@ int32_t fltSclProcessCNF(SArray *sclOpListCNF, SArray *colRangeList) { taosArrayDestroy(points); colRange->points = merged; } else { + taosArrayDestroy(colRange->points); colRange->points = points; } From 77a1de444c24bd68d0db36c2d7820feb4b1a8c4b Mon Sep 17 00:00:00 2001 From: slzhou Date: Tue, 23 May 2023 10:47:58 +0800 Subject: [PATCH 11/49] fix: where c7 means c7 is true, the right child is null --- source/libs/scalar/src/filter.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/source/libs/scalar/src/filter.c b/source/libs/scalar/src/filter.c index c21abc1380..4a9ebb16c1 100644 --- a/source/libs/scalar/src/filter.c +++ b/source/libs/scalar/src/filter.c @@ -4394,6 +4394,9 @@ static int32_t fltSclCollectOperatorFromNode(SNode* pNode, SArray* sclOpList) { //TODO: left value node, right column node //TODO: datatype //TODO: operator + if (pOper->pLeft == NULL || pOper->pRight == NULL) { + return TSDB_CODE_SUCCESS; + } if (nodeType(pOper->pLeft) == QUERY_NODE_COLUMN && nodeType(pOper->pRight) == QUERY_NODE_VALUE && (pOper->opType == OP_TYPE_GREATER_THAN || pOper->opType == OP_TYPE_GREATER_EQUAL || pOper->opType == OP_TYPE_LOWER_THAN || pOper->opType == OP_TYPE_LOWER_EQUAL || From c9140e547e652e7e241abeee7b5fc175bcbeeb4c Mon Sep 17 00:00:00 2001 From: shenglian zhou Date: Tue, 23 May 2023 14:08:20 +0800 Subject: [PATCH 12/49] fix: fix bugs of wrongly use sfltscloperator --- source/libs/scalar/src/filter.c | 76 ++++++++++++++++----------------- 1 file changed, 37 insertions(+), 39 deletions(-) diff --git a/source/libs/scalar/src/filter.c b/source/libs/scalar/src/filter.c index 4a9ebb16c1..3b61e3a97a 100644 --- a/source/libs/scalar/src/filter.c +++ b/source/libs/scalar/src/filter.c @@ -1841,9 +1841,9 @@ void filterFreeInfo(SFilterInfo *info) { return; } - for (int32_t i = 0 ; i < taosArrayGetSize(info->sclCtx.fltSclRange); ++i) { - SFltSclColumnRange* colRange = taosArrayGet(info->sclCtx.fltSclRange, i); - nodesDestroyNode((SNode*)colRange->colNode); + for (int32_t i = 0; i < taosArrayGetSize(info->sclCtx.fltSclRange); ++i) { + SFltSclColumnRange *colRange = taosArrayGet(info->sclCtx.fltSclRange, i); + nodesDestroyNode((SNode *)colRange->colNode); taosArrayDestroy(colRange->points); } taosArrayDestroy(info->sclCtx.fltSclRange); @@ -3431,7 +3431,6 @@ _return: return code; } - // compare ranges, null < min < val < max. null=null, min=min, max=max typedef enum { FLT_SCL_DATUM_KIND_NULL, @@ -3447,7 +3446,7 @@ typedef enum { typedef struct { SFltSclDatumKind kind; union { - int64_t i; // for int64, uint64 and double and bool (1 true, 0 false) + int64_t i; // for int64, uint64 and double and bool (1 true, 0 false) uint64_t u; double d; uint8_t *pData; // for varchar, nchar @@ -3627,7 +3626,6 @@ typedef struct { EOperatorType type; } SFltSclOperator; - SFltSclColumnRange *fltSclGetOrCreateColumnRange(SColumnNode *colNode, SArray *colRangeList) { for (int32_t i = 0; i < taosArrayGetSize(colRangeList); ++i) { SFltSclColumnRange *colRange = taosArrayGet(colRangeList, i); @@ -3635,7 +3633,7 @@ SFltSclColumnRange *fltSclGetOrCreateColumnRange(SColumnNode *colNode, SArray *c return colRange; } } - SColumnNode* pColumnNode = (SColumnNode*)nodesCloneNode((SNode*)colNode); + SColumnNode *pColumnNode = (SColumnNode *)nodesCloneNode((SNode *)colNode); SFltSclColumnRange newColRange = {.colNode = pColumnNode, .points = taosArrayInit(4, sizeof(SFltSclPoint))}; taosArrayPush(colRangeList, &newColRange); return taosArrayGetLast(colRangeList); @@ -3690,7 +3688,7 @@ int32_t fltSclBuildDatumFromValueNode(SFltSclDatum *datum, SValueNode *valNode) return TSDB_CODE_SUCCESS; } -int32_t fltSclBuildDatumFromBlockSmaValue(SFltSclDatum* datum, uint8_t type, int64_t val) { +int32_t fltSclBuildDatumFromBlockSmaValue(SFltSclDatum *datum, uint8_t type, int64_t val) { switch (type) { case TSDB_DATA_TYPE_BOOL: { datum->kind = FLT_SCL_DATUM_KIND_INT64; @@ -3702,13 +3700,13 @@ int32_t fltSclBuildDatumFromBlockSmaValue(SFltSclDatum* datum, uint8_t type, int case TSDB_DATA_TYPE_UINT: case TSDB_DATA_TYPE_UBIGINT: { datum->kind = FLT_SCL_DATUM_KIND_UINT64; - datum->u = *(uint64_t*)&val; + datum->u = *(uint64_t *)&val; break; } case TSDB_DATA_TYPE_FLOAT: case TSDB_DATA_TYPE_DOUBLE: { datum->kind = FLT_SCL_DATUM_KIND_FLOAT64; - datum->d = *(double*)&val; + datum->d = *(double *)&val; break; } // TODO:varchar/nchar/json @@ -3718,11 +3716,11 @@ int32_t fltSclBuildDatumFromBlockSmaValue(SFltSclDatum* datum, uint8_t type, int } } - return TSDB_CODE_SUCCESS; } -int32_t fltSclBuildRangeFromBlockSma(SFltSclColumnRange* colRange, SColumnDataAgg* pAgg, int32_t numOfRows, SArray* points) { +int32_t fltSclBuildRangeFromBlockSma(SFltSclColumnRange *colRange, SColumnDataAgg *pAgg, int32_t numOfRows, + SArray *points) { if (pAgg->numOfNull == numOfRows) { SFltSclDatum datum = {.kind = FLT_SCL_DATUM_KIND_NULL}; SFltSclPoint startPt = {.start = true, .excl = false, .val = datum}; @@ -3744,21 +3742,21 @@ int32_t fltSclBuildRangeFromBlockSma(SFltSclColumnRange* colRange, SColumnDataAg bool filterRangeExecute(SFilterInfo *info, SColumnDataAgg **pDataStatis, int32_t numOfCols, int32_t numOfRows) { if (info->scalarMode) { - SArray* colRanges = info->sclCtx.fltSclRange; + SArray *colRanges = info->sclCtx.fltSclRange; for (int32_t i = 0; i < taosArrayGetSize(colRanges); ++i) { - SFltSclColumnRange* colRange = taosArrayGet(colRanges, i); - bool foundCol = false; - int32_t j = 0; + SFltSclColumnRange *colRange = taosArrayGet(colRanges, i); + bool foundCol = false; + int32_t j = 0; for (; j < numOfCols; ++j) { if (pDataStatis[j] != NULL && pDataStatis[j]->colId == colRange->colNode->colId) { foundCol = true; } } if (foundCol) { - SColumnDataAgg* pAgg = pDataStatis[j]; - SArray* points = taosArrayInit(2, sizeof(SFltSclPoint)); + SColumnDataAgg *pAgg = pDataStatis[j]; + SArray *points = taosArrayInit(2, sizeof(SFltSclPoint)); fltSclBuildRangeFromBlockSma(colRange, pAgg, numOfRows, points); - SArray* merged = taosArrayInit(8, sizeof(SFltSclPoint)); + SArray *merged = taosArrayInit(8, sizeof(SFltSclPoint)); fltSclIntersect(points, colRange->points, merged); bool isIntersect = taosArrayGetSize(merged) != 0; taosArrayDestroy(merged); @@ -4381,19 +4379,18 @@ int32_t fltSclProcessCNF(SArray *sclOpListCNF, SArray *colRangeList) { taosArrayDestroy(colRange->points); colRange->points = points; } - } return TSDB_CODE_SUCCESS; } -static int32_t fltSclCollectOperatorFromNode(SNode* pNode, SArray* sclOpList) { +static int32_t fltSclCollectOperatorFromNode(SNode *pNode, SArray *sclOpList) { if (nodeType(pNode) != QUERY_NODE_OPERATOR) { return TSDB_CODE_SUCCESS; } - SOperatorNode* pOper = (SOperatorNode*)pNode; - //TODO: left value node, right column node - //TODO: datatype - //TODO: operator + SOperatorNode *pOper = (SOperatorNode *)pNode; + // TODO: left value node, right column node + // TODO: datatype + // TODO: operator if (pOper->pLeft == NULL || pOper->pRight == NULL) { return TSDB_CODE_SUCCESS; } @@ -4401,31 +4398,31 @@ static int32_t fltSclCollectOperatorFromNode(SNode* pNode, SArray* sclOpList) { (pOper->opType == OP_TYPE_GREATER_THAN || pOper->opType == OP_TYPE_GREATER_EQUAL || pOper->opType == OP_TYPE_LOWER_THAN || pOper->opType == OP_TYPE_LOWER_EQUAL || pOper->opType == OP_TYPE_NOT_EQUAL || pOper->opType == OP_TYPE_EQUAL)) { - SValueNode* valNode = (SValueNode*)pOper->pRight; + SValueNode *valNode = (SValueNode *)pOper->pRight; if (IS_NUMERIC_TYPE(valNode->node.resType.type) || valNode->node.resType.type == TSDB_DATA_TYPE_TIMESTAMP) { - SNode *p = nodesCloneNode(pNode); - taosArrayPush(sclOpList, &p); + SFltSclOperator sclOp = { + .colNode = nodesCloneNode(pOper->pLeft), .valNode = nodesCloneNode(pOper->pRight), .type = pOper->opType}; + taosArrayPush(sclOpList, &sclOp); } } return TSDB_CODE_SUCCESS; } -static int32_t fltSclCollectOperatorsFromLogicCond(SNode* pNode, SArray* sclOpList) { +static int32_t fltSclCollectOperatorsFromLogicCond(SNode *pNode, SArray *sclOpList) { if (nodeType(pNode) != QUERY_NODE_LOGIC_CONDITION) { return TSDB_CODE_SUCCESS; } - SLogicConditionNode* pLogicCond = (SLogicConditionNode*)pNode; - //TODO: support LOGIC_COND_TYPE_OR + SLogicConditionNode *pLogicCond = (SLogicConditionNode *)pNode; + // TODO: support LOGIC_COND_TYPE_OR if (pLogicCond->condType != LOGIC_COND_TYPE_AND) { return TSDB_CODE_SUCCESS; } - SNode* pExpr = NULL; - FOREACH(pExpr, pLogicCond->pParameterList) { fltSclCollectOperatorFromNode(pExpr, sclOpList); - } + SNode *pExpr = NULL; + FOREACH(pExpr, pLogicCond->pParameterList) { fltSclCollectOperatorFromNode(pExpr, sclOpList); } return TSDB_CODE_SUCCESS; } -static int32_t fltSclCollectOperators(SNode *pNode, SArray* sclOpList) { +static int32_t fltSclCollectOperators(SNode *pNode, SArray *sclOpList) { if (nodeType(pNode) == QUERY_NODE_OPERATOR) { fltSclCollectOperatorFromNode(pNode, sclOpList); } else if (nodeType(pNode) == QUERY_NODE_LOGIC_CONDITION) { @@ -4435,15 +4432,16 @@ static int32_t fltSclCollectOperators(SNode *pNode, SArray* sclOpList) { } int32_t fltOptimizeNodes(SFilterInfo *pInfo, SNode **pNode, SFltTreeStat *pStat) { - SArray* sclOpList = taosArrayInit(16, POINTER_BYTES); + SArray *sclOpList = taosArrayInit(16, POINTER_BYTES); fltSclCollectOperators(*pNode, sclOpList); - SArray* colRangeList = taosArrayInit(16, sizeof(SFltSclColumnRange)); + SArray *colRangeList = taosArrayInit(16, sizeof(SFltSclColumnRange)); fltSclProcessCNF(sclOpList, colRangeList); pInfo->sclCtx.fltSclRange = colRangeList; for (int32_t i = 0; i < taosArrayGetSize(sclOpList); ++i) { - SNode* p = taosArrayGetP(sclOpList, i); - nodesDestroyNode(p); + SFltSclOperator *sclOp = taosArrayGet(sclOpList, i); + nodesDestroyNode(sclOp->colNode); + nodesDestroyNode(sclOp->valNode); } taosArrayDestroy(sclOpList); return TSDB_CODE_SUCCESS; From 99620d434da9fdf995a923380416e5138ce5a32a Mon Sep 17 00:00:00 2001 From: slzhou Date: Tue, 23 May 2023 14:11:52 +0800 Subject: [PATCH 13/49] fix: fix bugs --- source/libs/scalar/src/filter.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/source/libs/scalar/src/filter.c b/source/libs/scalar/src/filter.c index 3b61e3a97a..c1de62a407 100644 --- a/source/libs/scalar/src/filter.c +++ b/source/libs/scalar/src/filter.c @@ -4401,7 +4401,7 @@ static int32_t fltSclCollectOperatorFromNode(SNode *pNode, SArray *sclOpList) { SValueNode *valNode = (SValueNode *)pOper->pRight; if (IS_NUMERIC_TYPE(valNode->node.resType.type) || valNode->node.resType.type == TSDB_DATA_TYPE_TIMESTAMP) { SFltSclOperator sclOp = { - .colNode = nodesCloneNode(pOper->pLeft), .valNode = nodesCloneNode(pOper->pRight), .type = pOper->opType}; + .colNode = (SColumnNode*)nodesCloneNode(pOper->pLeft), .valNode = (SValueNode*)nodesCloneNode(pOper->pRight), .type = pOper->opType}; taosArrayPush(sclOpList, &sclOp); } } @@ -4440,8 +4440,8 @@ int32_t fltOptimizeNodes(SFilterInfo *pInfo, SNode **pNode, SFltTreeStat *pStat) for (int32_t i = 0; i < taosArrayGetSize(sclOpList); ++i) { SFltSclOperator *sclOp = taosArrayGet(sclOpList, i); - nodesDestroyNode(sclOp->colNode); - nodesDestroyNode(sclOp->valNode); + nodesDestroyNode((SNode*)sclOp->colNode); + nodesDestroyNode((SNode*)sclOp->valNode); } taosArrayDestroy(sclOpList); return TSDB_CODE_SUCCESS; From 857cf3d269952a1f4a3010f01f67c2a8d77b9af4 Mon Sep 17 00:00:00 2001 From: slzhou Date: Tue, 23 May 2023 14:29:33 +0800 Subject: [PATCH 14/49] fix: fix bugs --- source/libs/scalar/src/filter.c | 1 + 1 file changed, 1 insertion(+) diff --git a/source/libs/scalar/src/filter.c b/source/libs/scalar/src/filter.c index c1de62a407..2e0750359f 100644 --- a/source/libs/scalar/src/filter.c +++ b/source/libs/scalar/src/filter.c @@ -3750,6 +3750,7 @@ bool filterRangeExecute(SFilterInfo *info, SColumnDataAgg **pDataStatis, int32_t for (; j < numOfCols; ++j) { if (pDataStatis[j] != NULL && pDataStatis[j]->colId == colRange->colNode->colId) { foundCol = true; + break; } } if (foundCol) { From 22616d235ae120362973ab67cdbe3da0fabf2a0b Mon Sep 17 00:00:00 2001 From: slzhou Date: Tue, 23 May 2023 16:36:27 +0800 Subject: [PATCH 15/49] fix: fix minior bugs --- source/libs/scalar/src/filter.c | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/source/libs/scalar/src/filter.c b/source/libs/scalar/src/filter.c index 2e0750359f..e7f88908e5 100644 --- a/source/libs/scalar/src/filter.c +++ b/source/libs/scalar/src/filter.c @@ -3598,7 +3598,7 @@ int32_t fltSclMerge(SArray *pts1, SArray *pts2, bool isUnion, SArray *merged) { int32_t countRequired = (isUnion) ? 1 : 2; int32_t count = 0; for (int32_t i = 0; i < taosArrayGetSize(all); ++i) { - SFltSclPoint *pt = taosArrayGet(pts1, i); + SFltSclPoint *pt = taosArrayGet(all, i); if (pt->start) { ++count; if (count == countRequired) { @@ -4402,7 +4402,9 @@ static int32_t fltSclCollectOperatorFromNode(SNode *pNode, SArray *sclOpList) { SValueNode *valNode = (SValueNode *)pOper->pRight; if (IS_NUMERIC_TYPE(valNode->node.resType.type) || valNode->node.resType.type == TSDB_DATA_TYPE_TIMESTAMP) { SFltSclOperator sclOp = { - .colNode = (SColumnNode*)nodesCloneNode(pOper->pLeft), .valNode = (SValueNode*)nodesCloneNode(pOper->pRight), .type = pOper->opType}; + .colNode = (SColumnNode*)nodesCloneNode(pOper->pLeft), + .valNode = (SValueNode*)nodesCloneNode(pOper->pRight), + .type = pOper->opType}; taosArrayPush(sclOpList, &sclOp); } } @@ -4433,7 +4435,7 @@ static int32_t fltSclCollectOperators(SNode *pNode, SArray *sclOpList) { } int32_t fltOptimizeNodes(SFilterInfo *pInfo, SNode **pNode, SFltTreeStat *pStat) { - SArray *sclOpList = taosArrayInit(16, POINTER_BYTES); + SArray *sclOpList = taosArrayInit(16, sizeof(SFltSclOperator)); fltSclCollectOperators(*pNode, sclOpList); SArray *colRangeList = taosArrayInit(16, sizeof(SFltSclColumnRange)); fltSclProcessCNF(sclOpList, colRangeList); From 2e4c14c18ce9a87f38eabe8c249fa26d04f70dbe Mon Sep 17 00:00:00 2001 From: slzhou Date: Tue, 23 May 2023 21:30:41 +0800 Subject: [PATCH 16/49] fix: fix bugs --- source/libs/scalar/src/filter.c | 13 ++++++++----- 1 file changed, 8 insertions(+), 5 deletions(-) diff --git a/source/libs/scalar/src/filter.c b/source/libs/scalar/src/filter.c index e7f88908e5..264e41b006 100644 --- a/source/libs/scalar/src/filter.c +++ b/source/libs/scalar/src/filter.c @@ -3514,10 +3514,7 @@ int32_t fltSclCompareWithUInt64(SFltSclDatum *val1, SFltSclDatum *val2) { int32_t fltSclCompareDatum(SFltSclDatum *val1, SFltSclDatum *val2) { if (val2->kind == FLT_SCL_DATUM_KIND_NULL || val2->kind == FLT_SCL_DATUM_KIND_MIN || val2->kind == FLT_SCL_DATUM_KIND_MAX) { - if (val1->kind == FLT_SCL_DATUM_KIND_NULL || val1->kind == FLT_SCL_DATUM_KIND_MIN || - val1->kind == FLT_SCL_DATUM_KIND_MAX) { return (val1->kind < val2->kind) ? -1 : ((val1->kind > val2->kind) ? 1 : 0); - } } switch (val2->kind) { @@ -3690,7 +3687,12 @@ int32_t fltSclBuildDatumFromValueNode(SFltSclDatum *datum, SValueNode *valNode) int32_t fltSclBuildDatumFromBlockSmaValue(SFltSclDatum *datum, uint8_t type, int64_t val) { switch (type) { - case TSDB_DATA_TYPE_BOOL: { + case TSDB_DATA_TYPE_BOOL: + case TSDB_DATA_TYPE_TINYINT: + case TSDB_DATA_TYPE_SMALLINT: + case TSDB_DATA_TYPE_INT: + case TSDB_DATA_TYPE_BIGINT: + case TSDB_DATA_TYPE_TIMESTAMP: { datum->kind = FLT_SCL_DATUM_KIND_INT64; datum->i = val; break; @@ -3711,7 +3713,8 @@ int32_t fltSclBuildDatumFromBlockSmaValue(SFltSclDatum *datum, uint8_t type, int } // TODO:varchar/nchar/json default: { - qError("not supported"); + datum->kind = FLT_SCL_DATUM_KIND_NULL; + qError("not supported type when build datum from block sma value"); break; } } From 383c828c856df00a752b9843a615c468993f9ef3 Mon Sep 17 00:00:00 2001 From: slzhou Date: Wed, 24 May 2023 11:18:44 +0800 Subject: [PATCH 17/49] fix: before review --- source/libs/scalar/src/filter.c | 90 +++++++++++++++++++++------------ 1 file changed, 58 insertions(+), 32 deletions(-) diff --git a/source/libs/scalar/src/filter.c b/source/libs/scalar/src/filter.c index 264e41b006..ecf621a726 100644 --- a/source/libs/scalar/src/filter.c +++ b/source/libs/scalar/src/filter.c @@ -3472,7 +3472,7 @@ int32_t fltSclCompareWithFloat64(SFltSclDatum *val1, SFltSclDatum *val2) { } // TODO: varchar, nchar default: - qError("not support comparsion. %d, %d", val1->kind, val2->kind); + qError("not supported comparsion. kind1 %d, kind2 %d", val1->kind, val2->kind); return (val1->kind - val2->kind); } } @@ -3489,7 +3489,7 @@ int32_t fltSclCompareWithInt64(SFltSclDatum *val1, SFltSclDatum *val2) { } // TODO: varchar, nchar default: - qError("not support comparsion. %d, %d", val1->kind, val2->kind); + qError("not supported comparsion. kind1 %d, kind2 %d", val1->kind, val2->kind); return (val1->kind - val2->kind); } } @@ -3506,7 +3506,7 @@ int32_t fltSclCompareWithUInt64(SFltSclDatum *val1, SFltSclDatum *val2) { } // TODO: varchar, nchar default: - qError("not support comparsion. %d, %d", val1->kind, val2->kind); + qError("not supported comparsion. kind1 %d, kind2 %d", val1->kind, val2->kind); return (val1->kind - val2->kind); } } @@ -3514,7 +3514,7 @@ int32_t fltSclCompareWithUInt64(SFltSclDatum *val1, SFltSclDatum *val2) { int32_t fltSclCompareDatum(SFltSclDatum *val1, SFltSclDatum *val2) { if (val2->kind == FLT_SCL_DATUM_KIND_NULL || val2->kind == FLT_SCL_DATUM_KIND_MIN || val2->kind == FLT_SCL_DATUM_KIND_MAX) { - return (val1->kind < val2->kind) ? -1 : ((val1->kind > val2->kind) ? 1 : 0); + return (val1->kind < val2->kind) ? -1 : ((val1->kind > val2->kind) ? 1 : 0); } switch (val2->kind) { @@ -3529,7 +3529,7 @@ int32_t fltSclCompareDatum(SFltSclDatum *val1, SFltSclDatum *val2) { } // TODO: varchar/nchar default: - fltError("not supported kind. just return 0"); + qError("not supported kind when compare datum. kind2 : %d", val2->kind); return 0; break; } @@ -3585,7 +3585,6 @@ int32_t fltSclMergeSort(SArray *pts1, SArray *pts2, SArray *result) { return 0; } -// pts1 and pts2 must be ordered and de-duplicated and each range can not be a range of another range int32_t fltSclMerge(SArray *pts1, SArray *pts2, bool isUnion, SArray *merged) { size_t len1 = taosArrayGetSize(pts1); size_t len2 = taosArrayGetSize(pts2); @@ -3616,7 +3615,6 @@ int32_t fltSclIntersect(SArray *pts1, SArray *pts2, SArray *merged) { return flt int32_t fltSclUnion(SArray *pts1, SArray *pts2, SArray *merged) { return fltSclMerge(pts1, pts2, true, merged); } -// TODO: column, constant typedef struct { SColumnNode *colNode; SValueNode *valNode; @@ -3675,9 +3673,9 @@ int32_t fltSclBuildDatumFromValueNode(SFltSclDatum *datum, SValueNode *valNode) datum->d = valNode->datum.d; break; } - // TODO:varchar/nchar/json + // TODO:varchar/nchar/json default: { - qError("not supported"); + qError("not supported type %d when build datum from value node", valNode->node.resType.type); break; } } @@ -3687,11 +3685,11 @@ int32_t fltSclBuildDatumFromValueNode(SFltSclDatum *datum, SValueNode *valNode) int32_t fltSclBuildDatumFromBlockSmaValue(SFltSclDatum *datum, uint8_t type, int64_t val) { switch (type) { - case TSDB_DATA_TYPE_BOOL: - case TSDB_DATA_TYPE_TINYINT: + case TSDB_DATA_TYPE_BOOL: + case TSDB_DATA_TYPE_TINYINT: case TSDB_DATA_TYPE_SMALLINT: case TSDB_DATA_TYPE_INT: - case TSDB_DATA_TYPE_BIGINT: + case TSDB_DATA_TYPE_BIGINT: case TSDB_DATA_TYPE_TIMESTAMP: { datum->kind = FLT_SCL_DATUM_KIND_INT64; datum->i = val; @@ -3711,10 +3709,10 @@ int32_t fltSclBuildDatumFromBlockSmaValue(SFltSclDatum *datum, uint8_t type, int datum->d = *(double *)&val; break; } - // TODO:varchar/nchar/json + // TODO:varchar/nchar/json default: { datum->kind = FLT_SCL_DATUM_KIND_NULL; - qError("not supported type when build datum from block sma value"); + qError("not supported type %d when build datum from block sma value", type); break; } } @@ -3732,6 +3730,13 @@ int32_t fltSclBuildRangeFromBlockSma(SFltSclColumnRange *colRange, SColumnDataAg taosArrayPush(points, &endPt); return TSDB_CODE_SUCCESS; } + if (pAgg->numOfNull > 0) { + SFltSclDatum nullDatum = {.kind = FLT_SCL_DATUM_KIND_NULL}; + SFltSclPoint startPt = {.start = true, .excl = false, .val = nullDatum}; + SFltSclPoint endPt = {.start = false, .excl = false, .val = nullDatum}; + taosArrayPush(points, &startPt); + taosArrayPush(points, &endPt); + } SFltSclDatum min; fltSclBuildDatumFromBlockSmaValue(&min, colRange->colNode->node.resType.type, pAgg->min); SFltSclPoint minPt = {.excl = false, .start = true, .val = min}; @@ -4357,8 +4362,25 @@ int32_t fltSclBuildRangePoints(SFltSclOperator *oper, SArray *points) { } break; } + case OP_TYPE_IS_NULL: { + SFltSclDatum nullDatum = {.kind = FLT_SCL_DATUM_KIND_NULL}; + SFltSclPoint startPt = {.start = true, .excl = false, .val = nullDatum}; + SFltSclPoint endPt = {.start = false, .excl = false, .val = nullDatum}; + taosArrayPush(points, &startPt); + taosArrayPush(points, &endPt); + break; + } + case OP_TYPE_IS_NOT_NULL: { + SFltSclDatum minDatum = {.kind = FLT_SCL_DATUM_KIND_MIN, .type = oper->colNode->node.resType}; + SFltSclPoint startPt = {.start = true, .excl = false, .val = minDatum}; + SFltSclDatum maxDatum = {.kind = FLT_SCL_DATUM_KIND_MAX, .type = oper->colNode->node.resType}; + SFltSclPoint endPt = {.start = false, .excl = false, .val = maxDatum}; + taosArrayPush(points, &startPt); + taosArrayPush(points, &endPt); + break; + } default: { - qError("not supported op"); + qError("not supported operator type : %d when build range points", oper->type); break; } } @@ -4391,26 +4413,30 @@ static int32_t fltSclCollectOperatorFromNode(SNode *pNode, SArray *sclOpList) { if (nodeType(pNode) != QUERY_NODE_OPERATOR) { return TSDB_CODE_SUCCESS; } + SOperatorNode *pOper = (SOperatorNode *)pNode; - // TODO: left value node, right column node - // TODO: datatype - // TODO: operator if (pOper->pLeft == NULL || pOper->pRight == NULL) { return TSDB_CODE_SUCCESS; } - if (nodeType(pOper->pLeft) == QUERY_NODE_COLUMN && nodeType(pOper->pRight) == QUERY_NODE_VALUE && - (pOper->opType == OP_TYPE_GREATER_THAN || pOper->opType == OP_TYPE_GREATER_EQUAL || - pOper->opType == OP_TYPE_LOWER_THAN || pOper->opType == OP_TYPE_LOWER_EQUAL || - pOper->opType == OP_TYPE_NOT_EQUAL || pOper->opType == OP_TYPE_EQUAL)) { - SValueNode *valNode = (SValueNode *)pOper->pRight; - if (IS_NUMERIC_TYPE(valNode->node.resType.type) || valNode->node.resType.type == TSDB_DATA_TYPE_TIMESTAMP) { - SFltSclOperator sclOp = { - .colNode = (SColumnNode*)nodesCloneNode(pOper->pLeft), - .valNode = (SValueNode*)nodesCloneNode(pOper->pRight), - .type = pOper->opType}; - taosArrayPush(sclOpList, &sclOp); - } + + if (!(pOper->opType == OP_TYPE_GREATER_THAN || pOper->opType == OP_TYPE_GREATER_EQUAL || + pOper->opType == OP_TYPE_LOWER_THAN || pOper->opType == OP_TYPE_LOWER_EQUAL || + pOper->opType == OP_TYPE_NOT_EQUAL || pOper->opType == OP_TYPE_EQUAL)) { + return TSDB_CODE_SUCCESS; } + + if (!(nodeType(pOper->pLeft) == QUERY_NODE_COLUMN && nodeType(pOper->pRight) == QUERY_NODE_VALUE)) { + return TSDB_CODE_SUCCESS; + } + + SValueNode *valNode = (SValueNode *)pOper->pRight; + if (IS_NUMERIC_TYPE(valNode->node.resType.type) || valNode->node.resType.type == TSDB_DATA_TYPE_TIMESTAMP) { + SFltSclOperator sclOp = {.colNode = (SColumnNode *)nodesCloneNode(pOper->pLeft), + .valNode = (SValueNode *)nodesCloneNode(pOper->pRight), + .type = pOper->opType}; + taosArrayPush(sclOpList, &sclOp); + } + return TSDB_CODE_SUCCESS; } @@ -4446,8 +4472,8 @@ int32_t fltOptimizeNodes(SFilterInfo *pInfo, SNode **pNode, SFltTreeStat *pStat) for (int32_t i = 0; i < taosArrayGetSize(sclOpList); ++i) { SFltSclOperator *sclOp = taosArrayGet(sclOpList, i); - nodesDestroyNode((SNode*)sclOp->colNode); - nodesDestroyNode((SNode*)sclOp->valNode); + nodesDestroyNode((SNode *)sclOp->colNode); + nodesDestroyNode((SNode *)sclOp->valNode); } taosArrayDestroy(sclOpList); return TSDB_CODE_SUCCESS; From 5b1e0be7560ebc658aec864e6db2b7fabcce15bb Mon Sep 17 00:00:00 2001 From: slzhou Date: Wed, 24 May 2023 13:41:21 +0800 Subject: [PATCH 18/49] enhance: add log --- source/libs/scalar/src/filter.c | 14 ++++++++++---- 1 file changed, 10 insertions(+), 4 deletions(-) diff --git a/source/libs/scalar/src/filter.c b/source/libs/scalar/src/filter.c index ecf621a726..3c01edf857 100644 --- a/source/libs/scalar/src/filter.c +++ b/source/libs/scalar/src/filter.c @@ -3446,10 +3446,10 @@ typedef enum { typedef struct { SFltSclDatumKind kind; union { - int64_t i; // for int64, uint64 and double and bool (1 true, 0 false) - uint64_t u; - double d; - uint8_t *pData; // for varchar, nchar + int64_t i; // for int and bool (1 true, 0 false) and ts + uint64_t u; // for uint + double d; // for double + uint8_t *pData; // for varchar, nchar, len prefixed }; SDataType type; // TODO: original data type, may not be used? } SFltSclDatum; @@ -3765,9 +3765,15 @@ bool filterRangeExecute(SFilterInfo *info, SColumnDataAgg **pDataStatis, int32_t SColumnDataAgg *pAgg = pDataStatis[j]; SArray *points = taosArrayInit(2, sizeof(SFltSclPoint)); fltSclBuildRangeFromBlockSma(colRange, pAgg, numOfRows, points); + qDebug("column data agg: nulls %d, rows %d, max %" PRId64 " min " PRId64, pAgg->numOfNull, numOfRows, pAgg->max, + pAgg->min); + SArray *merged = taosArrayInit(8, sizeof(SFltSclPoint)); fltSclIntersect(points, colRange->points, merged); bool isIntersect = taosArrayGetSize(merged) != 0; + qDebug("filter range execute, scalar mode, column range found. colId: %d colName: %s has overlap: %d", + colRange->colNode->colId, colRange->colNode->colName, isIntersect); + taosArrayDestroy(merged); taosArrayDestroy(points); if (!isIntersect) { From e5f9d78b6d0df0c0fa9970a29ce9f1d2617ae52a Mon Sep 17 00:00:00 2001 From: slzhou Date: Wed, 24 May 2023 16:39:38 +0800 Subject: [PATCH 19/49] fix: rocksdb so --- cmake/cmake.options | 5 -- contrib/CMakeLists.txt | 56 --------------------- contrib/test/CMakeLists.txt | 3 -- source/dnode/mgmt/mgmt_dnode/CMakeLists.txt | 2 +- source/dnode/vnode/CMakeLists.txt | 2 - source/libs/stream/CMakeLists.txt | 2 - 6 files changed, 1 insertion(+), 69 deletions(-) diff --git a/cmake/cmake.options b/cmake/cmake.options index a7e62350df..064b1f7c6f 100644 --- a/cmake/cmake.options +++ b/cmake/cmake.options @@ -106,11 +106,6 @@ option( OFF ) -option( - BUILD_WITH_ROCKSDB - "If build with rocksdb" - ON -) option( BUILD_WITH_SQLITE diff --git a/contrib/CMakeLists.txt b/contrib/CMakeLists.txt index 536d4eae8e..ad3d8bc326 100644 --- a/contrib/CMakeLists.txt +++ b/contrib/CMakeLists.txt @@ -77,11 +77,6 @@ if(${BUILD_WITH_LEVELDB}) cat("${TD_SUPPORT_DIR}/leveldb_CMakeLists.txt.in" ${CONTRIB_TMP_FILE}) endif(${BUILD_WITH_LEVELDB}) -# rocksdb -if(${BUILD_WITH_ROCKSDB}) - cat("${TD_SUPPORT_DIR}/rocksdb_CMakeLists.txt.in" ${CONTRIB_TMP_FILE}) - add_definitions(-DUSE_ROCKSDB) -endif(${BUILD_WITH_ROCKSDB}) # canonical-raft if(${BUILD_WITH_CRAFT}) @@ -220,57 +215,6 @@ if(${BUILD_WITH_LEVELDB}) ) endif(${BUILD_WITH_LEVELDB}) -# rocksdb -# To support rocksdb build on ubuntu: sudo apt-get install libgflags-dev -if(${BUILD_WITH_ROCKSDB}) - if(${TD_LINUX}) - SET(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wno-error=maybe-uninitialized -Wno-error=unused-but-set-variable -Wno-error=unused-variable -Wno-error=unused-function -Wno-errno=unused-private-field -Wno-error=unused-result") - endif(${TD_LINUX}) - - if(${TD_DARWIN}) - SET(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wno-error=maybe-uninitialized") - endif(${TD_DARWIN}) - - if (${TD_WINDOWS}) - SET(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} /wd4244 /wd4819") - endif(${TD_WINDOWS}) - - - if(${TD_DARWIN}) - option(HAVE_THREAD_LOCAL "" OFF) - option(WITH_IOSTATS_CONTEXT "" OFF) - option(WITH_PERF_CONTEXT "" OFF) - endif(${TD_DARWIN}) - - if(${TD_WINDOWS}) - option(WITH_JNI "" ON) - endif(${TD_WINDOWS}) - - if(${TD_WINDOWS}) - option(WITH_MD_LIBRARY "build with MD" OFF) - set(SYSTEM_LIBS ${SYSTEM_LIBS} shlwapi.lib rpcrt4.lib) - endif(${TD_WINDOWS}) - - - option(WITH_FALLOCATE "" OFF) - option(WITH_JEMALLOC "" OFF) - option(WITH_GFLAGS "" OFF) - option(PORTABLE "" ON) - option(WITH_LIBURING "" OFF) - option(FAIL_ON_WARNINGS OFF) - - option(WITH_TESTS "" OFF) - option(WITH_BENCHMARK_TOOLS "" OFF) - option(WITH_TOOLS "" OFF) - option(WITH_LIBURING "" OFF) - - option(ROCKSDB_BUILD_SHARED "Build shared versions of the RocksDB libraries" OFF) - add_subdirectory(rocksdb EXCLUDE_FROM_ALL) - target_include_directories( - rocksdb - PUBLIC $ - ) -endif(${BUILD_WITH_ROCKSDB}) # lucene # To support build on ubuntu: sudo apt-get install libboost-all-dev diff --git a/contrib/test/CMakeLists.txt b/contrib/test/CMakeLists.txt index f35cf0d13d..8d095518e2 100644 --- a/contrib/test/CMakeLists.txt +++ b/contrib/test/CMakeLists.txt @@ -1,7 +1,4 @@ # rocksdb -if(${BUILD_WITH_ROCKSDB}) - add_subdirectory(rocksdb) -endif(${BUILD_WITH_ROCKSDB}) if(${BUILD_WITH_LUCENE}) add_subdirectory(lucene) diff --git a/source/dnode/mgmt/mgmt_dnode/CMakeLists.txt b/source/dnode/mgmt/mgmt_dnode/CMakeLists.txt index fdd0830a58..7d0b6d617d 100644 --- a/source/dnode/mgmt/mgmt_dnode/CMakeLists.txt +++ b/source/dnode/mgmt/mgmt_dnode/CMakeLists.txt @@ -9,5 +9,5 @@ target_include_directories( PUBLIC "${GRANT_CFG_INCLUDE_DIR}" ) target_link_libraries( - mgmt_dnode node_util + mgmt_dnode node_util rocksdb ) \ No newline at end of file diff --git a/source/dnode/vnode/CMakeLists.txt b/source/dnode/vnode/CMakeLists.txt index e8660cd6ad..79a7d55198 100644 --- a/source/dnode/vnode/CMakeLists.txt +++ b/source/dnode/vnode/CMakeLists.txt @@ -117,9 +117,7 @@ if(${BUILD_WITH_INVERTEDINDEX}) add_definitions(-DUSE_INVERTED_INDEX) endif(${BUILD_WITH_INVERTEDINDEX}) -if(${BUILD_WITH_ROCKSDB}) add_definitions(-DUSE_ROCKSDB) -endif(${BUILD_WITH_ROCKSDB}) diff --git a/source/libs/stream/CMakeLists.txt b/source/libs/stream/CMakeLists.txt index 2edbc44aae..5952250b3e 100644 --- a/source/libs/stream/CMakeLists.txt +++ b/source/libs/stream/CMakeLists.txt @@ -6,7 +6,6 @@ target_include_directories( PRIVATE "${CMAKE_CURRENT_SOURCE_DIR}/inc" ) -if(${BUILD_WITH_ROCKSDB}) target_link_libraries( stream PUBLIC rocksdb tdb @@ -19,7 +18,6 @@ if(${BUILD_WITH_ROCKSDB}) ) add_definitions(-DUSE_ROCKSDB) -endif(${BUILD_WITH_ROCKSDB}) #target_link_libraries( From d7b1613d850eed2d65faf94c84f58a07320082b8 Mon Sep 17 00:00:00 2001 From: slzhou Date: Wed, 24 May 2023 16:42:50 +0800 Subject: [PATCH 20/49] Revert "fix: rocksdb so" This reverts commit e5f9d78b6d0df0c0fa9970a29ce9f1d2617ae52a. --- cmake/cmake.options | 5 ++ contrib/CMakeLists.txt | 56 +++++++++++++++++++++ contrib/test/CMakeLists.txt | 3 ++ source/dnode/mgmt/mgmt_dnode/CMakeLists.txt | 2 +- source/dnode/vnode/CMakeLists.txt | 2 + source/libs/stream/CMakeLists.txt | 2 + 6 files changed, 69 insertions(+), 1 deletion(-) diff --git a/cmake/cmake.options b/cmake/cmake.options index 064b1f7c6f..a7e62350df 100644 --- a/cmake/cmake.options +++ b/cmake/cmake.options @@ -106,6 +106,11 @@ option( OFF ) +option( + BUILD_WITH_ROCKSDB + "If build with rocksdb" + ON +) option( BUILD_WITH_SQLITE diff --git a/contrib/CMakeLists.txt b/contrib/CMakeLists.txt index ad3d8bc326..536d4eae8e 100644 --- a/contrib/CMakeLists.txt +++ b/contrib/CMakeLists.txt @@ -77,6 +77,11 @@ if(${BUILD_WITH_LEVELDB}) cat("${TD_SUPPORT_DIR}/leveldb_CMakeLists.txt.in" ${CONTRIB_TMP_FILE}) endif(${BUILD_WITH_LEVELDB}) +# rocksdb +if(${BUILD_WITH_ROCKSDB}) + cat("${TD_SUPPORT_DIR}/rocksdb_CMakeLists.txt.in" ${CONTRIB_TMP_FILE}) + add_definitions(-DUSE_ROCKSDB) +endif(${BUILD_WITH_ROCKSDB}) # canonical-raft if(${BUILD_WITH_CRAFT}) @@ -215,6 +220,57 @@ if(${BUILD_WITH_LEVELDB}) ) endif(${BUILD_WITH_LEVELDB}) +# rocksdb +# To support rocksdb build on ubuntu: sudo apt-get install libgflags-dev +if(${BUILD_WITH_ROCKSDB}) + if(${TD_LINUX}) + SET(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wno-error=maybe-uninitialized -Wno-error=unused-but-set-variable -Wno-error=unused-variable -Wno-error=unused-function -Wno-errno=unused-private-field -Wno-error=unused-result") + endif(${TD_LINUX}) + + if(${TD_DARWIN}) + SET(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wno-error=maybe-uninitialized") + endif(${TD_DARWIN}) + + if (${TD_WINDOWS}) + SET(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} /wd4244 /wd4819") + endif(${TD_WINDOWS}) + + + if(${TD_DARWIN}) + option(HAVE_THREAD_LOCAL "" OFF) + option(WITH_IOSTATS_CONTEXT "" OFF) + option(WITH_PERF_CONTEXT "" OFF) + endif(${TD_DARWIN}) + + if(${TD_WINDOWS}) + option(WITH_JNI "" ON) + endif(${TD_WINDOWS}) + + if(${TD_WINDOWS}) + option(WITH_MD_LIBRARY "build with MD" OFF) + set(SYSTEM_LIBS ${SYSTEM_LIBS} shlwapi.lib rpcrt4.lib) + endif(${TD_WINDOWS}) + + + option(WITH_FALLOCATE "" OFF) + option(WITH_JEMALLOC "" OFF) + option(WITH_GFLAGS "" OFF) + option(PORTABLE "" ON) + option(WITH_LIBURING "" OFF) + option(FAIL_ON_WARNINGS OFF) + + option(WITH_TESTS "" OFF) + option(WITH_BENCHMARK_TOOLS "" OFF) + option(WITH_TOOLS "" OFF) + option(WITH_LIBURING "" OFF) + + option(ROCKSDB_BUILD_SHARED "Build shared versions of the RocksDB libraries" OFF) + add_subdirectory(rocksdb EXCLUDE_FROM_ALL) + target_include_directories( + rocksdb + PUBLIC $ + ) +endif(${BUILD_WITH_ROCKSDB}) # lucene # To support build on ubuntu: sudo apt-get install libboost-all-dev diff --git a/contrib/test/CMakeLists.txt b/contrib/test/CMakeLists.txt index 8d095518e2..f35cf0d13d 100644 --- a/contrib/test/CMakeLists.txt +++ b/contrib/test/CMakeLists.txt @@ -1,4 +1,7 @@ # rocksdb +if(${BUILD_WITH_ROCKSDB}) + add_subdirectory(rocksdb) +endif(${BUILD_WITH_ROCKSDB}) if(${BUILD_WITH_LUCENE}) add_subdirectory(lucene) diff --git a/source/dnode/mgmt/mgmt_dnode/CMakeLists.txt b/source/dnode/mgmt/mgmt_dnode/CMakeLists.txt index 7d0b6d617d..fdd0830a58 100644 --- a/source/dnode/mgmt/mgmt_dnode/CMakeLists.txt +++ b/source/dnode/mgmt/mgmt_dnode/CMakeLists.txt @@ -9,5 +9,5 @@ target_include_directories( PUBLIC "${GRANT_CFG_INCLUDE_DIR}" ) target_link_libraries( - mgmt_dnode node_util rocksdb + mgmt_dnode node_util ) \ No newline at end of file diff --git a/source/dnode/vnode/CMakeLists.txt b/source/dnode/vnode/CMakeLists.txt index 79a7d55198..e8660cd6ad 100644 --- a/source/dnode/vnode/CMakeLists.txt +++ b/source/dnode/vnode/CMakeLists.txt @@ -117,7 +117,9 @@ if(${BUILD_WITH_INVERTEDINDEX}) add_definitions(-DUSE_INVERTED_INDEX) endif(${BUILD_WITH_INVERTEDINDEX}) +if(${BUILD_WITH_ROCKSDB}) add_definitions(-DUSE_ROCKSDB) +endif(${BUILD_WITH_ROCKSDB}) diff --git a/source/libs/stream/CMakeLists.txt b/source/libs/stream/CMakeLists.txt index 5952250b3e..2edbc44aae 100644 --- a/source/libs/stream/CMakeLists.txt +++ b/source/libs/stream/CMakeLists.txt @@ -6,6 +6,7 @@ target_include_directories( PRIVATE "${CMAKE_CURRENT_SOURCE_DIR}/inc" ) +if(${BUILD_WITH_ROCKSDB}) target_link_libraries( stream PUBLIC rocksdb tdb @@ -18,6 +19,7 @@ target_include_directories( ) add_definitions(-DUSE_ROCKSDB) +endif(${BUILD_WITH_ROCKSDB}) #target_link_libraries( From 768372fc7bca72b43b47a4dc29ebc3b96dab4c6d Mon Sep 17 00:00:00 2001 From: slzhou Date: Wed, 24 May 2023 16:45:06 +0800 Subject: [PATCH 21/49] fix: fix compilation warning --- source/libs/scalar/src/filter.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/source/libs/scalar/src/filter.c b/source/libs/scalar/src/filter.c index 3c01edf857..27cf9da1d5 100644 --- a/source/libs/scalar/src/filter.c +++ b/source/libs/scalar/src/filter.c @@ -3765,7 +3765,7 @@ bool filterRangeExecute(SFilterInfo *info, SColumnDataAgg **pDataStatis, int32_t SColumnDataAgg *pAgg = pDataStatis[j]; SArray *points = taosArrayInit(2, sizeof(SFltSclPoint)); fltSclBuildRangeFromBlockSma(colRange, pAgg, numOfRows, points); - qDebug("column data agg: nulls %d, rows %d, max %" PRId64 " min " PRId64, pAgg->numOfNull, numOfRows, pAgg->max, + qDebug("column data agg: nulls %d, rows %d, max %" PRId64 " min %" PRId64, pAgg->numOfNull, numOfRows, pAgg->max, pAgg->min); SArray *merged = taosArrayInit(8, sizeof(SFltSclPoint)); From de1848cb08726936958a51f6667ca33e49eb6995 Mon Sep 17 00:00:00 2001 From: slzhou Date: Thu, 25 May 2023 14:51:53 +0800 Subject: [PATCH 22/49] enhance: use scalar mode to run ci test --- source/libs/function/src/udfd.c | 2 +- source/libs/scalar/src/filter.c | 2 +- source/libs/scalar/src/sclvector.c | 17 ++++++++++++++++- 3 files changed, 18 insertions(+), 3 deletions(-) diff --git a/source/libs/function/src/udfd.c b/source/libs/function/src/udfd.c index b6ab7bd6f1..3b827a2f99 100644 --- a/source/libs/function/src/udfd.c +++ b/source/libs/function/src/udfd.c @@ -600,9 +600,9 @@ SUdf *udfdGetOrCreateUdf(const char *udfName) { return udf; } else { (*pUdfHash)->expired = true; - taosHashRemove(global.udfsHash, udfName, strlen(udfName)); fnInfo("udfd expired, check for new version. existing udf %s udf version %d, udf created time %" PRIx64, (*pUdfHash)->name, (*pUdfHash)->version, (*pUdfHash)->createdTime); + taosHashRemove(global.udfsHash, udfName, strlen(udfName)); } } diff --git a/source/libs/scalar/src/filter.c b/source/libs/scalar/src/filter.c index 27cf9da1d5..464d388db4 100644 --- a/source/libs/scalar/src/filter.c +++ b/source/libs/scalar/src/filter.c @@ -4556,7 +4556,7 @@ int32_t filterInitFromNode(SNode *pNode, SFilterInfo **pInfo, uint32_t options) FLT_ERR_JRET(fltReviseNodes(info, &pNode, &stat)); - info->scalarMode = stat.scalarMode; + info->scalarMode = true; fltDebug("scalar mode: %d", info->scalarMode); if (!info->scalarMode) { diff --git a/source/libs/scalar/src/sclvector.c b/source/libs/scalar/src/sclvector.c index 4d803cb638..6983f6313b 100644 --- a/source/libs/scalar/src/sclvector.c +++ b/source/libs/scalar/src/sclvector.c @@ -1784,6 +1784,9 @@ void vectorNotMatch(SScalarParam *pLeft, SScalarParam *pRight, SScalarParam *pOu void vectorIsNull(SScalarParam *pLeft, SScalarParam *pRight, SScalarParam *pOut, int32_t _ord) { for (int32_t i = 0; i < pLeft->numOfRows; ++i) { int8_t v = IS_HELPER_NULL(pLeft->columnData, i) ? 1 : 0; + if (v) { + ++pOut->numOfQualified; + } colDataSetInt8(pOut->columnData, i, &v); } pOut->numOfRows = pLeft->numOfRows; @@ -1792,6 +1795,9 @@ void vectorIsNull(SScalarParam *pLeft, SScalarParam *pRight, SScalarParam *pOut, void vectorNotNull(SScalarParam *pLeft, SScalarParam *pRight, SScalarParam *pOut, int32_t _ord) { for (int32_t i = 0; i < pLeft->numOfRows; ++i) { int8_t v = IS_HELPER_NULL(pLeft->columnData, i) ? 0 : 1; + if (v) { + ++pOut->numOfQualified; + } colDataSetInt8(pOut->columnData, i, &v); } pOut->numOfRows = pLeft->numOfRows; @@ -1805,6 +1811,13 @@ void vectorIsTrue(SScalarParam *pLeft, SScalarParam *pRight, SScalarParam *pOut, colDataSetInt8(pOut->columnData, i, &v); colDataClearNull_f(pOut->columnData->nullbitmap, i); } + { + bool v = false; + GET_TYPED_DATA(v, bool, pOut->columnData->info.type, colDataGetData(pOut->columnData, i)); + if (v) { + ++pOut->numOfQualified; + } + } } pOut->columnData->hasNull = false; } @@ -1844,7 +1857,9 @@ void vectorJsonContains(SScalarParam *pLeft, SScalarParam *pRight, SScalarParam char *pLeftData = colDataGetVarData(pLeft->columnData, i); getJsonValue(pLeftData, jsonKey, &isExist); } - + if (isExist) { + ++pOut->numOfQualified; + } colDataSetVal(pOutputCol, i, (const char *)(&isExist), false); } taosMemoryFree(jsonKey); From 61b063529384019a954f05fdcb247dc8623537ed Mon Sep 17 00:00:00 2001 From: slzhou Date: Thu, 25 May 2023 16:27:48 +0800 Subject: [PATCH 23/49] fix: get time range using new scalar mode range --- source/libs/scalar/src/filter.c | 46 +++++++++++++++++++++++++++++++-- 1 file changed, 44 insertions(+), 2 deletions(-) diff --git a/source/libs/scalar/src/filter.c b/source/libs/scalar/src/filter.c index 464d388db4..ae4712b9ba 100644 --- a/source/libs/scalar/src/filter.c +++ b/source/libs/scalar/src/filter.c @@ -3765,8 +3765,8 @@ bool filterRangeExecute(SFilterInfo *info, SColumnDataAgg **pDataStatis, int32_t SColumnDataAgg *pAgg = pDataStatis[j]; SArray *points = taosArrayInit(2, sizeof(SFltSclPoint)); fltSclBuildRangeFromBlockSma(colRange, pAgg, numOfRows, points); - qDebug("column data agg: nulls %d, rows %d, max %" PRId64 " min %" PRId64, pAgg->numOfNull, numOfRows, pAgg->max, - pAgg->min); + qDebug("column data agg: nulls %d, rows %d, max %" PRId64 " min %" PRId64, pAgg->numOfNull, numOfRows, + pAgg->max, pAgg->min); SArray *merged = taosArrayInit(8, sizeof(SFltSclPoint)); fltSclIntersect(points, colRange->points, merged); @@ -3960,6 +3960,31 @@ _return: return code; } +static int32_t fltSclGetDatumValueFromPoint(SFltSclPoint *point, SFltSclDatum *d) { + *d = point->val; + if (point->val.kind == FLT_SCL_DATUM_KIND_NULL) { + return TSDB_CODE_SUCCESS; + } + if (point->val.kind == FLT_SCL_DATUM_KIND_MAX) { + getDataMax(d->type.type, &(d->i)); + } else if (point->val.kind == FLT_SCL_DATUM_KIND_MIN) { + getDataMin(d->type.type, &(d->i)); + } + + if ((point->val.kind == FLT_SCL_DATUM_KIND_INT64) || (point->val.kind = FLT_SCL_DATUM_KIND_UINT64)) { + if (point->excl) { + if (point->start) { + ++d->i; + } else { + --d->i; + } + } + } else { + qError("not supported kind %d when get datum from point", point->val.kind); + } + return TSDB_CODE_SUCCESS; +} + int32_t filterGetTimeRange(SNode *pNode, STimeWindow *win, bool *isStrict) { SFilterInfo *info = NULL; int32_t code = 0; @@ -3969,6 +3994,23 @@ int32_t filterGetTimeRange(SNode *pNode, STimeWindow *win, bool *isStrict) { FLT_ERR_RET(filterInitFromNode(pNode, &info, FLT_OPTION_NO_REWRITE | FLT_OPTION_TIMESTAMP)); if (info->scalarMode) { + SArray *colRanges = info->sclCtx.fltSclRange; + if (taosArrayGetSize(colRanges) == 1) { + SFltSclColumnRange *colRange = taosArrayGet(colRanges, 0); + SArray *points = colRange->points; + if (taosArrayGetSize(points) == 2) { + SFltSclPoint *startPt = taosArrayGet(points, 0); + SFltSclPoint *endPt = taosArrayGet(points, 1); + SFltSclDatum start; + SFltSclDatum end; + fltSclGetDatumValueFromPoint(startPt, &start); + fltSclGetDatumValueFromPoint(endPt, &end); + win->skey = start.i; + win->ekey = end.i; + *isStrict = true; + goto _return; + } + } *win = TSWINDOW_INITIALIZER; *isStrict = false; goto _return; From 469b2b573c1ac673cbe54307d3613eafa133319c Mon Sep 17 00:00:00 2001 From: slzhou Date: Thu, 25 May 2023 21:49:28 +0800 Subject: [PATCH 24/49] fix: fix error during emptyTsRange.sim --- source/libs/scalar/src/filter.c | 13 ++++++++----- 1 file changed, 8 insertions(+), 5 deletions(-) diff --git a/source/libs/scalar/src/filter.c b/source/libs/scalar/src/filter.c index ae4712b9ba..8a97f5fae2 100644 --- a/source/libs/scalar/src/filter.c +++ b/source/libs/scalar/src/filter.c @@ -3546,7 +3546,7 @@ bool fltSclLessPoint(SFltSclPoint *pt1, SFltSclPoint *pt2) { if (pt1->start && pt2->start) { return !pt1->excl && pt2->excl; } else if (pt1->start) { - return pt1->excl && !pt2->excl; + return !pt1->excl && !pt2->excl; } else if (pt2->start) { return pt1->excl || pt2->excl; } @@ -3971,7 +3971,7 @@ static int32_t fltSclGetDatumValueFromPoint(SFltSclPoint *point, SFltSclDatum *d getDataMin(d->type.type, &(d->i)); } - if ((point->val.kind == FLT_SCL_DATUM_KIND_INT64) || (point->val.kind = FLT_SCL_DATUM_KIND_UINT64)) { + if (IS_NUMERIC_TYPE(d->type.type)) { if (point->excl) { if (point->start) { ++d->i; @@ -3980,7 +3980,7 @@ static int32_t fltSclGetDatumValueFromPoint(SFltSclPoint *point, SFltSclDatum *d } } } else { - qError("not supported kind %d when get datum from point", point->val.kind); + qError("not supported type %d when get datum from point", d->type.type); } return TSDB_CODE_SUCCESS; } @@ -4001,14 +4001,17 @@ int32_t filterGetTimeRange(SNode *pNode, STimeWindow *win, bool *isStrict) { if (taosArrayGetSize(points) == 2) { SFltSclPoint *startPt = taosArrayGet(points, 0); SFltSclPoint *endPt = taosArrayGet(points, 1); - SFltSclDatum start; - SFltSclDatum end; + SFltSclDatum start; + SFltSclDatum end; fltSclGetDatumValueFromPoint(startPt, &start); fltSclGetDatumValueFromPoint(endPt, &end); win->skey = start.i; win->ekey = end.i; *isStrict = true; goto _return; + } else if (taosArrayGetSize(points) == 0) { + *win = TSWINDOW_DESC_INITIALIZER; + goto _return; } } *win = TSWINDOW_INITIALIZER; From cbbd09b85c0fbe7038cfbfd3402888b217638acf Mon Sep 17 00:00:00 2001 From: slzhou Date: Thu, 25 May 2023 22:37:49 +0800 Subject: [PATCH 25/49] fix: for logical and condition, each element shall be collectable operator --- source/libs/scalar/src/filter.c | 24 +++++++++++++++++++----- 1 file changed, 19 insertions(+), 5 deletions(-) diff --git a/source/libs/scalar/src/filter.c b/source/libs/scalar/src/filter.c index 8a97f5fae2..d45abb1b87 100644 --- a/source/libs/scalar/src/filter.c +++ b/source/libs/scalar/src/filter.c @@ -3971,7 +3971,7 @@ static int32_t fltSclGetDatumValueFromPoint(SFltSclPoint *point, SFltSclDatum *d getDataMin(d->type.type, &(d->i)); } - if (IS_NUMERIC_TYPE(d->type.type)) { + if (IS_INTEGER_TYPE(d->type.type) || IS_TIMESTAMP_TYPE(d->type.type)) { if (point->excl) { if (point->start) { ++d->i; @@ -4460,26 +4460,35 @@ int32_t fltSclProcessCNF(SArray *sclOpListCNF, SArray *colRangeList) { return TSDB_CODE_SUCCESS; } -static int32_t fltSclCollectOperatorFromNode(SNode *pNode, SArray *sclOpList) { +static bool fltSclIsCollectableNode(SNode *pNode) { if (nodeType(pNode) != QUERY_NODE_OPERATOR) { - return TSDB_CODE_SUCCESS; + return false; } SOperatorNode *pOper = (SOperatorNode *)pNode; if (pOper->pLeft == NULL || pOper->pRight == NULL) { - return TSDB_CODE_SUCCESS; + return false; } if (!(pOper->opType == OP_TYPE_GREATER_THAN || pOper->opType == OP_TYPE_GREATER_EQUAL || pOper->opType == OP_TYPE_LOWER_THAN || pOper->opType == OP_TYPE_LOWER_EQUAL || pOper->opType == OP_TYPE_NOT_EQUAL || pOper->opType == OP_TYPE_EQUAL)) { - return TSDB_CODE_SUCCESS; + return false; } if (!(nodeType(pOper->pLeft) == QUERY_NODE_COLUMN && nodeType(pOper->pRight) == QUERY_NODE_VALUE)) { + return false; + } + return true; +} + +static int32_t fltSclCollectOperatorFromNode(SNode *pNode, SArray *sclOpList) { + if (!fltSclIsCollectableNode(pNode)) { return TSDB_CODE_SUCCESS; } + SOperatorNode *pOper = (SOperatorNode *)pNode; + SValueNode *valNode = (SValueNode *)pOper->pRight; if (IS_NUMERIC_TYPE(valNode->node.resType.type) || valNode->node.resType.type == TSDB_DATA_TYPE_TIMESTAMP) { SFltSclOperator sclOp = {.colNode = (SColumnNode *)nodesCloneNode(pOper->pLeft), @@ -4501,6 +4510,11 @@ static int32_t fltSclCollectOperatorsFromLogicCond(SNode *pNode, SArray *sclOpLi return TSDB_CODE_SUCCESS; } SNode *pExpr = NULL; + FOREACH(pExpr, pLogicCond->pParameterList) { + if (!fltSclIsCollectableNode(pExpr)) { + return TSDB_CODE_SUCCESS; + } + } FOREACH(pExpr, pLogicCond->pParameterList) { fltSclCollectOperatorFromNode(pExpr, sclOpList); } return TSDB_CODE_SUCCESS; } From 81c796953e5233d1a2603eab13b71e7f25d12a0b Mon Sep 17 00:00:00 2001 From: slzhou Date: Fri, 26 May 2023 10:55:52 +0800 Subject: [PATCH 26/49] fix: address sanitizer error --- source/libs/executor/src/executorInt.c | 7 ++++++- source/libs/scalar/src/sclvector.c | 2 ++ 2 files changed, 8 insertions(+), 1 deletion(-) diff --git a/source/libs/executor/src/executorInt.c b/source/libs/executor/src/executorInt.c index d229f7d0ee..66ad3ca892 100644 --- a/source/libs/executor/src/executorInt.c +++ b/source/libs/executor/src/executorInt.c @@ -573,8 +573,13 @@ void extractQualifiedTupleByFilterResult(SSDataBlock* pBlock, const SColumnInfoD if (colDataIsNull_var(pDst, j)) { colDataSetNull_var(pDst, numOfRows); } else { + // fix address sanitizer error. p1 may point to memory that will change during realloc of colDataSetVal, first copy it to p2 char* p1 = colDataGetVarData(pDst, j); - colDataSetVal(pDst, numOfRows, p1, false); + int32_t len = varDataTLen(p1); + char* p2 = taosMemoryMalloc(len); + varDataCopy(p2, p1); + colDataSetVal(pDst, numOfRows, p2, false); + taosMemoryFree(p2); } numOfRows += 1; j += 1; diff --git a/source/libs/scalar/src/sclvector.c b/source/libs/scalar/src/sclvector.c index 6983f6313b..e2496e15c3 100644 --- a/source/libs/scalar/src/sclvector.c +++ b/source/libs/scalar/src/sclvector.c @@ -1788,6 +1788,7 @@ void vectorIsNull(SScalarParam *pLeft, SScalarParam *pRight, SScalarParam *pOut, ++pOut->numOfQualified; } colDataSetInt8(pOut->columnData, i, &v); + colDataClearNull_f(pOut->columnData->nullbitmap, i); } pOut->numOfRows = pLeft->numOfRows; } @@ -1799,6 +1800,7 @@ void vectorNotNull(SScalarParam *pLeft, SScalarParam *pRight, SScalarParam *pOut ++pOut->numOfQualified; } colDataSetInt8(pOut->columnData, i, &v); + colDataClearNull_f(pOut->columnData->nullbitmap, i); } pOut->numOfRows = pLeft->numOfRows; } From 70bcbafbb8b3ee3adde1bd1813857fec9c9732da Mon Sep 17 00:00:00 2001 From: slzhou Date: Fri, 26 May 2023 13:36:56 +0800 Subject: [PATCH 27/49] fix: json value length is not same as binary/nchar length --- source/libs/executor/src/executorInt.c | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/source/libs/executor/src/executorInt.c b/source/libs/executor/src/executorInt.c index 66ad3ca892..ecd1874fa5 100644 --- a/source/libs/executor/src/executorInt.c +++ b/source/libs/executor/src/executorInt.c @@ -575,9 +575,14 @@ void extractQualifiedTupleByFilterResult(SSDataBlock* pBlock, const SColumnInfoD } else { // fix address sanitizer error. p1 may point to memory that will change during realloc of colDataSetVal, first copy it to p2 char* p1 = colDataGetVarData(pDst, j); - int32_t len = varDataTLen(p1); + int32_t len = 0; + if (pDst->info.type == TSDB_DATA_TYPE_JSON) { + len = getJsonValueLen(p1); + } else { + len = varDataTLen(p1); + } char* p2 = taosMemoryMalloc(len); - varDataCopy(p2, p1); + memcpy(p2, p1, len); colDataSetVal(pDst, numOfRows, p2, false); taosMemoryFree(p2); } From a8599d78d1e0d0d5305b90375df15a5aef5da6bc Mon Sep 17 00:00:00 2001 From: Ganlin Zhao Date: Fri, 26 May 2023 14:02:46 +0800 Subject: [PATCH 28/49] docs: fix timeline function super table explaination --- docs/en/12-taos-sql/10-function.md | 9 +-------- 1 file changed, 1 insertion(+), 8 deletions(-) diff --git a/docs/en/12-taos-sql/10-function.md b/docs/en/12-taos-sql/10-function.md index a204415d65..b469da8f55 100644 --- a/docs/en/12-taos-sql/10-function.md +++ b/docs/en/12-taos-sql/10-function.md @@ -1008,8 +1008,7 @@ SAMPLE(expr, k) **More explanations**: -This function cannot be used in expression calculation. -- Must be used with `PARTITION BY tbname` when it's used on a STable to force the result on each single timeline +- This function cannot be used in expression calculation. ### TAIL @@ -1088,7 +1087,6 @@ CSUM(expr) - Arithmetic operation can't be performed on the result of `csum` function - Can only be used with aggregate functions This function can be used with supertables and standard tables. -- Must be used with `PARTITION BY tbname` when it's used on a STable to force the result on each single timeline ### DERIVATIVE @@ -1112,7 +1110,6 @@ ignore_negative: { **More explanation**: -- It can be used together with `PARTITION BY tbname` against a STable. - It can be used together with a selected column. For example: select \_rowts, DERIVATIVE() from. ### DIFF @@ -1175,7 +1172,6 @@ MAVG(expr, k) - Arithmetic operation can't be performed on the result of `MAVG`. - Can only be used with data columns, can't be used with tags. - Can't be used with aggregate functions. -- Must be used with `PARTITION BY tbname` when it's used on a STable to force the result on each single timeline ### STATECOUNT @@ -1201,7 +1197,6 @@ STATECOUNT(expr, oper, val) **More explanations**: -- Must be used together with `PARTITION BY tbname` when it's used on a STable to force the result into each single timeline] - Can't be used with window operation, like interval/state_window/session_window @@ -1229,7 +1224,6 @@ STATEDURATION(expr, oper, val, unit) **More explanations**: -- Must be used together with `PARTITION BY tbname` when it's used on a STable to force the result into each single timeline] - Can't be used with window operation, like interval/state_window/session_window @@ -1247,7 +1241,6 @@ TWA(expr) **Applicable table types**: standard tables and supertables -- Must be used together with `PARTITION BY tbname` to force the result into each single timeline. ## System Information Functions From 217efa830e8eb74e092b0ddd8c6bcbf80287497a Mon Sep 17 00:00:00 2001 From: Ganlin Zhao Date: Fri, 26 May 2023 14:02:46 +0800 Subject: [PATCH 29/49] docs: fix timeline function super table explaination --- docs/zh/12-taos-sql/10-function.md | 8 -------- 1 file changed, 8 deletions(-) diff --git a/docs/zh/12-taos-sql/10-function.md b/docs/zh/12-taos-sql/10-function.md index 248554f931..b4785dc5e6 100644 --- a/docs/zh/12-taos-sql/10-function.md +++ b/docs/zh/12-taos-sql/10-function.md @@ -1001,7 +1001,6 @@ SAMPLE(expr, k) **使用说明**: - 不能参与表达式计算;该函数可以应用在普通表和超级表上; -- 使用在超级表上的时候,需要搭配 PARTITION by tbname 使用,将结果强制规约到单个时间线。 ### TAIL @@ -1080,7 +1079,6 @@ CSUM(expr) - 不支持 +、-、*、/ 运算,如 csum(col1) + csum(col2)。 - 只能与聚合(Aggregation)函数一起使用。 该函数可以应用在普通表和超级表上。 -- 使用在超级表上的时候,需要搭配 PARTITION BY tbname使用,将结果强制规约到单个时间线。 ### DERIVATIVE @@ -1104,7 +1102,6 @@ ignore_negative: { **使用说明**: -- DERIVATIVE 函数可以在由 PARTITION BY 划分出单独时间线的情况下用于超级表(也即 PARTITION BY tbname)。 - 可以与选择相关联的列一起使用。 例如: select \_rowts, DERIVATIVE() from。 ### DIFF @@ -1167,7 +1164,6 @@ MAVG(expr, k) - 不支持 +、-、*、/ 运算,如 mavg(col1, k1) + mavg(col2, k1); - 只能与普通列,选择(Selection)、投影(Projection)函数一起使用,不能与聚合(Aggregation)函数一起使用; -- 使用在超级表上的时候,需要搭配 PARTITION BY tbname使用,将结果强制规约到单个时间线。 ### STATECOUNT @@ -1193,7 +1189,6 @@ STATECOUNT(expr, oper, val) **使用说明**: -- 该函数可以应用在普通表上,在由 PARTITION BY 划分出单独时间线的情况下用于超级表(也即 PARTITION BY tbname) - 不能和窗口操作一起使用,例如 interval/state_window/session_window。 @@ -1221,7 +1216,6 @@ STATEDURATION(expr, oper, val, unit) **使用说明**: -- 该函数可以应用在普通表上,在由 PARTITION BY 划分出单独时间线的情况下用于超级表(也即 PARTITION BY tbname) - 不能和窗口操作一起使用,例如 interval/state_window/session_window。 @@ -1239,8 +1233,6 @@ TWA(expr) **适用于**:表和超级表。 -**使用说明**: TWA 函数可以在由 PARTITION BY 划分出单独时间线的情况下用于超级表(也即 PARTITION BY tbname)。 - ## 系统信息函数 From b83bf9163ec854d2fb857666fd50693df16572fa Mon Sep 17 00:00:00 2001 From: slzhou Date: Fri, 26 May 2023 14:46:32 +0800 Subject: [PATCH 30/49] fix: restore scalar mode computation and add filterScalarMode config variable and add test case --- include/common/tglobal.h | 1 + source/common/src/tglobal.c | 5 + source/libs/scalar/src/filter.c | 10 +- tests/parallel_test/cases.task | 2 + tests/script/tsim/parser/condition_scl.sim | 136 +++++++++++++++++++ tests/script/tsim/query/emptyTsRange_scl.sim | 21 +++ 6 files changed, 172 insertions(+), 3 deletions(-) create mode 100644 tests/script/tsim/parser/condition_scl.sim create mode 100644 tests/script/tsim/query/emptyTsRange_scl.sim diff --git a/include/common/tglobal.h b/include/common/tglobal.h index abee37d122..157e37f080 100644 --- a/include/common/tglobal.h +++ b/include/common/tglobal.h @@ -180,6 +180,7 @@ extern int32_t tsRpcRetryInterval; extern bool tsDisableStream; extern int64_t tsStreamBufferSize; extern int64_t tsCheckpointInterval; +extern bool tsFilterScalarMode; // #define NEEDTO_COMPRESSS_MSG(size) (tsCompressMsgSize != -1 && (size) > tsCompressMsgSize) diff --git a/source/common/src/tglobal.c b/source/common/src/tglobal.c index 9ff297c5b4..c648f8551a 100644 --- a/source/common/src/tglobal.c +++ b/source/common/src/tglobal.c @@ -208,6 +208,7 @@ char tsUdfdLdLibPath[512] = ""; bool tsDisableStream = false; int64_t tsStreamBufferSize = 128 * 1024 * 1024; int64_t tsCheckpointInterval = 3 * 60 * 60 * 1000; +bool tsFilterScalarMode = false; #ifndef _STORAGE int32_t taosSetTfsCfg(SConfig *pCfg) { @@ -522,6 +523,8 @@ static int32_t taosAddServerCfg(SConfig *pCfg) { if (cfgAddInt32(pCfg, "cacheLazyLoadThreshold", tsCacheLazyLoadThreshold, 0, 100000, 0) != 0) return -1; + if (cfgAddBool(pCfg, "filterScalarMode", tsFilterScalarMode, 0) != 0) return -1; + GRANT_CFG_ADD; return 0; } @@ -898,6 +901,8 @@ static int32_t taosSetServerCfg(SConfig *pCfg) { tsStreamBufferSize = cfgGetItem(pCfg, "streamBufferSize")->i64; tsCheckpointInterval = cfgGetItem(pCfg, "checkpointInterval")->i64; + tsFilterScalarMode = cfgGetItem(pCfg, "filterScalarMode")->bval; + GRANT_CFG_GET; return 0; } diff --git a/source/libs/scalar/src/filter.c b/source/libs/scalar/src/filter.c index d45abb1b87..9a488edd67 100644 --- a/source/libs/scalar/src/filter.c +++ b/source/libs/scalar/src/filter.c @@ -14,6 +14,7 @@ */ #include #include "os.h" +#include "tglobal.h" #include "thash.h" // #include "queryLog.h" #include "filter.h" @@ -4510,7 +4511,7 @@ static int32_t fltSclCollectOperatorsFromLogicCond(SNode *pNode, SArray *sclOpLi return TSDB_CODE_SUCCESS; } SNode *pExpr = NULL; - FOREACH(pExpr, pLogicCond->pParameterList) { + FOREACH(pExpr, pLogicCond->pParameterList) { if (!fltSclIsCollectableNode(pExpr)) { return TSDB_CODE_SUCCESS; } @@ -4614,8 +4615,11 @@ int32_t filterInitFromNode(SNode *pNode, SFilterInfo **pInfo, uint32_t options) stat.info = info; FLT_ERR_JRET(fltReviseNodes(info, &pNode, &stat)); - - info->scalarMode = true; + if (tsFilterScalarMode) { + info->scalarMode = true; + } else { + info->scalarMode = stat.scalarMode; + } fltDebug("scalar mode: %d", info->scalarMode); if (!info->scalarMode) { diff --git a/tests/parallel_test/cases.task b/tests/parallel_test/cases.task index ab4663e5b3..6037a14c8a 100644 --- a/tests/parallel_test/cases.task +++ b/tests/parallel_test/cases.task @@ -1274,6 +1274,7 @@ ,,y,script,./test.sh -f tsim/parser/columnValue_tinyint.sim ,,y,script,./test.sh -f tsim/parser/columnValue_unsign.sim ,,y,script,./test.sh -f tsim/parser/condition.sim +,,y,script,./test.sh -f tsim/parser/condition_scl.sim ,,y,script,./test.sh -f tsim/parser/constCol.sim ,,y,script,./test.sh -f tsim/parser/create_db.sim ,,y,script,./test.sh -f tsim/parser/create_mt.sim @@ -1353,6 +1354,7 @@ ,,y,script,./test.sh -f tsim/query/event.sim ,,y,script,./test.sh -f tsim/query/forceFill.sim ,,y,script,./test.sh -f tsim/query/emptyTsRange.sim +,,y,script,./test.sh -f tsim/query/emptyTsRange_scl.sim ,,y,script,./test.sh -f tsim/query/partitionby.sim ,,y,script,./test.sh -f tsim/query/tableCount.sim ,,y,script,./test.sh -f tsim/query/tag_scan.sim diff --git a/tests/script/tsim/parser/condition_scl.sim b/tests/script/tsim/parser/condition_scl.sim new file mode 100644 index 0000000000..f377988006 --- /dev/null +++ b/tests/script/tsim/parser/condition_scl.sim @@ -0,0 +1,136 @@ +system sh/stop_dnodes.sh +system sh/deploy.sh -n dnode1 -i 1 +system sh/cfg.sh -n dnode1 -c filterScalarMode -v 1 +system sh/exec.sh -n dnode1 -s start +sql connect + +sql drop database if exists cdb +sql create database if not exists cdb +sql use cdb +sql create table stb1 (ts timestamp, c1 int, c2 float, c3 bigint, c4 smallint, c5 tinyint, c6 double, c7 bool, c8 binary(10), c9 nchar(9)) TAGS(t1 int, t2 binary(10), t3 double) +sql create table tb1 using stb1 tags(1,'1',1.0) +sql create table tb2 using stb1 tags(2,'2',2.0) +sql create table tb3 using stb1 tags(3,'3',3.0) +sql create table tb4 using stb1 tags(4,'4',4.0) +sql create table tb5 using stb1 tags(5,'5',5.0) +sql create table tb6 using stb1 tags(6,'6',6.0) + +sql insert into tb1 values ('2021-05-05 18:19:00',1,1.0,1,1,1,1.0,true ,'1','1') +sql insert into tb1 values ('2021-05-05 18:19:01',2,2.0,2,2,2,2.0,true ,'2','2') +sql insert into tb1 values ('2021-05-05 18:19:02',3,3.0,3,3,3,3.0,false,'3','3') +sql insert into tb1 values ('2021-05-05 18:19:03',4,4.0,4,4,4,4.0,false,'4','4') +sql insert into tb1 values ('2021-05-05 18:19:04',11,11.0,11,11,11,11.0,true ,'11','11') +sql insert into tb1 values ('2021-05-05 18:19:05',12,12.0,12,12,12,12.0,true ,'12','12') +sql insert into tb1 values ('2021-05-05 18:19:06',13,13.0,13,13,13,13.0,false,'13','13') +sql insert into tb1 values ('2021-05-05 18:19:07',14,14.0,14,14,14,14.0,false,'14','14') +sql insert into tb2 values ('2021-05-05 18:19:08',21,21.0,21,21,21,21.0,true ,'21','21') +sql insert into tb2 values ('2021-05-05 18:19:09',22,22.0,22,22,22,22.0,true ,'22','22') +sql insert into tb2 values ('2021-05-05 18:19:10',23,23.0,23,23,23,23.0,false,'23','23') +sql insert into tb2 values ('2021-05-05 18:19:11',24,24.0,24,24,24,24.0,false,'24','24') +sql insert into tb3 values ('2021-05-05 18:19:12',31,31.0,31,31,31,31.0,true ,'31','31') +sql insert into tb3 values ('2021-05-05 18:19:13',32,32.0,32,32,32,32.0,true ,'32','32') +sql insert into tb3 values ('2021-05-05 18:19:14',33,33.0,33,33,33,33.0,false,'33','33') +sql insert into tb3 values ('2021-05-05 18:19:15',34,34.0,34,34,34,34.0,false,'34','34') +sql insert into tb4 values ('2021-05-05 18:19:16',41,41.0,41,41,41,41.0,true ,'41','41') +sql insert into tb4 values ('2021-05-05 18:19:17',42,42.0,42,42,42,42.0,true ,'42','42') +sql insert into tb4 values ('2021-05-05 18:19:18',43,43.0,43,43,43,43.0,false,'43','43') +sql insert into tb4 values ('2021-05-05 18:19:19',44,44.0,44,44,44,44.0,false,'44','44') +sql insert into tb5 values ('2021-05-05 18:19:20',51,51.0,51,51,51,51.0,true ,'51','51') +sql insert into tb5 values ('2021-05-05 18:19:21',52,52.0,52,52,52,52.0,true ,'52','52') +sql insert into tb5 values ('2021-05-05 18:19:22',53,53.0,53,53,53,53.0,false,'53','53') +sql insert into tb5 values ('2021-05-05 18:19:23',54,54.0,54,54,54,54.0,false,'54','54') +sql insert into tb6 values ('2021-05-05 18:19:24',61,61.0,61,61,61,61.0,true ,'61','61') +sql insert into tb6 values ('2021-05-05 18:19:25',62,62.0,62,62,62,62.0,true ,'62','62') +sql insert into tb6 values ('2021-05-05 18:19:26',63,63.0,63,63,63,63.0,false,'63','63') +sql insert into tb6 values ('2021-05-05 18:19:27',64,64.0,64,64,64,64.0,false,'64','64') +sql insert into tb6 values ('2021-05-05 18:19:28',NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL) + +sql create table stb2 (ts timestamp, u1 int unsigned, u2 bigint unsigned, u3 smallint unsigned, u4 tinyint unsigned, ts2 timestamp) TAGS(t1 int unsigned, t2 bigint unsigned, t3 timestamp, t4 int) +sql create table tb2_1 using stb2 tags(1,1,'2021-05-05 18:38:38',1) +sql create table tb2_2 using stb2 tags(2,2,'2021-05-05 18:58:58',2) + +sql insert into tb2_1 values ('2021-05-05 18:19:00',1,2,3,4,'2021-05-05 18:28:01') +sql insert into tb2_1 values ('2021-05-05 18:19:01',5,6,7,8,'2021-05-05 18:28:02') +sql insert into tb2_1 values ('2021-05-05 18:19:02',2,2,3,4,'2021-05-05 18:28:03') +sql insert into tb2_1 values ('2021-05-05 18:19:03',5,6,7,8,'2021-05-05 18:28:04') +sql insert into tb2_1 values ('2021-05-05 18:19:04',3,2,3,4,'2021-05-05 18:28:05') +sql insert into tb2_1 values ('2021-05-05 18:19:05',5,6,7,8,'2021-05-05 18:28:06') +sql insert into tb2_1 values ('2021-05-05 18:19:06',4,2,3,4,'2021-05-05 18:28:07') +sql insert into tb2_1 values ('2021-05-05 18:19:07',5,6,7,8,'2021-05-05 18:28:08') +sql insert into tb2_1 values ('2021-05-05 18:19:08',5,2,3,4,'2021-05-05 18:28:09') +sql insert into tb2_1 values ('2021-05-05 18:19:09',5,6,7,8,'2021-05-05 18:28:10') +sql insert into tb2_1 values ('2021-05-05 18:19:10',6,2,3,4,'2021-05-05 18:28:11') +sql insert into tb2_2 values ('2021-05-05 18:19:11',5,6,7,8,'2021-05-05 18:28:12') +sql insert into tb2_2 values ('2021-05-05 18:19:12',7,2,3,4,'2021-05-05 18:28:13') +sql insert into tb2_2 values ('2021-05-05 18:19:13',5,6,7,8,'2021-05-05 18:28:14') +sql insert into tb2_2 values ('2021-05-05 18:19:14',8,2,3,4,'2021-05-05 18:28:15') +sql insert into tb2_2 values ('2021-05-05 18:19:15',5,6,7,8,'2021-05-05 18:28:16') + +sql create table stb3 (ts timestamp, c1 int, c2 float, c3 bigint, c4 smallint, c5 tinyint, c6 double, c7 bool, c8 binary(10), c9 nchar(9)) TAGS(t1 int, t2 binary(10), t3 double) +sql create table tb3_1 using stb3 tags(1,'1',1.0) +sql create table tb3_2 using stb3 tags(2,'2',2.0) + +sql insert into tb3_1 values ('2021-01-05 18:19:00',1,1.0,1,1,1,1.0,true ,'1','1') +sql insert into tb3_1 values ('2021-02-05 18:19:01',2,2.0,2,2,2,2.0,true ,'2','2') +sql insert into tb3_1 values ('2021-03-05 18:19:02',3,3.0,3,3,3,3.0,false,'3','3') +sql insert into tb3_1 values ('2021-04-05 18:19:03',4,4.0,4,4,4,4.0,false,'4','4') +sql insert into tb3_1 values ('2021-05-05 18:19:28',5,NULL,5,NULL,5,NULL,true,NULL,'5') +sql insert into tb3_1 values ('2021-06-05 18:19:28',NULL,6.0,NULL,6,NULL,6.0,NULL,'6',NULL) +sql insert into tb3_1 values ('2021-07-05 18:19:28',NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL) +sql insert into tb3_2 values ('2021-01-06 18:19:00',11,11.0,11,11,11,11.0,true ,'11','11') +sql insert into tb3_2 values ('2021-02-06 18:19:01',12,12.0,12,12,12,12.0,true ,'12','12') +sql insert into tb3_2 values ('2021-03-06 18:19:02',13,13.0,13,13,13,13.0,false,'13','13') +sql insert into tb3_2 values ('2021-04-06 18:19:03',14,14.0,14,14,14,14.0,false,'14','14') +sql insert into tb3_2 values ('2021-05-06 18:19:28',15,NULL,15,NULL,15,NULL,true,NULL,'15') +sql insert into tb3_2 values ('2021-06-06 18:19:28',NULL,16.0,NULL,16,NULL,16.0,NULL,'16',NULL) +sql insert into tb3_2 values ('2021-07-06 18:19:28',NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL) + +sql create table stb4 (ts timestamp, c1 int, c2 float, c3 bigint, c4 smallint, c5 tinyint, c6 double, c7 bool, c8 binary(10), c9 nchar(9),c10 binary(16300)) TAGS(t1 int, t2 binary(10), t3 double) +sql create table tb4_0 using stb4 tags(0,'0',0.0) +sql create table tb4_1 using stb4 tags(1,'1',1.0) +sql create table tb4_2 using stb4 tags(2,'2',2.0) +sql create table tb4_3 using stb4 tags(3,'3',3.0) +sql create table tb4_4 using stb4 tags(4,'4',4.0) + +$i = 0 +$ts0 = 1625850000000 +$blockNum = 5 +$delta = 0 +$tbname0 = tb4_ +$a = 0 +$b = 200 +$c = 400 +while $i < $blockNum + $x = 0 + $rowNum = 1200 + while $x < $rowNum + $ts = $ts0 + $x + $a = $a + 1 + $b = $b + 1 + $c = $c + 1 + $d = $x / 10 + $tin = $rowNum + $binary = 'binary . $c + $binary = $binary . ' + $nchar = 'nchar . $c + $nchar = $nchar . ' + $tbname = 'tb4_ . $i + $tbname = $tbname . ' + sql insert into $tbname values ( $ts , $a , $b , $c , $d , $d , $c , true, $binary , $nchar , $binary ) + $x = $x + 1 + endw + + $i = $i + 1 + $ts0 = $ts0 + 259200000 +endw + +run tsim/parser/condition_query.sim + +print ================== restart server to commit data into disk +system sh/exec.sh -n dnode1 -s stop -x SIGINT +system sh/exec.sh -n dnode1 -s start +print ================== server restart completed +sql connect + +run tsim/parser/condition_query.sim + diff --git a/tests/script/tsim/query/emptyTsRange_scl.sim b/tests/script/tsim/query/emptyTsRange_scl.sim new file mode 100644 index 0000000000..43734b047d --- /dev/null +++ b/tests/script/tsim/query/emptyTsRange_scl.sim @@ -0,0 +1,21 @@ +system sh/stop_dnodes.sh +system sh/deploy.sh -n dnode1 -i 1 +system sh/cfg.sh -n dnode1 -c filterScalarMode -v 1 +system sh/exec.sh -n dnode1 -s start +sql connect + +sql drop database if exists db1; +sql create database if not exists db1; +sql use db1; +sql create stable sta (ts timestamp, f1 double, f2 binary(200)) tags(t1 int); +sql create table tba1 using sta tags(1); +sql insert into tba1 values ('2022-04-26 15:15:01', 1.0, "a"); +sql insert into tba1 values ('2022-04-26 15:15:02', 2.0, "b"); +sql insert into tba1 values ('2022-04-26 15:15:04', 4.0, "b"); +sql insert into tba1 values ('2022-04-26 15:15:05', 5.0, "b"); +sql select last_row(*) from sta where ts >= 1678901803783 and ts <= 1678901803783 and _c0 <= 1678901803782 interval(10d,8d) fill(linear) order by _wstart desc; +if $rows != 0 then + return -1 +endi + +system sh/exec.sh -n dnode1 -s stop -x SIGINT From b89f1652fcc16487de8e4fc5194c08316bc376dd Mon Sep 17 00:00:00 2001 From: slzhou Date: Fri, 26 May 2023 14:52:14 +0800 Subject: [PATCH 31/49] fix: remove tbl count scan modification from the PR --- source/libs/planner/src/planOptimizer.c | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/source/libs/planner/src/planOptimizer.c b/source/libs/planner/src/planOptimizer.c index 4a803065ed..72e5081ab9 100644 --- a/source/libs/planner/src/planOptimizer.c +++ b/source/libs/planner/src/planOptimizer.c @@ -2632,13 +2632,11 @@ static bool tbCntScanOptIsEligibleConds(STbCntScanOptInfo* pInfo, SNode* pCondit return false; } if (QUERY_NODE_LOGIC_CONDITION == nodeType(pConditions)) { - return tbCntScanOptIsEligibleLogicCond(pInfo, (SLogicConditionNode*)pConditions) && - LIST_LENGTH(pInfo->pAgg->pGroupKeys) == 0; + return tbCntScanOptIsEligibleLogicCond(pInfo, (SLogicConditionNode*)pConditions); } if (QUERY_NODE_OPERATOR == nodeType(pConditions)) { - return tbCntScanOptIsEligibleOpCond((SOperatorNode*)pConditions) && - LIST_LENGTH(pInfo->pAgg->pGroupKeys) == 0; + return tbCntScanOptIsEligibleOpCond((SOperatorNode*)pConditions); } return false; From 24937c3d5bb9baf6cb90472c7db55c4f03db3cda Mon Sep 17 00:00:00 2001 From: dmchen Date: Fri, 26 May 2023 19:01:13 +0800 Subject: [PATCH 32/49] check empty dnode --- source/dnode/mnode/impl/src/mndDnode.c | 28 +++++++++++++------------- 1 file changed, 14 insertions(+), 14 deletions(-) diff --git a/source/dnode/mnode/impl/src/mndDnode.c b/source/dnode/mnode/impl/src/mndDnode.c index 99694ae4b8..73dbb243a1 100644 --- a/source/dnode/mnode/impl/src/mndDnode.c +++ b/source/dnode/mnode/impl/src/mndDnode.c @@ -986,20 +986,20 @@ static int32_t mndProcessDropDnodeReq(SRpcMsg *pReq) { } int32_t numOfVnodes = mndGetVnodesNum(pMnode, pDnode->id); - if (numOfVnodes > 0 || pMObj != NULL || pSObj != NULL || pQObj != NULL) { - bool isonline = mndIsDnodeOnline(pDnode, taosGetTimestampMs()); - if (isonline && force) { - terrno = TSDB_CODE_DNODE_ONLY_USE_WHEN_OFFLINE; - mError("dnode:%d, failed to drop since %s, vnodes:%d mnode:%d qnode:%d snode:%d", pDnode->id, terrstr(), - numOfVnodes, pMObj != NULL, pQObj != NULL, pSObj != NULL); - goto _OVER; - } - if (!isonline && !force) { - terrno = TSDB_CODE_DNODE_OFFLINE; - mError("dnode:%d, failed to drop since %s, vnodes:%d mnode:%d qnode:%d snode:%d", pDnode->id, terrstr(), - numOfVnodes, pMObj != NULL, pQObj != NULL, pSObj != NULL); - goto _OVER; - } + bool isonline = mndIsDnodeOnline(pDnode, taosGetTimestampMs()); + + if (isonline && force) { + terrno = TSDB_CODE_DNODE_ONLY_USE_WHEN_OFFLINE; + mError("dnode:%d, failed to drop since %s, vnodes:%d mnode:%d qnode:%d snode:%d", pDnode->id, terrstr(), + numOfVnodes, pMObj != NULL, pQObj != NULL, pSObj != NULL); + goto _OVER; + } + + if (!isonline && !force) { + terrno = TSDB_CODE_DNODE_OFFLINE; + mError("dnode:%d, failed to drop since %s, vnodes:%d mnode:%d qnode:%d snode:%d", pDnode->id, terrstr(), + numOfVnodes, pMObj != NULL, pQObj != NULL, pSObj != NULL); + goto _OVER; } code = mndDropDnode(pMnode, pReq, pDnode, pMObj, pQObj, pSObj, numOfVnodes, force, dropReq.unsafe); From 436717008663a19655d67ddee463b13890cfb621 Mon Sep 17 00:00:00 2001 From: gccgdb1234 Date: Fri, 26 May 2023 19:16:23 +0800 Subject: [PATCH 33/49] doc: refine PDF user guide --- docs/en/07-develop/09-udf.md | 564 ++++++++++++++++++++++++++++++++--- docs/zh/07-develop/09-udf.md | 563 +++++++++++++++++++++++++++++++--- 2 files changed, 1046 insertions(+), 81 deletions(-) diff --git a/docs/en/07-develop/09-udf.md b/docs/en/07-develop/09-udf.md index b0124b9c3f..92bd4efd25 100644 --- a/docs/en/07-develop/09-udf.md +++ b/docs/en/07-develop/09-udf.md @@ -270,29 +270,92 @@ select max_vol(vol1,vol2,vol3,deviceid) from battery; ## Implement a UDF in Python +### Prepare Environment + +1. Prepare Python Environment + +Please follow standard procedure of python environment preparation. + +2. Install Python package `taospyudf` + +```shell +pip3 install taospyudf +``` + +During this process, some C++ code needs to be compiled. So it's required to have `cmake` and `gcc` on your system. The compiled `libtaospyudf.so` will be automatically copied to `/usr/local/lib` path. If you are not root user, please use `sudo`. After installation is done, please check using the command below. + +```shell +root@slave11 ~/udf $ ls -l /usr/local/lib/libtaos* +-rw-r--r-- 1 root root 671344 May 24 22:54 /usr/local/lib/libtaospyudf.so +``` + +Then execute the command below. + +```shell +ldconfig +``` + +3. If you want to utilize some 3rd party python packages using `PYTHONPATH`, please set configuration parameter `UdfdLdLibPath` to the value of `PYTHONPATH` before starting `taosd`. + +4. Launch `taosd` service + +Please refer to [Get Started](../get-started) + +### Interface definition + +#### Introduction to Interface + Implement the specified interface functions when implementing a UDF in Python. - implement `process` function for the scalar UDF. - implement `start`, `reduce`, `finish` for the aggregate UDF. - implement `init` for initialization and `destroy` for termination. -### Implement a Scalar UDF in Python +#### Scalar UDF Interface The implementation of a scalar UDF is described as follows: +```Python +def process(input: datablock) -> tuple[output_type]: +``` + +Description: this function prcesses datablock, which is the input; you can use datablock.data(row, col) to access the python object at location(row,col); the output is a tuple object consisted of objects of type outputtype + +### Aggregate UDF Interface + +The implementation of an aggregate function is described as follows: + +```Python +def start() -> bytes: +def reduce(inputs: datablock, buf: bytes) -> bytes +def finish(buf: bytes) -> output_type: +``` + +Description: first the start() is invoked to generate the initial result `buffer`; then the input data is divided into multiple row blocks, and reduce() is invoked for each block `inputs` and current intermediate result `buf`; finally finish() is invoked to generate the final result from intermediate `buf`, the final result can only contains 0 or 1 data. + +#### Initialization and Cleanup Interface + +```python +def init() +def destroy() +``` + +Description: init() does the work of initialization before processing any data; destroy() does the work of cleanup after the data is processed. + +### Python UDF Template + +#### Scalar Template + ```Python def init(): # initialization def destroy(): # destroy -def process(input: datablock) -> tuple[output_type]: - # process input datablock, - # datablock.data(row, col) is to access the python object in location(row,col) - # return tuple object consisted of object of type outputtype +def process(input: datablock) -> tuple[output_type]: ``` -### Implement an Aggregate UDF in Python +Note:process() must be implemeted, init() and destroy() must be defined too but they can do nothing. -The implementation of an aggregate function is described as follows: +#### Aggregate Template ```Python def init(): @@ -311,33 +374,7 @@ def finish(buf: bytes) -> output_type: #return obj of type outputtype ``` -### Python UDF Interface Definition - -#### Scalar interface -```Python -def process(input: datablock) -> tuple[output_type]: -``` -- `input` is a data block two-dimension matrix-like object, of which method `data(row, col)` returns the Python object located at location (`row`, `col`) -- return a Python tuple object, of which each item is a Python object of type `output_type` - -#### Aggregate Interface -```Python -def start() -> bytes: -def reduce(input: datablock, buf: bytes) -> bytes -def finish(buf: bytes) -> output_type: -``` - -- first `start()` is called to return the initial result in type `bytes` -- then the input data are divided into multiple data blocks and for each block `input`, `reduce` is called with the data block `input` and the current result `buf` bytes and generates a new intermediate result buffer. -- finally, the `finish` function is called on the intermediate result `buf` and outputs 0 or 1 data of type `output_type` - - -#### Initialization and Cleanup Interface -```Python -def init() -def destroy() -``` -Implement `init` for initialization and `destroy` for termination. +Note: aggregate UDF requires init(), destroy(), start(), reduce() and finish() to be impemented. start() generates the initial result in buffer, then the input data is divided into multiple row data blocks, reduce() is invoked for each data block `inputs` and intermediate `buf`, finally finish() is invoked to generate final result from the intermediate result `buf`. ### Data Mapping between TDengine SQL and Python UDF @@ -353,15 +390,460 @@ The following table describes the mapping between TDengine SQL data type and Pyt |TIMESTAMP | int | |JSON and other types | Not Supported | -### Installing Python UDF -1. Install Python package `taospyudf` that executes Python UDF -```bash -sudo pip install taospyudf -ldconfig +### Development Guide + +In this section we will demonstrate 5 examples of developing UDF in Python language. In this guide, you will learn the development skills from easy case to hard case, the examples include: +1. A scalar function which accepts only one integer as input and outputs ln(n^2 + 1)。 +2. A scalar function which accepts n integers, like(x1, x2, ..., xn)and output the sum of the product of each input and its sequence number, i.e. x1 + 2 * x2 + ... + n * xn。 +3. A scalar function which accepts a timestamp and output the next closest Sunday of the timestamp. In this case, we will demonstrate how to use 3rd party library `moment`. +4. An aggregate function which calculates the difference between the maximum and the minimum of a specific column, i.e. same functionality of built-in spread(). + +In the guide, some debugging skills of using Python UDF will be explained too. + +We assume you are using Linux system and already have TDengine 3.0.4.0+ and Python 3.x. + +Note:**You can't use print() function to output log inside a UDF, you have to write the log to a specific file or use logging module of Python.** + +#### Sample 1: Simplest UDF + +This scalar UDF accepts an integer as input and output ln(n^2 + 1). + +Firstly, please compose a Python source code file in your system and save it, e.g. `/root/udf/myfun.py`, the code is like below. + +```python +from math import log + +def init(): + pass + +def destroy(): + pass + +def process(block): + rows, _ = block.shape() + return [log(block.data(i, 0) ** 2 + 1) for i in range(rows)] +``` + +This program consists of 3 functions, init() and destroy() do nothing, but they have to be defined even though there is nothing to do in them because they are critical parts of a python UDF. The most important function is process(), which accepts a data block and the data block object has two methods: +1. shape() returns the number of rows and the number of columns of the data block +2. data(i, j) returns the value at (i,j) in the block + +The output of the process() function of a scalar UDF returns exactly same number of data as the number of input rows. We will ignore the number of columns because we just want to compute on the first column. + +Then, we create the UDF using the SQL command below. + +```sql +create function myfun as '/root/udf/myfun.py' outputtype double language 'Python' +``` + +Here is the output example, it may change a little depending on your version being used. + +```shell + taos> create function myfun as '/root/udf/myfun.py' outputtype double language 'Python'; +Create OK, 0 row(s) affected (0.005202s) +``` + +Then, we used the `show` command to prove the creation of the UDF is successful. + +```text +taos> show functions; + name | +================================= + myfun | +Query OK, 1 row(s) in set (0.005767s) +``` + +Next, we can try to test the function. Before executing the UDF, we need to prepare some data using the command below in TDengine CLI. + +```sql +create database test; +create table t(ts timestamp, v1 int, v2 int, v3 int); +insert into t values('2023-05-01 12:13:14', 1, 2, 3); +insert into t values('2023-05-03 08:09:10', 2, 3, 4); +insert into t values('2023-05-10 07:06:05', 3, 4, 5); +``` + +Execute the UDF to test it: + +```sql +taos> select myfun(v1, v2) from t; + +DB error: udf function execution failure (0.011088s) +``` + +Unfortunately, the UDF execution failed. We need to check the log `udfd` daemon to find out why. + +```shell +tail -10 /var/log/taos/udfd.log +``` + +Below is the output. + +```text +05/24 22:46:28.733545 01665799 UDF ERROR can not load library libtaospyudf.so. error: operation not permitted +05/24 22:46:28.733561 01665799 UDF ERROR can not load python plugin. lib path libtaospyudf.so +``` + +From the error message we can find out that `libtaospyudf.so` was not loaded successfully. Please refer to the [Prepare Environment] section. + +After correcting environment issues, execute the UDF: + +```sql +taos> select myfun(v1) from t; + myfun(v1) | +============================ + 0.693147181 | + 1.609437912 | + 2.302585093 | +``` + +Now, we have finished the first PDF in Python, and learned some basic debugging skills. + +#### Sample 2: Abnormal Processing + +The `myfun` UDF example in sample 1 has passed, but it has two drawbacks. + +1. It accepts only one column of data as input, and doesn't throw exception if you passes multiple columns.这 + +```sql +taos> select myfun(v1, v2) from t; + myfun(v1, v2) | +============================ + 0.693147181 | + 1.609437912 | + 2.302585093 | +``` + +2. `null` value is not processed. We except the program to throw exception and terminate if `null` is passed as input. + +So, we try to optimize the process() function as below. + +```python +def process(block): + rows, cols = block.shape() + if cols > 1: + raise Exception(f"require 1 parameter but given {cols}") + return [ None if block.data(i, 0) is None else log(block.data(i, 0) ** 2 + 1) for i in range(rows)] +``` + +The update the UDF with command below. + +```sql +create or replace function myfun as '/root/udf/myfun.py' outputtype double language 'Python'; +``` + +At this time, if we pass two arguments to `myfun`, the execution would fail. + +```sql +taos> select myfun(v1, v2) from t; + +DB error: udf function execution failure (0.014643s) +``` + +However, the exception is not shown to end user, but displayed in the log file `/var/log/taos/taospyudf.log` + +```text +2023-05-24 23:21:06.790 ERROR [1666188] [doPyUdfScalarProc@507] call pyUdfScalar proc function. context 0x7faade26d180. error: Exception: require 1 parameter but given 2 + +At: + /var/lib/taos//.udf/myfun_3_1884e1281d9.py(12): process + +``` + +Now, we have learned how to update a UDF and check the log of a UDF. + +Note: Prior to TDengine 3.0.5.0 (excluding), updating a UDF requires to restart `taosd` service. After 3.0.5.0, restarting is not required. + +#### Sample 3: UDF with n arguments + +A UDF which accepts n intergers, likee (x1, x2, ..., xn) and output the sum of the product of each value and its sequence number: 1 * x1 + 2 * x2 + ... + n * xn. If there is `null` in the input, then the result is `null`. The difference from sample 1 is that it can accept any number of columns as input and process each column. Assume the program is written in /root/udf/nsum.py: + +```python +def init(): + pass + + +def destroy(): + pass + + +def process(block): + rows, cols = block.shape() + result = [] + for i in range(rows): + total = 0 + for j in range(cols): + v = block.data(i, j) + if v is None: + total = None + break + total += (j + 1) * block.data(i, j) + result.append(total) + return result +``` + +Crate and test the UDF: + +```sql +create function nsum as '/root/udf/nsum.py' outputtype double language 'Python'; +``` + +```sql +taos> insert into t values('2023-05-25 09:09:15', 6, null, 8); +Insert OK, 1 row(s) affected (0.003675s) + +taos> select ts, v1, v2, v3, nsum(v1, v2, v3) from t; + ts | v1 | v2 | v3 | nsum(v1, v2, v3) | +================================================================================================ + 2023-05-01 12:13:14.000 | 1 | 2 | 3 | 14.000000000 | + 2023-05-03 08:09:10.000 | 2 | 3 | 4 | 20.000000000 | + 2023-05-10 07:06:05.000 | 3 | 4 | 5 | 26.000000000 | + 2023-05-25 09:09:15.000 | 6 | NULL | 8 | NULL | +Query OK, 4 row(s) in set (0.010653s) +``` + +Sample 4: Utilize 3rd party package + +A UDF which accepts a timestamp and output the next closed Sunday. This sample requires to use third party package `moment`, you need to install it firslty. + +```shell +pip3 install moment +``` + +Then compose the Python code in /root/udf/nextsunday.py + +```python +import moment + + +def init(): + pass + + +def destroy(): + pass + + +def process(block): + rows, cols = block.shape() + if cols > 1: + raise Exception("require only 1 parameter") + if not type(block.data(0, 0)) is int: + raise Exception("type error") + return [moment.unix(block.data(i, 0)).replace(weekday=7).format('YYYY-MM-DD') + for i in range(rows)] +``` + +UDF framework will map the TDengine timestamp to Python int type, so this function only accepts an integer representing millisecond. process() firstly validates the parameters, then use `moment` to replace the time, format the result and output. + +Create and test the UDF. + +```sql +create function nextsunday as '/root/udf/nextsunday.py' outputtype binary(10) language 'Python'; +``` + +If your `taosd` is started using `systemd`, you man encounter the error below. Next we will show how to debug. + +```sql +taos> select ts, nextsunday(ts) from t; + +DB error: udf function execution failure (1.123615s) +``` + +```shell + tail -20 taospyudf.log +2023-05-25 11:42:34.541 ERROR [1679419] [PyUdf::PyUdf@217] py udf load module failure. error ModuleNotFoundError: No module named 'moment' +``` + +This is because `moment` doesn't exist in the default library search path of python UDF, please check the log file `taosdpyudf.log`. + +```shell +grep 'sys path' taospyudf.log | tail -1 +``` + +```text +2023-05-25 10:58:48.554 INFO [1679419] [doPyOpen@592] python sys path: ['', '/lib/python38.zip', '/lib/python3.8', '/lib/python3.8/lib-dynload', '/lib/python3/dist-packages', '/var/lib/taos//.udf'] +``` + +You may find that the default library search path is `/lib/python3/dist-packages` (just for example, it may be different in your system), but `moment` is installed to `/usr/local/lib/python3.8/dist-packages` (for example, it may be different in your system). Then we change the library search path of python UDF. + +Check `sys.path` + +```python +>>> import sys +>>> ":".join(sys.path) +'/usr/lib/python3.8:/usr/lib/python3.8/lib-dynload:/usr/local/lib/python3.8/dist-packages:/usr/lib/python3/dist-packages' +``` + +Copy the output and edit /var/taos/taos.cfg to add below configuration parameter. + +```shell +UdfdLdLibPath /usr/lib/python3.8:/usr/lib/python3.8/lib-dynload:/usr/local/lib/python3.8/dist-packages:/usr/lib/python3/dist-packages +``` + +Save it, then restart `taosd`, using `systemctl restart taosd`, and test again, it will succeed this time. + +```sql +taos> select ts, nextsunday(ts) from t; + ts | nextsunday(ts) | +=========================================== + 2023-05-01 12:13:14.000 | 2023-05-07 | + 2023-05-03 08:09:10.000 | 2023-05-07 | + 2023-05-10 07:06:05.000 | 2023-05-14 | + 2023-05-25 09:09:15.000 | 2023-05-28 | +Query OK, 4 row(s) in set (1.011474s) +``` + +#### Sample 5: Aggregate Function + +An aggregate function which calculates the difference of the maximum and the minimum in a column. An aggregate funnction takes multiple rows as input and output only one data. The execution process of an aggregate UDF is like map-reduce, the framework divides the input into multiple parts, each mapper processes one block and the reducer aggregates the result of the mappers. The reduce() of Python UDF has the functionality of both map() and reduce(). The reduce() takes two arguments: the data to be processed; and the result of other tasks executing reduce(). For exmaple, assume the code is in `/root/udf/myspread.py`. + +```python +import io +import math +import pickle + +LOG_FILE: io.TextIOBase = None + + +def init(): + global LOG_FILE + LOG_FILE = open("/var/log/taos/spread.log", "wt") + log("init function myspead success") + + +def log(o): + LOG_FILE.write(str(o) + '\n') + + +def destroy(): + log("close log file: spread.log") + LOG_FILE.close() + + +def start(): + return pickle.dumps((-math.inf, math.inf)) + + +def reduce(block, buf): + max_number, min_number = pickle.loads(buf) + log(f"initial max_number={max_number}, min_number={min_number}") + rows, _ = block.shape() + for i in range(rows): + v = block.data(i, 0) + if v > max_number: + log(f"max_number={v}") + max_number = v + if v < min_number: + log(f"min_number={v}") + min_number = v + return pickle.dumps((max_number, min_number)) + + +def finish(buf): + max_number, min_number = pickle.loads(buf) + return max_number - min_number +``` + +In this example, we implemented an aggregate function, and added some logging. +1. init() opens a file for logging +2. log() is the function for logging, it converts the input object to string and output with an end of line +3. destroy() closes the log file \ +4. start() returns the initial buffer for storing the intermediate result +5. reduce() processes each daa block and aggregates the result +6. finish() converts the final buffer() to final result\ + +Create the UDF. + +```sql +create or replace aggregate function myspread as '/root/udf/myspread.py' outputtype double bufsize 128 language 'Python'; +``` + +This SQL command has two important different points from the command creating scalar UDF. +1. keyword `aggregate` is used +2. keyword `bufsize` is used to specify the memory size for storing the intermediate result. In this example, the result is 32 bytes, but we specified 128 bytes for `bufsize`. You can use the `python` CLI to print actual size. + +```python +>>> len(pickle.dumps((12345.6789, 23456789.9877))) +32 +``` + +Test this function, you can see the result is same as built-in spread() function. \ + +```sql +taos> select myspread(v1) from t; + myspread(v1) | +============================ + 5.000000000 | +Query OK, 1 row(s) in set (0.013486s) + +taos> select spread(v1) from t; + spread(v1) | +============================ + 5.000000000 | +Query OK, 1 row(s) in set (0.005501s) +``` + +At last, check the log file, we can see that the reduce() function is executed 3 times, max value is updated 3 times and min value is updated only one time. + +```shell +root@slave11 /var/log/taos $ cat spread.log +init function myspead success +initial max_number=-inf, min_number=inf +max_number=1 +min_number=1 +initial max_number=1, min_number=1 +max_number=2 +max_number=3 +initial max_number=3, min_number=1 +max_number=6 +close log file: spread.log +``` + +### SQL Commands + +1. Create Scalar UDF + +```sql +CREATE FUNCTION function_name AS library_path OUTPUTTYPE output_type LANGUAGE 'Python'; +``` + +2. Create Aggregate UDF + +```sql +CREATE AGGREGATE FUNCTION function_name library_path OUTPUTTYPE output_type LANGUAGE 'Python'; +``` + +3. Update Scalar UDF + +```sql +CREATE OR REPLACE FUNCTION function_name AS OUTPUTTYPE int LANGUAGE 'Python'; +``` + +4. Update Aggregate UDF + +```sql +CREATE OR REPLACE AGGREGATE FUNCTION function_name AS OUTPUTTYPE BUFSIZE buf_size int LANGUAGE 'Python'; +``` + +Note: If keyword `AGGREGATE` used, the UDF will be treated as aggregate UDF despite what it was before; Similarly, if there is no keyword `aggregate`, the UDF will be treated as scalar function despite what it was before. 更 + +5. Show the UDF + +The version of a UDF is increased by one every time it's updated. + +```sql +select * from ins_functions \G; +``` + +6. Show and Drop existing UDF + +```sql +SHOW functions; +DROP FUNCTION function_name; ``` -2. If PYTHONPATH is needed to find Python packages when the Python UDF executes, include the PYTHONPATH contents into the udfdLdLibPath variable of the taos.cfg configuration file -### Python UDF Sample Code +### More Python UDF Samples #### Scalar Function [pybitand](https://github.com/taosdata/TDengine/blob/3.0/tests/script/sh/pybitand.py) The `pybitand` function implements bitwise addition for multiple columns. If there is only one column, the column is returned. The `pybitand` function ignores null values. diff --git a/docs/zh/07-develop/09-udf.md b/docs/zh/07-develop/09-udf.md index 92f5d2a857..61592b4ac5 100644 --- a/docs/zh/07-develop/09-udf.md +++ b/docs/zh/07-develop/09-udf.md @@ -271,26 +271,88 @@ select max_vol(vol1,vol2,vol3,deviceid) from battery; ## 用 Python 语言实现 UDF +### 准备环境 + +1. 准备好 Python 运行环境 + +2. 安装 Python 包 `taospyudf` + +```shell +pip3 install taospyudf +``` + +安装过程中会编译 C++ 源码,因此系统上要有 cmake 和 gcc。编译生成的 libtaospyudf.so 文件自动会被复制到 /usr/local/lib/ 目录,因此如果是非 root 用户,安装时需加 sudo。安装完可以检查这个目录是否有了这个文件: + +```shell +root@slave11 ~/udf $ ls -l /usr/local/lib/libtaos* +-rw-r--r-- 1 root root 671344 May 24 22:54 /usr/local/lib/libtaospyudf.so +``` + +然后执行命令 +```shell +ldconfig +``` + +3. 如果 Python UDF 程序执行时,通过 PYTHONPATH 引用其它的包,可以设置 taos.cfg 的 UdfdLdLibPath 变量为PYTHONPATH的内容 + +4. 启动 `taosd` 服务 +细节请参考 [快速开始](../get-started) + +### 接口定义 + +#### 接口概述 + 使用 Python 语言实现 UDF 时,需要实现规定的接口函数 - 标量函数需要实现标量接口函数 process 。 - 聚合函数需要实现聚合接口函数 start ,reduce ,finish。 - 如果需要初始化,实现 init;如果需要清理工作,实现 destroy。 -### 用 Python 实现标量函数 +#### 标量函数接口 +```Python +def process(input: datablock) -> tuple[output_type]: +``` + +说明: + - input:datablock 类似二维矩阵,通过成员方法 data(row,col)返回位于 row 行,col 列的 python 对象 + - 返回值是一个 Python 对象元组,每个元素类型为输出类型。 + +#### 聚合函数接口 +```Python +def start() -> bytes: +def reduce(inputs: datablock, buf: bytes) -> bytes +def finish(buf: bytes) -> output_type: +``` + +说明: + - 首先调用 start 生成最初结果 buffer + - 然后输入数据会被分为多个行数据块,对每个数据块 inputs 和当前中间结果 buf 调用 reduce,得到新的中间结果 + - 最后再调用 finish 从中间结果 buf 产生最终输出,最终输出只能含 0 或 1 条数据。 + +#### 初始化和销毁接口 +```Python +def init() +def destroy() +``` + +说明: + - init 完成初始化工作 + - destroy 完成清理工作 + +### 标量函数实现模板 标量函数实现模版如下 + ```Python def init(): # initialization def destroy(): # destroy -def process(input: datablock) -> tuple[output_type]: - # process input datablock, - # datablock.data(row, col) is to access the python object in location(row,col) - # return tuple object consisted of object of type outputtype +def process(input: datablock) -> tuple[output_type]: ``` -### 用 Python 实现聚合函数 +注意:定义标题函数最重要是要实现 process 函数,同时必须定义 init 和 destroy 函数即使什么都不做 + +### 聚合函数实现模板 聚合函数实现模版如下 ```Python @@ -310,34 +372,9 @@ def finish(buf: bytes) -> output_type: #return obj of type outputtype ``` -### Python UDF 接口函数定义 +注意:定义聚合函数最重要是要实现 start, reduce 和 finish,且必须定义 init 和 destroy 函数。start 生成最初结果 buffer,然后输入数据会被分为多个行数据块,对每个数据块 inputs 和当前中间结果 buf 调用 reduce,得到新的中间结果,最后再调用 finish 从中间结果 buf 产生最终输出。 -#### 标量函数接口 -```Python -def process(input: datablock) -> tuple[output_type]: -``` -- input:datablock 类似二维矩阵,通过成员方法 data(row,col)返回位于 row 行,col 列的 python 对象 -- 返回值是一个 Python 对象元组,每个元素类型为输出类型。 - -#### 聚合函数接口 -```Python -def start() -> bytes: -def reduce(inputs: datablock, buf: bytes) -> bytes -def finish(buf: bytes) -> output_type: -``` - -首先调用 start 生成最初结果 buffer,然后输入数据会被分为多个行数据块,对每个数据块 inputs 和当前中间结果 buf 调用 reduce,得到新的中间结果,最后再调用 finish 从中间结果 buf 产生最终输出,最终输出只能含 0 或 1 条数据。 - - -#### 初始化和销毁接口 -```Python -def init() -def destroy() -``` - -其中 init 完成初始化工作。 destroy 完成清理工作。 - -### Python 和 TDengine之间的数据类型映射 +### 数据类型映射 下表描述了TDengine SQL数据类型和Python数据类型的映射。任何类型的NULL值都映射成Python的None值。 @@ -351,15 +388,461 @@ def destroy() |TIMESTAMP | int | |JSON and other types | 不支持 | -### Python UDF 环境的安装 -1. 安装 taospyudf 包。此包执行Python UDF程序。 -```bash -sudo pip install taospyudf -ldconfig +### 开发指南 + +本文内容由浅入深包括 4 个示例程序: +1. 定义一个只接收一个整数的标量函数: 输入 n, 输出 ln(n^2 + 1)。 +2. 定义一个接收 n 个整数的标量函数, 输入 (x1, x2, ..., xn), 输出每个值和它们的序号的乘积的和: x1 + 2 * x2 + ... + n * xn。 +3. 定义一个标量函数,输入一个时间戳,输出距离这个时间最近的下一个周日。完成这个函数要用到第三方库 moment。我们在这个示例中讲解使用第三方库的注意事项。 +4. 定义一个聚合函数,计算某一列最大值和最小值的差, 也就是实现 TDengien 内置的 spread 函数。 +同时也包含大量实用的 debug 技巧。 +本文假设你用的是 Linux 系统,且已安装好了 TDengine 3.0.4.0+ 和 Python 3.x。 + +注意:**UDF 内无法通过 print 函数输出日志,需要自己写文件或用 python 内置的 logging 库写文件**。 + +#### 最简单的 UDF + +编写一个只接收一个整数的 UDF 函数: 输入 n, 输出 ln(n^2 + 1)。 +首先编写一个 Python 文件,存在系统某个目录,比如 /root/udf/myfun.py 内容如下 + +```python +from math import log + +def init(): + pass + +def destroy(): + pass + +def process(block): + rows, _ = block.shape() + return [log(block.data(i, 0) ** 2 + 1) for i in range(rows)] ``` -2. 如果 Python UDF 程序执行时,通过 PYTHONPATH 引用其它的包,可以设置 taos.cfg 的 UdfdLdLibPath 变量为PYTHONPATH的内容 + +这个文件包含 3 个函数, init 和 destroy 都是空函数,它们是 UDF 的生命周期函数,即使什么都不做也要定义。最关键的是 process 函数, 它接受一个数据块,这个数据块对象有两个方法: +1. shape() 返回数据块的行数和列数 +2. data(i, j) 返回 i 行 j 列的数据 +标量函数的 process 方法传人的数据块有多少行,就需要返回多少个数据。上述代码中我们忽略的列数,因为我们只想对每行的第一个数做计算。 +接下来我们创建对应的 UDF 函数,在 TDengine CLI 中执行下面语句: + +```sql +create function myfun as '/root/udf/myfun.py' outputtype double language 'Python' +``` +其输出如下 + +```shell + taos> create function myfun as '/root/udf/myfun.py' outputtype double language 'Python'; +Create OK, 0 row(s) affected (0.005202s) +``` + +看起来很顺利,接下来 show 一下系统中所有的自定义函数,确认创建成功: + +```text +taos> show functions; + name | +================================= + myfun | +Query OK, 1 row(s) in set (0.005767s) +``` + +接下来就来测试一下这个函数,测试之前先执行下面的 SQL 命令,制造些测试数据,在 TDengine CLI 中执行下述命令 + +```sql +create database test; +create table t(ts timestamp, v1 int, v2 int, v3 int); +insert into t values('2023-05-01 12:13:14', 1, 2, 3); +insert into t values('2023-05-03 08:09:10', 2, 3, 4); +insert into t values('2023-05-10 07:06:05', 3, 4, 5); +``` + +测试 myfun 函数: + +```sql +taos> select myfun(v1, v2) from t; + +DB error: udf function execution failure (0.011088s) +``` + +不幸的是执行失败了,什么原因呢? +查看 udfd 进程的日志 + +```shell +tail -10 /var/log/taos/udfd.log +``` + +发现以下错误信息: + +```text +05/24 22:46:28.733545 01665799 UDF ERROR can not load library libtaospyudf.so. error: operation not permitted +05/24 22:46:28.733561 01665799 UDF ERROR can not load python plugin. lib path libtaospyudf.so +``` + +错误很明确:没有加载到 Python 插件 libtaospyudf.so,如果遇到此错误,请参考前面的准备环境一节。 + +修复环境错误后再次执行,如下: + +```sql +taos> select myfun(v1) from t; + myfun(v1) | +============================ + 0.693147181 | + 1.609437912 | + 2.302585093 | +``` + +至此,我们完成了第一个 UDF 😊,并学会了简单的 debug 方法。 + +#### 示例二:异常处理 + +上面的 myfun 虽然测试测试通过了,但是有两个缺点: + +1. 这个标量函数只接受 1 列数据作为输入,如果用户传入了多列也不会抛异常。 + +```sql +taos> select myfun(v1, v2) from t; + myfun(v1, v2) | +============================ + 0.693147181 | + 1.609437912 | + 2.302585093 | +``` + +2. 没有处理 null 值。我们期望如果输入有 null,则会抛异常终止执行。 +因此 process 函数改进如下: + +```python +def process(block): + rows, cols = block.shape() + if cols > 1: + raise Exception(f"require 1 parameter but given {cols}") + return [ None if block.data(i, 0) is None else log(block.data(i, 0) ** 2 + 1) for i in range(rows)] +``` + +然后执行下面的语句更新已有的 UDF: + +```sql +create or replace function myfun as '/root/udf/myfun.py' outputtype double language 'Python'; +``` + +再传入 myfun 两个参数,就会执行失败了 + +```sql +taos> select myfun(v1, v2) from t; + +DB error: udf function execution failure (0.014643s) +``` + +但遗憾的是我们自定义的异常信息没有展示给用户,而是在插件的日志文件 /var/log/taos/taospyudf.log 中: + +```text +2023-05-24 23:21:06.790 ERROR [1666188] [doPyUdfScalarProc@507] call pyUdfScalar proc function. context 0x7faade26d180. error: Exception: require 1 parameter but given 2 + +At: + /var/lib/taos//.udf/myfun_3_1884e1281d9.py(12): process + +``` + +至此,我们学会了如何更新 UDF,并查看 UDF 输出的错误日志。 +(注:如果 UDF 更新后未生效,在 TDengine 3.0.5.0 以前(不含)的版本中需要重启 taosd,在 3.0.5.0 及之后的版本中不需要重启 taosd 即可生效。) + +#### 示例三: 接收 n 个参数的 UDF + +编写一个 UDF:输入(x1, x2, ..., xn), 输出每个值和它们的序号的乘积的和: 1 * x1 + 2 * x2 + ... + n * xn。如果 x1 至 xn 中包含 null,则结果为 null。 +这个示例与示例一的区别是,可以接受任意多列作为输入,且要处理每一列的值。编写 UDF 文件 /root/udf/nsum.py: + +```python +def init(): + pass + + +def destroy(): + pass + + +def process(block): + rows, cols = block.shape() + result = [] + for i in range(rows): + total = 0 + for j in range(cols): + v = block.data(i, j) + if v is None: + total = None + break + total += (j + 1) * block.data(i, j) + result.append(total) + return result +``` + +创建 UDF: + +```sql +create function nsum as '/root/udf/nsum.py' outputtype double language 'Python'; +``` + +测试 UDF: + +```sql +taos> insert into t values('2023-05-25 09:09:15', 6, null, 8); +Insert OK, 1 row(s) affected (0.003675s) + +taos> select ts, v1, v2, v3, nsum(v1, v2, v3) from t; + ts | v1 | v2 | v3 | nsum(v1, v2, v3) | +================================================================================================ + 2023-05-01 12:13:14.000 | 1 | 2 | 3 | 14.000000000 | + 2023-05-03 08:09:10.000 | 2 | 3 | 4 | 20.000000000 | + 2023-05-10 07:06:05.000 | 3 | 4 | 5 | 26.000000000 | + 2023-05-25 09:09:15.000 | 6 | NULL | 8 | NULL | +Query OK, 4 row(s) in set (0.010653s) +``` + +示例三:使用第三方库 + +编写一个 UDF,输入一个时间戳,输出距离这个时间最近的下一个周日。比如今天是 2023-05-25, 则下一个周日是 2023-05-28。 +完成这个函数要用到第三方库 momen。先安装这个库: + +```shell +pip3 install moment +``` + +然后编写 UDF 文件 /root/udf/nextsunday.py + +```python +import moment + + +def init(): + pass + + +def destroy(): + pass + + +def process(block): + rows, cols = block.shape() + if cols > 1: + raise Exception("require only 1 parameter") + if not type(block.data(0, 0)) is int: + raise Exception("type error") + return [moment.unix(block.data(i, 0)).replace(weekday=7).format('YYYY-MM-DD') + for i in range(rows)] +``` + +UDF 框架会将 TDengine 的 timestamp 类型映射为 Python 的 int 类型,所以这个函数只接受一个表示毫秒数的整数。process 方法先做参数检查,然后用 moment 包替换时间的星期为星期日,最后格式化输出。输出的字符串长度是固定的10个字符长,因此可以这样创建 UDF 函数: + +```sql +create function nextsunday as '/root/udf/nextsunday.py' outputtype binary(10) language 'Python'; +``` + +此时测试函数,如果你是用 systemctl 启动的 taosd,肯定会遇到错误: + +```sql +taos> select ts, nextsunday(ts) from t; + +DB error: udf function execution failure (1.123615s) +``` + +```shell + tail -20 taospyudf.log +2023-05-25 11:42:34.541 ERROR [1679419] [PyUdf::PyUdf@217] py udf load module failure. error ModuleNotFoundError: No module named 'moment' +``` + +这是因为 “moment” 所在位置不在 python udf 插件默认的库搜索路径中。怎么确认这一点呢?通过以下命令搜索 taospyudf.log: + +```shell +grep 'sys path' taospyudf.log | tail -1 +``` + +输出如下 + +```text +2023-05-25 10:58:48.554 INFO [1679419] [doPyOpen@592] python sys path: ['', '/lib/python38.zip', '/lib/python3.8', '/lib/python3.8/lib-dynload', '/lib/python3/dist-packages', '/var/lib/taos//.udf'] +``` + +发现 python udf 插件默认搜索的第三方库安装路径是: /lib/python3/dist-packages,而 moment 默认安装到了 /usr/local/lib/python3.8/dist-packages。下面我们修改 python udf 插件默认的库搜索路径。 +先打开 python3 命令行,查看当前的 sys.path + +```python +>>> import sys +>>> ":".join(sys.path) +'/usr/lib/python3.8:/usr/lib/python3.8/lib-dynload:/usr/local/lib/python3.8/dist-packages:/usr/lib/python3/dist-packages' +``` + +复制上面脚本的输出的字符串,然后编辑 /var/taos/taos.cfg 加入以下配置: + +```shell +UdfdLdLibPath /usr/lib/python3.8:/usr/lib/python3.8/lib-dynload:/usr/local/lib/python3.8/dist-packages:/usr/lib/python3/dist-packages +``` + +保存后执行 systemctl restart taosd, 再测试就不报错了: + +```sql +taos> select ts, nextsunday(ts) from t; + ts | nextsunday(ts) | +=========================================== + 2023-05-01 12:13:14.000 | 2023-05-07 | + 2023-05-03 08:09:10.000 | 2023-05-07 | + 2023-05-10 07:06:05.000 | 2023-05-14 | + 2023-05-25 09:09:15.000 | 2023-05-28 | +Query OK, 4 row(s) in set (1.011474s) +``` + +#### 示例五:聚合函数 + +编写一个聚合函数,计算某一列最大值和最小值的差。 +聚合函数与标量函数的区别是:标量函数是多行输入对应多个输出,聚合函数是多行输入对应一个输出。聚合函数的执行过程有点像经典的 map-reduce 框架的执行过程,框架把数据分成若干块,每个 mapper 处理一个块,reducer 再把 mapper 的结果做聚合。不一样的地方在于,对于 TDengine Python UDF 中的 reduce 函数既有 map 的功能又有 reduce 的功能。reduce 函数接受两个参数:一个是自己要处理的数据,一个是别的任务执行 reduce 函数的处理结果。如下面的示例 /root/udf/myspread.py: + +```python +import io +import math +import pickle + +LOG_FILE: io.TextIOBase = None + + +def init(): + global LOG_FILE + LOG_FILE = open("/var/log/taos/spread.log", "wt") + log("init function myspead success") + + +def log(o): + LOG_FILE.write(str(o) + '\n') + + +def destroy(): + log("close log file: spread.log") + LOG_FILE.close() + + +def start(): + return pickle.dumps((-math.inf, math.inf)) + + +def reduce(block, buf): + max_number, min_number = pickle.loads(buf) + log(f"initial max_number={max_number}, min_number={min_number}") + rows, _ = block.shape() + for i in range(rows): + v = block.data(i, 0) + if v > max_number: + log(f"max_number={v}") + max_number = v + if v < min_number: + log(f"min_number={v}") + min_number = v + return pickle.dumps((max_number, min_number)) + + +def finish(buf): + max_number, min_number = pickle.loads(buf) + return max_number - min_number +``` + +在这个示例中我们不光定义了一个聚合函数,还添加记录执行日志的功能,讲解如下: +1. init 函数不再是空函数,而是打开了一个文件用于写执行日志 +2. log 函数是记录日志的工具,自动将传入的对象转成字符串,加换行符输出 +3. destroy 函数用来在执行结束关闭文件 +4. start 返回了初始的 buffer,用来存聚合函数的中间结果,我们把最大值初始化为负无穷大,最小值初始化为正无穷大 +5. reduce 处理每个数据块并聚合结果 +6. finish 函数将最终的 buffer 转换成最终的输出 +执行下面的 SQL语句创建对应的 UDF: + +```sql +create or replace aggregate function myspread as '/root/udf/myspread.py' outputtype double bufsize 128 language 'Python'; +``` + +这个 SQL 语句与创建标量函数的 SQL 语句有两个重要区别: +1. 增加了 aggregate 关键字 +2. 增加了 bufsize 关键字,用来指定存储中间结果的内存大小,这个数值可以大于实际使用的数值。本例中间结果是两个浮点数组成的 tuple,序列化后实际占用大小只有 32 个字节,但指定的 bufsize 是128,可以用 python 命令行打印实际占用的字节数 + +```python +>>> len(pickle.dumps((12345.6789, 23456789.9877))) +32 +``` + +测试这个函数,可以看到 myspread 的输出结果和内置的 spread 函数的输出结果是一致的。 + +```sql +taos> select myspread(v1) from t; + myspread(v1) | +============================ + 5.000000000 | +Query OK, 1 row(s) in set (0.013486s) + +taos> select spread(v1) from t; + spread(v1) | +============================ + 5.000000000 | +Query OK, 1 row(s) in set (0.005501s) +``` + +最后,查看我们自己打印的执行日志,从日志可以看出,reduce 函数被执行了 3 次。执行过程中 max 值被更新了 4 次, min 值只被更新 1 次。 + +```shell +root@slave11 /var/log/taos $ cat spread.log +init function myspead success +initial max_number=-inf, min_number=inf +max_number=1 +min_number=1 +initial max_number=1, min_number=1 +max_number=2 +max_number=3 +initial max_number=3, min_number=1 +max_number=6 +close log file: spread.log +``` + +通过这个示例,我们学会了如何定义聚合函数,并打印自定义的日志信息。 + +### SQL 命令 + +1. 创建标量函数的语法 + +```sql +CREATE FUNCTION function_name AS library_path OUTPUTTYPE output_type LANGUAGE 'Python'; +``` + +2. 创建聚合函数的语法 + +```sql +CREATE AGGREGATE FUNCTION function_name library_path OUTPUTTYPE output_type LANGUAGE 'Python'; +``` + +3. 更新标量函数 + +```sql +CREATE OR REPLACE FUNCTION function_name AS OUTPUTTYPE int LANGUAGE 'Python'; +``` + +4. 更新聚合函数 + +```sql +CREATE OR REPLACE AGGREGATE FUNCTION function_name AS OUTPUTTYPE BUFSIZE buf_size int LANGUAGE 'Python'; +``` + +注意:如果加了 “AGGREGATE” 关键字,更新之后函数将被当作聚合函数,无论之前是什么类型的函数。相反,如果没有加 “AGGREGATE” 关键字,更新之后的函数将被当作标量函数,无论之前是什么类型的函数。 + +5. 查看函数信息 + + 同名的 UDF 每更新一次,版本号会增加 1。 + +```sql +select * from ins_functions \G; +``` + +6. 查看和删除已有的 UDF + +```sql +SHOW functions; +DROP FUNCTION function_name; +``` + + +上面的命令可以查看 UDF 的完整信息 -### Python UDF 示例代码 +### 更多 Python UDF 示例代码 #### 标量函数示例 [pybitand](https://github.com/taosdata/TDengine/blob/3.0/tests/script/sh/pybitand.py) pybitand 实现多列的按位与功能。如果只有一列,返回这一列。pybitand 忽略空值。 From 730e087d5e72d14aff0578e31d4eda3dd2f52c2e Mon Sep 17 00:00:00 2001 From: wade zhang <95411902+gccgdb1234@users.noreply.github.com> Date: Fri, 26 May 2023 19:21:13 +0800 Subject: [PATCH 34/49] Update 09-udf.md --- docs/en/07-develop/09-udf.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/en/07-develop/09-udf.md b/docs/en/07-develop/09-udf.md index 92bd4efd25..f5f1230026 100644 --- a/docs/en/07-develop/09-udf.md +++ b/docs/en/07-develop/09-udf.md @@ -320,7 +320,7 @@ def process(input: datablock) -> tuple[output_type]: Description: this function prcesses datablock, which is the input; you can use datablock.data(row, col) to access the python object at location(row,col); the output is a tuple object consisted of objects of type outputtype -### Aggregate UDF Interface +#### Aggregate UDF Interface The implementation of an aggregate function is described as follows: From 290a14e2d31d074daaea7b9e801c19f9a40bef91 Mon Sep 17 00:00:00 2001 From: wade zhang <95411902+gccgdb1234@users.noreply.github.com> Date: Fri, 26 May 2023 19:22:47 +0800 Subject: [PATCH 35/49] Update 09-udf.md --- docs/en/07-develop/09-udf.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/en/07-develop/09-udf.md b/docs/en/07-develop/09-udf.md index f5f1230026..67109834ba 100644 --- a/docs/en/07-develop/09-udf.md +++ b/docs/en/07-develop/09-udf.md @@ -602,7 +602,7 @@ taos> select ts, v1, v2, v3, nsum(v1, v2, v3) from t; Query OK, 4 row(s) in set (0.010653s) ``` -Sample 4: Utilize 3rd party package +#### Sample 4: Utilize 3rd party package A UDF which accepts a timestamp and output the next closed Sunday. This sample requires to use third party package `moment`, you need to install it firslty. From 3b0cee7866c81e7a3edd97b6feeb849323245ad9 Mon Sep 17 00:00:00 2001 From: wade zhang <95411902+gccgdb1234@users.noreply.github.com> Date: Fri, 26 May 2023 19:24:46 +0800 Subject: [PATCH 36/49] Update 09-udf.md --- docs/zh/07-develop/09-udf.md | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/docs/zh/07-develop/09-udf.md b/docs/zh/07-develop/09-udf.md index 61592b4ac5..cce9c0b24c 100644 --- a/docs/zh/07-develop/09-udf.md +++ b/docs/zh/07-develop/09-udf.md @@ -338,7 +338,9 @@ def destroy() - init 完成初始化工作 - destroy 完成清理工作 -### 标量函数实现模板 +### Python UDF 函数模板 + +#### 标量函数实现模板 标量函数实现模版如下 @@ -352,7 +354,7 @@ def process(input: datablock) -> tuple[output_type]: 注意:定义标题函数最重要是要实现 process 函数,同时必须定义 init 和 destroy 函数即使什么都不做 -### 聚合函数实现模板 +#### 聚合函数实现模板 聚合函数实现模版如下 ```Python @@ -596,7 +598,7 @@ taos> select ts, v1, v2, v3, nsum(v1, v2, v3) from t; Query OK, 4 row(s) in set (0.010653s) ``` -示例三:使用第三方库 +#### 示例四:使用第三方库 编写一个 UDF,输入一个时间戳,输出距离这个时间最近的下一个周日。比如今天是 2023-05-25, 则下一个周日是 2023-05-28。 完成这个函数要用到第三方库 momen。先安装这个库: From 5abfff6b53718d35fe592361d3d40345f5148b26 Mon Sep 17 00:00:00 2001 From: wade zhang <95411902+gccgdb1234@users.noreply.github.com> Date: Fri, 26 May 2023 19:30:34 +0800 Subject: [PATCH 37/49] Update 09-udf.md --- docs/zh/07-develop/09-udf.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/zh/07-develop/09-udf.md b/docs/zh/07-develop/09-udf.md index cce9c0b24c..d964a2a1bd 100644 --- a/docs/zh/07-develop/09-udf.md +++ b/docs/zh/07-develop/09-udf.md @@ -871,7 +871,7 @@ pyl2norm 实现了输入列的所有数据的二阶范数,即对每个数据 -### 聚合函数示例 [pycumsum](https://github.com/taosdata/TDengine/blob/3.0/tests/script/sh/pycumsum.py) +#### 聚合函数示例 [pycumsum](https://github.com/taosdata/TDengine/blob/3.0/tests/script/sh/pycumsum.py) pycumsum 使用 numpy 计算输入列所有数据的累积和。
From c7e2347dfdea75a813ee0ddf5a7cc7648cd4f677 Mon Sep 17 00:00:00 2001 From: wade zhang <95411902+gccgdb1234@users.noreply.github.com> Date: Mon, 29 May 2023 08:14:36 +0800 Subject: [PATCH 38/49] Update 09-udf.md --- docs/en/07-develop/09-udf.md | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/docs/en/07-develop/09-udf.md b/docs/en/07-develop/09-udf.md index 67109834ba..66e944d654 100644 --- a/docs/en/07-develop/09-udf.md +++ b/docs/en/07-develop/09-udf.md @@ -295,7 +295,7 @@ Then execute the command below. ldconfig ``` -3. If you want to utilize some 3rd party python packages using `PYTHONPATH`, please set configuration parameter `UdfdLdLibPath` to the value of `PYTHONPATH` before starting `taosd`. +3. If you want to utilize some 3rd party python packages in your Python UDF, please set configuration parameter `UdfdLdLibPath` to the value of `PYTHONPATH` before starting `taosd`. 4. Launch `taosd` service @@ -503,7 +503,7 @@ Now, we have finished the first PDF in Python, and learned some basic debugging The `myfun` UDF example in sample 1 has passed, but it has two drawbacks. -1. It accepts only one column of data as input, and doesn't throw exception if you passes multiple columns.这 +1. It accepts only one column of data as input, and doesn't throw exception if you passes multiple columns. ```sql taos> select myfun(v1, v2) from t; From cb82453a43ca42354eea7046ee9f64825c4566ea Mon Sep 17 00:00:00 2001 From: wade zhang <95411902+gccgdb1234@users.noreply.github.com> Date: Mon, 29 May 2023 08:21:00 +0800 Subject: [PATCH 39/49] Update 09-udf.md --- docs/en/07-develop/09-udf.md | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/docs/en/07-develop/09-udf.md b/docs/en/07-develop/09-udf.md index 66e944d654..d7a3fbd032 100644 --- a/docs/en/07-develop/09-udf.md +++ b/docs/en/07-develop/09-udf.md @@ -503,7 +503,7 @@ Now, we have finished the first PDF in Python, and learned some basic debugging The `myfun` UDF example in sample 1 has passed, but it has two drawbacks. -1. It accepts only one column of data as input, and doesn't throw exception if you passes multiple columns. +1. It the program accepts only one column of data as input, but it doesn't throw exception if you passes multiple columns. ```sql taos> select myfun(v1, v2) from t; @@ -514,7 +514,7 @@ taos> select myfun(v1, v2) from t; 2.302585093 | ``` -2. `null` value is not processed. We except the program to throw exception and terminate if `null` is passed as input. +2. `null` value is not processed. We expect the program to throw exception and terminate if `null` is passed as input. So, we try to optimize the process() function as below. @@ -642,7 +642,7 @@ Create and test the UDF. create function nextsunday as '/root/udf/nextsunday.py' outputtype binary(10) language 'Python'; ``` -If your `taosd` is started using `systemd`, you man encounter the error below. Next we will show how to debug. +If your `taosd` is started using `systemd`, you may encounter the error below. Next we will show how to debug. ```sql taos> select ts, nextsunday(ts) from t; @@ -826,7 +826,7 @@ CREATE OR REPLACE FUNCTION function_name AS OUTPUTTYPE int LANGUAGE 'Python'; CREATE OR REPLACE AGGREGATE FUNCTION function_name AS OUTPUTTYPE BUFSIZE buf_size int LANGUAGE 'Python'; ``` -Note: If keyword `AGGREGATE` used, the UDF will be treated as aggregate UDF despite what it was before; Similarly, if there is no keyword `aggregate`, the UDF will be treated as scalar function despite what it was before. 更 +Note: If keyword `AGGREGATE` used, the UDF will be treated as aggregate UDF despite what it was before; Similarly, if there is no keyword `aggregate`, the UDF will be treated as scalar function despite what it was before. 5. Show the UDF From 6ccefe9891302176f54228cfffe446ed156c1d8b Mon Sep 17 00:00:00 2001 From: wade zhang <95411902+gccgdb1234@users.noreply.github.com> Date: Mon, 29 May 2023 08:23:31 +0800 Subject: [PATCH 40/49] Update 09-udf.md --- docs/en/07-develop/09-udf.md | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/docs/en/07-develop/09-udf.md b/docs/en/07-develop/09-udf.md index d7a3fbd032..51c4c112c0 100644 --- a/docs/en/07-develop/09-udf.md +++ b/docs/en/07-develop/09-udf.md @@ -667,7 +667,7 @@ grep 'sys path' taospyudf.log | tail -1 You may find that the default library search path is `/lib/python3/dist-packages` (just for example, it may be different in your system), but `moment` is installed to `/usr/local/lib/python3.8/dist-packages` (for example, it may be different in your system). Then we change the library search path of python UDF. -Check `sys.path` +Check `sys.path`, which must include the packages you install with pip3 command previously, as shown below: ```python >>> import sys @@ -683,6 +683,8 @@ UdfdLdLibPath /usr/lib/python3.8:/usr/lib/python3.8/lib-dynload:/usr/local/lib/p Save it, then restart `taosd`, using `systemctl restart taosd`, and test again, it will succeed this time. +Note: If your cluster consists of multiple `taosd` instances, you have to repeat same process for each of them. + ```sql taos> select ts, nextsunday(ts) from t; ts | nextsunday(ts) | From 6a741cc9014e039df68343329031a05f0e87452e Mon Sep 17 00:00:00 2001 From: wade zhang <95411902+gccgdb1234@users.noreply.github.com> Date: Mon, 29 May 2023 09:45:02 +0800 Subject: [PATCH 41/49] Update 09-udf.md --- docs/zh/07-develop/09-udf.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/zh/07-develop/09-udf.md b/docs/zh/07-develop/09-udf.md index 8e345277b3..ae11273a39 100644 --- a/docs/zh/07-develop/09-udf.md +++ b/docs/zh/07-develop/09-udf.md @@ -296,7 +296,7 @@ ldconfig 3. 如果 Python UDF 程序执行时,通过 PYTHONPATH 引用其它的包,可以设置 taos.cfg 的 UdfdLdLibPath 变量为PYTHONPATH的内容 4. 启动 `taosd` 服务 -细节请参考 [快速开始](../get-started) +细节请参考 [快速开始](../../get-started) ### 接口定义 From b2d2e35450132649cfa5d12fadd1a20e449dcfd3 Mon Sep 17 00:00:00 2001 From: wade zhang <95411902+gccgdb1234@users.noreply.github.com> Date: Mon, 29 May 2023 09:52:03 +0800 Subject: [PATCH 42/49] Update 09-udf.md --- docs/en/07-develop/09-udf.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/en/07-develop/09-udf.md b/docs/en/07-develop/09-udf.md index 62cdc68e84..825d3c6f8b 100644 --- a/docs/en/07-develop/09-udf.md +++ b/docs/en/07-develop/09-udf.md @@ -299,7 +299,7 @@ ldconfig 4. Launch `taosd` service -Please refer to [Get Started](../get-started) +Please refer to [Get Started](../../get-started) ### Interface definition From 953a69a829a4487946cd23fe701a33b0d0bef16c Mon Sep 17 00:00:00 2001 From: Haojun Liao Date: Mon, 29 May 2023 10:12:27 +0800 Subject: [PATCH 43/49] refactor: do some internal refactor. --- include/libs/executor/storageapi.h | 146 ++++------------------ source/dnode/vnode/inc/vnode.h | 108 +--------------- source/dnode/vnode/src/inc/vnodeInt.h | 14 +++ source/dnode/vnode/src/meta/metaCache.c | 6 +- source/dnode/vnode/src/meta/metaSma.c | 2 + source/dnode/vnode/src/tsdb/tsdbRead.c | 17 +-- source/dnode/vnode/src/vnd/vnodeInitApi.c | 6 +- source/libs/executor/src/executil.c | 7 +- 8 files changed, 69 insertions(+), 237 deletions(-) diff --git a/include/libs/executor/storageapi.h b/include/libs/executor/storageapi.h index 9ed47c94f8..b89293f8f0 100644 --- a/include/libs/executor/storageapi.h +++ b/include/libs/executor/storageapi.h @@ -103,63 +103,6 @@ typedef struct SRowBuffPos { bool beUsed; } SRowBuffPos; -// int32_t tsdbReuseCacherowsReader(void* pReader, void* pTableIdList, int32_t numOfTables); -// int32_t tsdbCacherowsReaderOpen(void *pVnode, int32_t type, void *pTableIdList, int32_t numOfTables, int32_t numOfCols, -// SArray *pCidList, int32_t *pSlotIds, uint64_t suid, void **pReader, const char *idstr); -// int32_t tsdbRetrieveCacheRows(void *pReader, SSDataBlock *pResBlock, const int32_t *slotIds, const int32_t *dstSlotIds, -// SArray *pTableUids); -// void *tsdbCacherowsReaderClose(void *pReader); -// int32_t tsdbGetTableSchema(void *pVnode, int64_t uid, STSchema **pSchema, int64_t *suid); - -// int32_t tsdbReaderOpen(void *pVnode, SQueryTableDataCond *pCond, void *pTableList, int32_t numOfTables, -// SSDataBlock *pResBlock, STsdbReader **ppReader, const char *idstr, bool countOnly, SHashObj** pIgnoreTables); -// int32_t tsdbSetTableList(STsdbReader *pReader, const void *pTableList, int32_t num); -// void tsdbReaderSetId(STsdbReader *pReader, const char *idstr); -// void tsdbReaderClose(STsdbReader *pReader); -// int32_t tsdbNextDataBlock(STsdbReader *pReader, bool *hasNext); -// int32_t tsdbRetrieveDatablockSMA(STsdbReader *pReader, SSDataBlock *pDataBlock, bool *allHave); -// void tsdbReleaseDataBlock(STsdbReader *pReader); -// SSDataBlock *tsdbRetrieveDataBlock(STsdbReader *pTsdbReadHandle, SArray *pColumnIdList); -// int32_t tsdbReaderReset(STsdbReader *pReader, SQueryTableDataCond *pCond); -// int32_t tsdbGetFileBlocksDistInfo(STsdbReader *pReader, STableBlockDistInfo *pTableBlockInfo); -// int64_t tsdbGetNumOfRowsInMemTable(STsdbReader *pHandle); -// void *tsdbGetIdx(void *pMeta); -// void *tsdbGetIvtIdx(void *pMeta); -// uint64_t tsdbGetReaderMaxVersion(STsdbReader *pReader); -// void tsdbReaderSetCloseFlag(STsdbReader *pReader); -// int64_t tsdbGetLastTimestamp(void* pVnode, void* pTableList, int32_t numOfTables, const char* pIdStr); - -// int32_t vnodeGetCtbIdList(void *pVnode, int64_t suid, SArray *list); -// int32_t vnodeGetCtbIdListByFilter(void *pVnode, int64_t suid, SArray *list, bool (*filter)(void *arg), void *arg); -// int32_t vnodeGetStbIdList(void *pVnode, int64_t suid, SArray *list); -// void *vnodeGetIdx(void *pVnode); -// void *vnodeGetIvtIdx(void *pVnode); - -// void metaReaderInit(SMetaReader *pReader, SMeta *pMeta, int32_t flags); -// void metaReaderReleaseLock(SMetaReader *pReader); -// void metaReaderClear(SMetaReader *pReader); -// int32_t metaReaderGetTableEntryByUid(SMetaReader *pReader, tb_uid_t uid); -// int32_t metaReaderGetTableEntryByUidCache(SMetaReader *pReader, tb_uid_t uid); -// int metaGetTableEntryByName(SMetaReader *pReader, const char *name); -// int32_t metaGetTableTags(SMeta *pMeta, uint64_t suid, SArray *uidList); -// int32_t metaGetTableTagsByUids(SMeta *pMeta, int64_t suid, SArray *uidList); -// const void *metaGetTableTagVal(void *tag, int16_t type, STagVal *tagVal); -// int metaGetTableNameByUid(void *meta, uint64_t uid, char *tbName); -// -// int metaGetTableSzNameByUid(void *meta, uint64_t uid, char *tbName); -// int metaGetTableUidByName(void *meta, char *tbName, uint64_t *uid); -// int metaGetTableTypeByName(void *meta, char *tbName, ETableType *tbType); -// bool metaIsTableExist(SMeta *pMeta, tb_uid_t uid); -// int32_t metaGetCachedTableUidList(SMeta *pMeta, tb_uid_t suid, const uint8_t *key, int32_t keyLen, SArray *pList, -// bool *acquired); -// int32_t metaUidFilterCachePut(SMeta *pMeta, uint64_t suid, const void *pKey, int32_t keyLen, void *pPayload, -// int32_t payloadLen, double selectivityRatio); -// int32_t metaUidCacheClear(SMeta *pMeta, uint64_t suid); -// tb_uid_t metaGetTableEntryUidByName(SMeta *pMeta, const char *name); -// int32_t metaGetCachedTbGroup(SMeta* pMeta, tb_uid_t suid, const uint8_t* pKey, int32_t keyLen, SArray** pList); -// int32_t metaPutTbGroupToCache(SMeta* pMeta, uint64_t suid, const void* pKey, int32_t keyLen, void* pPayload, -// int32_t payloadLen); - // tq typedef struct SMetaTableInfo { int64_t suid; @@ -202,29 +145,10 @@ typedef struct { // int32_t destroySnapContext(SSnapContext *ctx); /*-------------------------------------------------new api format---------------------------------------------------*/ - -// typedef int32_t (*__store_reader_(STsdbReader *pReader, const void *pTableList, int32_t num); -// typedef void (*tsdbReaderSetId(STsdbReader *pReader, const char *idstr); -// typedef void (*tsdbReaderClose(STsdbReader *pReader); -// typedef int32_t (*tsdbNextDataBlock(STsdbReader *pReader, bool *hasNext); -// typedef int32_t (*tsdbRetrieveDatablockSMA(STsdbReader *pReader, SSDataBlock *pDataBlock, bool *allHave); -// typedef void (*tsdbReleaseDataBlock(STsdbReader *pReader); -// typedef SSDataBlock * (*tsdbRetrieveDataBlock(STsdbReader *pTsdbReadHandle, SArray *pColumnIdList); -// typedef int32_t (*tsdbReaderReset(STsdbReader *pReader, SQueryTableDataCond *pCond); -// typedef int32_t (*tsdbGetFileBlocksDistInfo(STsdbReader *pReader, STableBlockDistInfo *pTableBlockInfo); -// typedef int64_t (*tsdbGetNumOfRowsInMemTable(STsdbReader *pHandle); -// typedef void * (*tsdbGetIdx(void *pMeta); -// typedef void * (*tsdbGetIvtIdx(void *pMeta); -// typedef uint64_t (*tsdbGetReaderMaxVersion(STsdbReader *pReader); -// typedef void (*tsdbReaderSetCloseFlag(STsdbReader *pReader); -// typedef int64_t (*tsdbGetLastTimestamp(void* pVnode, void* pTableList, int32_t numOfTables, const char* pIdStr); - -typedef int32_t (*__store_reader_open_fn_t)(void *pVnode, SQueryTableDataCond *pCond, void *pTableList, - int32_t numOfTables, SSDataBlock *pResBlock, void **ppReader, - const char *idstr, bool countOnly, SHashObj **pIgnoreTables); - typedef struct TsdReader { - __store_reader_open_fn_t tsdReaderOpen; + int32_t (*tsdReaderOpen)(void* pVnode, SQueryTableDataCond* pCond, void* pTableList, int32_t numOfTables, + SSDataBlock* pResBlock, void** ppReader, const char* idstr, bool countOnly, + SHashObj** pIgnoreTables); void (*tsdReaderClose)(); void (*tsdSetReaderTaskId)(void *pReader, const char *pId); int32_t (*tsdSetQueryTableList)(); @@ -241,22 +165,13 @@ typedef struct TsdReader { void (*tsdReaderNotifyClosing)(); } TsdReader; - -/** - * int32_t tsdbReuseCacherowsReader(void* pReader, void* pTableIdList, int32_t numOfTables); -int32_t tsdbCacherowsReaderOpen(void *pVnode, int32_t type, void *pTableIdList, int32_t numOfTables, int32_t numOfCols, - SArray *pCidList, int32_t *pSlotIds, uint64_t suid, void **pReader, const char *idstr); -int32_t tsdbRetrieveCacheRows(void *pReader, SSDataBlock *pResBlock, const int32_t *slotIds, const int32_t *dstSlotIds, - SArray *pTableUids); -void *tsdbCacherowsReaderClose(void *pReader); - */ typedef struct SStoreCacheReader { - int32_t (*openReader)(void *pVnode, int32_t type, void *pTableIdList, int32_t numOfTables, int32_t numOfCols, - SArray *pCidList, int32_t *pSlotIds, uint64_t suid, void **pReader, const char *idstr); - void *(*closeReader)(void *pReader); - int32_t (*retrieveRows)(void *pReader, SSDataBlock *pResBlock, const int32_t *slotIds, const int32_t *dstSlotIds, - SArray *pTableUidList); - int32_t (*reuseReader)(void *pReader, void *pTableIdList, int32_t numOfTables); + int32_t (*openReader)(void *pVnode, int32_t type, void *pTableIdList, int32_t numOfTables, int32_t numOfCols, + SArray *pCidList, int32_t *pSlotIds, uint64_t suid, void **pReader, const char *idstr); + void *(*closeReader)(void *pReader); + int32_t (*retrieveRows)(void *pReader, SSDataBlock *pResBlock, const int32_t *slotIds, const int32_t *dstSlotIds, + SArray *pTableUidList); + int32_t (*reuseReader)(void *pReader, void *pTableIdList, int32_t numOfTables); } SStoreCacheReader; /*------------------------------------------------------------------------------------------------------------------*/ @@ -309,16 +224,10 @@ typedef struct SStoreTqReader { } SStoreTqReader; typedef struct SStoreSnapshotFn { - /* - int32_t getTableInfoFromSnapshot(SSnapContext *ctx, void **pBuf, int32_t *contLen, int16_t *type, int64_t *uid); - SMetaTableInfo getMetaTableInfoFromSnapshot(SSnapContext *ctx); - int32_t setForSnapShot(SSnapContext *ctx, int64_t uid); - int32_t destroySnapContext(SSnapContext *ctx); - */ - int32_t (*createSnapshot)(); - int32_t (*destroySnapshot)(); - SMetaTableInfo (*getMetaTableInfoFromSnapshot)(); - int32_t (*getTableInfoFromSnapshot)(); + int32_t (*createSnapshot)(SSnapContext *ctx, int64_t uid); + int32_t (*destroySnapshot)(SSnapContext *ctx); + SMetaTableInfo (*getMetaTableInfoFromSnapshot)(SSnapContext* ctx); + int32_t (*getTableInfoFromSnapshot)(SSnapContext* ctx, void** pBuf, int32_t* contLen, int16_t* type, int64_t* uid); } SStoreSnapshotFn; /** @@ -327,7 +236,6 @@ void metaReaderReleaseLock(SMetaReader *pReader); void metaReaderClear(SMetaReader *pReader); int32_t metaReaderGetTableEntryByUid(SMetaReader *pReader, tb_uid_t uid); int32_t metaReaderGetTableEntryByUidCache(SMetaReader *pReader, tb_uid_t uid); -int metaGetTableEntryByName(SMetaReader *pReader, const char *name); int32_t metaGetTableTags(SMeta *pMeta, uint64_t suid, SArray *uidList); const void *metaGetTableTagVal(void *tag, int16_t type, STagVal *tagVal); int metaGetTableNameByUid(void *meta, uint64_t uid, char *tbName); @@ -340,7 +248,6 @@ int32_t metaGetCachedTableUidList(SMeta *pMeta, tb_uid_t suid, const uint8_t *k bool *acquired); int32_t metaUidFilterCachePut(SMeta *pMeta, uint64_t suid, const void *pKey, int32_t keyLen, void *pPayload, int32_t payloadLen, double selectivityRatio); -int32_t metaUidCacheClear(SMeta *pMeta, uint64_t suid); tb_uid_t metaGetTableEntryUidByName(SMeta *pMeta, const char *name); int32_t metaGetCachedTbGroup(SMeta* pMeta, tb_uid_t suid, const uint8_t* pKey, int32_t keyLen, SArray** pList); int32_t metaPutTbGroupToCache(SMeta* pMeta, uint64_t suid, const void* pKey, int32_t keyLen, void* pPayload, @@ -350,29 +257,28 @@ int32_t metaPutTbGroupToCache(SMeta* pMeta, uint64_t suid, const void* pKey, in typedef struct SStoreMeta { SMTbCursor *(*openTableMetaCursor)(void *pVnode); // metaOpenTbCursor void (*closeTableMetaCursor)(SMTbCursor *pTbCur); // metaCloseTbCursor - int32_t (*cursorNext)(); // metaTbCursorNext - int32_t (*cursorPrev)(); // metaTbCursorPrev + int32_t (*cursorNext)(SMTbCursor *pTbCur, ETableType jumpTableType); // metaTbCursorNext + int32_t (*cursorPrev)(SMTbCursor *pTbCur, ETableType jumpTableType); // metaTbCursorPrev - int32_t (*getTableTags)(void *pVnode, uint64_t suid, SArray *uidList); - int32_t (*getTableTagsByUid)(void *pVnode, int64_t suid, SArray *uidList); + int32_t (*getTableTags)(void *pVnode, uint64_t suid, SArray *uidList); + int32_t (*getTableTagsByUid)(void *pVnode, int64_t suid, SArray *uidList); const void *(*extractTagVal)(const void *tag, int16_t type, STagVal *tagVal); // todo remove it - int32_t (*getTableUidByName)(void *pVnode, char *tbName, uint64_t *uid); - int32_t (*getTableTypeByName)(void *pVnode, char *tbName, ETableType *tbType); - int32_t (*getTableNameByUid)(void *pVnode, uint64_t uid, char *tbName); - bool (*isTableExisted)(void *pVnode, tb_uid_t uid); + int32_t (*getTableUidByName)(void *pVnode, char *tbName, uint64_t *uid); + int32_t (*getTableTypeByName)(void *pVnode, char *tbName, ETableType *tbType); + int32_t (*getTableNameByUid)(void *pVnode, uint64_t uid, char *tbName); + bool (*isTableExisted)(void *pVnode, tb_uid_t uid); /** - * int32_t metaUidFilterCachePut(SMeta *pMeta, uint64_t suid, const void *pKey, int32_t keyLen, void *pPayload, - int32_t payloadLen, double selectivityRatio); -int32_t metaUidCacheClear(SMeta *pMeta, uint64_t suid); -tb_uid_t metaGetTableEntryUidByName(SMeta *pMeta, const char *name); int32_t metaGetCachedTbGroup(SMeta* pMeta, tb_uid_t suid, const uint8_t* pKey, int32_t keyLen, SArray** pList); int32_t metaPutTbGroupToCache(SMeta* pMeta, uint64_t suid, const void* pKey, int32_t keyLen, void* pPayload, int32_t payloadLen); */ - void (*getCachedTableList)(); - void (*putTableListIntoCache)(); + int32_t (*getCachedTableList)(void* pVnode, tb_uid_t suid, const uint8_t* pKey, int32_t keyLen, SArray* pList1, + bool* acquireRes); + int32_t (*putCachedTableList)(void* pVnode, uint64_t suid, const void* pKey, int32_t keyLen, void* pPayload, + int32_t payloadLen, double selectivityRatio); +// int32_t (*clearCachedTableList)(void* pVnode, uint64_t suid); /** * diff --git a/source/dnode/vnode/inc/vnode.h b/source/dnode/vnode/inc/vnode.h index 72c8dce52f..af92291456 100644 --- a/source/dnode/vnode/inc/vnode.h +++ b/source/dnode/vnode/inc/vnode.h @@ -100,18 +100,11 @@ void vnodeApplyWriteMsg(SQueueInfo *pInfo, STaosQall *qall, int32_t numOfMsgs void vnodeProposeCommitOnNeed(SVnode *pVnode, bool atExit); // meta -typedef struct SMeta SMeta; // todo: remove -typedef struct SMetaReader SMetaReader; -typedef struct SMetaEntry SMetaEntry; - -#define META_READER_NOLOCK 0x1 - void _metaReaderInit(SMetaReader *pReader, void *pVnode, int32_t flags, SStoreMeta* pAPI); void metaReaderReleaseLock(SMetaReader *pReader); void metaReaderClear(SMetaReader *pReader); int32_t metaReaderGetTableEntryByUid(SMetaReader *pReader, tb_uid_t uid); int32_t metaReaderGetTableEntryByUidCache(SMetaReader *pReader, tb_uid_t uid); -int metaGetTableEntryByName(SMetaReader *pReader, const char *name); int32_t metaGetTableTags(void *pVnode, uint64_t suid, SArray *uidList); int32_t metaGetTableTagsByUids(void* pVnode, int64_t suid, SArray *uidList); int32_t metaReadNext(SMetaReader *pReader); @@ -122,11 +115,10 @@ int metaGetTableSzNameByUid(void *meta, uint64_t uid, char *tbName); int metaGetTableUidByName(void *pVnode, char *tbName, uint64_t *uid); int metaGetTableTypeByName(void *meta, char *tbName, ETableType *tbType); bool metaIsTableExist(void* pVnode, tb_uid_t uid); -int32_t metaGetCachedTableUidList(SMeta *pMeta, tb_uid_t suid, const uint8_t *key, int32_t keyLen, SArray *pList, +int32_t metaGetCachedTableUidList(void *pVnode, tb_uid_t suid, const uint8_t *key, int32_t keyLen, SArray *pList, bool *acquired); -int32_t metaUidFilterCachePut(SMeta *pMeta, uint64_t suid, const void *pKey, int32_t keyLen, void *pPayload, +int32_t metaUidFilterCachePut(void *pVnode, uint64_t suid, const void *pKey, int32_t keyLen, void *pPayload, int32_t payloadLen, double selectivityRatio); -int32_t metaUidCacheClear(SMeta *pMeta, uint64_t suid); tb_uid_t metaGetTableEntryUidByName(SMeta *pMeta, const char *name); int32_t metaTbGroupCacheClear(SMeta *pMeta, uint64_t suid); int32_t metaGetCachedTbGroup(SMeta *pMeta, tb_uid_t suid, const uint8_t *pKey, int32_t keyLen, SArray **pList); @@ -135,25 +127,9 @@ int32_t metaPutTbGroupToCache(SMeta *pMeta, uint64_t suid, const void *pKey, in int64_t metaGetTbNum(SMeta *pMeta); int64_t metaGetNtbNum(SMeta *pMeta); -//typedef struct { -// int64_t uid; -// int64_t ctbNum; -//} SMetaStbStats; + int32_t metaGetStbStats(void *pVnode, int64_t uid, int64_t *numOfTables); -#if 1 // refact APIs below (TODO) -typedef SVCreateTbReq STbCfg; -typedef SVCreateTSmaReq SSmaCfg; - -typedef struct SMTbCursor SMTbCursor; - -SMTbCursor *metaOpenTbCursor(void *pVnode); -void metaCloseTbCursor(SMTbCursor *pTbCur); -int32_t metaTbCursorNext(SMTbCursor *pTbCur, ETableType jumpTableType); -int32_t metaTbCursorPrev(SMTbCursor *pTbCur, ETableType jumpTableType); - -#endif - // tsdb typedef struct STsdbReader STsdbReader; @@ -168,8 +144,8 @@ typedef struct STsdbReader STsdbReader; #define CACHESCAN_RETRIEVE_LAST_ROW 0x4 #define CACHESCAN_RETRIEVE_LAST 0x8 -int32_t tsdbReaderOpen(SVnode *pVnode, SQueryTableDataCond *pCond, void *pTableList, int32_t numOfTables, - SSDataBlock *pResBlock, STsdbReader **ppReader, const char *idstr, bool countOnly, +int32_t tsdbReaderOpen(void *pVnode, SQueryTableDataCond *pCond, void *pTableList, int32_t numOfTables, + SSDataBlock *pResBlock, void **ppReader, const char *idstr, bool countOnly, SHashObj **pIgnoreTables); int32_t tsdbSetTableList(STsdbReader *pReader, const void *pTableList, int32_t num); void tsdbReaderSetId(STsdbReader *pReader, const char *idstr); @@ -201,32 +177,11 @@ size_t tsdbCacheGetUsage(SVnode *pVnode); int32_t tsdbCacheGetElems(SVnode *pVnode); //// tq -//typedef struct SMetaTableInfo { -// int64_t suid; -// int64_t uid; -// SSchemaWrapper *schema; -// char tbName[TSDB_TABLE_NAME_LEN]; -//} SMetaTableInfo; - typedef struct SIdInfo { int64_t version; int32_t index; } SIdInfo; -//typedef struct SSnapContext { -// SMeta *pMeta; -// int64_t snapVersion; -// TBC *pCur; -// int64_t suid; -// int8_t subType; -// SHashObj *idVersion; -// SHashObj *suidInfo; -// SArray *idList; -// int32_t index; -// bool withMeta; -// bool queryMeta; // true-get meta, false-get data -//} SSnapContext; - typedef struct STqReader { SPackedData msg; SSubmitReq2 submit; @@ -345,59 +300,6 @@ struct SVnodeCfg { #define TABLE_ROLLUP_ON ((int8_t)0x1) #define TABLE_IS_ROLLUP(FLG) (((FLG) & (TABLE_ROLLUP_ON)) != 0) #define TABLE_SET_ROLLUP(FLG) ((FLG) |= TABLE_ROLLUP_ON) -//struct SMetaEntry { -// int64_t version; -// int8_t type; -// int8_t flags; // TODO: need refactor? -// tb_uid_t uid; -// char *name; -// union { -// struct { -// SSchemaWrapper schemaRow; -// SSchemaWrapper schemaTag; -// SRSmaParam rsmaParam; -// } stbEntry; -// struct { -// int64_t ctime; -// int32_t ttlDays; -// int32_t commentLen; -// char *comment; -// tb_uid_t suid; -// uint8_t *pTags; -// } ctbEntry; -// struct { -// int64_t ctime; -// int32_t ttlDays; -// int32_t commentLen; -// char *comment; -// int32_t ncid; // next column id -// SSchemaWrapper schemaRow; -// } ntbEntry; -// struct { -// STSma *tsma; -// } smaEntry; -// }; -// -// uint8_t *pBuf; -//}; - -//struct SMetaReader { -// int32_t flags; -// SMeta *pMeta; -// SDecoder coder; -// SMetaEntry me; -// void *pBuf; -// int32_t szBuf; -//}; - -//struct SMTbCursor { -// TBC *pDbc; -// void *pKey; -// void *pVal; -// int32_t kLen; -// int32_t vLen; -// SMetaReader mr; -//}; #ifdef __cplusplus } diff --git a/source/dnode/vnode/src/inc/vnodeInt.h b/source/dnode/vnode/src/inc/vnodeInt.h index 3f79fe5fd1..7ae05b9554 100644 --- a/source/dnode/vnode/src/inc/vnodeInt.h +++ b/source/dnode/vnode/src/inc/vnodeInt.h @@ -103,6 +103,17 @@ struct SQueryNode { _query_reseek_func_t reseek; }; +#if 1 // refact APIs below (TODO) +typedef SVCreateTbReq STbCfg; +typedef SVCreateTSmaReq SSmaCfg; + +SMTbCursor *metaOpenTbCursor(void *pVnode); +void metaCloseTbCursor(SMTbCursor *pTbCur); +int32_t metaTbCursorNext(SMTbCursor *pTbCur, ETableType jumpTableType); +int32_t metaTbCursorPrev(SMTbCursor *pTbCur, ETableType jumpTableType); + +#endif + void* vnodeBufPoolMalloc(SVBufPool* pPool, int size); void* vnodeBufPoolMallocAligned(SVBufPool* pPool, int size); void vnodeBufPoolFree(SVBufPool* pPool, void* p); @@ -143,6 +154,8 @@ int32_t metaGetTbTSchemaEx(SMeta* pMeta, tb_uid_t suid, tb_uid_t uid, in int metaGetTableEntryByName(SMetaReader* pReader, const char* name); int metaAlterCache(SMeta* pMeta, int32_t nPage); +int32_t metaUidCacheClear(SMeta* pMeta, uint64_t suid); + int metaAddIndexToSTable(SMeta* pMeta, int64_t version, SVCreateStbReq* pReq); int metaDropIndexFromSTable(SMeta* pMeta, int64_t version, SDropIndexReq* pReq); @@ -477,6 +490,7 @@ struct SCompactInfo { void initStorageAPI(SStorageAPI* pAPI); + #ifdef __cplusplus } #endif diff --git a/source/dnode/vnode/src/meta/metaCache.c b/source/dnode/vnode/src/meta/metaCache.c index 436ca1abd3..054b9f6f24 100644 --- a/source/dnode/vnode/src/meta/metaCache.c +++ b/source/dnode/vnode/src/meta/metaCache.c @@ -499,8 +499,9 @@ static void initCacheKey(uint64_t* buf, const SHashObj* pHashMap, uint64_t suid, ASSERT(keyLen == sizeof(uint64_t) * 2); } -int32_t metaGetCachedTableUidList(SMeta* pMeta, tb_uid_t suid, const uint8_t* pKey, int32_t keyLen, SArray* pList1, +int32_t metaGetCachedTableUidList(void* pVnode, tb_uid_t suid, const uint8_t* pKey, int32_t keyLen, SArray* pList1, bool* acquireRes) { + SMeta* pMeta = ((SVnode*)pVnode)->pMeta; int32_t vgId = TD_VID(pMeta->pVnode); // generate the composed key for LRU cache @@ -603,9 +604,10 @@ static int32_t addNewEntry(SHashObj* pTableEntry, const void* pKey, int32_t keyL } // check both the payload size and selectivity ratio -int32_t metaUidFilterCachePut(SMeta* pMeta, uint64_t suid, const void* pKey, int32_t keyLen, void* pPayload, +int32_t metaUidFilterCachePut(void* pVnode, uint64_t suid, const void* pKey, int32_t keyLen, void* pPayload, int32_t payloadLen, double selectivityRatio) { int32_t code = 0; + SMeta* pMeta = ((SVnode*)pVnode)->pMeta; int32_t vgId = TD_VID(pMeta->pVnode); if (selectivityRatio > tsSelectivityRatio) { diff --git a/source/dnode/vnode/src/meta/metaSma.c b/source/dnode/vnode/src/meta/metaSma.c index 3fb491848a..a49848f442 100644 --- a/source/dnode/vnode/src/meta/metaSma.c +++ b/source/dnode/vnode/src/meta/metaSma.c @@ -13,8 +13,10 @@ * along with this program. If not, see . */ +#include "vnodeInt.h" #include "meta.h" + static int metaHandleSmaEntry(SMeta *pMeta, const SMetaEntry *pME); static int metaSaveSmaToDB(SMeta *pMeta, const SMetaEntry *pME); diff --git a/source/dnode/vnode/src/tsdb/tsdbRead.c b/source/dnode/vnode/src/tsdb/tsdbRead.c index 268d17bacb..362934ec84 100644 --- a/source/dnode/vnode/src/tsdb/tsdbRead.c +++ b/source/dnode/vnode/src/tsdb/tsdbRead.c @@ -754,7 +754,7 @@ static int32_t initResBlockInfo(SResultBlockInfo* pResBlockInfo, int64_t capacit return terrno; } -static int32_t tsdbReaderCreate(SVnode* pVnode, SQueryTableDataCond* pCond, STsdbReader** ppReader, int32_t capacity, +static int32_t tsdbReaderCreate(SVnode* pVnode, SQueryTableDataCond* pCond, void** ppReader, int32_t capacity, SSDataBlock* pResBlock, const char* idstr) { int32_t code = 0; int8_t level = 0; @@ -4396,11 +4396,12 @@ static void freeSchemaFunc(void* param) { } // ====================================== EXPOSED APIs ====================================== -int32_t tsdbReaderOpen(SVnode* pVnode, SQueryTableDataCond* pCond, void* pTableList, int32_t numOfTables, - SSDataBlock* pResBlock, STsdbReader** ppReader, const char* idstr, bool countOnly, SHashObj** pIgnoreTables) { +int32_t tsdbReaderOpen(void* pVnode, SQueryTableDataCond* pCond, void* pTableList, int32_t numOfTables, + SSDataBlock* pResBlock, void** ppReader, const char* idstr, bool countOnly, SHashObj** pIgnoreTables) { STimeWindow window = pCond->twindows; + SVnodeCfg* pConf = &(((SVnode*)pVnode)->config); - int32_t capacity = pVnode->config.tsdbCfg.maxRows; + int32_t capacity = pConf->tsdbCfg.maxRows; if (pResBlock != NULL) { blockDataEnsureCapacity(pResBlock, capacity); } @@ -4431,7 +4432,7 @@ int32_t tsdbReaderOpen(SVnode* pVnode, SQueryTableDataCond* pCond, void* pTableL } // here we only need one more row, so the capacity is set to be ONE. - code = tsdbReaderCreate(pVnode, pCond, &pReader->innerReader[0], 1, pResBlock, idstr); + code = tsdbReaderCreate(pVnode, pCond, (void**)&((STsdbReader*)pReader)->innerReader[0], 1, pResBlock, idstr); if (code != TSDB_CODE_SUCCESS) { goto _err; } @@ -4445,7 +4446,7 @@ int32_t tsdbReaderOpen(SVnode* pVnode, SQueryTableDataCond* pCond, void* pTableL } pCond->order = order; - code = tsdbReaderCreate(pVnode, pCond, &pReader->innerReader[1], 1, pResBlock, idstr); + code = tsdbReaderCreate(pVnode, pCond, (void**)&((STsdbReader*)pReader)->innerReader[1], 1, pResBlock, idstr); if (code != TSDB_CODE_SUCCESS) { goto _err; } @@ -4495,7 +4496,7 @@ int32_t tsdbReaderOpen(SVnode* pVnode, SQueryTableDataCond* pCond, void* pTableL goto _err; } - pReader->status.pLDataIter = taosMemoryCalloc(pVnode->config.sttTrigger, sizeof(SLDataIter)); + pReader->status.pLDataIter = taosMemoryCalloc(pConf->sttTrigger, sizeof(SLDataIter)); if (pReader->status.pLDataIter == NULL) { terrno = TSDB_CODE_OUT_OF_MEMORY; goto _err; @@ -5546,7 +5547,7 @@ int64_t tsdbGetLastTimestamp(SVnode* pVnode, void* pTableList, int32_t numOfTabl int64_t key = INT64_MIN; for(int32_t i = 0; i < numOfTables; ++i) { - int32_t code = tsdbReaderOpen(pVnode, &cond, &pTableKeyInfo[i], 1, pBlock, &pReader, pIdStr, false, NULL); + int32_t code = tsdbReaderOpen(pVnode, &cond, &pTableKeyInfo[i], 1, pBlock, (void**)&pReader, pIdStr, false, NULL); if (code != TSDB_CODE_SUCCESS) { return code; } diff --git a/source/dnode/vnode/src/vnd/vnodeInitApi.c b/source/dnode/vnode/src/vnd/vnodeInitApi.c index 1cc985f7df..7ca93df39e 100644 --- a/source/dnode/vnode/src/vnd/vnodeInitApi.c +++ b/source/dnode/vnode/src/vnd/vnodeInitApi.c @@ -41,7 +41,8 @@ void initStorageAPI(SStorageAPI* pAPI) { } void initTsdbReaderAPI(TsdReader* pReader) { - pReader->tsdReaderOpen = (__store_reader_open_fn_t)tsdbReaderOpen; + pReader->tsdReaderOpen = (int32_t(*)(void*, SQueryTableDataCond*, void*, int32_t, SSDataBlock*, void**, const char*, + bool, SHashObj**))tsdbReaderOpen; pReader->tsdReaderClose = tsdbReaderClose; pReader->tsdNextDataBlock = tsdbNextDataBlock; @@ -87,6 +88,9 @@ void initMetadataAPI(SStoreMeta* pMeta) { pMeta->getTableSchema = tsdbGetTableSchema; // todo refactor pMeta->storeGetTableList = vnodeGetTableList; + + pMeta->getCachedTableList = metaGetCachedTableUidList; + pMeta->putCachedTableList = metaUidFilterCachePut; } void initTqAPI(SStoreTqReader* pTq) { diff --git a/source/libs/executor/src/executil.c b/source/libs/executor/src/executil.c index 562a6daf8a..f51527b944 100644 --- a/source/libs/executor/src/executil.c +++ b/source/libs/executor/src/executil.c @@ -494,8 +494,8 @@ int32_t getColInfoResultForGroupby(void* pVnode, SNodeList* group, STableListInf listNode->pNodeList = group; genTbGroupDigest((SNode*)listNode, digest, &context); nodesFree(listNode); - - pAPI->metaFn.getCachedTableList(pVnode, pTableListInfo->idInfo.suid, context.digest, tListLen(context.digest), &tableList); + ASSERT(0); +// pAPI->metaFn.getCachedTableList(pVnode, pTableListInfo->idInfo.suid, context.digest, tListLen(context.digest), &tableList, true); if (tableList) { taosArrayDestroy(pTableListInfo->pTableList); pTableListInfo->pTableList = tableList; @@ -630,9 +630,10 @@ int32_t getColInfoResultForGroupby(void* pVnode, SNodeList* group, STableListInf info->groupId = calcGroupId(keyBuf, len); } + ASSERT(0); if (tsTagFilterCache) { tableList = taosArrayDup(pTableListInfo->pTableList, NULL); - pAPI->metaFn.putTableListIntoCache(pVnode, pTableListInfo->idInfo.suid, context.digest, tListLen(context.digest), tableList, taosArrayGetSize(tableList) * sizeof(STableKeyInfo)); +// pAPI->metaFn.putCachedTableList(pVnode, pTableListInfo->idInfo.suid, context.digest, tListLen(context.digest), tableList, taosArrayGetSize(tableList) * sizeof(STableKeyInfo)); } // int64_t st2 = taosGetTimestampUs(); From 8f61e2d6efc80e5ff0a06c06916b8beb5e93a565 Mon Sep 17 00:00:00 2001 From: Haojun Liao Date: Mon, 29 May 2023 10:50:50 +0800 Subject: [PATCH 44/49] refactor: do some internal refactor. --- include/libs/executor/storageapi.h | 65 ++++++++++------------- source/dnode/vnode/inc/vnode.h | 8 ++- source/dnode/vnode/src/inc/vnodeInt.h | 1 + source/dnode/vnode/src/meta/metaCache.c | 6 ++- source/dnode/vnode/src/meta/metaQuery.c | 2 - source/dnode/vnode/src/vnd/vnodeInitApi.c | 5 +- source/libs/executor/src/executil.c | 7 ++- source/libs/executor/src/scanoperator.c | 2 +- 8 files changed, 45 insertions(+), 51 deletions(-) diff --git a/include/libs/executor/storageapi.h b/include/libs/executor/storageapi.h index b89293f8f0..4cfbf2c2fa 100644 --- a/include/libs/executor/storageapi.h +++ b/include/libs/executor/storageapi.h @@ -144,25 +144,26 @@ typedef struct { // int32_t setForSnapShot(SSnapContext *ctx, int64_t uid); // int32_t destroySnapContext(SSnapContext *ctx); +// clang-format off /*-------------------------------------------------new api format---------------------------------------------------*/ typedef struct TsdReader { - int32_t (*tsdReaderOpen)(void* pVnode, SQueryTableDataCond* pCond, void* pTableList, int32_t numOfTables, + int32_t (*tsdReaderOpen)(void* pVnode, SQueryTableDataCond* pCond, void* pTableList, int32_t numOfTables, SSDataBlock* pResBlock, void** ppReader, const char* idstr, bool countOnly, SHashObj** pIgnoreTables); - void (*tsdReaderClose)(); - void (*tsdSetReaderTaskId)(void *pReader, const char *pId); - int32_t (*tsdSetQueryTableList)(); - int32_t (*tsdNextDataBlock)(); + void (*tsdReaderClose)(); + void (*tsdSetReaderTaskId)(void *pReader, const char *pId); + int32_t (*tsdSetQueryTableList)(); + int32_t (*tsdNextDataBlock)(); - int32_t (*tsdReaderRetrieveBlockSMAInfo)(); + int32_t (*tsdReaderRetrieveBlockSMAInfo)(); SSDataBlock *(*tsdReaderRetrieveDataBlock)(); - void (*tsdReaderReleaseDataBlock)(); + void (*tsdReaderReleaseDataBlock)(); - int32_t (*tsdReaderResetStatus)(); - int32_t (*tsdReaderGetDataBlockDistInfo)(); - int64_t (*tsdReaderGetNumOfInMemRows)(); - void (*tsdReaderNotifyClosing)(); + int32_t (*tsdReaderResetStatus)(); + int32_t (*tsdReaderGetDataBlockDistInfo)(); + int64_t (*tsdReaderGetNumOfInMemRows)(); + void (*tsdReaderNotifyClosing)(); } TsdReader; typedef struct SStoreCacheReader { @@ -174,6 +175,8 @@ typedef struct SStoreCacheReader { int32_t (*reuseReader)(void *pReader, void *pTableIdList, int32_t numOfTables); } SStoreCacheReader; +// clang-format on + /*------------------------------------------------------------------------------------------------------------------*/ /* void tqReaderSetColIdList(STqReader *pReader, SArray *pColIdList); @@ -240,7 +243,6 @@ int32_t metaGetTableTags(SMeta *pMeta, uint64_t suid, SArray *uidList); const void *metaGetTableTagVal(void *tag, int16_t type, STagVal *tagVal); int metaGetTableNameByUid(void *meta, uint64_t uid, char *tbName); -int metaGetTableSzNameByUid(void *meta, uint64_t uid, char *tbName); int metaGetTableUidByName(void *meta, char *tbName, uint64_t *uid); int metaGetTableTypeByName(void *meta, char *tbName, ETableType *tbType); bool metaIsTableExist(SMeta *pMeta, tb_uid_t uid); @@ -250,13 +252,12 @@ int32_t metaUidFilterCachePut(SMeta *pMeta, uint64_t suid, const void *pKey, in int32_t payloadLen, double selectivityRatio); tb_uid_t metaGetTableEntryUidByName(SMeta *pMeta, const char *name); int32_t metaGetCachedTbGroup(SMeta* pMeta, tb_uid_t suid, const uint8_t* pKey, int32_t keyLen, SArray** pList); -int32_t metaPutTbGroupToCache(SMeta* pMeta, uint64_t suid, const void* pKey, int32_t keyLen, void* pPayload, - int32_t payloadLen); +int32_t metaPutTbGroupToCache(SMeta* pMeta, uint64_t suid, const void* pKey, int32_t keyLen, void* pPayload, int32_t payloadLen); */ typedef struct SStoreMeta { - SMTbCursor *(*openTableMetaCursor)(void *pVnode); // metaOpenTbCursor - void (*closeTableMetaCursor)(SMTbCursor *pTbCur); // metaCloseTbCursor + SMTbCursor *(*openTableMetaCursor)(void *pVnode); // metaOpenTbCursor + void (*closeTableMetaCursor)(SMTbCursor *pTbCur); // metaCloseTbCursor int32_t (*cursorNext)(SMTbCursor *pTbCur, ETableType jumpTableType); // metaTbCursorNext int32_t (*cursorPrev)(SMTbCursor *pTbCur, ETableType jumpTableType); // metaTbCursorPrev @@ -269,28 +270,20 @@ typedef struct SStoreMeta { int32_t (*getTableNameByUid)(void *pVnode, uint64_t uid, char *tbName); bool (*isTableExisted)(void *pVnode, tb_uid_t uid); - /** -int32_t metaGetCachedTbGroup(SMeta* pMeta, tb_uid_t suid, const uint8_t* pKey, int32_t keyLen, SArray** pList); -int32_t metaPutTbGroupToCache(SMeta* pMeta, uint64_t suid, const void* pKey, int32_t keyLen, void* pPayload, - int32_t payloadLen); - */ - int32_t (*getCachedTableList)(void* pVnode, tb_uid_t suid, const uint8_t* pKey, int32_t keyLen, SArray* pList1, - bool* acquireRes); - int32_t (*putCachedTableList)(void* pVnode, uint64_t suid, const void* pKey, int32_t keyLen, void* pPayload, - int32_t payloadLen, double selectivityRatio); -// int32_t (*clearCachedTableList)(void* pVnode, uint64_t suid); + int32_t (*metaGetCachedTbGroup)(void* pVnode, tb_uid_t suid, const uint8_t* pKey, int32_t keyLen, SArray** pList); + int32_t (*metaPutTbGroupToCache)(void* pVnode, uint64_t suid, const void* pKey, int32_t keyLen, void* pPayload, int32_t payloadLen); - /** - * - */ - void *(*storeGetIndexInfo)(); - void *(*getInvertIndex)(void* pVnode); - int32_t (*getChildTableList)(void *pVnode, int64_t suid, SArray *list); // support filter and non-filter cases. [vnodeGetCtbIdList & vnodeGetCtbIdListByFilter] - int32_t (*storeGetTableList)(void* pVnode, int8_t type, SArray* pList); // vnodeGetStbIdList & vnodeGetAllTableList - void *storeGetVersionRange; - void *storeGetLastTimestamp; + int32_t (*getCachedTableList)(void* pVnode, tb_uid_t suid, const uint8_t* pKey, int32_t keyLen, SArray* pList1, bool* acquireRes); + int32_t (*putCachedTableList)(void* pVnode, uint64_t suid, const void* pKey, int32_t keyLen, void* pPayload, int32_t payloadLen, double selectivityRatio); - int32_t (*getTableSchema)(void *pVnode, int64_t uid, STSchema **pSchema, int64_t *suid); // tsdbGetTableSchema + void *(*storeGetIndexInfo)(); + void *(*getInvertIndex)(void* pVnode); + int32_t (*getChildTableList)(void *pVnode, int64_t suid, SArray *list); // support filter and non-filter cases. [vnodeGetCtbIdList & vnodeGetCtbIdListByFilter] + int32_t (*storeGetTableList)(void* pVnode, int8_t type, SArray* pList); // vnodeGetStbIdList & vnodeGetAllTableList + void *storeGetVersionRange; + void *storeGetLastTimestamp; + + int32_t (*getTableSchema)(void *pVnode, int64_t uid, STSchema **pSchema, int64_t *suid); // tsdbGetTableSchema // db name, vgId, numOfTables, numOfSTables int32_t (*getNumOfChildTables)(void* pVnode, int64_t uid, int64_t* numOfTables); // int32_t metaGetStbStats(SMeta *pMeta, int64_t uid, SMetaStbStats *pInfo); diff --git a/source/dnode/vnode/inc/vnode.h b/source/dnode/vnode/inc/vnode.h index af92291456..fbe2f4809c 100644 --- a/source/dnode/vnode/inc/vnode.h +++ b/source/dnode/vnode/inc/vnode.h @@ -120,15 +120,13 @@ int32_t metaGetCachedTableUidList(void *pVnode, tb_uid_t suid, const uint8_t *k int32_t metaUidFilterCachePut(void *pVnode, uint64_t suid, const void *pKey, int32_t keyLen, void *pPayload, int32_t payloadLen, double selectivityRatio); tb_uid_t metaGetTableEntryUidByName(SMeta *pMeta, const char *name); -int32_t metaTbGroupCacheClear(SMeta *pMeta, uint64_t suid); -int32_t metaGetCachedTbGroup(SMeta *pMeta, tb_uid_t suid, const uint8_t *pKey, int32_t keyLen, SArray **pList); -int32_t metaPutTbGroupToCache(SMeta *pMeta, uint64_t suid, const void *pKey, int32_t keyLen, void *pPayload, +int32_t metaGetCachedTbGroup(void *pVnode, tb_uid_t suid, const uint8_t *pKey, int32_t keyLen, SArray **pList); +int32_t metaPutTbGroupToCache(void* pVnode, uint64_t suid, const void *pKey, int32_t keyLen, void *pPayload, int32_t payloadLen); int64_t metaGetTbNum(SMeta *pMeta); -int64_t metaGetNtbNum(SMeta *pMeta); -int32_t metaGetStbStats(void *pVnode, int64_t uid, int64_t *numOfTables); +int32_t metaGetStbStats(void *pVnode, int64_t uid, int64_t *numOfTables); // tsdb typedef struct STsdbReader STsdbReader; diff --git a/source/dnode/vnode/src/inc/vnodeInt.h b/source/dnode/vnode/src/inc/vnodeInt.h index 7ae05b9554..db285dc124 100644 --- a/source/dnode/vnode/src/inc/vnodeInt.h +++ b/source/dnode/vnode/src/inc/vnodeInt.h @@ -155,6 +155,7 @@ int metaGetTableEntryByName(SMetaReader* pReader, const char* name); int metaAlterCache(SMeta* pMeta, int32_t nPage); int32_t metaUidCacheClear(SMeta* pMeta, uint64_t suid); +int32_t metaTbGroupCacheClear(SMeta *pMeta, uint64_t suid); int metaAddIndexToSTable(SMeta* pMeta, int64_t version, SVCreateStbReq* pReq); int metaDropIndexFromSTable(SMeta* pMeta, int64_t version, SDropIndexReq* pReq); diff --git a/source/dnode/vnode/src/meta/metaCache.c b/source/dnode/vnode/src/meta/metaCache.c index 054b9f6f24..8749b3ac94 100644 --- a/source/dnode/vnode/src/meta/metaCache.c +++ b/source/dnode/vnode/src/meta/metaCache.c @@ -704,7 +704,8 @@ int32_t metaUidCacheClear(SMeta* pMeta, uint64_t suid) { return TSDB_CODE_SUCCESS; } -int32_t metaGetCachedTbGroup(SMeta* pMeta, tb_uid_t suid, const uint8_t* pKey, int32_t keyLen, SArray** pList) { +int32_t metaGetCachedTbGroup(void* pVnode, tb_uid_t suid, const uint8_t* pKey, int32_t keyLen, SArray** pList) { + SMeta* pMeta = ((SVnode*)pVnode)->pMeta; int32_t vgId = TD_VID(pMeta->pVnode); // generate the composed key for LRU cache @@ -788,9 +789,10 @@ static void freeTbGroupCachePayload(const void* key, size_t keyLen, void* value) } -int32_t metaPutTbGroupToCache(SMeta* pMeta, uint64_t suid, const void* pKey, int32_t keyLen, void* pPayload, +int32_t metaPutTbGroupToCache(void* pVnode, uint64_t suid, const void* pKey, int32_t keyLen, void* pPayload, int32_t payloadLen) { int32_t code = 0; + SMeta* pMeta = ((SVnode*)pVnode)->pMeta; int32_t vgId = TD_VID(pMeta->pVnode); if (payloadLen > tsTagFilterResCacheSize) { diff --git a/source/dnode/vnode/src/meta/metaQuery.c b/source/dnode/vnode/src/meta/metaQuery.c index 26182baf7b..726ed6a4c8 100644 --- a/source/dnode/vnode/src/meta/metaQuery.c +++ b/source/dnode/vnode/src/meta/metaQuery.c @@ -700,8 +700,6 @@ int64_t metaGetTimeSeriesNum(SMeta *pMeta) { return pMeta->pVnode->config.vndStats.numOfTimeSeries + pMeta->pVnode->config.vndStats.numOfNTimeSeries; } -int64_t metaGetNtbNum(SMeta *pMeta) { return pMeta->pVnode->config.vndStats.numOfNTables; } - typedef struct { SMeta *pMeta; TBC *pCur; diff --git a/source/dnode/vnode/src/vnd/vnodeInitApi.c b/source/dnode/vnode/src/vnd/vnodeInitApi.c index 7ca93df39e..526b9b4e2d 100644 --- a/source/dnode/vnode/src/vnd/vnodeInitApi.c +++ b/source/dnode/vnode/src/vnd/vnodeInitApi.c @@ -31,10 +31,10 @@ static void initSnapshotFn(SStoreSnapshotFn* pSnapshot); void initStorageAPI(SStorageAPI* pAPI) { initTsdbReaderAPI(&pAPI->tsdReader); initMetadataAPI(&pAPI->metaFn); - initTqAPI(&pAPI->tqReaderFn); initStateStoreAPI(&pAPI->stateStore); initMetaReaderAPI(&pAPI->metaReaderFn); initMetaFilterAPI(&pAPI->metaFilter); + initTqAPI(&pAPI->tqReaderFn); initFunctionStateStore(&pAPI->functionStore); initCacheFn(&pAPI->cacheFn); initSnapshotFn(&pAPI->snapshotFn); @@ -91,6 +91,9 @@ void initMetadataAPI(SStoreMeta* pMeta) { pMeta->getCachedTableList = metaGetCachedTableUidList; pMeta->putCachedTableList = metaUidFilterCachePut; + + pMeta->metaGetCachedTbGroup = metaGetCachedTbGroup; + pMeta->metaPutTbGroupToCache = metaPutTbGroupToCache; } void initTqAPI(SStoreTqReader* pTq) { diff --git a/source/libs/executor/src/executil.c b/source/libs/executor/src/executil.c index f51527b944..53b100da22 100644 --- a/source/libs/executor/src/executil.c +++ b/source/libs/executor/src/executil.c @@ -494,8 +494,8 @@ int32_t getColInfoResultForGroupby(void* pVnode, SNodeList* group, STableListInf listNode->pNodeList = group; genTbGroupDigest((SNode*)listNode, digest, &context); nodesFree(listNode); - ASSERT(0); -// pAPI->metaFn.getCachedTableList(pVnode, pTableListInfo->idInfo.suid, context.digest, tListLen(context.digest), &tableList, true); + + pAPI->metaFn.metaGetCachedTbGroup(pVnode, pTableListInfo->idInfo.suid, context.digest, tListLen(context.digest), &tableList); if (tableList) { taosArrayDestroy(pTableListInfo->pTableList); pTableListInfo->pTableList = tableList; @@ -630,10 +630,9 @@ int32_t getColInfoResultForGroupby(void* pVnode, SNodeList* group, STableListInf info->groupId = calcGroupId(keyBuf, len); } - ASSERT(0); if (tsTagFilterCache) { tableList = taosArrayDup(pTableListInfo->pTableList, NULL); -// pAPI->metaFn.putCachedTableList(pVnode, pTableListInfo->idInfo.suid, context.digest, tListLen(context.digest), tableList, taosArrayGetSize(tableList) * sizeof(STableKeyInfo)); + pAPI->metaFn.metaPutTbGroupToCache(pVnode, pTableListInfo->idInfo.suid, context.digest, tListLen(context.digest), tableList, taosArrayGetSize(tableList) * sizeof(STableKeyInfo)); } // int64_t st2 = taosGetTimestampUs(); diff --git a/source/libs/executor/src/scanoperator.c b/source/libs/executor/src/scanoperator.c index 3984e9a792..2d2f377041 100644 --- a/source/libs/executor/src/scanoperator.c +++ b/source/libs/executor/src/scanoperator.c @@ -3446,7 +3446,7 @@ static void buildVnodeGroupedNtbTableCount(STableCountScanOperatorInfo* pInfo, S uint64_t groupId = calcGroupId(fullStbName, strlen(fullStbName)); pRes->info.id.groupId = groupId; - int64_t numOfTables = 0;//metaGetNtbNum(pInfo->readHandle.vnode); + int64_t numOfTables = 0; pAPI->metaFn.getBasicInfo(pInfo->readHandle.vnode, NULL, NULL, NULL, &numOfTables); if (numOfTables != 0) { From e21a2f9cce3a21ed0ea803f27cc339c7262c91ba Mon Sep 17 00:00:00 2001 From: tangfangzhi Date: Mon, 29 May 2023 14:02:36 +0800 Subject: [PATCH 45/49] check empty --- source/dnode/mnode/impl/src/mndDnode.c | 12 +++++++----- 1 file changed, 7 insertions(+), 5 deletions(-) diff --git a/source/dnode/mnode/impl/src/mndDnode.c b/source/dnode/mnode/impl/src/mndDnode.c index 73dbb243a1..c4b0e00c76 100644 --- a/source/dnode/mnode/impl/src/mndDnode.c +++ b/source/dnode/mnode/impl/src/mndDnode.c @@ -995,11 +995,13 @@ static int32_t mndProcessDropDnodeReq(SRpcMsg *pReq) { goto _OVER; } - if (!isonline && !force) { - terrno = TSDB_CODE_DNODE_OFFLINE; - mError("dnode:%d, failed to drop since %s, vnodes:%d mnode:%d qnode:%d snode:%d", pDnode->id, terrstr(), - numOfVnodes, pMObj != NULL, pQObj != NULL, pSObj != NULL); - goto _OVER; + if (numOfVnodes > 0 || pMObj != NULL || pSObj != NULL || pQObj != NULL) { + if (!isonline && !force) { + terrno = TSDB_CODE_DNODE_OFFLINE; + mError("dnode:%d, failed to drop since %s, vnodes:%d mnode:%d qnode:%d snode:%d", pDnode->id, terrstr(), + numOfVnodes, pMObj != NULL, pQObj != NULL, pSObj != NULL); + goto _OVER; + } } code = mndDropDnode(pMnode, pReq, pDnode, pMObj, pQObj, pSObj, numOfVnodes, force, dropReq.unsafe); From 5e2d3939fef5b48fa50b9dd401a5c6a1989f48e5 Mon Sep 17 00:00:00 2001 From: wangjiaming0909 <604227650@qq.com> Date: Mon, 29 May 2023 11:33:08 +0800 Subject: [PATCH 46/49] fix: page buffer cache flush wrong bytes of data into disk --- source/util/src/tpagedbuf.c | 2 +- source/util/test/CMakeLists.txt | 10 ++++- source/util/test/pageBufferTest.cpp | 65 ++++++++++++++++++++++++++++- 3 files changed, 74 insertions(+), 3 deletions(-) diff --git a/source/util/src/tpagedbuf.c b/source/util/src/tpagedbuf.c index fa8b5d33b7..acbf0bb0d3 100644 --- a/source/util/src/tpagedbuf.c +++ b/source/util/src/tpagedbuf.c @@ -166,7 +166,7 @@ static char* doFlushBufPage(SDiskbasedBuf* pBuf, SPageInfo* pg) { char* t = NULL; if ((!HAS_DATA_IN_DISK(pg)) || pg->dirty) { void* payload = GET_PAYLOAD_DATA(pg); - t = doCompressData(payload, pBuf->pageSize, &size, pBuf); + t = doCompressData(payload, pBuf->pageSize + sizeof(SFilePage), &size, pBuf); if (size < 0) { uError("failed to compress data when flushing data to disk, %s", pBuf->id); terrno = TSDB_CODE_INVALID_PARA; diff --git a/source/util/test/CMakeLists.txt b/source/util/test/CMakeLists.txt index 2e307771b7..0bf06e6f44 100644 --- a/source/util/test/CMakeLists.txt +++ b/source/util/test/CMakeLists.txt @@ -75,4 +75,12 @@ target_link_libraries(rbtreeTest os util gtest_main) add_test( NAME rbtreeTest COMMAND rbtreeTest -) \ No newline at end of file +) + +# pageBufferTest +add_executable(pageBufferTest "pageBufferTest.cpp") +target_link_libraries(pageBufferTest os util gtest_main) +add_test( + NAME pageBufferTest + COMMAND pageBufferTest +) diff --git a/source/util/test/pageBufferTest.cpp b/source/util/test/pageBufferTest.cpp index 00ed804930..50d3656ccd 100644 --- a/source/util/test/pageBufferTest.cpp +++ b/source/util/test/pageBufferTest.cpp @@ -157,6 +157,68 @@ void recyclePageTest() { destroyDiskbasedBuf(pBuf); } + +int saveDataToPage(SFilePage* pPg, const char* data, uint32_t len) { + memcpy(pPg->data + pPg->num, data, len); + pPg->num += len; + setBufPageDirty(pPg, true); + return 0; +} + +bool checkBufVarData(SFilePage* pPg, const char* varData, uint32_t varLen) { + const char* start = pPg->data + sizeof(SFilePage); + for (uint32_t i = 0; i < (pPg->num - sizeof(SFilePage)) / varLen; ++i) { + if (0 != strncmp(start + 6 * i + 3, varData, varLen - 3)) { + using namespace std; + cout << "pos: " << sizeof(SFilePage) + 6 * i + 3 << " should be " << varData << " but is: " << start + 6 * i + 3 + << endl; + return false; + } + } + return true; +} + +// SPageInfo.pData: | sizeof(void*) 8 bytes | sizeof(SFilePage) 4 bytes| 4096 bytes | +// ^ +// | +// SFilePage: flush to disk from here +void testFlushAndReadBackBuffer() { + SDiskbasedBuf* pBuf = NULL; + uint32_t totalLen = 4096; + auto code = createDiskbasedBuf(&pBuf, totalLen, totalLen * 2, "1", TD_TMP_DIR_PATH); + int32_t pageId = -1; + auto* pPg = (SFilePage*)getNewBufPage(pBuf, &pageId); + ASSERT_TRUE(pPg != nullptr); + pPg->num = sizeof(SFilePage); + + // save data into page + uint32_t len = 6; // sizeof(SFilePage) + 6 * 682 = 4096 + // nullbitmap(1) + len(2) + AA\0(3) + char* rowData = (char*)taosMemoryCalloc(1, len); + *(uint16_t*)(rowData + 2) = (uint16_t)2; + rowData[3] = 'A'; + rowData[4] = 'A'; + + while (pPg->num + len <= getBufPageSize(pBuf)) { + saveDataToPage(pPg, rowData, len); + } + ASSERT_EQ(pPg->num, totalLen); + ASSERT_TRUE(checkBufVarData(pPg, rowData + 3, len)); + releaseBufPage(pBuf, pPg); + + // flush to disk + int32_t newPgId = -1; + pPg = (SFilePage*)getNewBufPage(pBuf, &newPgId); + releaseBufPage(pBuf, pPg); + pPg = (SFilePage*)getNewBufPage(pBuf, &newPgId); + releaseBufPage(pBuf, pPg); + + // reload it from disk + pPg = (SFilePage*)getBufPage(pBuf, pageId); + ASSERT_TRUE(checkBufVarData(pPg, rowData + 3, len)); + destroyDiskbasedBuf(pBuf); +} + } // namespace TEST(testCase, resultBufferTest) { @@ -164,6 +226,7 @@ TEST(testCase, resultBufferTest) { simpleTest(); writeDownTest(); recyclePageTest(); + testFlushAndReadBackBuffer(); } -#pragma GCC diagnostic pop \ No newline at end of file +#pragma GCC diagnostic pop From 7b8f55c4d6dc9a662e23210cce4556adbd65fa3f Mon Sep 17 00:00:00 2001 From: Benguang Zhao Date: Mon, 29 May 2023 16:10:02 +0800 Subject: [PATCH 47/49] docs: update alter database options for WAL_RETENTION_PERIOD etc. --- docs/zh/12-taos-sql/02-database.md | 2 ++ 1 file changed, 2 insertions(+) diff --git a/docs/zh/12-taos-sql/02-database.md b/docs/zh/12-taos-sql/02-database.md index a2a0914120..b329413aa8 100644 --- a/docs/zh/12-taos-sql/02-database.md +++ b/docs/zh/12-taos-sql/02-database.md @@ -121,6 +121,8 @@ alter_database_option: { | WAL_LEVEL value | WAL_FSYNC_PERIOD value | KEEP value + | WAL_RETENTION_PERIOD value + | WAL_RETENTION_SIZE value } ``` From 23983e506fae3c7ff7a2c168296aed3a01050d94 Mon Sep 17 00:00:00 2001 From: dmchen Date: Mon, 29 May 2023 16:12:26 +0800 Subject: [PATCH 48/49] fix test case --- source/dnode/mnode/impl/src/mndDnode.c | 12 +++++------- tests/script/tsim/dnode/drop_dnode_has_mnode.sim | 2 +- tests/script/tsim/dnode/offline_reason.sim | 2 +- tests/script/tsim/valgrind/checkError1.sim | 2 +- 4 files changed, 8 insertions(+), 10 deletions(-) diff --git a/source/dnode/mnode/impl/src/mndDnode.c b/source/dnode/mnode/impl/src/mndDnode.c index c4b0e00c76..73dbb243a1 100644 --- a/source/dnode/mnode/impl/src/mndDnode.c +++ b/source/dnode/mnode/impl/src/mndDnode.c @@ -995,13 +995,11 @@ static int32_t mndProcessDropDnodeReq(SRpcMsg *pReq) { goto _OVER; } - if (numOfVnodes > 0 || pMObj != NULL || pSObj != NULL || pQObj != NULL) { - if (!isonline && !force) { - terrno = TSDB_CODE_DNODE_OFFLINE; - mError("dnode:%d, failed to drop since %s, vnodes:%d mnode:%d qnode:%d snode:%d", pDnode->id, terrstr(), - numOfVnodes, pMObj != NULL, pQObj != NULL, pSObj != NULL); - goto _OVER; - } + if (!isonline && !force) { + terrno = TSDB_CODE_DNODE_OFFLINE; + mError("dnode:%d, failed to drop since %s, vnodes:%d mnode:%d qnode:%d snode:%d", pDnode->id, terrstr(), + numOfVnodes, pMObj != NULL, pQObj != NULL, pSObj != NULL); + goto _OVER; } code = mndDropDnode(pMnode, pReq, pDnode, pMObj, pQObj, pSObj, numOfVnodes, force, dropReq.unsafe); diff --git a/tests/script/tsim/dnode/drop_dnode_has_mnode.sim b/tests/script/tsim/dnode/drop_dnode_has_mnode.sim index 054f978607..8a74367726 100644 --- a/tests/script/tsim/dnode/drop_dnode_has_mnode.sim +++ b/tests/script/tsim/dnode/drop_dnode_has_mnode.sim @@ -35,7 +35,7 @@ endi print =============== step2 drop dnode 3 sql_error drop dnode 1 -sql drop dnode 3 +sql drop dnode 3 force sql select * from information_schema.ins_dnodes print ===> $data00 $data01 $data02 $data03 $data04 $data05 diff --git a/tests/script/tsim/dnode/offline_reason.sim b/tests/script/tsim/dnode/offline_reason.sim index 8c4d8b47f7..23f015392b 100644 --- a/tests/script/tsim/dnode/offline_reason.sim +++ b/tests/script/tsim/dnode/offline_reason.sim @@ -57,7 +57,7 @@ if $data(2)[7] != @status msg timeout@ then endi print ========== step4 -sql drop dnode 2 +sql drop dnode 2 force sql select * from information_schema.ins_dnodes if $rows != 1 then return -1 diff --git a/tests/script/tsim/valgrind/checkError1.sim b/tests/script/tsim/valgrind/checkError1.sim index 5f82d2d935..debe633f06 100644 --- a/tests/script/tsim/valgrind/checkError1.sim +++ b/tests/script/tsim/valgrind/checkError1.sim @@ -20,7 +20,7 @@ sql_error alter user u2 sysinfo 0 print =============== step2 create drop dnode sql create dnode $hostname port 7200 sql create dnode $hostname port 7300 -sql drop dnode 3 +sql drop dnode 3 force sql alter dnode 1 'debugflag 131' print =============== step3: select * from information_schema.ins_dnodes From c2bb44b6877a9953735a5f990366badc8ad551c8 Mon Sep 17 00:00:00 2001 From: Xuefeng Tan <1172915550@qq.com> Date: Mon, 29 May 2023 16:48:57 +0800 Subject: [PATCH 49/49] fix: TDengine build on macOS arm64 (#21513) --- cmake/cmake.platform | 12 ++++++++++-- contrib/CMakeLists.txt | 4 ++++ 2 files changed, 14 insertions(+), 2 deletions(-) diff --git a/cmake/cmake.platform b/cmake/cmake.platform index cb09bf2085..f9faf7316c 100644 --- a/cmake/cmake.platform +++ b/cmake/cmake.platform @@ -56,9 +56,17 @@ IF (${CMAKE_SYSTEM_NAME} MATCHES "Linux" OR ${CMAKE_SYSTEM_NAME} MATCHES "Darwin SET(TD_DARWIN TRUE) SET(OSTYPE "macOS") + execute_process(COMMAND geos-config --cflags OUTPUT_VARIABLE GEOS_CFLAGS) + execute_process(COMMAND geos-config --ldflags OUTPUT_VARIABLE GEOS_LDFLAGS) + string(SUBSTRING ${GEOS_CFLAGS} 2 -1 GEOS_CFLAGS) + string(REGEX REPLACE "\n" "" GEOS_CFLAGS ${GEOS_CFLAGS}) + string(SUBSTRING ${GEOS_LDFLAGS} 2 -1 GEOS_LDFLAGS) + string(REGEX REPLACE "\n" "" GEOS_LDFLAGS ${GEOS_LDFLAGS}) + MESSAGE("GEOS_CFLAGS "${GEOS_CFLAGS}) + MESSAGE("GEOS_LDFLAGS "${GEOS_LDFLAGS}) ADD_DEFINITIONS("-DDARWIN -Wno-tautological-pointer-compare") - INCLUDE_DIRECTORIES(/usr/local/include) - LINK_DIRECTORIES(/usr/local/lib) + INCLUDE_DIRECTORIES(${GEOS_CFLAGS}) + LINK_DIRECTORIES(${GEOS_LDFLAGS}) IF (${CMAKE_SYSTEM_PROCESSOR} MATCHES "arm64") MESSAGE("Current system arch is arm64") diff --git a/contrib/CMakeLists.txt b/contrib/CMakeLists.txt index b3333dee99..eeb20ee884 100644 --- a/contrib/CMakeLists.txt +++ b/contrib/CMakeLists.txt @@ -236,6 +236,10 @@ if(${BUILD_WITH_ROCKSDB}) SET(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wno-error=maybe-uninitialized") endif(${TD_DARWIN}) + if (${TD_DARWIN_ARM64}) + set(HAS_ARMV8_CRC true) + endif(${TD_DARWIN_ARM64}) + if (${TD_WINDOWS}) SET(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} /wd4244 /wd4819") endif(${TD_WINDOWS})