From fc32f716287910a0e7f3e91dd190a5fd0425adb4 Mon Sep 17 00:00:00 2001 From: wangmm0220 Date: Mon, 13 Jun 2022 19:59:30 +0800 Subject: [PATCH 1/8] opt: optimize group by tag --- source/libs/executor/inc/executorInt.h | 3 - source/libs/executor/inc/executorimpl.h | 8 +-- source/libs/executor/src/executorimpl.c | 92 +++++++++++++++++++++++- source/libs/executor/src/groupoperator.c | 6 +- source/libs/executor/src/scanoperator.c | 40 ++--------- 5 files changed, 100 insertions(+), 49 deletions(-) diff --git a/source/libs/executor/inc/executorInt.h b/source/libs/executor/inc/executorInt.h index 88f308710e..df54161720 100644 --- a/source/libs/executor/inc/executorInt.h +++ b/source/libs/executor/inc/executorInt.h @@ -27,10 +27,7 @@ typedef struct { int32_t bytes; } SGroupKeys, SStateKeys; -int32_t initGroupOptrInfo(SArray** pGroupColVals, int32_t* keyLen, char** keyBuf, const SArray* pGroupColList); uint64_t calcGroupId(char* pData, int32_t len); -void recordNewGroupKeys(SArray* pGroupCols, SArray* pGroupColVals, SSDataBlock* pBlock, int32_t rowIndex); -int32_t buildGroupKeys(void* pKey, const SArray* pGroupColVals); #ifdef __cplusplus } #endif diff --git a/source/libs/executor/inc/executorimpl.h b/source/libs/executor/inc/executorimpl.h index 94c6512e77..3e24e43233 100644 --- a/source/libs/executor/inc/executorimpl.h +++ b/source/libs/executor/inc/executorimpl.h @@ -336,12 +336,6 @@ typedef struct STableScanInfo { int32_t dataBlockLoadFlag; SInterval interval; // if the upstream is an interval operator, the interval info is also kept here to get the time window to check if current data block needs to be loaded. - SArray* pGroupCols; - SArray* pGroupColVals; // current group column values, SArray - char* keyBuf; // group by keys for hash - int32_t groupKeyLen; // total group by column width - SHashObj* pGroupSet; // quick locate the window object for each result - SSampleExecInfo sample; // sample execution info int32_t curTWinIdx; } STableScanInfo; @@ -789,7 +783,7 @@ SResultRow* doSetResultOutBufByKey(SDiskbasedBuf* pResultBuf, SResultRowInfo* pR SOperatorInfo* createExchangeOperatorInfo(void* pTransporter, SExchangePhysiNode* pExNode, SExecTaskInfo* pTaskInfo); -SOperatorInfo* createTableScanOperatorInfo(STableScanPhysiNode* pTableScanNode, tsdbReaderT pDataReader, SReadHandle* pHandle, SArray* groupKyes, SExecTaskInfo* pTaskInfo); +SOperatorInfo* createTableScanOperatorInfo(STableScanPhysiNode* pTableScanNode, tsdbReaderT pDataReader, SReadHandle* pHandle, SExecTaskInfo* pTaskInfo); SOperatorInfo* createTagScanOperatorInfo(SReadHandle* pReadHandle, STagScanPhysiNode* pPhyNode, STableListInfo* pTableListInfo, SExecTaskInfo* pTaskInfo); SOperatorInfo* createSysTableScanOperatorInfo(void* readHandle, SSystemTableScanPhysiNode *pScanPhyNode, SExecTaskInfo* pTaskInfo); diff --git a/source/libs/executor/src/executorimpl.c b/source/libs/executor/src/executorimpl.c index 2c067bf488..13ac74199e 100644 --- a/source/libs/executor/src/executorimpl.c +++ b/source/libs/executor/src/executorimpl.c @@ -4592,6 +4592,85 @@ int32_t extractTableSchemaVersion(SReadHandle* pHandle, uint64_t uid, SExecTaskI return TSDB_CODE_SUCCESS; } +int32_t generateGroupIdMap(STableListInfo* pTableListInfo, SReadHandle* pHandle, SArray* groupKey){ + if(groupKey == NULL) { + return TDB_CODE_SUCCESS; + } + + pTableListInfo->map = taosHashInit(32, taosGetDefaultHashFunction(TSDB_DATA_TYPE_BINARY), false, HASH_NO_LOCK); + if (pTableListInfo->map == NULL) { + return TSDB_CODE_OUT_OF_MEMORY; + } + int32_t keyLen = 0; + void *keyBuf = NULL; + int32_t numOfGroupCols = taosArrayGetSize(groupKey); + for (int32_t j = 0; j < numOfGroupCols; ++j) { + SColumn* pCol = taosArrayGet(groupKey, j); + keyLen += pCol->bytes; // actual data + null_flag + } + + int32_t nullFlagSize = sizeof(int8_t) * numOfGroupCols; + keyLen += nullFlagSize; + + keyBuf = taosMemoryCalloc(1, keyLen); + if (keyBuf == NULL) { + return TSDB_CODE_OUT_OF_MEMORY; + } + + for(int32_t i = 0; i < taosArrayGetSize(pTableListInfo->pTableList); i++){ + STableKeyInfo *info = taosArrayGet(pTableListInfo->pTableList, i); + SMetaReader mr = {0}; + metaReaderInit(&mr, pHandle->meta, 0); + metaGetTableEntryByUid(&mr, info->uid); + + char* isNull = (char*)keyBuf; + char* pStart = (char*)keyBuf + sizeof(int8_t) * numOfGroupCols; + for (int32_t j = 0; j < numOfGroupCols; ++j) { + SColumn* pCol = taosArrayGet(groupKey, j); + + if(strcmp(pCol->name, "tbname") == 0){ + isNull[i] = 0; + memcpy(pStart, mr.me.name, strlen(mr.me.name)); + pStart += strlen(mr.me.name); + }else{ + STagVal tagVal = {0}; + tagVal.cid = pCol->colId; + const char* p = metaGetTableTagVal(&mr.me, pCol->type, &tagVal); + if(p == NULL){ + isNull[j] = 1; + continue; + } + isNull[i] = 0; + if (pCol->type == TSDB_DATA_TYPE_JSON) { +// int32_t dataLen = getJsonValueLen(pkey->pData); +// memcpy(pStart, (pkey->pData), dataLen); +// pStart += dataLen; + } else if (IS_VAR_DATA_TYPE(pCol->type)) { + memcpy(pStart, tagVal.pData, tagVal.nData); + pStart += tagVal.nData; + ASSERT(tagVal.nData <= pCol->bytes); + } else { + memcpy(pStart, &(tagVal.i64), pCol->bytes); + pStart += pCol->bytes; + } + } + } + + int32_t len = (int32_t) (pStart - (char*)keyBuf); + uint64_t* groupId = taosHashGet(pTableListInfo->map, keyBuf, len); + if (groupId) { + taosHashPut(pTableListInfo->map, &(info->uid), sizeof(uint64_t), groupId, sizeof(uint64_t)); + } else { + uint64_t tmpId = calcGroupId(keyBuf, len); + taosHashPut(pTableListInfo->map, &(info->uid), sizeof(uint64_t), &tmpId, sizeof(uint64_t)); + } + + metaReaderClear(&mr); + } + taosMemoryFree(keyBuf); + return TDB_CODE_SUCCESS; +} + SOperatorInfo* createOperatorTree(SPhysiNode* pPhyNode, SExecTaskInfo* pTaskInfo, SReadHandle* pHandle, uint64_t queryId, uint64_t taskId, STableListInfo* pTableListInfo, SNode* pTagCond) { int32_t type = nodeType(pPhyNode); @@ -4605,15 +4684,22 @@ SOperatorInfo* createOperatorTree(SPhysiNode* pPhyNode, SExecTaskInfo* pTaskInfo if (pDataReader == NULL && terrno != 0) { return NULL; } - SArray* groupKyes = extractPartitionColInfo(pTableScanNode->pPartitionKeys); + int32_t code = extractTableSchemaVersion(pHandle, pTableScanNode->scan.uid, pTaskInfo); if (code) { tsdbCleanupReadHandle(pDataReader); return NULL; } - + + SArray* groupKyes = extractPartitionColInfo(pTableScanNode->pPartitionKeys); + code = generateGroupIdMap(pTableListInfo, pHandle, groupKyes); //todo for json + if (code){ + tsdbCleanupReadHandle(pDataReader); + return NULL; + } + SOperatorInfo* pOperator = - createTableScanOperatorInfo(pTableScanNode, pDataReader, pHandle, groupKyes, pTaskInfo); + createTableScanOperatorInfo(pTableScanNode, pDataReader, pHandle, pTaskInfo); STableScanInfo* pScanInfo = pOperator->info; pTaskInfo->cost.pRecoder = &pScanInfo->readRecorder; diff --git a/source/libs/executor/src/groupoperator.c b/source/libs/executor/src/groupoperator.c index 132f93a6a5..0f9879ecc9 100644 --- a/source/libs/executor/src/groupoperator.c +++ b/source/libs/executor/src/groupoperator.c @@ -37,7 +37,7 @@ static void destroyGroupOperatorInfo(void* param, int32_t numOfOutput) { taosArrayDestroy(pInfo->pGroupColVals); } -int32_t initGroupOptrInfo(SArray** pGroupColVals, int32_t* keyLen, char** keyBuf, const SArray* pGroupColList) { +static int32_t initGroupOptrInfo(SArray** pGroupColVals, int32_t* keyLen, char** keyBuf, const SArray* pGroupColList) { *pGroupColVals = taosArrayInit(4, sizeof(SGroupKeys)); if ((*pGroupColVals) == NULL) { return TSDB_CODE_OUT_OF_MEMORY; @@ -118,7 +118,7 @@ static bool groupKeyCompare(SArray* pGroupCols, SArray* pGroupColVals, SSDataBlo return true; } -void recordNewGroupKeys(SArray* pGroupCols, SArray* pGroupColVals, SSDataBlock* pBlock, int32_t rowIndex) { +static void recordNewGroupKeys(SArray* pGroupCols, SArray* pGroupColVals, SSDataBlock* pBlock, int32_t rowIndex) { SColumnDataAgg* pColAgg = NULL; size_t numOfGroupCols = taosArrayGetSize(pGroupCols); @@ -150,7 +150,7 @@ void recordNewGroupKeys(SArray* pGroupCols, SArray* pGroupColVals, SSDataBlock* } } -int32_t buildGroupKeys(void* pKey, const SArray* pGroupColVals) { +static int32_t buildGroupKeys(void* pKey, const SArray* pGroupColVals) { ASSERT(pKey != NULL); size_t numOfGroupCols = taosArrayGetSize(pGroupColVals); diff --git a/source/libs/executor/src/scanoperator.c b/source/libs/executor/src/scanoperator.c index 8fb4878cd9..b6ee9843b4 100644 --- a/source/libs/executor/src/scanoperator.c +++ b/source/libs/executor/src/scanoperator.c @@ -391,22 +391,16 @@ static SSDataBlock* doTableScanImpl(SOperatorInfo* pOperator) { longjmp(pOperator->pTaskInfo->env, code); } - recordNewGroupKeys(pTableScanInfo->pGroupCols, pTableScanInfo->pGroupColVals, pBlock, 0); - int32_t len = buildGroupKeys(pTableScanInfo->keyBuf, pTableScanInfo->pGroupColVals); - - uint64_t* groupId = taosHashGet(pTableScanInfo->pGroupSet, pTableScanInfo->keyBuf, len); - if (groupId) { - pBlock->info.groupId = *groupId; - } else if (len != 0) { - pBlock->info.groupId = calcGroupId(pTableScanInfo->keyBuf, len); - taosHashPut(pTableScanInfo->pGroupSet, pTableScanInfo->keyBuf, len, &pBlock->info.groupId, sizeof(uint64_t)); - } - // current block is filter out according to filter condition, continue load the next block if (status == FUNC_DATA_REQUIRED_FILTEROUT || pBlock->info.rows == 0) { continue; } + uint64_t* groupId = taosHashGet(pOperator->pTaskInfo->tableqinfoList.map, &pBlock->info.uid, sizeof(int64_t)); + if (groupId) { + pBlock->info.groupId = *groupId; + } + pOperator->resultInfo.totalRows = pTableScanInfo->readRecorder.totalRows; pTableScanInfo->readRecorder.elapsedTime += (taosGetTimestampUs() - st) / 1000.0; @@ -530,21 +524,13 @@ static void destroyTableScanOperatorInfo(void* param, int32_t numOfOutput) { tsdbCleanupReadHandle(pTableScanInfo->dataReader); - taosArrayDestroy(pTableScanInfo->pGroupCols); - for (int i = 0; i < taosArrayGetSize(pTableScanInfo->pGroupColVals); i++) { - SGroupKeys key = *(SGroupKeys*)taosArrayGet(pTableScanInfo->pGroupColVals, i); - taosMemoryFree(key.pData); - } - taosArrayDestroy(pTableScanInfo->pGroupColVals); - taosMemoryFree(pTableScanInfo->keyBuf); - taosHashCleanup(pTableScanInfo->pGroupSet); if (pTableScanInfo->pColMatchInfo != NULL) { taosArrayDestroy(pTableScanInfo->pColMatchInfo); } } SOperatorInfo* createTableScanOperatorInfo(STableScanPhysiNode* pTableScanNode, tsdbReaderT pDataReader, - SReadHandle* readHandle, SArray* groupKyes, SExecTaskInfo* pTaskInfo) { + SReadHandle* readHandle, SExecTaskInfo* pTaskInfo) { STableScanInfo* pInfo = taosMemoryCalloc(1, sizeof(STableScanInfo)); SOperatorInfo* pOperator = taosMemoryCalloc(1, sizeof(SOperatorInfo)); if (pInfo == NULL || pOperator == NULL) { @@ -591,18 +577,6 @@ SOperatorInfo* createTableScanOperatorInfo(STableScanPhysiNode* pTableScanNode, pOperator->numOfExprs = numOfCols; pOperator->pTaskInfo = pTaskInfo; - // for table group - pInfo->pGroupCols = groupKyes; - _hash_fn_t hashFn = taosGetDefaultHashFunction(TSDB_DATA_TYPE_BINARY); - pInfo->pGroupSet = taosHashInit(100, hashFn, false, HASH_NO_LOCK); - if (pInfo->pGroupSet == NULL) { - goto _error; - } - code = initGroupOptrInfo(&pInfo->pGroupColVals, &pInfo->groupKeyLen, &pInfo->keyBuf, groupKyes); - if (code != TSDB_CODE_SUCCESS) { - goto _error; - } - pOperator->fpSet = createOperatorFpSet(operatorDummyOpenFn, doTableScan, NULL, NULL, destroyTableScanOperatorInfo, NULL, NULL, getTableScannerExecInfo); @@ -992,7 +966,7 @@ SOperatorInfo* createStreamScanOperatorInfo(void* pDataReader, SReadHandle* pHan SScanPhysiNode* pScanPhyNode = &pTableScanNode->scan; SDataBlockDescNode* pDescNode = pScanPhyNode->node.pOutputDataBlockDesc; - SOperatorInfo* pTableScanDummy = createTableScanOperatorInfo(pTableScanNode, pDataReader, pHandle, NULL, pTaskInfo); + SOperatorInfo* pTableScanDummy = createTableScanOperatorInfo(pTableScanNode, pDataReader, pHandle, pTaskInfo); STableScanInfo* pSTInfo = (STableScanInfo*)pTableScanDummy->info; From cb85ee6d3c944148f0616757dbf304e95098eff0 Mon Sep 17 00:00:00 2001 From: wangmm0220 Date: Tue, 14 Jun 2022 13:36:54 +0800 Subject: [PATCH 2/8] opt: optimize generating groupid in partition/group by tag --- source/libs/executor/inc/executorimpl.h | 2 +- source/libs/executor/src/executorimpl.c | 24 ++++++++---------------- source/libs/executor/src/scanoperator.c | 25 +++++++++++++++++++++++-- 3 files changed, 32 insertions(+), 19 deletions(-) diff --git a/source/libs/executor/inc/executorimpl.h b/source/libs/executor/inc/executorimpl.h index 3e24e43233..ab60acab53 100644 --- a/source/libs/executor/inc/executorimpl.h +++ b/source/libs/executor/inc/executorimpl.h @@ -821,7 +821,7 @@ SOperatorInfo* createGroupOperatorInfo(SOperatorInfo* downstream, SExprInfo* pEx SExprInfo* pScalarExprInfo, int32_t numOfScalarExpr, SExecTaskInfo* pTaskInfo); SOperatorInfo* createDataBlockInfoScanOperator(void* dataReader, SExecTaskInfo* pTaskInfo); -SOperatorInfo* createStreamScanOperatorInfo(void* pDataReader, SReadHandle* pHandle, SArray* pTableIdList, +SOperatorInfo* createStreamScanOperatorInfo(void* pDataReader, SReadHandle* pHandle, STableScanPhysiNode* pTableScanNode, SExecTaskInfo* pTaskInfo, STimeWindowAggSupp* pTwSup); diff --git a/source/libs/executor/src/executorimpl.c b/source/libs/executor/src/executorimpl.c index 13ac74199e..e4c072a396 100644 --- a/source/libs/executor/src/executorimpl.c +++ b/source/libs/executor/src/executorimpl.c @@ -4558,7 +4558,6 @@ static tsdbReaderT doCreateDataReader(STableScanPhysiNode* pTableScanNode, SRead static int32_t getTableList(void* metaHandle, int32_t tableType, uint64_t tableUid, STableListInfo* pListInfo, SNode* pTagCond); -static SArray* extractTableIdList(const STableListInfo* pTableGroupInfo); static SArray* extractColumnInfo(SNodeList* pNodeList); static SArray* createSortInfo(SNodeList* pNodeList); @@ -4725,12 +4724,17 @@ SOperatorInfo* createOperatorTree(SPhysiNode* pPhyNode, SExecTaskInfo* pTaskInfo } else { qDebug("%s pDataReader is not NULL", GET_TASKID(pTaskInfo)); } - SArray* tableIdList = extractTableIdList(pTableListInfo); + + SArray* groupKyes = extractPartitionColInfo(pTableScanNode->pPartitionKeys); + int32_t code = generateGroupIdMap(pTableListInfo, pHandle, groupKyes); //todo for json + if (code){ + tsdbCleanupReadHandle(pDataReader); + return NULL; + } SOperatorInfo* pOperator = - createStreamScanOperatorInfo(pDataReader, pHandle, tableIdList, pTableScanNode, pTaskInfo, &twSup); + createStreamScanOperatorInfo(pDataReader, pHandle, pTableScanNode, pTaskInfo, &twSup); - taosArrayDestroy(tableIdList); return pOperator; } else if (QUERY_NODE_PHYSICAL_PLAN_SYSTABLE_SCAN == type) { SSystemTableScanPhysiNode* pSysScanPhyNode = (SSystemTableScanPhysiNode*)pPhyNode; @@ -5183,18 +5187,6 @@ int32_t getTableList(void* metaHandle, int32_t tableType, uint64_t tableUid, STa return code; } -SArray* extractTableIdList(const STableListInfo* pTableGroupInfo) { - SArray* tableIdList = taosArrayInit(4, sizeof(uint64_t)); - - // Transfer the Array of STableKeyInfo into uid list. - for (int32_t i = 0; i < taosArrayGetSize(pTableGroupInfo->pTableList); ++i) { - STableKeyInfo* pkeyInfo = taosArrayGet(pTableGroupInfo->pTableList, i); - taosArrayPush(tableIdList, &pkeyInfo->uid); - } - - return tableIdList; -} - tsdbReaderT doCreateDataReader(STableScanPhysiNode* pTableScanNode, SReadHandle* pHandle, STableListInfo* pTableListInfo, uint64_t queryId, uint64_t taskId, SNode* pTagCond) { int32_t code = diff --git a/source/libs/executor/src/scanoperator.c b/source/libs/executor/src/scanoperator.c index b6ee9843b4..d30e4ef6db 100644 --- a/source/libs/executor/src/scanoperator.c +++ b/source/libs/executor/src/scanoperator.c @@ -886,6 +886,11 @@ static SSDataBlock* doStreamBlockScan(SOperatorInfo* pOperator) { pInfo->pRes->info.groupId = groupId; } + uint64_t* groupIdPre = taosHashGet(pOperator->pTaskInfo->tableqinfoList.map, &uid, sizeof(int64_t)); + if (groupIdPre) { + pInfo->pRes->info.groupId = *groupIdPre; + } + for (int32_t i = 0; i < taosArrayGetSize(pInfo->pColMatchInfo); ++i) { SColMatchInfo* pColMatchInfo = taosArrayGet(pInfo->pColMatchInfo, i); if (!pColMatchInfo->output) { @@ -953,11 +958,24 @@ static SSDataBlock* doStreamBlockScan(SOperatorInfo* pOperator) { } } -SOperatorInfo* createStreamScanOperatorInfo(void* pDataReader, SReadHandle* pHandle, SArray* pTableIdList, +static SArray* extractTableIdList(const STableListInfo* pTableGroupInfo) { + SArray* tableIdList = taosArrayInit(4, sizeof(uint64_t)); + + // Transfer the Array of STableKeyInfo into uid list. + for (int32_t i = 0; i < taosArrayGetSize(pTableGroupInfo->pTableList); ++i) { + STableKeyInfo* pkeyInfo = taosArrayGet(pTableGroupInfo->pTableList, i); + taosArrayPush(tableIdList, &pkeyInfo->uid); + } + + return tableIdList; +} + +SOperatorInfo* createStreamScanOperatorInfo(void* pDataReader, SReadHandle* pHandle, STableScanPhysiNode* pTableScanNode, SExecTaskInfo* pTaskInfo, STimeWindowAggSupp* pTwSup) { SStreamBlockScanInfo* pInfo = taosMemoryCalloc(1, sizeof(SStreamBlockScanInfo)); SOperatorInfo* pOperator = taosMemoryCalloc(1, sizeof(SOperatorInfo)); + if (pInfo == NULL || pOperator == NULL) { terrno = TSDB_CODE_QRY_OUT_OF_MEMORY; goto _error; @@ -988,10 +1006,13 @@ SOperatorInfo* createStreamScanOperatorInfo(void* pDataReader, SReadHandle* pHan // set the extract column id to streamHandle tqReadHandleSetColIdList((STqReadHandle*)pHandle->reader, pColIds); - int32_t code = tqReadHandleSetTbUidList(pHandle->reader, pTableIdList); + SArray* tableIdList = extractTableIdList(&pTaskInfo->tableqinfoList); + int32_t code = tqReadHandleSetTbUidList(pHandle->reader, tableIdList); if (code != 0) { + taosArrayDestroy(tableIdList); goto _error; } + taosArrayDestroy(tableIdList); pInfo->pBlockLists = taosArrayInit(4, POINTER_BYTES); if (pInfo->pBlockLists == NULL) { From 601e454a242bd511f2e2914f3f946490715cb45f Mon Sep 17 00:00:00 2001 From: Liu Jicong Date: Tue, 14 Jun 2022 13:58:40 +0800 Subject: [PATCH 3/8] enh(tmq): put offset store into vnode --- include/common/tmsg.h | 29 +++- include/common/tmsgdef.h | 3 +- source/client/src/clientImpl.c | 54 +++--- source/client/src/tmq.c | 172 +++++++++++++++++++- source/common/src/tdatablock.c | 4 +- source/common/src/tmsg.c | 29 ++++ source/dnode/mgmt/mgmt_vnode/src/vmHandle.c | 1 + source/dnode/vnode/src/inc/tq.h | 36 ++-- source/dnode/vnode/src/inc/vnodeInt.h | 1 + source/dnode/vnode/src/tq/tq.c | 35 +++- source/dnode/vnode/src/tq/tqOffset.c | 115 +++++++++++-- source/dnode/vnode/src/vnd/vnodeSvr.c | 15 +- 12 files changed, 415 insertions(+), 79 deletions(-) diff --git a/include/common/tmsg.h b/include/common/tmsg.h index d69849349c..be9eda3825 100644 --- a/include/common/tmsg.h +++ b/include/common/tmsg.h @@ -1494,9 +1494,9 @@ typedef struct { int32_t code; } STaskDropRsp; -#define STREAM_TRIGGER_AT_ONCE 1 -#define STREAM_TRIGGER_WINDOW_CLOSE 2 -#define STREAM_TRIGGER_MAX_DELAY 3 +#define STREAM_TRIGGER_AT_ONCE 1 +#define STREAM_TRIGGER_WINDOW_CLOSE 2 +#define STREAM_TRIGGER_MAX_DELAY 3 typedef struct { char name[TSDB_TABLE_FNAME_LEN]; @@ -2297,6 +2297,29 @@ int32_t tDecodeSMqOffset(SDecoder* decoder, SMqOffset* pOffset); int32_t tEncodeSMqCMCommitOffsetReq(SEncoder* encoder, const SMqCMCommitOffsetReq* pReq); int32_t tDecodeSMqCMCommitOffsetReq(SDecoder* decoder, SMqCMCommitOffsetReq* pReq); +// tqOffset +enum { + TMQ_OFFSET__SNAPSHOT = 1, + TMQ_OFFSET__LOG, +}; + +typedef struct { + int8_t type; + union { + struct { + int64_t uid; + int64_t ts; + }; + struct { + int64_t version; + }; + }; + char subKey[TSDB_SUBSCRIBE_KEY_LEN]; +} STqOffset; + +int32_t tEncodeSTqOffset(SEncoder* pEncoder, const STqOffset* pOffset); +int32_t tDecodeSTqOffset(SDecoder* pDecoder, STqOffset* pOffset); + typedef struct { char name[TSDB_TABLE_FNAME_LEN]; char stb[TSDB_TABLE_FNAME_LEN]; diff --git a/include/common/tmsgdef.h b/include/common/tmsgdef.h index 743f10bd55..534194e1cf 100644 --- a/include/common/tmsgdef.h +++ b/include/common/tmsgdef.h @@ -140,7 +140,7 @@ enum { TD_DEF_MSG_TYPE(TDMT_MND_MQ_CONSUMER_RECOVER, "consumer-recover", SMqConsumerRecoverMsg, NULL) TD_DEF_MSG_TYPE(TDMT_MND_MQ_DO_REBALANCE, "do-rebalance", SMqDoRebalanceMsg, NULL) TD_DEF_MSG_TYPE(TDMT_MND_MQ_DROP_CGROUP, "drop-cgroup", SMqDropCGroupReq, SMqDropCGroupRsp) - TD_DEF_MSG_TYPE(TDMT_MND_MQ_COMMIT_OFFSET, "commit-offset", SMqCMCommitOffsetReq, SMqCMCommitOffsetRsp) + TD_DEF_MSG_TYPE(TDMT_MND_MQ_COMMIT_OFFSET, "mnode-commit-offset", SMqCMCommitOffsetReq, SMqCMCommitOffsetRsp) TD_DEF_MSG_TYPE(TDMT_MND_MQ_TIMER, "mq-tmr", SMTimerReq, NULL) TD_DEF_MSG_TYPE(TDMT_MND_TELEM_TIMER, "telem-tmr", SMTimerReq, SMTimerReq) TD_DEF_MSG_TYPE(TDMT_MND_TRANS_TIMER, "trans-tmr", NULL, NULL) @@ -176,6 +176,7 @@ enum { TD_DEF_MSG_TYPE(TDMT_VND_DROP_STB, "vnode-drop-stb", SVDropStbReq, NULL) TD_DEF_MSG_TYPE(TDMT_VND_MQ_VG_CHANGE, "vnode-mq-vg-change", SMqRebVgReq, SMqRebVgRsp) TD_DEF_MSG_TYPE(TDMT_VND_MQ_VG_DELETE, "vnode-mq-vg-delete", SMqVDeleteReq, SMqVDeleteRsp) + TD_DEF_MSG_TYPE(TDMT_VND_MQ_COMMIT_OFFSET, "vnode-commit-offset", STqOffset, STqOffset) TD_DEF_MSG_TYPE(TDMT_VND_CANCEL_TASK, "vnode-cancel-task", NULL, NULL) TD_DEF_MSG_TYPE(TDMT_VND_DROP_TASK, "vnode-drop-task", NULL, NULL) TD_DEF_MSG_TYPE(TDMT_VND_CREATE_TOPIC, "vnode-create-topic", NULL, NULL) diff --git a/source/client/src/clientImpl.c b/source/client/src/clientImpl.c index dc83f7bc43..f48383c43a 100644 --- a/source/client/src/clientImpl.c +++ b/source/client/src/clientImpl.c @@ -205,7 +205,7 @@ int32_t execLocalCmd(SRequestObj* pRequest, SQuery* pQuery) { SRetrieveTableRsp* pRsp = NULL; int32_t code = qExecCommand(pQuery->pRoot, &pRsp); if (TSDB_CODE_SUCCESS == code && NULL != pRsp) { - code = setQueryResultFromRsp(&pRequest->body.resInfo, pRsp, false, false); + code = setQueryResultFromRsp(&pRequest->body.resInfo, pRsp, false, true); } return code; @@ -232,9 +232,7 @@ int32_t execDdlQuery(SRequestObj* pRequest, SQuery* pQuery) { return TSDB_CODE_SUCCESS; } -static SAppInstInfo* getAppInfo(SRequestObj* pRequest) { - return pRequest->pTscObj->pAppInfo; -} +static SAppInstInfo* getAppInfo(SRequestObj* pRequest) { return pRequest->pTscObj->pAppInfo; } void asyncExecLocalCmd(SRequestObj* pRequest, SQuery* pQuery) { SRetrieveTableRsp* pRsp = NULL; @@ -258,7 +256,7 @@ void asyncExecLocalCmd(SRequestObj* pRequest, SQuery* pQuery) { } pRequest->body.queryFp(pRequest->body.param, pRequest, 0); -// pRequest->body.fetchFp(pRequest->body.param, pRequest, pResultInfo->numOfRows); + // pRequest->body.fetchFp(pRequest->body.param, pRequest, pResultInfo->numOfRows); } int32_t asyncExecDdlQuery(SRequestObj* pRequest, SQuery* pQuery) { @@ -401,7 +399,7 @@ int32_t scheduleAsyncQuery(SRequestObj* pRequest, SQueryPlan* pDag, SArray* pNod SQueryResult res = {.code = 0, .numOfRows = 0}; int32_t code = schedulerAsyncExecJob(pTransporter, pNodeList, pDag, &pRequest->body.queryJob, pRequest->sqlstr, - pRequest->metric.start, schdExecCallback, &res); + pRequest->metric.start, schdExecCallback, &res); pRequest->body.resInfo.execRes = res.res; @@ -457,7 +455,8 @@ int32_t scheduleQuery(SRequestObj* pRequest, SQueryPlan* pDag, SArray* pNodeList return pRequest->code; } - if (TDMT_VND_SUBMIT == pRequest->type || TDMT_VND_DELETE == pRequest->type || TDMT_VND_CREATE_TABLE == pRequest->type) { + if (TDMT_VND_SUBMIT == pRequest->type || TDMT_VND_DELETE == pRequest->type || + TDMT_VND_CREATE_TABLE == pRequest->type) { pRequest->body.resInfo.numOfRows = res.numOfRows; if (pRequest->body.queryJob != 0) { @@ -470,9 +469,9 @@ int32_t scheduleQuery(SRequestObj* pRequest, SQueryPlan* pDag, SArray* pNodeList return pRequest->code; } -int32_t handleSubmitExecRes(SRequestObj* pRequest, void* res, SCatalog* pCatalog, SEpSet *epset) { - int32_t code = 0; - SArray* pArray = NULL; +int32_t handleSubmitExecRes(SRequestObj* pRequest, void* res, SCatalog* pCatalog, SEpSet* epset) { + int32_t code = 0; + SArray* pArray = NULL; SSubmitRsp* pRsp = (SSubmitRsp*)res; if (pRsp->nBlocks <= 0) { return TSDB_CODE_SUCCESS; @@ -502,7 +501,7 @@ _return: return code; } -int32_t handleQueryExecRes(SRequestObj* pRequest, void* res, SCatalog* pCatalog, SEpSet *epset) { +int32_t handleQueryExecRes(SRequestObj* pRequest, void* res, SCatalog* pCatalog, SEpSet* epset) { int32_t code = 0; SArray* pArray = NULL; SArray* pTbArray = (SArray*)res; @@ -540,15 +539,15 @@ int32_t handleQueryExecRsp(SRequestObj* pRequest) { return TSDB_CODE_SUCCESS; } - SCatalog* pCatalog = NULL; - SAppInstInfo* pAppInfo = getAppInfo(pRequest); + SCatalog* pCatalog = NULL; + SAppInstInfo* pAppInfo = getAppInfo(pRequest); int32_t code = catalogGetHandle(pAppInfo->clusterId, &pCatalog); if (code) { return code; } - SEpSet epset = getEpSet_s(&pAppInfo->mgmtEp); + SEpSet epset = getEpSet_s(&pAppInfo->mgmtEp); SQueryExecRes* pRes = &pRequest->body.resInfo.execRes; switch (pRes->msgType) { @@ -566,8 +565,8 @@ int32_t handleQueryExecRsp(SRequestObj* pRequest) { break; } default: - tscError("0x%"PRIx64", invalid exec result for request type %d, reqId:0x%"PRIx64, pRequest->self, - pRequest->type, pRequest->requestId); + tscError("0x%" PRIx64 ", invalid exec result for request type %d, reqId:0x%" PRIx64, pRequest->self, + pRequest->type, pRequest->requestId); code = TSDB_CODE_APP_ERROR; } @@ -575,13 +574,13 @@ int32_t handleQueryExecRsp(SRequestObj* pRequest) { } void schedulerExecCb(SQueryResult* pResult, void* param, int32_t code) { - SRequestObj* pRequest = (SRequestObj*) param; + SRequestObj* pRequest = (SRequestObj*)param; pRequest->code = code; STscObj* pTscObj = pRequest->pTscObj; if (code != TSDB_CODE_SUCCESS && NEED_CLIENT_HANDLE_ERROR(code)) { - tscDebug("0x%"PRIx64" client retry to handle the error, code:%d - %s, tryCount:%d, reqId:0x%"PRIx64, pRequest->self, code, tstrerror(code), - pRequest->retry, pRequest->requestId); + tscDebug("0x%" PRIx64 " client retry to handle the error, code:%d - %s, tryCount:%d, reqId:0x%" PRIx64, + pRequest->self, code, tstrerror(code), pRequest->retry, pRequest->requestId); pRequest->prevCode = code; doAsyncQuery(pRequest, true); return; @@ -589,7 +588,7 @@ void schedulerExecCb(SQueryResult* pResult, void* param, int32_t code) { if (code == TSDB_CODE_SUCCESS) { code = handleQueryExecRsp(pRequest); - ASSERT(pRequest->code == TSDB_CODE_SUCCESS); + ASSERT(pRequest->code == TSDB_CODE_SUCCESS); pRequest->code = code; } @@ -697,16 +696,17 @@ void launchAsyncQuery(SRequestObj* pRequest, SQuery* pQuery) { schedulerAsyncExecJob(pAppInfo->pTransporter, pNodeList, pRequest->body.pDag, &pRequest->body.queryJob, pRequest->sqlstr, pRequest->metric.start, schedulerExecCb, pRequest); } else { - tscError("0x%"PRIx64" failed to create query plan, code:%s 0x%"PRIx64, pRequest->self, tstrerror(code), pRequest->requestId); + tscError("0x%" PRIx64 " failed to create query plan, code:%s 0x%" PRIx64, pRequest->self, tstrerror(code), + pRequest->requestId); pRequest->body.queryFp(pRequest->body.param, pRequest, code); } - //todo not to be released here + // todo not to be released here taosArrayDestroy(pNodeList); break; } case QUERY_EXEC_MODE_EMPTY_RESULT: - pRequest->type = TSDB_SQL_RETRIEVE_EMPTY_RESULT; + pRequest->type = TSDB_SQL_RETRIEVE_EMPTY_RESULT; pRequest->body.queryFp(pRequest->body.param, pRequest, 0); break; default: @@ -1349,14 +1349,14 @@ int32_t setResultDataPtr(SReqResultInfo* pResultInfo, TAOS_FIELD* pFields, int32 p += sizeof(uint64_t); // check fields - for(int32_t i = 0; i < numOfCols; ++i) { - int16_t type = *(int16_t*) p; + for (int32_t i = 0; i < numOfCols; ++i) { + int16_t type = *(int16_t*)p; p += sizeof(int16_t); - int32_t bytes = *(int32_t*) p; + int32_t bytes = *(int32_t*)p; p += sizeof(int32_t); -// ASSERT(type == pFields[i].type && bytes == pFields[i].bytes); + ASSERT(type == pFields[i].type && bytes == pFields[i].bytes); } int32_t* colLength = (int32_t*)p; diff --git a/source/client/src/tmq.c b/source/client/src/tmq.c index 7d49c4206f..48c85cf265 100644 --- a/source/client/src/tmq.c +++ b/source/client/src/tmq.c @@ -132,6 +132,7 @@ typedef struct { // statistics int64_t pollCnt; // offset + int64_t committedOffset; int64_t currentOffset; // connection info int32_t vgId; @@ -193,6 +194,26 @@ typedef struct { void* userParam; } SMqCommitCbParam; +typedef struct { + tmq_t* tmq; + int8_t automatic; + int8_t async; + int8_t freeOffsets; + int8_t waitingRspNum; + int8_t totalRspNum; + tmq_resp_err_t rspErr; + tmq_commit_cb* userCb; + SArray* successfulOffsets; + SArray* failedOffsets; + void* userParam; + tsem_t rspSem; +} SMqCommitCbParamSet; + +typedef struct { + SMqCommitCbParamSet* params; + STqOffset* pOffset; +} SMqCommitCbParam2; + tmq_conf_t* tmq_conf_new() { tmq_conf_t* conf = taosMemoryCalloc(1, sizeof(tmq_conf_t)); conf->withTbName = false; @@ -343,6 +364,135 @@ int32_t tmqCommitCb(void* param, const SDataBuf* pMsg, int32_t code) { return 0; } +int32_t tmqCommitCb2(void* param, const SDataBuf* pBuf, int32_t code) { + SMqCommitCbParam2* pParam = (SMqCommitCbParam2*)param; + SMqCommitCbParamSet* pParamSet = (SMqCommitCbParamSet*)pParam->params; + // push into array + if (code == 0) { + taosArrayPush(pParamSet->failedOffsets, &pParam->pOffset); + } else { + taosArrayPush(pParamSet->successfulOffsets, &pParam->pOffset); + } + // count down waiting rsp + int8_t waitingRspNum = atomic_sub_fetch_8(&pParam->params->waitingRspNum, 1); + ASSERT(waitingRspNum >= 0); + + if (waitingRspNum == 0) { + // if no more waiting rsp + if (pParamSet->async) { + // call async cb func + if (pParamSet->automatic && pParamSet->tmq->commitCb) { + pParamSet->tmq->commitCb(pParamSet->tmq, pParamSet->rspErr, NULL, pParamSet->tmq->commitCbUserParam); + } else if (!pParamSet->automatic && pParamSet->userCb) { + // sem post + pParamSet->userCb(pParamSet->tmq, pParamSet->rspErr, NULL, pParamSet->userParam); + } + } + + taosArrayDestroyP(pParamSet->successfulOffsets, taosMemoryFree); + taosArrayDestroyP(pParamSet->failedOffsets, taosMemoryFree); + } + return 0; +} + +int32_t tmqComitInner2(tmq_t* tmq, int8_t automatic, int8_t async, tmq_commit_cb* userCb, void* userParam) { + int32_t code = -1; + + SMqCommitCbParamSet* pParamSet = taosMemoryCalloc(1, sizeof(SMqCommitCbParamSet)); + if (pParamSet == NULL) { + terrno = TSDB_CODE_OUT_OF_MEMORY; + return -1; + } + pParamSet->tmq = tmq; + pParamSet->automatic = automatic; + pParamSet->async = async; + pParamSet->freeOffsets = 1; + pParamSet->userCb = userCb; + pParamSet->userParam = userParam; + tsem_init(&pParamSet->rspSem, 0, 0); + + for (int32_t i = 0; i < taosArrayGetSize(tmq->clientTopics); i++) { + SMqClientTopic* pTopic = taosArrayGet(tmq->clientTopics, i); + for (int32_t j = 0; j < taosArrayGetSize(pTopic->vgs); j++) { + SMqClientVg* pVg = taosArrayGet(pTopic->vgs, i); + STqOffset* pOffset = taosMemoryCalloc(1, sizeof(STqOffset)); + if (pOffset == NULL) { + terrno = TSDB_CODE_OUT_OF_MEMORY; + return -1; + } + int32_t tlen = strlen(tmq->groupId); + memcpy(pOffset->subKey, tmq->groupId, tlen); + pOffset->subKey[tlen] = TMQ_SEPARATOR; + strcpy(pOffset->subKey + tlen + 1, pTopic->topicName); + int32_t len; + int32_t code; + tEncodeSize(tEncodeSTqOffset, pOffset, len, code); + if (code < 0) { + ASSERT(0); + } + void* buf = taosMemoryCalloc(1, sizeof(SMsgHead) + len); + ((SMsgHead*)buf)->vgId = htonl(pVg->vgId); + + void* abuf = POINTER_SHIFT(buf, sizeof(SMsgHead)); + + SEncoder encoder; + tEncoderInit(&encoder, abuf, len); + tEncodeSTqOffset(&encoder, pOffset); + + // build param + SMqCommitCbParam2* pParam = taosMemoryCalloc(1, sizeof(SMqCommitCbParam2)); + pParam->params = pParamSet; + pParam->pOffset = pOffset; + + // build send info + SMsgSendInfo* pMsgSendInfo = taosMemoryCalloc(1, sizeof(SMsgSendInfo)); + if (pMsgSendInfo == NULL) { + // TODO + continue; + } + pMsgSendInfo->msgInfo = (SDataBuf){ + .pData = buf, + .len = len, + .handle = NULL, + }; + + pMsgSendInfo->requestId = generateRequestId(); + pMsgSendInfo->requestObjRefId = 0; + pMsgSendInfo->param = pParam; + pMsgSendInfo->fp = tmqCommitCb2; + pMsgSendInfo->msgType = TDMT_MND_MQ_COMMIT_OFFSET; + // send msg + + SEpSet epSet = getEpSet_s(&tmq->pTscObj->pAppInfo->mgmtEp); + int64_t transporterId = 0; + asyncSendMsgToServer(tmq->pTscObj->pAppInfo->pTransporter, &epSet, &transporterId, pMsgSendInfo); + } + } + + if (!async) { + tsem_wait(&pParamSet->rspSem); + code = pParamSet->rspErr; + tsem_destroy(&pParamSet->rspSem); + } else { + code = 0; + } + + if (code != 0 && async) { + if (automatic) { + tmq->commitCb(tmq, code, NULL, tmq->commitCbUserParam); + } else { + userCb(tmq, code, NULL, userParam); + } + } + + if (!async) { + taosArrayDestroyP(pParamSet->successfulOffsets, taosMemoryFree); + taosArrayDestroyP(pParamSet->failedOffsets, taosMemoryFree); + } + + return 0; +} + int32_t tmqCommitInner(tmq_t* tmq, const tmq_topic_vgroup_list_t* offsets, int8_t automatic, int8_t async, tmq_commit_cb* userCb, void* userParam) { SMqCMCommitOffsetReq req; @@ -890,12 +1040,13 @@ bool tmqUpdateEp(tmq_t* tmq, int32_t epoch, SMqAskEpRsp* pRsp) { sprintf(vgKey, "%s:%d", topic.topicName, pVgEp->vgId); int64_t* pOffset = taosHashGet(pHash, vgKey, strlen(vgKey)); int64_t offset = pVgEp->offset; - tscDebug("consumer %ld epoch %d vg %d offset og to %ld", tmq->consumerId, epoch, pVgEp->vgId, offset); + tscDebug("consumer %ld(epoch %d) original offset of vg %d is %ld", tmq->consumerId, epoch, pVgEp->vgId, offset); if (pOffset != NULL) { offset = *pOffset; - tscDebug("consumer %ld epoch %d vg %d found %s", tmq->consumerId, epoch, pVgEp->vgId, vgKey); + tscDebug("consumer %ld(epoch %d) receive offset of vg %d, full key is %s", tmq->consumerId, epoch, pVgEp->vgId, + vgKey); } - tscDebug("consumer %ld epoch %d vg %d offset set to %ld", tmq->consumerId, epoch, pVgEp->vgId, offset); + tscDebug("consumer %ld(epoch %d) offset of vg %d updated to %ld", tmq->consumerId, epoch, pVgEp->vgId, offset); SMqClientVg clientVg = { .pollCnt = 0, .currentOffset = offset, @@ -1226,9 +1377,8 @@ SMqRspObj* tmqHandleAllRsp(tmq_t* tmq, int64_t timeout, bool pollIfReset) { if (rspWrapper->tmqRspType == TMQ_MSG_TYPE__POLL_RSP) { SMqPollRspWrapper* pollRspWrapper = (SMqPollRspWrapper*)rspWrapper; /*atomic_sub_fetch_32(&tmq->readyRequest, 1);*/ - /*printf("handle poll rsp %d\n", rspMsg->head.mqMsgType);*/ - if (pollRspWrapper->msg.head.epoch == atomic_load_32(&tmq->epoch)) { - /*printf("epoch match\n");*/ + int32_t consumerEpoch = atomic_load_32(&tmq->epoch); + if (pollRspWrapper->msg.head.epoch == consumerEpoch) { SMqClientVg* pVg = pollRspWrapper->vgHandle; /*printf("vg %d offset %ld up to %ld\n", pVg->vgId, pVg->currentOffset, rspMsg->msg.rspOffset);*/ pVg->currentOffset = pollRspWrapper->msg.rspOffset; @@ -1243,7 +1393,8 @@ SMqRspObj* tmqHandleAllRsp(tmq_t* tmq, int64_t timeout, bool pollIfReset) { taosFreeQitem(pollRspWrapper); return pRsp; } else { - /*printf("epoch mismatch\n");*/ + tscDebug("msg discard since epoch mismatch: msg epoch %d, consumer epoch %d\n", pollRspWrapper->msg.head.epoch, + consumerEpoch); taosFreeQitem(pollRspWrapper); } } else { @@ -1263,10 +1414,14 @@ TAOS_RES* tmq_consumer_poll(tmq_t* tmq, int64_t timeout) { SMqRspObj* rspObj; int64_t startTime = taosGetTimestampMs(); +#if 0 + tmqHandleAllDelayedTask(tmq); + tmqPollImpl(tmq, timeout); rspObj = tmqHandleAllRsp(tmq, timeout, false); if (rspObj) { return (TAOS_RES*)rspObj; } +#endif // in no topic status also need process delayed task if (atomic_load_8(&tmq->status) == TMQ_CONSUMER_STATUS__INIT) { @@ -1359,8 +1514,7 @@ const char* tmq_get_table_name(TAOS_RES* res) { pRspObj->resIter >= pRspObj->rsp.blockNum) { return NULL; } - const char* name = taosArrayGetP(pRspObj->rsp.blockTbName, pRspObj->resIter); - return name; + return (const char*)taosArrayGetP(pRspObj->rsp.blockTbName, pRspObj->resIter); } return NULL; } diff --git a/source/common/src/tdatablock.c b/source/common/src/tdatablock.c index b7a9ef88b6..4d1f5a34b1 100644 --- a/source/common/src/tdatablock.c +++ b/source/common/src/tdatablock.c @@ -1756,7 +1756,7 @@ SSubmitReq* tdBlockToSubmit(const SArray* pBlocks, const STSchema* pTSchema, boo createTbReq.type = TSDB_CHILD_TABLE; createTbReq.ctb.suid = suid; - STagVal tagVal = {.cid = 1, + STagVal tagVal = {.cid = pDataBlock->info.numOfCols + 1, .type = TSDB_DATA_TYPE_UBIGINT, .pData = (uint8_t*)&pDataBlock->info.groupId, .nData = sizeof(uint64_t)}; @@ -1821,7 +1821,7 @@ SSubmitReq* tdBlockToSubmit(const SArray* pBlocks, const STSchema* pTSchema, boo createTbReq.type = TSDB_CHILD_TABLE; createTbReq.ctb.suid = suid; - STagVal tagVal = {.cid = 1, + STagVal tagVal = {.cid = pDataBlock->info.numOfCols + 1, .type = TSDB_DATA_TYPE_UBIGINT, .pData = (uint8_t*)&pDataBlock->info.groupId, .nData = sizeof(uint64_t)}; diff --git a/source/common/src/tmsg.c b/source/common/src/tmsg.c index d16ab57ea9..4ebd91b0da 100644 --- a/source/common/src/tmsg.c +++ b/source/common/src/tmsg.c @@ -4746,3 +4746,32 @@ void tFreeSMAlterStbRsp(SMAlterStbRsp *pRsp) { taosMemoryFree(pRsp->pMeta); } } + +int32_t tEncodeSTqOffset(SEncoder *pEncoder, const STqOffset *pOffset) { + if (tEncodeI8(pEncoder, pOffset->type) < 0) return -1; + if (pOffset->type == TMQ_OFFSET__SNAPSHOT) { + if (tEncodeI64(pEncoder, pOffset->uid) < 0) return -1; + if (tEncodeI64(pEncoder, pOffset->ts) < 0) return -1; + } else if (pOffset->type == TMQ_OFFSET__LOG) { + if (tEncodeI64(pEncoder, pOffset->version) < 0) return -1; + } else { + ASSERT(0); + } + if (tEncodeCStr(pEncoder, pOffset->subKey) < 0) return -1; + return 0; +} + +int32_t tDecodeSTqOffset(SDecoder *pDecoder, STqOffset *pOffset) { + if (tDecodeI8(pDecoder, &pOffset->type) < 0) return -1; + if (pOffset->type == TMQ_OFFSET__SNAPSHOT) { + if (tDecodeI64(pDecoder, &pOffset->uid) < 0) return -1; + if (tDecodeI64(pDecoder, &pOffset->ts) < 0) return -1; + } else if (pOffset->type == TMQ_OFFSET__LOG) { + if (tDecodeI64(pDecoder, &pOffset->version) < 0) return -1; + } else { + ASSERT(0); + } + if (tDecodeCStrTo(pDecoder, pOffset->subKey) < 0) return -1; + return 0; +} + diff --git a/source/dnode/mgmt/mgmt_vnode/src/vmHandle.c b/source/dnode/mgmt/mgmt_vnode/src/vmHandle.c index ee120576c3..750006d05f 100644 --- a/source/dnode/mgmt/mgmt_vnode/src/vmHandle.c +++ b/source/dnode/mgmt/mgmt_vnode/src/vmHandle.c @@ -344,6 +344,7 @@ SArray *vmGetMsgHandles() { if (dmSetMgmtHandle(pArray, TDMT_VND_SUBMIT_RSMA, vmPutMsgToWriteQueue, 0) == NULL) goto _OVER; if (dmSetMgmtHandle(pArray, TDMT_VND_MQ_VG_CHANGE, vmPutMsgToWriteQueue, 0) == NULL) goto _OVER; if (dmSetMgmtHandle(pArray, TDMT_VND_MQ_VG_DELETE, vmPutMsgToWriteQueue, 0) == NULL) goto _OVER; + if (dmSetMgmtHandle(pArray, TDMT_VND_MQ_COMMIT_OFFSET, vmPutMsgToWriteQueue, 0) == NULL) goto _OVER; if (dmSetMgmtHandle(pArray, TDMT_VND_CONSUME, vmPutMsgToFetchQueue, 0) == NULL) goto _OVER; if (dmSetMgmtHandle(pArray, TDMT_VND_DELETE, vmPutMsgToWriteQueue, 0) == NULL) goto _OVER; if (dmSetMgmtHandle(pArray, TDMT_VND_QUERY_HEARTBEAT, vmPutMsgToFetchQueue, 0) == NULL) goto _OVER; diff --git a/source/dnode/vnode/src/inc/tq.h b/source/dnode/vnode/src/inc/tq.h index 5a8564bfd1..2ee0673ce5 100644 --- a/source/dnode/vnode/src/inc/tq.h +++ b/source/dnode/vnode/src/inc/tq.h @@ -41,7 +41,6 @@ extern "C" { #define tqTrace(...) do { if (tqDebugFlag & DEBUG_TRACE) { taosPrintLog("TQ ", DEBUG_TRACE, tqDebugFlag, __VA_ARGS__); }} while(0) // clang-format on -typedef struct STqOffsetCfg STqOffsetCfg; typedef struct STqOffsetStore STqOffsetStore; // tqRead @@ -127,14 +126,15 @@ typedef struct { } STqHandle; struct STQ { - char* path; - SHashObj* pushMgr; // consumerId -> STqHandle* - SHashObj* handles; // subKey -> STqHandle - SHashObj* pStreamTasks; // taksId -> SStreamTask - SVnode* pVnode; - SWal* pWal; - TDB* pMetaStore; - TTB* pExecStore; + char* path; + SHashObj* pushMgr; // consumerId -> STqHandle* + SHashObj* handles; // subKey -> STqHandle + SHashObj* pStreamTasks; // taksId -> SStreamTask + STqOffsetStore* pOffsetStore; + SVnode* pVnode; + SWal* pWal; + TDB* pMetaStore; + TTB* pExecStore; }; typedef struct { @@ -157,17 +157,19 @@ int32_t tqMetaClose(STQ* pTq); int32_t tqMetaSaveHandle(STQ* pTq, const char* key, const STqHandle* pHandle); int32_t tqMetaDeleteHandle(STQ* pTq, const char* key); +typedef struct { + int32_t size; +} STqOffsetHead; + +STqOffsetStore* tqOffsetOpen(); +void tqOffsetClose(STqOffsetStore*); +STqOffset* tqOffsetRead(STqOffsetStore* pStore, const char* subscribeKey); +int32_t tqOffsetWrite(STqOffsetStore* pStore, const STqOffset* pOffset); +int32_t tqOffsetSnapshot(STqOffsetStore* pStore); + // tqSink void tqTableSink(SStreamTask* pTask, void* vnode, int64_t ver, void* data); -// tqOffset -STqOffsetStore* tqOffsetOpen(STqOffsetCfg*); -void tqOffsetClose(STqOffsetStore*); -int64_t tqOffsetFetch(STqOffsetStore* pStore, const char* subscribeKey); -int32_t tqOffsetCommit(STqOffsetStore* pStore, const char* subscribeKey, int64_t offset); -int32_t tqOffsetPersist(STqOffsetStore* pStore, const char* subscribeKey); -int32_t tqOffsetPersistAll(STqOffsetStore* pStore); - #ifdef __cplusplus } #endif diff --git a/source/dnode/vnode/src/inc/vnodeInt.h b/source/dnode/vnode/src/inc/vnodeInt.h index 0c2b09a493..300a5f890e 100644 --- a/source/dnode/vnode/src/inc/vnodeInt.h +++ b/source/dnode/vnode/src/inc/vnodeInt.h @@ -137,6 +137,7 @@ int tqCommit(STQ*); int32_t tqUpdateTbUidList(STQ* pTq, const SArray* tbUidList, bool isAdd); int32_t tqProcessVgChangeReq(STQ* pTq, char* msg, int32_t msgLen); int32_t tqProcessVgDeleteReq(STQ* pTq, char* msg, int32_t msgLen); +int32_t tqProcessOffsetCommitReq(STQ* pTq, char* msg, int32_t msgLen); int32_t tqProcessPollReq(STQ* pTq, SRpcMsg* pMsg, int32_t workerId); int32_t tqProcessTaskDeploy(STQ* pTq, char* msg, int32_t msgLen); int32_t tqProcessStreamTrigger(STQ* pTq, SSubmitReq* data); diff --git a/source/dnode/vnode/src/tq/tq.c b/source/dnode/vnode/src/tq/tq.c index 4a5ea49d79..22685c1e19 100644 --- a/source/dnode/vnode/src/tq/tq.c +++ b/source/dnode/vnode/src/tq/tq.c @@ -66,19 +66,23 @@ STQ* tqOpen(const char* path, SVnode* pVnode, SWal* pWal) { ASSERT(0); } + if (tqOffsetOpen(pTq) < 0) { + ASSERT(0); + } + return pTq; } void tqClose(STQ* pTq) { if (pTq) { - taosMemoryFreeClear(pTq->path); + tqOffsetClose(pTq->pOffsetStore); taosHashCleanup(pTq->handles); taosHashCleanup(pTq->pStreamTasks); taosHashCleanup(pTq->pushMgr); + taosMemoryFree(pTq->path); tqMetaClose(pTq); taosMemoryFree(pTq); } - // TODO } int32_t tqSendPollRsp(STQ* pTq, const SRpcMsg* pMsg, const SMqPollReq* pReq, const SMqDataBlkRsp* pRsp) { @@ -109,6 +113,33 @@ int32_t tqSendPollRsp(STQ* pTq, const SRpcMsg* pMsg, const SMqPollReq* pReq, con return 0; } +int32_t tqProcessOffsetCommitReq(STQ* pTq, char* msg, int32_t msgLen) { + STqOffset offset = {0}; + SDecoder decoder; + tDecoderInit(&decoder, msg, msgLen); + if (tDecodeSTqOffset(&decoder, &offset) < 0) { + ASSERT(0); + return -1; + } + tDecoderClear(&decoder); + + if (offset.type == TMQ_OFFSET__SNAPSHOT) { + tqDebug("receive offset commit msg to %s, offset(type:snapshot) uid: %ld, ts: %ld", offset.subKey, offset.uid, + offset.ts); + } else if (offset.type == TMQ_OFFSET__LOG) { + tqDebug("receive offset commit msg to %s, offset(type:log) version: %ld", offset.subKey, offset.version); + } else { + ASSERT(0); + } + + if (tqOffsetWrite(pTq->pOffsetStore, &offset) < 0) { + ASSERT(0); + return -1; + } + + return 0; +} + int32_t tqProcessPollReq(STQ* pTq, SRpcMsg* pMsg, int32_t workerId) { SMqPollReq* pReq = pMsg->pCont; int64_t consumerId = pReq->consumerId; diff --git a/source/dnode/vnode/src/tq/tqOffset.c b/source/dnode/vnode/src/tq/tqOffset.c index 4d83a67579..2db49d7cf5 100644 --- a/source/dnode/vnode/src/tq/tqOffset.c +++ b/source/dnode/vnode/src/tq/tqOffset.c @@ -16,26 +16,113 @@ #include "tq.h" -enum ETqOffsetPersist { - TQ_OFFSET_PERSIST__LAZY = 1, - TQ_OFFSET_PERSIST__EAGER, -}; - -struct STqOffsetCfg { - int8_t persistPolicy; -}; - struct STqOffsetStore { - STqOffsetCfg cfg; - SHashObj* pHash; // SHashObj + STQ* pTq; + SHashObj* pHash; // SHashObj }; -STqOffsetStore* tqOffsetOpen(STqOffsetCfg* pCfg) { - STqOffsetStore* pStore = taosMemoryMalloc(sizeof(STqOffsetStore)); +STqOffsetStore* tqOffsetOpen(STQ* pTq) { + STqOffsetStore* pStore = taosMemoryCalloc(1, sizeof(STqOffsetStore)); if (pStore == NULL) { return NULL; } - memcpy(&pStore->cfg, pCfg, sizeof(STqOffsetCfg)); pStore->pHash = taosHashInit(64, MurmurHash3_32, true, HASH_NO_LOCK); + if (pStore->pHash == NULL) { + if (pStore->pHash) taosHashCleanup(pStore->pHash); + return NULL; + } + TdFilePtr pFile = taosOpenFile(pStore->pTq->path, TD_FILE_READ); + if (pFile != NULL) { + STqOffsetHead head = {0}; + int64_t code; + + while (1) { + if ((code = taosReadFile(pFile, &head, sizeof(STqOffsetHead))) != sizeof(STqOffsetHead)) { + if (code < 0) { + break; + } else { + ASSERT(0); + // TODO handle error + } + } + int32_t size = htonl(head.size); + void* memBuf = taosMemoryCalloc(1, size); + if ((code = taosReadFile(pFile, memBuf, size)) != size) { + ASSERT(0); + // TODO handle error + } + STqOffset offset; + SDecoder decoder; + tDecoderInit(&decoder, memBuf, size); + if (tDecodeSTqOffset(&decoder, &offset) < 0) { + ASSERT(0); + } + tDecoderClear(&decoder); + if (taosHashPut(pStore->pHash, offset.subKey, strlen(offset.subKey), &offset, sizeof(STqOffset)) < 0) { + ASSERT(0); + // TODO + } + } + + taosCloseFile(&pFile); + } return pStore; } + +void tqOffsetClose(STqOffsetStore* pStore) { + tqOffsetSnapshot(pStore); + taosHashCleanup(pStore->pHash); +} + +STqOffset* tqOffsetRead(STqOffsetStore* pStore, const char* subscribeKey) { + return (STqOffset*)taosHashGet(pStore->pHash, subscribeKey, strlen(subscribeKey)); +} + +int32_t tqOffsetWrite(STqOffsetStore* pStore, const STqOffset* pOffset) { + return taosHashPut(pStore->pHash, pOffset->subKey, strlen(pOffset->subKey), pOffset, sizeof(STqOffset)); +} + +int32_t tqOffsetSnapshot(STqOffsetStore* pStore) { + // open file + // TODO file name should be with a version + TdFilePtr pFile = taosOpenFile(pStore->pTq->path, TD_FILE_CREATE | TD_FILE_WRITE | TD_FILE_APPEND); + if (pFile == NULL) { + ASSERT(0); + return -1; + } + void* pIter = NULL; + while (1) { + pIter = taosHashIterate(pStore->pHash, pIter); + if (pIter == NULL) break; + STqOffset* pOffset = (STqOffset*)pIter; + int32_t bodyLen; + int32_t code; + tEncodeSize(tEncodeSTqOffset, pOffset, bodyLen, code); + ASSERT(code == 0); + if (code < 0) { + ASSERT(0); + taosHashCancelIterate(pStore->pHash, pIter); + return -1; + } + + int32_t totLen = sizeof(STqOffsetHead) + bodyLen; + void* buf = taosMemoryCalloc(1, totLen); + void* abuf = POINTER_SHIFT(buf, sizeof(STqOffsetHead)); + + ((STqOffsetHead*)buf)->size = htonl(bodyLen); + SEncoder encoder; + tEncoderInit(&encoder, abuf, bodyLen); + tEncodeSTqOffset(&encoder, pOffset); + // write file + int64_t writeLen; + if ((writeLen = taosWriteFile(pFile, buf, totLen)) != bodyLen) { + ASSERT(0); + tqError("write offset incomplete, len %d, write len %ld", bodyLen, writeLen); + taosHashCancelIterate(pStore->pHash, pIter); + return -1; + } + } + // close and rename file + taosCloseFile(&pFile); + return 0; +} diff --git a/source/dnode/vnode/src/vnd/vnodeSvr.c b/source/dnode/vnode/src/vnd/vnodeSvr.c index ab2efa4791..186ab4b528 100644 --- a/source/dnode/vnode/src/vnd/vnodeSvr.c +++ b/source/dnode/vnode/src/vnd/vnodeSvr.c @@ -148,17 +148,24 @@ int32_t vnodeProcessWriteReq(SVnode *pVnode, SRpcMsg *pMsg, int64_t version, SRp case TDMT_VND_MQ_VG_CHANGE: if (tqProcessVgChangeReq(pVnode->pTq, POINTER_SHIFT(pMsg->pCont, sizeof(SMsgHead)), pMsg->contLen - sizeof(SMsgHead)) < 0) { - // TODO: handle error + goto _err; } break; case TDMT_VND_MQ_VG_DELETE: if (tqProcessVgDeleteReq(pVnode->pTq, pMsg->pCont, pMsg->contLen) < 0) { - // TODO: handle error + goto _err; + } + break; + case TDMT_VND_MQ_COMMIT_OFFSET: + if (tqProcessOffsetCommitReq(pVnode->pTq, POINTER_SHIFT(pMsg->pCont, sizeof(SMsgHead)), + pMsg->contLen - sizeof(SMsgHead)) < 0) { + goto _err; } break; case TDMT_STREAM_TASK_DEPLOY: { if (tqProcessTaskDeploy(pVnode->pTq, POINTER_SHIFT(pMsg->pCont, sizeof(SMsgHead)), pMsg->contLen - sizeof(SMsgHead)) < 0) { + goto _err; } } break; case TDMT_VND_ALTER_CONFIRM: @@ -901,8 +908,8 @@ static int32_t vnodeProcessAlterConfirmReq(SVnode *pVnode, int64_t version, void static int32_t vnodeProcessAlterHasnRangeReq(SVnode *pVnode, int64_t version, void *pReq, int32_t len, SRpcMsg *pRsp) { vInfo("vgId:%d, alter hashrange msg will be processed", TD_VID(pVnode)); - // todo - // 1. stop work + // todo + // 1. stop work // 2. adjust hash range / compact / remove wals / rename vgroups // 3. reload sync return 0; From 7435221e3280de9b723bc8108b289e18ad6b2298 Mon Sep 17 00:00:00 2001 From: wangmm0220 Date: Tue, 14 Jun 2022 14:40:01 +0800 Subject: [PATCH 4/8] opt: optimize generating groupid in partition/group by tag --- include/util/taoserror.h | 2 ++ source/libs/executor/src/executorimpl.c | 16 +++++++++++----- source/util/src/terror.c | 1 + 3 files changed, 14 insertions(+), 5 deletions(-) diff --git a/include/util/taoserror.h b/include/util/taoserror.h index 03308e395f..8d9951f9e3 100644 --- a/include/util/taoserror.h +++ b/include/util/taoserror.h @@ -695,6 +695,8 @@ int32_t* taosGetErrno(); #define TSDB_CODE_RSMA_INVALID_ENV TAOS_DEF_ERROR_CODE(0, 0x3150) #define TSDB_CODE_RSMA_INVALID_STAT TAOS_DEF_ERROR_CODE(0, 0x3151) +//index +#define TSDB_CODE_INDEX_REBUILDING TAOS_DEF_ERROR_CODE(0, 0x3200) #ifdef __cplusplus } diff --git a/source/libs/executor/src/executorimpl.c b/source/libs/executor/src/executorimpl.c index e4c072a396..f878e9e027 100644 --- a/source/libs/executor/src/executorimpl.c +++ b/source/libs/executor/src/executorimpl.c @@ -4690,8 +4690,9 @@ SOperatorInfo* createOperatorTree(SPhysiNode* pPhyNode, SExecTaskInfo* pTaskInfo return NULL; } - SArray* groupKyes = extractPartitionColInfo(pTableScanNode->pPartitionKeys); - code = generateGroupIdMap(pTableListInfo, pHandle, groupKyes); //todo for json + SArray* groupKeys = extractPartitionColInfo(pTableScanNode->pPartitionKeys); + code = generateGroupIdMap(pTableListInfo, pHandle, groupKeys); //todo for json + taosArrayDestroy(groupKeys); if (code){ tsdbCleanupReadHandle(pDataReader); return NULL; @@ -4725,8 +4726,9 @@ SOperatorInfo* createOperatorTree(SPhysiNode* pPhyNode, SExecTaskInfo* pTaskInfo qDebug("%s pDataReader is not NULL", GET_TASKID(pTaskInfo)); } - SArray* groupKyes = extractPartitionColInfo(pTableScanNode->pPartitionKeys); - int32_t code = generateGroupIdMap(pTableListInfo, pHandle, groupKyes); //todo for json + SArray* groupKeys = extractPartitionColInfo(pTableScanNode->pPartitionKeys); + int32_t code = generateGroupIdMap(pTableListInfo, pHandle, groupKeys); //todo for json + taosArrayDestroy(groupKeys); if (code){ tsdbCleanupReadHandle(pDataReader); return NULL; @@ -5059,6 +5061,7 @@ SArray* extractColumnInfo(SNodeList* pNodeList) { } SArray* extractPartitionColInfo(SNodeList* pNodeList) { + if(!pNodeList) return NULL; size_t numOfCols = LIST_LENGTH(pNodeList); SArray* pList = taosArrayInit(numOfCols, sizeof(SColumn)); if (pList == NULL) { @@ -5163,7 +5166,9 @@ int32_t getTableList(void* metaHandle, int32_t tableType, uint64_t tableUid, STa SArray* res = taosArrayInit(8, sizeof(uint64_t)); code = doFilterTag(pTagCond, &metaArg, res); - if (code != TSDB_CODE_SUCCESS) { + if (code == TSDB_CODE_INDEX_REBUILDING){ // todo + // doFilter(); + } else if (code != TSDB_CODE_SUCCESS) { qError("failed to get tableIds, reason: %s, suid: %" PRIu64 "", tstrerror(code), tableUid); taosArrayDestroy(res); terrno = code; @@ -5171,6 +5176,7 @@ int32_t getTableList(void* metaHandle, int32_t tableType, uint64_t tableUid, STa } else { qDebug("sucess to get tableIds, size: %d, suid: %" PRIu64 "", (int)taosArrayGetSize(res), tableUid); } + for (int i = 0; i < taosArrayGetSize(res); i++) { STableKeyInfo info = {.lastKey = TSKEY_INITIAL_VAL, .uid = *(uint64_t*)taosArrayGet(res, i)}; taosArrayPush(pListInfo->pTableList, &info); diff --git a/source/util/src/terror.c b/source/util/src/terror.c index e122ad0ab6..079d5ef590 100644 --- a/source/util/src/terror.c +++ b/source/util/src/terror.c @@ -566,6 +566,7 @@ TAOS_DEFINE_ERROR(TSDB_CODE_TSMA_RM_SKEY_IN_HASH, "Rm tsma skey in cac TAOS_DEFINE_ERROR(TSDB_CODE_RSMA_INVALID_ENV, "Invalid rsma env") TAOS_DEFINE_ERROR(TSDB_CODE_RSMA_INVALID_STAT, "Invalid rsma state") +TAOS_DEFINE_ERROR(TSDB_CODE_INDEX_REBUILDING, "Index is rebuilding") #ifdef TAOS_ERROR_C }; From 5881ef26905b46e36cd2f054a3d837f47948a487 Mon Sep 17 00:00:00 2001 From: Liu Jicong Date: Tue, 14 Jun 2022 14:28:41 +0800 Subject: [PATCH 5/8] refactor: convert datablock to submit block --- include/common/tdatablock.h | 3 - source/common/src/tdatablock.c | 167 -------------------------- source/dnode/vnode/src/tq/tq.c | 2 +- source/dnode/vnode/src/tq/tqOffset.c | 19 ++- source/dnode/vnode/src/tq/tqSink.c | 169 +++++++++++++++++++++++++++ 5 files changed, 187 insertions(+), 173 deletions(-) diff --git a/include/common/tdatablock.h b/include/common/tdatablock.h index af46535c94..709462a744 100644 --- a/include/common/tdatablock.h +++ b/include/common/tdatablock.h @@ -234,9 +234,6 @@ int32_t buildSubmitReqFromDataBlock(SSubmitReq** pReq, const SArray* pDataBlocks char* buildCtbNameByGroupId(const char* stbName, uint64_t groupId); -SSubmitReq* tdBlockToSubmit(const SArray* pBlocks, const STSchema* pSchema, bool createTb, int64_t suid, - const char* stbFullName, int32_t vgId); - static FORCE_INLINE int32_t blockGetEncodeSize(const SSDataBlock* pBlock) { return blockDataGetSerialMetaSize(pBlock->info.numOfCols) + blockDataGetSize(pBlock); } diff --git a/source/common/src/tdatablock.c b/source/common/src/tdatablock.c index 4d1f5a34b1..b4ece426b3 100644 --- a/source/common/src/tdatablock.c +++ b/source/common/src/tdatablock.c @@ -1728,173 +1728,6 @@ char* buildCtbNameByGroupId(const char* stbName, uint64_t groupId) { return rname.childTableName; } -SSubmitReq* tdBlockToSubmit(const SArray* pBlocks, const STSchema* pTSchema, bool createTb, int64_t suid, - const char* stbFullName, int32_t vgId) { - SSubmitReq* ret = NULL; - SArray* tagArray = taosArrayInit(1, sizeof(STagVal)); - if (!tagArray) { - terrno = TSDB_CODE_OUT_OF_MEMORY; - return NULL; - } - - // cal size - int32_t cap = sizeof(SSubmitReq); - int32_t sz = taosArrayGetSize(pBlocks); - for (int32_t i = 0; i < sz; i++) { - SSDataBlock* pDataBlock = taosArrayGet(pBlocks, i); - int32_t rows = pDataBlock->info.rows; - // TODO min - int32_t rowSize = pDataBlock->info.rowSize; - int32_t maxLen = TD_ROW_MAX_BYTES_FROM_SCHEMA(pTSchema); - int32_t schemaLen = 0; - - if (createTb) { - SVCreateTbReq createTbReq = {0}; - char* cname = buildCtbNameByGroupId(stbFullName, pDataBlock->info.groupId); - createTbReq.name = cname; - createTbReq.flags = 0; - createTbReq.type = TSDB_CHILD_TABLE; - createTbReq.ctb.suid = suid; - - STagVal tagVal = {.cid = pDataBlock->info.numOfCols + 1, - .type = TSDB_DATA_TYPE_UBIGINT, - .pData = (uint8_t*)&pDataBlock->info.groupId, - .nData = sizeof(uint64_t)}; - STag* pTag = NULL; - taosArrayClear(tagArray); - taosArrayPush(tagArray, &tagVal); - tTagNew(tagArray, 1, false, &pTag); - if (pTag == NULL) { - tdDestroySVCreateTbReq(&createTbReq); - taosArrayDestroy(tagArray); - return NULL; - } - createTbReq.ctb.pTag = (uint8_t*)pTag; - - int32_t code; - tEncodeSize(tEncodeSVCreateTbReq, &createTbReq, schemaLen, code); - - tdDestroySVCreateTbReq(&createTbReq); - if (code < 0) { - taosArrayDestroy(tagArray); - return NULL; - } - } - - cap += sizeof(SSubmitBlk) + schemaLen + rows * maxLen; - } - - // assign data - // TODO - ret = taosMemoryCalloc(1, cap + 46); - ret = POINTER_SHIFT(ret, 46); - ret->header.vgId = vgId; - ret->version = htonl(1); - ret->length = sizeof(SSubmitReq); - ret->numOfBlocks = htonl(sz); - - void* submitBlk = POINTER_SHIFT(ret, sizeof(SSubmitReq)); - for (int32_t i = 0; i < sz; i++) { - SSDataBlock* pDataBlock = taosArrayGet(pBlocks, i); - - SSubmitBlk* blkHead = submitBlk; - blkHead->numOfRows = htons(pDataBlock->info.rows); - blkHead->sversion = htonl(pTSchema->version); - // TODO - blkHead->suid = htobe64(suid); - // uid is assigned by vnode - blkHead->uid = 0; - - int32_t rows = pDataBlock->info.rows; - /*int32_t maxLen = TD_ROW_MAX_BYTES_FROM_SCHEMA(pTSchema);*/ - /*blkHead->dataLen = htonl(rows * maxLen);*/ - blkHead->dataLen = 0; - - void* blockData = POINTER_SHIFT(submitBlk, sizeof(SSubmitBlk)); - - int32_t schemaLen = 0; - if (createTb) { - SVCreateTbReq createTbReq = {0}; - char* cname = buildCtbNameByGroupId(stbFullName, pDataBlock->info.groupId); - createTbReq.name = cname; - createTbReq.flags = 0; - createTbReq.type = TSDB_CHILD_TABLE; - createTbReq.ctb.suid = suid; - - STagVal tagVal = {.cid = pDataBlock->info.numOfCols + 1, - .type = TSDB_DATA_TYPE_UBIGINT, - .pData = (uint8_t*)&pDataBlock->info.groupId, - .nData = sizeof(uint64_t)}; - taosArrayClear(tagArray); - taosArrayPush(tagArray, &tagVal); - STag* pTag = NULL; - tTagNew(tagArray, 1, false, &pTag); - if (pTag == NULL) { - tdDestroySVCreateTbReq(&createTbReq); - taosArrayDestroy(tagArray); - taosMemoryFreeClear(ret); - return NULL; - } - createTbReq.ctb.pTag = (uint8_t*)pTag; - - int32_t code; - tEncodeSize(tEncodeSVCreateTbReq, &createTbReq, schemaLen, code); - if (code < 0) { - tdDestroySVCreateTbReq(&createTbReq); - taosArrayDestroy(tagArray); - taosMemoryFreeClear(ret); - return NULL; - } - - SEncoder encoder = {0}; - tEncoderInit(&encoder, blockData, schemaLen); - code = tEncodeSVCreateTbReq(&encoder, &createTbReq); - tEncoderClear(&encoder); - tdDestroySVCreateTbReq(&createTbReq); - - if (code < 0) { - taosArrayDestroy(tagArray); - taosMemoryFreeClear(ret); - return NULL; - } - } - blkHead->schemaLen = htonl(schemaLen); - - STSRow* rowData = POINTER_SHIFT(blockData, schemaLen); - - for (int32_t j = 0; j < rows; j++) { - SRowBuilder rb = {0}; - tdSRowInit(&rb, pTSchema->version); - tdSRowSetTpInfo(&rb, pTSchema->numOfCols, pTSchema->flen); - tdSRowResetBuf(&rb, rowData); - - for (int32_t k = 0; k < pTSchema->numOfCols; k++) { - const STColumn* pColumn = &pTSchema->columns[k]; - SColumnInfoData* pColData = taosArrayGet(pDataBlock->pDataBlock, k); - if (colDataIsNull_s(pColData, j)) { - tdAppendColValToRow(&rb, pColumn->colId, pColumn->type, TD_VTYPE_NONE, NULL, false, pColumn->offset, k); - } else { - void* data = colDataGetData(pColData, j); - tdAppendColValToRow(&rb, pColumn->colId, pColumn->type, TD_VTYPE_NORM, data, true, pColumn->offset, k); - } - } - int32_t rowLen = TD_ROW_LEN(rowData); - rowData = POINTER_SHIFT(rowData, rowLen); - blkHead->dataLen += rowLen; - } - int32_t dataLen = blkHead->dataLen; - blkHead->dataLen = htonl(dataLen); - - ret->length += sizeof(SSubmitBlk) + schemaLen + dataLen; - blkHead = POINTER_SHIFT(blkHead, schemaLen + dataLen); - /*submitBlk = blkHead;*/ - } - - ret->length = htonl(ret->length); - taosArrayDestroy(tagArray); - return ret; -} - void blockCompressEncode(const SSDataBlock* pBlock, char* data, int32_t* dataLen, int32_t numOfCols, int8_t needCompress) { // todo extract method diff --git a/source/dnode/vnode/src/tq/tq.c b/source/dnode/vnode/src/tq/tq.c index 22685c1e19..9f34ae39c0 100644 --- a/source/dnode/vnode/src/tq/tq.c +++ b/source/dnode/vnode/src/tq/tq.c @@ -47,7 +47,7 @@ void tqCleanUp() { } STQ* tqOpen(const char* path, SVnode* pVnode, SWal* pWal) { - STQ* pTq = taosMemoryMalloc(sizeof(STQ)); + STQ* pTq = taosMemoryCalloc(1, sizeof(STQ)); if (pTq == NULL) { terrno = TSDB_CODE_TQ_OUT_OF_MEMORY; return NULL; diff --git a/source/dnode/vnode/src/tq/tqOffset.c b/source/dnode/vnode/src/tq/tqOffset.c index 2db49d7cf5..41444b7288 100644 --- a/source/dnode/vnode/src/tq/tqOffset.c +++ b/source/dnode/vnode/src/tq/tqOffset.c @@ -17,21 +17,33 @@ #include "tq.h" struct STqOffsetStore { + char* fname; STQ* pTq; SHashObj* pHash; // SHashObj }; +static char* buildFileName(const char* path) { + int32_t len = strlen(path); + char* fname = taosMemoryCalloc(1, len + 20); + snprintf(fname, len + 20, "%s/offset", path); + return fname; +} + STqOffsetStore* tqOffsetOpen(STQ* pTq) { STqOffsetStore* pStore = taosMemoryCalloc(1, sizeof(STqOffsetStore)); if (pStore == NULL) { return NULL; } + pStore->pTq = pTq; + pTq->pOffsetStore = pStore; + pStore->pHash = taosHashInit(64, MurmurHash3_32, true, HASH_NO_LOCK); if (pStore->pHash == NULL) { if (pStore->pHash) taosHashCleanup(pStore->pHash); return NULL; } - TdFilePtr pFile = taosOpenFile(pStore->pTq->path, TD_FILE_READ); + char* fname = buildFileName(pStore->pTq->path); + TdFilePtr pFile = taosOpenFile(fname, TD_FILE_READ); if (pFile != NULL) { STqOffsetHead head = {0}; int64_t code; @@ -65,6 +77,7 @@ STqOffsetStore* tqOffsetOpen(STQ* pTq) { } taosCloseFile(&pFile); + taosMemoryFree(fname); } return pStore; } @@ -85,7 +98,8 @@ int32_t tqOffsetWrite(STqOffsetStore* pStore, const STqOffset* pOffset) { int32_t tqOffsetSnapshot(STqOffsetStore* pStore) { // open file // TODO file name should be with a version - TdFilePtr pFile = taosOpenFile(pStore->pTq->path, TD_FILE_CREATE | TD_FILE_WRITE | TD_FILE_APPEND); + char* fname = buildFileName(pStore->pTq->path); + TdFilePtr pFile = taosOpenFile(fname, TD_FILE_CREATE | TD_FILE_WRITE | TD_FILE_APPEND); if (pFile == NULL) { ASSERT(0); return -1; @@ -124,5 +138,6 @@ int32_t tqOffsetSnapshot(STqOffsetStore* pStore) { } // close and rename file taosCloseFile(&pFile); + taosMemoryFree(fname); return 0; } diff --git a/source/dnode/vnode/src/tq/tqSink.c b/source/dnode/vnode/src/tq/tqSink.c index 5c0bf971fb..391a008440 100644 --- a/source/dnode/vnode/src/tq/tqSink.c +++ b/source/dnode/vnode/src/tq/tqSink.c @@ -15,6 +15,175 @@ #include "tq.h" +static SSubmitReq* tdBlockToSubmit(const SArray* pBlocks, const STSchema* pSchema, bool createTb, int64_t suid, + const char* stbFullName, int32_t vgId); + +static SSubmitReq* tdBlockToSubmit(const SArray* pBlocks, const STSchema* pTSchema, bool createTb, int64_t suid, + const char* stbFullName, int32_t vgId) { + SSubmitReq* ret = NULL; + SArray* tagArray = taosArrayInit(1, sizeof(STagVal)); + if (!tagArray) { + terrno = TSDB_CODE_OUT_OF_MEMORY; + return NULL; + } + + // cal size + int32_t cap = sizeof(SSubmitReq); + int32_t sz = taosArrayGetSize(pBlocks); + for (int32_t i = 0; i < sz; i++) { + SSDataBlock* pDataBlock = taosArrayGet(pBlocks, i); + int32_t rows = pDataBlock->info.rows; + // TODO min + int32_t rowSize = pDataBlock->info.rowSize; + int32_t maxLen = TD_ROW_MAX_BYTES_FROM_SCHEMA(pTSchema); + int32_t schemaLen = 0; + + if (createTb) { + SVCreateTbReq createTbReq = {0}; + char* cname = buildCtbNameByGroupId(stbFullName, pDataBlock->info.groupId); + createTbReq.name = cname; + createTbReq.flags = 0; + createTbReq.type = TSDB_CHILD_TABLE; + createTbReq.ctb.suid = suid; + + STagVal tagVal = {.cid = pDataBlock->info.numOfCols + 1, + .type = TSDB_DATA_TYPE_UBIGINT, + .pData = (uint8_t*)&pDataBlock->info.groupId, + .nData = sizeof(uint64_t)}; + STag* pTag = NULL; + taosArrayClear(tagArray); + taosArrayPush(tagArray, &tagVal); + tTagNew(tagArray, 1, false, &pTag); + if (pTag == NULL) { + tdDestroySVCreateTbReq(&createTbReq); + taosArrayDestroy(tagArray); + return NULL; + } + createTbReq.ctb.pTag = (uint8_t*)pTag; + + int32_t code; + tEncodeSize(tEncodeSVCreateTbReq, &createTbReq, schemaLen, code); + + tdDestroySVCreateTbReq(&createTbReq); + if (code < 0) { + taosArrayDestroy(tagArray); + return NULL; + } + } + + cap += sizeof(SSubmitBlk) + schemaLen + rows * maxLen; + } + + // assign data + // TODO + ret = rpcMallocCont(cap); + ret->header.vgId = vgId; + ret->version = htonl(1); + ret->length = sizeof(SSubmitReq); + ret->numOfBlocks = htonl(sz); + + void* submitBlk = POINTER_SHIFT(ret, sizeof(SSubmitReq)); + for (int32_t i = 0; i < sz; i++) { + SSDataBlock* pDataBlock = taosArrayGet(pBlocks, i); + + SSubmitBlk* blkHead = submitBlk; + blkHead->numOfRows = htons(pDataBlock->info.rows); + blkHead->sversion = htonl(pTSchema->version); + // TODO + blkHead->suid = htobe64(suid); + // uid is assigned by vnode + blkHead->uid = 0; + + int32_t rows = pDataBlock->info.rows; + /*int32_t maxLen = TD_ROW_MAX_BYTES_FROM_SCHEMA(pTSchema);*/ + /*blkHead->dataLen = htonl(rows * maxLen);*/ + blkHead->dataLen = 0; + + void* blockData = POINTER_SHIFT(submitBlk, sizeof(SSubmitBlk)); + + int32_t schemaLen = 0; + if (createTb) { + SVCreateTbReq createTbReq = {0}; + char* cname = buildCtbNameByGroupId(stbFullName, pDataBlock->info.groupId); + createTbReq.name = cname; + createTbReq.flags = 0; + createTbReq.type = TSDB_CHILD_TABLE; + createTbReq.ctb.suid = suid; + + STagVal tagVal = {.cid = pDataBlock->info.numOfCols + 1, + .type = TSDB_DATA_TYPE_UBIGINT, + .pData = (uint8_t*)&pDataBlock->info.groupId, + .nData = sizeof(uint64_t)}; + taosArrayClear(tagArray); + taosArrayPush(tagArray, &tagVal); + STag* pTag = NULL; + tTagNew(tagArray, 1, false, &pTag); + if (pTag == NULL) { + tdDestroySVCreateTbReq(&createTbReq); + taosArrayDestroy(tagArray); + taosMemoryFreeClear(ret); + return NULL; + } + createTbReq.ctb.pTag = (uint8_t*)pTag; + + int32_t code; + tEncodeSize(tEncodeSVCreateTbReq, &createTbReq, schemaLen, code); + if (code < 0) { + tdDestroySVCreateTbReq(&createTbReq); + taosArrayDestroy(tagArray); + taosMemoryFreeClear(ret); + return NULL; + } + + SEncoder encoder = {0}; + tEncoderInit(&encoder, blockData, schemaLen); + code = tEncodeSVCreateTbReq(&encoder, &createTbReq); + tEncoderClear(&encoder); + tdDestroySVCreateTbReq(&createTbReq); + + if (code < 0) { + taosArrayDestroy(tagArray); + taosMemoryFreeClear(ret); + return NULL; + } + } + blkHead->schemaLen = htonl(schemaLen); + + STSRow* rowData = POINTER_SHIFT(blockData, schemaLen); + + for (int32_t j = 0; j < rows; j++) { + SRowBuilder rb = {0}; + tdSRowInit(&rb, pTSchema->version); + tdSRowSetTpInfo(&rb, pTSchema->numOfCols, pTSchema->flen); + tdSRowResetBuf(&rb, rowData); + + for (int32_t k = 0; k < pTSchema->numOfCols; k++) { + const STColumn* pColumn = &pTSchema->columns[k]; + SColumnInfoData* pColData = taosArrayGet(pDataBlock->pDataBlock, k); + if (colDataIsNull_s(pColData, j)) { + tdAppendColValToRow(&rb, pColumn->colId, pColumn->type, TD_VTYPE_NONE, NULL, false, pColumn->offset, k); + } else { + void* data = colDataGetData(pColData, j); + tdAppendColValToRow(&rb, pColumn->colId, pColumn->type, TD_VTYPE_NORM, data, true, pColumn->offset, k); + } + } + int32_t rowLen = TD_ROW_LEN(rowData); + rowData = POINTER_SHIFT(rowData, rowLen); + blkHead->dataLen += rowLen; + } + int32_t dataLen = blkHead->dataLen; + blkHead->dataLen = htonl(dataLen); + + ret->length += sizeof(SSubmitBlk) + schemaLen + dataLen; + blkHead = POINTER_SHIFT(blkHead, schemaLen + dataLen); + /*submitBlk = blkHead;*/ + } + + ret->length = htonl(ret->length); + taosArrayDestroy(tagArray); + return ret; +} + void tqTableSink(SStreamTask* pTask, void* vnode, int64_t ver, void* data) { const SArray* pRes = (const SArray*)data; SVnode* pVnode = (SVnode*)vnode; From 79d9c04a4c6118069c3065434d7bc71815f73685 Mon Sep 17 00:00:00 2001 From: Liu Jicong Date: Tue, 14 Jun 2022 15:22:39 +0800 Subject: [PATCH 6/8] fix: check error code --- source/dnode/vnode/src/tq/tqOffset.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/source/dnode/vnode/src/tq/tqOffset.c b/source/dnode/vnode/src/tq/tqOffset.c index 41444b7288..8d6cb28065 100644 --- a/source/dnode/vnode/src/tq/tqOffset.c +++ b/source/dnode/vnode/src/tq/tqOffset.c @@ -50,7 +50,7 @@ STqOffsetStore* tqOffsetOpen(STQ* pTq) { while (1) { if ((code = taosReadFile(pFile, &head, sizeof(STqOffsetHead))) != sizeof(STqOffsetHead)) { - if (code < 0) { + if (code == 0) { break; } else { ASSERT(0); From 18c0e0ac7411e223741f60bfda569bb029884b0e Mon Sep 17 00:00:00 2001 From: Liu Jicong Date: Tue, 14 Jun 2022 15:31:47 +0800 Subject: [PATCH 7/8] fix(query): cast --- source/client/src/clientImpl.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/source/client/src/clientImpl.c b/source/client/src/clientImpl.c index c6cdd42584..6c3132a3ca 100644 --- a/source/client/src/clientImpl.c +++ b/source/client/src/clientImpl.c @@ -1415,7 +1415,7 @@ int32_t setResultDataPtr(SReqResultInfo* pResultInfo, TAOS_FIELD* pFields, int32 int32_t bytes = *(int32_t*)p; p += sizeof(int32_t); - ASSERT(type == pFields[i].type && bytes == pFields[i].bytes); + /*ASSERT(type == pFields[i].type && bytes == pFields[i].bytes);*/ } int32_t* colLength = (int32_t*)p; From 04593fa44ec4e37d13832ec3e8ce4efe07da24f5 Mon Sep 17 00:00:00 2001 From: Liu Jicong Date: Tue, 14 Jun 2022 16:38:33 +0800 Subject: [PATCH 8/8] fix(stream): build table name --- source/client/src/tmq.c | 12 ++++++++---- source/common/src/tdatablock.c | 1 + source/dnode/vnode/src/tq/tqSink.c | 20 +++++++++++--------- 3 files changed, 20 insertions(+), 13 deletions(-) diff --git a/source/client/src/tmq.c b/source/client/src/tmq.c index 48c85cf265..10c6c3623a 100644 --- a/source/client/src/tmq.c +++ b/source/client/src/tmq.c @@ -199,8 +199,8 @@ typedef struct { int8_t automatic; int8_t async; int8_t freeOffsets; - int8_t waitingRspNum; - int8_t totalRspNum; + int32_t waitingRspNum; + int32_t totalRspNum; tmq_resp_err_t rspErr; tmq_commit_cb* userCb; SArray* successfulOffsets; @@ -373,8 +373,9 @@ int32_t tmqCommitCb2(void* param, const SDataBuf* pBuf, int32_t code) { } else { taosArrayPush(pParamSet->successfulOffsets, &pParam->pOffset); } + // count down waiting rsp - int8_t waitingRspNum = atomic_sub_fetch_8(&pParam->params->waitingRspNum, 1); + int32_t waitingRspNum = atomic_sub_fetch_32(&pParamSet->waitingRspNum, 1); ASSERT(waitingRspNum >= 0); if (waitingRspNum == 0) { @@ -395,7 +396,8 @@ int32_t tmqCommitCb2(void* param, const SDataBuf* pBuf, int32_t code) { return 0; } -int32_t tmqComitInner2(tmq_t* tmq, int8_t automatic, int8_t async, tmq_commit_cb* userCb, void* userParam) { +int32_t tmqCommitInner2(tmq_t* tmq, const tmq_topic_vgroup_list_t* offsets, int8_t automatic, int8_t async, + tmq_commit_cb* userCb, void* userParam) { int32_t code = -1; SMqCommitCbParamSet* pParamSet = taosMemoryCalloc(1, sizeof(SMqCommitCbParamSet)); @@ -466,6 +468,8 @@ int32_t tmqComitInner2(tmq_t* tmq, int8_t automatic, int8_t async, tmq_commit_cb SEpSet epSet = getEpSet_s(&tmq->pTscObj->pAppInfo->mgmtEp); int64_t transporterId = 0; asyncSendMsgToServer(tmq->pTscObj->pAppInfo->pTransporter, &epSet, &transporterId, pMsgSendInfo); + pParamSet->waitingRspNum++; + pParamSet->totalRspNum++; } } diff --git a/source/common/src/tdatablock.c b/source/common/src/tdatablock.c index b4ece426b3..5a2aaed74e 100644 --- a/source/common/src/tdatablock.c +++ b/source/common/src/tdatablock.c @@ -1708,6 +1708,7 @@ char* buildCtbNameByGroupId(const char* stbName, uint64_t groupId) { pTag->keyLen = strlen(pTag->key); pTag->type = TSDB_DATA_TYPE_UBIGINT; pTag->u = groupId; + pTag->length = sizeof(uint64_t); taosArrayPush(tags, &pTag); void* cname = taosMemoryCalloc(1, TSDB_TABLE_NAME_LEN + 1); diff --git a/source/dnode/vnode/src/tq/tqSink.c b/source/dnode/vnode/src/tq/tqSink.c index 391a008440..0cca1d2e10 100644 --- a/source/dnode/vnode/src/tq/tqSink.c +++ b/source/dnode/vnode/src/tq/tqSink.c @@ -46,11 +46,12 @@ static SSubmitReq* tdBlockToSubmit(const SArray* pBlocks, const STSchema* pTSche createTbReq.type = TSDB_CHILD_TABLE; createTbReq.ctb.suid = suid; - STagVal tagVal = {.cid = pDataBlock->info.numOfCols + 1, - .type = TSDB_DATA_TYPE_UBIGINT, - .pData = (uint8_t*)&pDataBlock->info.groupId, - .nData = sizeof(uint64_t)}; - STag* pTag = NULL; + STagVal tagVal = { + .cid = pDataBlock->info.numOfCols + 1, + .type = TSDB_DATA_TYPE_UBIGINT, + .i64 = (int64_t)pDataBlock->info.groupId, + }; + STag* pTag = NULL; taosArrayClear(tagArray); taosArrayPush(tagArray, &tagVal); tTagNew(tagArray, 1, false, &pTag); @@ -110,10 +111,11 @@ static SSubmitReq* tdBlockToSubmit(const SArray* pBlocks, const STSchema* pTSche createTbReq.type = TSDB_CHILD_TABLE; createTbReq.ctb.suid = suid; - STagVal tagVal = {.cid = pDataBlock->info.numOfCols + 1, - .type = TSDB_DATA_TYPE_UBIGINT, - .pData = (uint8_t*)&pDataBlock->info.groupId, - .nData = sizeof(uint64_t)}; + STagVal tagVal = { + .cid = pDataBlock->info.numOfCols + 1, + .type = TSDB_DATA_TYPE_UBIGINT, + .i64 = (int64_t)pDataBlock->info.groupId, + }; taosArrayClear(tagArray); taosArrayPush(tagArray, &tagVal); STag* pTag = NULL;