From 7cab27110ad2b350df76600d59065565b99a2d8a Mon Sep 17 00:00:00 2001 From: Yihao Deng Date: Mon, 24 Jun 2024 12:26:10 +0000 Subject: [PATCH 01/34] add self check info --- source/libs/stream/inc/streamBackendRocksdb.h | 5 +- source/libs/stream/src/streamBackendRocksdb.c | 159 ++++++++++++++++-- source/libs/stream/src/streamCheckpoint.c | 63 +++---- source/libs/stream/test/backendTest.cpp | 38 ++--- 4 files changed, 197 insertions(+), 68 deletions(-) diff --git a/source/libs/stream/inc/streamBackendRocksdb.h b/source/libs/stream/inc/streamBackendRocksdb.h index 6b81ac87ee..ebeedcb5d2 100644 --- a/source/libs/stream/inc/streamBackendRocksdb.h +++ b/source/libs/stream/inc/streamBackendRocksdb.h @@ -136,7 +136,7 @@ void* streamBackendInit(const char* path, int64_t chkpId, int32_t vgId); void streamBackendCleanup(void* arg); void streamBackendHandleCleanup(void* arg); int32_t streamBackendLoadCheckpointInfo(void* pMeta); -int32_t streamBackendDoCheckpoint(void* pMeta, int64_t checkpointId); +int32_t streamBackendDoCheckpoint(void* pMeta, int64_t checkpointId, int64_t processver); SListNode* streamBackendAddCompare(void* backend, void* arg); void streamBackendDelCompare(void* backend, void* arg); int32_t streamStateCvtDataFormat(char* path, char* key, void* cfInst); @@ -144,7 +144,6 @@ int32_t streamStateCvtDataFormat(char* path, char* key, void* cfInst); STaskDbWrapper* taskDbOpen(const char* path, const char* key, int64_t chkptId); void taskDbDestroy(void* pBackend, bool flush); void taskDbDestroy2(void* pBackend); -int32_t taskDbDoCheckpoint(void* arg, int64_t chkpId); void taskDbUpdateChkpId(void* pTaskDb, int64_t chkpId); @@ -249,7 +248,7 @@ int32_t streamBackendDelInUseChkp(void* arg, int64_t chkpId); int32_t taskDbBuildSnap(void* arg, SArray* pSnap); int32_t taskDbDestroySnap(void* arg, SArray* pSnapInfo); -int32_t taskDbDoCheckpoint(void* arg, int64_t chkpId); +int32_t taskDbDoCheckpoint(void* arg, int64_t chkpId, int64_t processId); SBkdMgt* bkdMgtCreate(char* path); int32_t bkdMgtAddChkp(SBkdMgt* bm, char* task, char* path); diff --git a/source/libs/stream/src/streamBackendRocksdb.c b/source/libs/stream/src/streamBackendRocksdb.c index c151193284..4915d4b122 100644 --- a/source/libs/stream/src/streamBackendRocksdb.c +++ b/source/libs/stream/src/streamBackendRocksdb.c @@ -140,7 +140,7 @@ int32_t valueDecode(void* value, int32_t vlen, int64_t* ttl, char** dest); int32_t valueToString(void* k, char* buf); int32_t valueIsStale(void* k, int64_t ts); -void destroyCompare(void* arg); +void destroyCompare(void* arg); static void cleanDir(const char* pPath, const char* id); static bool streamStateIterSeekAndValid(rocksdb_iterator_t* iter, char* buf, size_t len); @@ -194,9 +194,7 @@ int32_t getCfIdx(const char* cfName) { return idx; } -bool isValidCheckpoint(const char* dir) { - return true; -} +bool isValidCheckpoint(const char* dir) { return true; } int32_t rebuildDirFromCheckpoint(const char* path, int64_t chkpId, char** dst) { // impl later @@ -486,9 +484,7 @@ _ERROR: return code; } -int32_t backendCopyFiles(const char* src, const char* dst) { - return backendFileCopyFilesImpl(src, dst); -} +int32_t backendCopyFiles(const char* src, const char* dst) { return backendFileCopyFilesImpl(src, dst); } static int32_t rebuildFromLocalCheckpoint(const char* pTaskIdStr, const char* checkpointPath, int64_t checkpointId, const char* defaultPath) { @@ -540,7 +536,8 @@ int32_t restoreCheckpointData(const char* path, const char* key, int64_t chkptId char* chkptPath = taosMemoryCalloc(1, pathLen); if (chkptId > 0) { - snprintf(chkptPath, pathLen, "%s%s%s%s%s%" PRId64 "", prefixPath, TD_DIRSEP, "checkpoints", TD_DIRSEP, "checkpoint", chkptId); + snprintf(chkptPath, pathLen, "%s%s%s%s%s%" PRId64 "", prefixPath, TD_DIRSEP, "checkpoints", TD_DIRSEP, "checkpoint", + chkptId); code = rebuildFromLocalCheckpoint(key, chkptPath, chkptId, defaultPath); if (code != 0) { @@ -549,11 +546,12 @@ int32_t restoreCheckpointData(const char* path, const char* key, int64_t chkptId if (code != 0) { stError("failed to start stream backend at %s, reason: %s, restart from default defaultPath:%s", chkptPath, - tstrerror(code), defaultPath); - code = 0; // reset the error code + tstrerror(code), defaultPath); + code = 0; // reset the error code } } else { // no valid checkpoint id - stInfo("%s no valid checkpoint ever generated, no need to copy checkpoint data, clean defaultPath:%s", key, defaultPath); + stInfo("%s no valid checkpoint ever generated, no need to copy checkpoint data, clean defaultPath:%s", key, + defaultPath); cleanDir(defaultPath, key); } @@ -1142,7 +1140,7 @@ int32_t taskDbBuildSnap(void* arg, SArray* pSnap) { int64_t chkpId = pTaskDb->chkpId; taskDbRefChkp(pTaskDb, chkpId); - code = taskDbDoCheckpoint(pTaskDb, chkpId); + code = taskDbDoCheckpoint(pTaskDb, chkpId, 0); if (code != 0) { taskDbUnRefChkp(pTaskDb, chkpId); } @@ -1230,7 +1228,106 @@ int64_t taskGetDBRef(void* arg) { return pDb->refId; } -int32_t taskDbDoCheckpoint(void* arg, int64_t chkpId) { +int32_t chkpLoadExtraInfo(char* pChkpIdDir, int64_t* chkpId, int64_t* processId) { + TdFilePtr pFile = NULL; + int32_t code = -1; + + int32_t len = strlen(pChkpIdDir); + if (len == 0) { + terrno = TSDB_CODE_INVALID_PARA; + stError("failed to load extra info, dir:%s, reason:%s", pChkpIdDir, tstrerror(terrno)); + return -1; + } + + char* pDst = taosMemoryCalloc(1, len + 64); + if (pDst == NULL) { + terrno = TSDB_CODE_OUT_OF_MEMORY; + stError("failed to alloc memory to load extra info, dir:%s", pChkpIdDir); + goto _EXIT; + } + + if (sprintf(pDst, "%s%sinfo", pChkpIdDir, TD_DIRSEP) <= 0) { + code = -1; + stError("failed to build dst to load extra info, dir:%s", pChkpIdDir); + goto _EXIT; + } + + pFile = taosOpenFile(pDst, TD_FILE_READ); + if (pFile == NULL) { + terrno = TAOS_SYSTEM_ERROR(errno); + stError("failed to open file to load extra info, file:%s", pDst); + goto _EXIT; + } + + char buf[256] = {0}; + if (taosReadFile(pFile, buf, sizeof(buf)) <= 0) { + terrno = TAOS_SYSTEM_ERROR(errno); + stError("failed to read file to load extra info, file:%s, reason:%s", pDst, tstrerror(terrno)); + code = -1; + goto _EXIT; + } + + if (sscanf(buf, "%" PRId64 " %" PRId64 "", chkpId, processId) < 2) { + terrno = TSDB_CODE_INVALID_PARA; + stError("failed to read file content to load extra info, file:%s, reason:%s", pDst, tstrerror(terrno)); + } + code = 0; +_EXIT: + taosMemoryFree(pDst); + taosCloseFile(&pFile); + return code; +} +int32_t chkpAddExtraInfo(char* pChkpIdDir, int64_t chkpId, int64_t processId) { + TdFilePtr pFile = NULL; + int32_t code = -1; + + int32_t len = strlen(pChkpIdDir); + if (len == 0) { + terrno = TSDB_CODE_INVALID_PARA; + stError("failed to add extra info, dir:%s, reason:%s", pChkpIdDir, tstrerror(terrno)); + return -1; + } + + char* pDst = taosMemoryCalloc(1, len + 64); + if (pDst == NULL) { + terrno = TSDB_CODE_OUT_OF_MEMORY; + stError("failed to alloc memory to add extra info, dir:%s", pChkpIdDir); + goto _EXIT; + } + + if (sprintf(pDst, "%s%sinfo", pChkpIdDir, TD_DIRSEP) < 0) { + stError("failed to build dst to add extra info, dir:%s", pChkpIdDir); + goto _EXIT; + } + + pFile = taosOpenFile(pDst, TD_FILE_CREATE | TD_FILE_WRITE | TD_FILE_TRUNC); + if (pFile == NULL) { + terrno = TAOS_SYSTEM_ERROR(errno); + stError("failed to open file to add extra info, file:%s", pDst); + goto _EXIT; + } + + char buf[256] = {0}; + int n = snprintf(buf, sizeof(buf), "%" PRId64 " %" PRId64 "", chkpId, processId); + if (n <= 0 || n >= sizeof(buf)) { + code = -1; + stError("failed to build content to add extra info, dir:%s", pChkpIdDir); + goto _EXIT; + } + + if (taosWriteFile(pFile, buf, strlen(buf)) <= 0) { + terrno = TAOS_SYSTEM_ERROR(errno); + stError("failed to write file to add extra info, file:%s, reason:%s", pDst, tstrerror(terrno)); + goto _EXIT; + } + code = 0; + +_EXIT: + taosCloseFile(&pFile); + taosMemoryFree(pDst); + return code; +} +int32_t taskDbDoCheckpoint(void* arg, int64_t chkpId, int64_t processId) { STaskDbWrapper* pTaskDb = arg; int64_t st = taosGetTimestampMs(); int32_t code = -1; @@ -1254,32 +1351,58 @@ int32_t taskDbDoCheckpoint(void* arg, int64_t chkpId) { int64_t written = atomic_load_64(&pTaskDb->dataWritten); + // flush db if (written > 0) { stDebug("stream backend:%p start to flush db at:%s, data written:%" PRId64 "", pTaskDb, pChkpIdDir, written); code = chkpPreFlushDb(pTaskDb->db, ppCf, nCf); + if (code != 0) goto _EXIT; } else { stDebug("stream backend:%p not need flush db at:%s, data written:%" PRId64 "", pTaskDb, pChkpIdDir, written); } + + // do checkpoint if ((code = chkpDoDbCheckpoint(pTaskDb->db, pChkpIdDir)) != 0) { stError("stream backend:%p failed to do checkpoint at:%s", pTaskDb, pChkpIdDir); + goto _EXIT; } else { stDebug("stream backend:%p end to do checkpoint at:%s, time cost:%" PRId64 "ms", pTaskDb, pChkpIdDir, taosGetTimestampMs() - st); } + // add extra info to checkpoint + if ((code = chkpAddExtraInfo(pChkpIdDir, chkpId, processId)) != 0) { + stError("stream backend:%p failed to add extra info to checkpoint at:%s", pTaskDb, pChkpIdDir); + goto _EXIT; + } + + // delete ttl checkpoint code = chkpMayDelObsolete(pTaskDb, chkpId, pChkpDir); + if (code < 0) { + goto _EXIT; + } + atomic_store_64(&pTaskDb->dataWritten, 0); pTaskDb->chkpId = chkpId; _EXIT: - taosMemoryFree(pChkpDir); + + // clear checkpoint dir if failed + if (code != 0 && pChkpDir != NULL) { + if (taosDirExist(pChkpIdDir)) { + taosRemoveDir(pChkpIdDir); + } + } taosMemoryFree(pChkpIdDir); + taosMemoryFree(pChkpDir); + taosReleaseRef(taskDbWrapperId, refId); taosMemoryFree(ppCf); return code; } -int32_t streamBackendDoCheckpoint(void* arg, int64_t chkpId) { return taskDbDoCheckpoint(arg, chkpId); } +int32_t streamBackendDoCheckpoint(void* arg, int64_t chkpId, int64_t processVer) { + return taskDbDoCheckpoint(arg, chkpId, processVer); +} SListNode* streamBackendAddCompare(void* backend, void* arg) { SBackendWrapper* pHandle = (SBackendWrapper*)backend; @@ -2205,7 +2328,8 @@ int32_t taskDbGenChkpUploadData__rsync(STaskDbWrapper* pDb, int64_t chkpId, char return code; } -int32_t taskDbGenChkpUploadData__s3(STaskDbWrapper* pDb, void* bkdChkpMgt, int64_t chkpId, char** path, SArray* list, const char* idStr) { +int32_t taskDbGenChkpUploadData__s3(STaskDbWrapper* pDb, void* bkdChkpMgt, int64_t chkpId, char** path, SArray* list, + const char* idStr) { int32_t code = 0; SBkdMgt* p = (SBkdMgt*)bkdChkpMgt; @@ -2224,7 +2348,8 @@ int32_t taskDbGenChkpUploadData__s3(STaskDbWrapper* pDb, void* bkdChkpMgt, int64 return code; } -int32_t taskDbGenChkpUploadData(void* arg, void* mgt, int64_t chkpId, int8_t type, char** path, SArray* list, const char* idStr) { +int32_t taskDbGenChkpUploadData(void* arg, void* mgt, int64_t chkpId, int8_t type, char** path, SArray* list, + const char* idStr) { int32_t code = -1; STaskDbWrapper* pDb = arg; ECHECKPOINT_BACKUP_TYPE utype = type; diff --git a/source/libs/stream/src/streamCheckpoint.c b/source/libs/stream/src/streamCheckpoint.c index 1fddb5a97d..af7e969c07 100644 --- a/source/libs/stream/src/streamCheckpoint.c +++ b/source/libs/stream/src/streamCheckpoint.c @@ -24,11 +24,13 @@ static int32_t streamTaskUploadCheckpoint(const char* id, const char* path); static int32_t deleteCheckpoint(const char* id); static int32_t downloadCheckpointByNameS3(const char* id, const char* fname, const char* dstName); static int32_t continueDispatchCheckpointTriggerBlock(SStreamDataBlock* pBlock, SStreamTask* pTask); -static int32_t appendCheckpointIntoInputQ(SStreamTask* pTask, int32_t checkpointType, int64_t checkpointId, int32_t transId); +static int32_t appendCheckpointIntoInputQ(SStreamTask* pTask, int32_t checkpointType, int64_t checkpointId, + int32_t transId); static int32_t doSendRetrieveTriggerMsg(SStreamTask* pTask, SArray* pNotSendList); static void checkpointTriggerMonitorFn(void* param, void* tmrId); -static SStreamDataBlock* createChkptTriggerBlock(SStreamTask* pTask, int32_t checkpointType, int64_t checkpointId, int32_t transId); +static SStreamDataBlock* createChkptTriggerBlock(SStreamTask* pTask, int32_t checkpointType, int64_t checkpointId, + int32_t transId); SStreamDataBlock* createChkptTriggerBlock(SStreamTask* pTask, int32_t checkpointType, int64_t checkpointId, int32_t transId) { @@ -96,7 +98,7 @@ int32_t streamTaskProcessCheckpointTriggerRsp(SStreamTask* pTask, SCheckpointTri if (pRsp->rspCode != TSDB_CODE_SUCCESS) { stDebug("s-task:%s retrieve checkpoint-trgger rsp from upstream:0x%x invalid, code:%s", pTask->id.idStr, - pRsp->upstreamTaskId, tstrerror(pRsp->rspCode)); + pRsp->upstreamTaskId, tstrerror(pRsp->rspCode)); return TSDB_CODE_SUCCESS; } @@ -108,7 +110,7 @@ int32_t streamTaskSendCheckpointTriggerMsg(SStreamTask* pTask, int32_t dstTaskId SRpcHandleInfo* pRpcInfo, int32_t code) { int32_t size = sizeof(SMsgHead) + sizeof(SCheckpointTriggerRsp); - void* pBuf = rpcMallocCont(size); + void* pBuf = rpcMallocCont(size); SCheckpointTriggerRsp* pRsp = POINTER_SHIFT(pBuf, sizeof(SMsgHead)); ((SMsgHead*)pBuf)->vgId = htonl(downstreamNodeId); @@ -162,15 +164,15 @@ int32_t streamProcessCheckpointTriggerBlock(SStreamTask* pTask, SStreamDataBlock taosThreadMutexLock(&pTask->lock); if (pTask->chkInfo.checkpointId > checkpointId) { stError("s-task:%s vgId:%d current checkpointId:%" PRId64 - " recv expired checkpoint-trigger block, checkpointId:%" PRId64 " transId:%d, discard", - id, vgId, pTask->chkInfo.checkpointId, checkpointId, transId); + " recv expired checkpoint-trigger block, checkpointId:%" PRId64 " transId:%d, discard", + id, vgId, pTask->chkInfo.checkpointId, checkpointId, transId); taosThreadMutexUnlock(&pTask->lock); return TSDB_CODE_SUCCESS; } if (pTask->chkInfo.checkpointId == checkpointId) { { // send checkpoint-ready msg to upstream - SRpcMsg msg ={0}; + SRpcMsg msg = {0}; SStreamUpstreamEpInfo* pInfo = streamTaskGetUpstreamTaskEpInfo(pTask, pBlock->srcTaskId); initCheckpointReadyMsg(pTask, pInfo->nodeId, pBlock->srcTaskId, pInfo->childId, checkpointId, &msg); @@ -362,7 +364,8 @@ int32_t streamProcessCheckpointReadyMsg(SStreamTask* pTask, int64_t checkpointId taosThreadMutexUnlock(&pInfo->lock); if (notReady == 0) { - stDebug("s-task:%s all downstream task(s) have completed build checkpoint, start to do checkpoint for current task", id); + stDebug("s-task:%s all downstream task(s) have completed build checkpoint, start to do checkpoint for current task", + id); appendCheckpointIntoInputQ(pTask, STREAM_INPUT__CHECKPOINT, checkpointId, transId); } @@ -371,11 +374,11 @@ int32_t streamProcessCheckpointReadyMsg(SStreamTask* pTask, int64_t checkpointId int32_t streamTaskProcessCheckpointReadyRsp(SStreamTask* pTask, int32_t upstreamTaskId, int64_t checkpointId) { SActiveCheckpointInfo* pInfo = pTask->chkInfo.pActiveInfo; - int64_t now = taosGetTimestampMs(); - int32_t numOfConfirmed = 0; + int64_t now = taosGetTimestampMs(); + int32_t numOfConfirmed = 0; taosThreadMutexLock(&pInfo->lock); - for(int32_t i = 0; i < taosArrayGetSize(pInfo->pReadyMsgList); ++i) { + for (int32_t i = 0; i < taosArrayGetSize(pInfo->pReadyMsgList); ++i) { STaskCheckpointReadyInfo* pReadyInfo = taosArrayGet(pInfo->pReadyMsgList, i); if (pReadyInfo->upstreamTaskId == upstreamTaskId && pReadyInfo->checkpointId == checkpointId) { pReadyInfo->sendCompleted = 1; @@ -385,7 +388,7 @@ int32_t streamTaskProcessCheckpointReadyRsp(SStreamTask* pTask, int32_t upstream } } - for(int32_t i = 0; i < taosArrayGetSize(pInfo->pReadyMsgList); ++i) { + for (int32_t i = 0; i < taosArrayGetSize(pInfo->pReadyMsgList); ++i) { STaskCheckpointReadyInfo* pReadyInfo = taosArrayGet(pInfo->pReadyMsgList, i); if (pReadyInfo->sendCompleted == 1) { numOfConfirmed += 1; @@ -568,12 +571,12 @@ static int32_t getCheckpointDataMeta(const char* id, const char* path, SArray* l } int32_t uploadCheckpointData(SStreamTask* pTask, int64_t checkpointId, int64_t dbRefId, ECHECKPOINT_BACKUP_TYPE type) { - char* path = NULL; - int32_t code = 0; - SArray* toDelFiles = taosArrayInit(4, POINTER_BYTES); - int64_t now = taosGetTimestampMs(); - SStreamMeta* pMeta = pTask->pMeta; - const char* idStr = pTask->id.idStr; + char* path = NULL; + int32_t code = 0; + SArray* toDelFiles = taosArrayInit(4, POINTER_BYTES); + int64_t now = taosGetTimestampMs(); + SStreamMeta* pMeta = pTask->pMeta; + const char* idStr = pTask->id.idStr; if ((code = taskDbGenChkpUploadData(pTask->pBackend, pMeta->bkdChkptMgt, checkpointId, type, &path, toDelFiles, pTask->id.idStr)) != 0) { @@ -619,8 +622,8 @@ int32_t uploadCheckpointData(SStreamTask* pTask, int64_t checkpointId, int64_t d idStr, checkpointId, el, path); taosRemoveDir(path); } else { - stDebug("s-task:%s failed to upload checkpointId:%" PRId64 " keep local checkpoint data, elapsed time:%.2fs", - idStr, checkpointId, el); + stDebug("s-task:%s failed to upload checkpointId:%" PRId64 " keep local checkpoint data, elapsed time:%.2fs", idStr, + checkpointId, el); } taosMemoryFree(path); @@ -639,9 +642,10 @@ int32_t streamTaskRemoteBackupCheckpoint(SStreamTask* pTask, int64_t checkpointI } int64_t dbRefId = taskGetDBRef(pTask->pBackend); - void* pBackend = taskAcquireDb(dbRefId); + void* pBackend = taskAcquireDb(dbRefId); if (pBackend == NULL) { - stError("s-task:%s failed to acquire db during update checkpoint data, failed to upload checkpointData", pTask->id.idStr); + stError("s-task:%s failed to acquire db during update checkpoint data, failed to upload checkpointData", + pTask->id.idStr); return -1; } @@ -663,7 +667,8 @@ int32_t streamTaskBuildCheckpoint(SStreamTask* pTask) { if (pTask->info.taskLevel != TASK_LEVEL__SINK) { stDebug("s-task:%s level:%d start gen checkpoint, checkpointId:%" PRId64, id, pTask->info.taskLevel, ckId); - code = streamBackendDoCheckpoint(pTask->pBackend, ckId); + int64_t ver = 0; + code = streamBackendDoCheckpoint(pTask->pBackend, ckId, ver); if (code != TSDB_CODE_SUCCESS) { stError("s-task:%s gen checkpoint:%" PRId64 " failed, code:%s", id, ckId, tstrerror(terrno)); } @@ -773,11 +778,11 @@ void checkpointTriggerMonitorFn(void* param, void* tmrId) { ASSERT(pTask->info.taskLevel > TASK_LEVEL__SOURCE); SArray* pNotSendList = taosArrayInit(4, sizeof(SStreamUpstreamEpInfo)); - for(int32_t i = 0; i < taosArrayGetSize(pList); ++i) { + for (int32_t i = 0; i < taosArrayGetSize(pList); ++i) { SStreamUpstreamEpInfo* pInfo = taosArrayGetP(pList, i); bool recved = false; - for(int32_t j = 0; j < taosArrayGetSize(pActiveInfo->pReadyMsgList); ++j) { + for (int32_t j = 0; j < taosArrayGetSize(pActiveInfo->pReadyMsgList); ++j) { STaskCheckpointReadyInfo* pReady = taosArrayGet(pActiveInfo->pReadyMsgList, j); if (pInfo->nodeId == pReady->upstreamNodeId) { recved = true; @@ -785,7 +790,7 @@ void checkpointTriggerMonitorFn(void* param, void* tmrId) { } } - if (!recved) { // make sure the inputQ is opened for not recv upstream checkpoint-trigger message + if (!recved) { // make sure the inputQ is opened for not recv upstream checkpoint-trigger message streamTaskOpenUpstreamInput(pTask, pInfo->taskId); taosArrayPush(pNotSendList, pInfo); } @@ -870,7 +875,7 @@ bool streamTaskAlreadySendTrigger(SStreamTask* pTask, int32_t downstreamNodeId) return false; } - for(int32_t i = 0; i < taosArrayGetSize(pInfo->pDispatchTriggerList); ++i) { + for (int32_t i = 0; i < taosArrayGetSize(pInfo->pDispatchTriggerList); ++i) { STaskTriggerSendInfo* pSendInfo = taosArrayGet(pInfo->pDispatchTriggerList, i); if (pSendInfo->nodeId != downstreamNodeId) { continue; @@ -939,10 +944,10 @@ int32_t streamTaskGetNumOfConfirmed(SStreamTask* pTask) { int32_t num = 0; taosThreadMutexLock(&pInfo->lock); - for(int32_t i = 0; i < taosArrayGetSize(pInfo->pDispatchTriggerList); ++i) { + for (int32_t i = 0; i < taosArrayGetSize(pInfo->pDispatchTriggerList); ++i) { STaskTriggerSendInfo* p = taosArrayGet(pInfo->pDispatchTriggerList, i); if (p->recved) { - num ++; + num++; } } taosThreadMutexUnlock(&pInfo->lock); diff --git a/source/libs/stream/test/backendTest.cpp b/source/libs/stream/test/backendTest.cpp index 2fb257fe4e..38d48a2a32 100644 --- a/source/libs/stream/test/backendTest.cpp +++ b/source/libs/stream/test/backendTest.cpp @@ -29,7 +29,7 @@ class BackendEnv : public ::testing::Test { void *backendCreate() { const char *streamPath = "/tmp"; - void * p = NULL; + void *p = NULL; // char *absPath = NULL; // // SBackendWrapper *p = (SBackendWrapper *)streamBackendInit(streamPath, -1, 2); @@ -52,7 +52,7 @@ SStreamState *stateCreate(const char *path) { } void *backendOpen() { streamMetaInit(); - const char * path = "/tmp/backend"; + const char *path = "/tmp/backend"; SStreamState *p = stateCreate(path); ASSERT(p != NULL); @@ -79,7 +79,7 @@ void *backendOpen() { const char *val = "value data"; int32_t len = 0; - char * newVal = NULL; + char *newVal = NULL; streamStateGet_rocksdb(p, &key, (void **)&newVal, &len); ASSERT(len == strlen(val)); } @@ -100,7 +100,7 @@ void *backendOpen() { const char *val = "value data"; int32_t len = 0; - char * newVal = NULL; + char *newVal = NULL; int32_t code = streamStateGet_rocksdb(p, &key, (void **)&newVal, &len); ASSERT(code != 0); } @@ -130,7 +130,7 @@ void *backendOpen() { winkey.groupId = 0; winkey.ts = tsArray[0]; - char * val = NULL; + char *val = NULL; int32_t len = 0; pCurr = streamStateSeekKeyNext_rocksdb(p, &winkey); @@ -157,7 +157,7 @@ void *backendOpen() { key.ts = tsArray[i]; key.exprIdx = i; - char * val = NULL; + char *val = NULL; int32_t len = 0; streamStateFuncGet_rocksdb(p, &key, (void **)&val, &len); ASSERT(len == strlen("Value")); @@ -168,7 +168,7 @@ void *backendOpen() { key.ts = tsArray[i]; key.exprIdx = i; - char * val = NULL; + char *val = NULL; int32_t len = 0; streamStateFuncDel_rocksdb(p, &key); } @@ -213,7 +213,7 @@ void *backendOpen() { { SSessionKey key; memset(&key, 0, sizeof(key)); - char * val = NULL; + char *val = NULL; int32_t vlen = 0; code = streamStateSessionGetKVByCur_rocksdb(pCurr, &key, (void **)&val, &vlen); ASSERT(code == 0); @@ -260,7 +260,7 @@ void *backendOpen() { SWinKey key = {0}; // {.groupId = (uint64_t)(i), .ts = tsArray[i]}; key.groupId = (uint64_t)(i); key.ts = tsArray[i]; - char * val = NULL; + char *val = NULL; int32_t vlen = 0; ASSERT(streamStateFillGet_rocksdb(p, &key, (void **)&val, &vlen) == 0); taosMemoryFreeClear(val); @@ -272,7 +272,7 @@ void *backendOpen() { SStreamStateCur *pCurr = streamStateFillGetCur_rocksdb(p, &key); ASSERT(pCurr != NULL); - char * val = NULL; + char *val = NULL; int32_t vlen = 0; ASSERT(0 == streamStateFillGetKVByCur_rocksdb(pCurr, &key, (const void **)&val, &vlen)); ASSERT(vlen == strlen("Value")); @@ -296,7 +296,7 @@ void *backendOpen() { SWinKey key = {0}; // {.groupId = (uint64_t)(i), .ts = tsArray[i]}; key.groupId = (uint64_t)(i); key.ts = tsArray[i]; - char * val = NULL; + char *val = NULL; int32_t vlen = 0; ASSERT(streamStateFillDel_rocksdb(p, &key) == 0); taosMemoryFreeClear(val); @@ -338,7 +338,7 @@ void *backendOpen() { char key[128] = {0}; sprintf(key, "tbname_%d", i); - char * val = NULL; + char *val = NULL; int32_t len = 0; code = streamDefaultGet_rocksdb(p, key, (void **)&val, &len); ASSERT(code == 0); @@ -354,7 +354,7 @@ TEST_F(BackendEnv, checkOpen) { SStreamState *p = (SStreamState *)backendOpen(); int64_t tsStart = taosGetTimestampMs(); { - void * pBatch = streamStateCreateBatch(); + void *pBatch = streamStateCreateBatch(); int32_t size = 0; for (int i = 0; i < size; i++) { char key[128] = {0}; @@ -368,7 +368,7 @@ TEST_F(BackendEnv, checkOpen) { streamStateDestroyBatch(pBatch); } { - void * pBatch = streamStateCreateBatch(); + void *pBatch = streamStateCreateBatch(); int32_t size = 0; char valBuf[256] = {0}; for (int i = 0; i < size; i++) { @@ -383,9 +383,9 @@ TEST_F(BackendEnv, checkOpen) { streamStateDestroyBatch(pBatch); } // do checkpoint 2 - taskDbDoCheckpoint(p->pTdbState->pOwner->pBackend, 2); + taskDbDoCheckpoint(p->pTdbState->pOwner->pBackend, 2, 0); { - void * pBatch = streamStateCreateBatch(); + void *pBatch = streamStateCreateBatch(); int32_t size = 0; char valBuf[256] = {0}; for (int i = 0; i < size; i++) { @@ -400,17 +400,17 @@ TEST_F(BackendEnv, checkOpen) { streamStateDestroyBatch(pBatch); } - taskDbDoCheckpoint(p->pTdbState->pOwner->pBackend, 3); + taskDbDoCheckpoint(p->pTdbState->pOwner->pBackend, 3, 0); const char *path = "/tmp/backend/stream"; const char *dump = "/tmp/backend/stream/dump"; // taosMkDir(dump); taosMulMkDir(dump); SBkdMgt *mgt = bkdMgtCreate((char *)path); - SArray * result = taosArrayInit(4, sizeof(void *)); + SArray *result = taosArrayInit(4, sizeof(void *)); bkdMgtGetDelta(mgt, p->pTdbState->idstr, 3, result, (char *)dump); - taskDbDoCheckpoint(p->pTdbState->pOwner->pBackend, 4); + taskDbDoCheckpoint(p->pTdbState->pOwner->pBackend, 4, 0); taosArrayClear(result); bkdMgtGetDelta(mgt, p->pTdbState->idstr, 4, result, (char *)dump); From 33aef6ddc550808185e720e2093d920ab35228ac Mon Sep 17 00:00:00 2001 From: Yihao Deng Date: Tue, 25 Jun 2024 07:12:09 +0000 Subject: [PATCH 02/34] add self check info --- source/libs/stream/inc/streamBackendRocksdb.h | 2 +- source/libs/stream/src/streamBackendRocksdb.c | 105 +++++++++++++++--- source/libs/stream/src/streamMeta.c | 28 +++-- 3 files changed, 104 insertions(+), 31 deletions(-) diff --git a/source/libs/stream/inc/streamBackendRocksdb.h b/source/libs/stream/inc/streamBackendRocksdb.h index ebeedcb5d2..24cd861550 100644 --- a/source/libs/stream/inc/streamBackendRocksdb.h +++ b/source/libs/stream/inc/streamBackendRocksdb.h @@ -141,7 +141,7 @@ SListNode* streamBackendAddCompare(void* backend, void* arg); void streamBackendDelCompare(void* backend, void* arg); int32_t streamStateCvtDataFormat(char* path, char* key, void* cfInst); -STaskDbWrapper* taskDbOpen(const char* path, const char* key, int64_t chkptId); +STaskDbWrapper* taskDbOpen(const char* path, const char* key, int64_t chkptId, int64_t* processVer); void taskDbDestroy(void* pBackend, bool flush); void taskDbDestroy2(void* pBackend); diff --git a/source/libs/stream/src/streamBackendRocksdb.c b/source/libs/stream/src/streamBackendRocksdb.c index 4915d4b122..4278757136 100644 --- a/source/libs/stream/src/streamBackendRocksdb.c +++ b/source/libs/stream/src/streamBackendRocksdb.c @@ -326,9 +326,11 @@ void cleanDir(const char* pPath, const char* id) { } } -void validateDir(const char* pPath) { +int32_t createDirIfNotExist(const char* pPath) { if (!taosIsDir(pPath)) { - taosMulMkDir(pPath); + return taosMulMkDir(pPath); + } else { + return 0; } } @@ -419,6 +421,9 @@ int32_t backendFileCopyFilesImpl(const char* src, const char* dst) { const char* current = "CURRENT"; size_t currLen = strlen(current); + const char* info = "info"; + size_t infoLen = strlen(info); + int32_t code = 0; int32_t sLen = strlen(src); int32_t dLen = strlen(dst); @@ -455,6 +460,14 @@ int32_t backendFileCopyFilesImpl(const char* src, const char* dst) { stError("failed to copy file, detail: %s to %s reason:%s", srcName, dstName, tstrerror(code)); goto _ERROR; } + } else if (strncmp(name, info, strlen(name) <= infoLen ? strlen(name) : infoLen) == 0) { + code = copyFiles_create(srcName, dstName, 0); + if (code != 0) { + code = TAOS_SYSTEM_ERROR(code); + stError("failed to copy file, detail: %s to %s reason:%s", srcName, dstName, tstrerror(code)); + goto _ERROR; + } + } else { code = copyFiles_hardlink(srcName, dstName, 0); if (code != 0) { @@ -487,7 +500,7 @@ _ERROR: int32_t backendCopyFiles(const char* src, const char* dst) { return backendFileCopyFilesImpl(src, dst); } static int32_t rebuildFromLocalCheckpoint(const char* pTaskIdStr, const char* checkpointPath, int64_t checkpointId, - const char* defaultPath) { + const char* defaultPath, int64_t* processVer) { int32_t code = 0; cleanDir(defaultPath, pTaskIdStr); @@ -512,34 +525,67 @@ static int32_t rebuildFromLocalCheckpoint(const char* pTaskIdStr, const char* ch return code; } -int32_t restoreCheckpointData(const char* path, const char* key, int64_t chkptId, char** dbPrefixPath, char** dbPath) { - int32_t code = 0; +int32_t restoreCheckpointData(const char* path, const char* key, int64_t chkptId, char** dbPrefixPath, char** dbPath, + int64_t* processVer) { + int32_t code = -1; + + size_t pathLen = strlen(path); + char* prefixPath = NULL; + char* defaultPath = NULL; + + // alloc buf + prefixPath = taosMemoryCalloc(1, pathLen + 64); + if (prefixPath == NULL) { + terrno = TSDB_CODE_OUT_OF_MEMORY; + goto _EXIT; + } - char* prefixPath = taosMemoryCalloc(1, strlen(path) + 128); sprintf(prefixPath, "%s%s%s", path, TD_DIRSEP, key); + code = createDirIfNotExist(prefixPath); + if (code != 0) { + goto _EXIT; + } - validateDir(prefixPath); + defaultPath = taosMemoryCalloc(1, pathLen + 128); + if (defaultPath == NULL) { + terrno = TSDB_CODE_OUT_OF_MEMORY; + goto _EXIT; + } - char* defaultPath = taosMemoryCalloc(1, strlen(path) + 256); sprintf(defaultPath, "%s%s%s", prefixPath, TD_DIRSEP, "state"); + code = createDirIfNotExist(defaultPath); + if (code != 0) { + goto _EXIT; + } - validateDir(defaultPath); - int32_t pathLen = strlen(path) + 256; + // int32_t pathLen = strlen(path) + 48; + char* checkpointRoot = taosMemoryCalloc(1, pathLen + 48); + if (checkpointRoot == NULL) { + terrno = TSDB_CODE_OUT_OF_MEMORY; + goto _EXIT; + } - char* checkpointRoot = taosMemoryCalloc(1, pathLen); sprintf(checkpointRoot, "%s%s%s", prefixPath, TD_DIRSEP, "checkpoints"); - - validateDir(checkpointRoot); - taosMemoryFree(checkpointRoot); + code = createDirIfNotExist(checkpointRoot); + if (code != 0) { + taosMemoryFreeClear(checkpointRoot); + goto _EXIT; + } + taosMemoryFreeClear(checkpointRoot); stDebug("%s check local backend dir:%s, checkpointId:%" PRId64 " succ", key, defaultPath, chkptId); char* chkptPath = taosMemoryCalloc(1, pathLen); + if (chkptPath == NULL) { + terrno = TSDB_CODE_OUT_OF_MEMORY; + goto _EXIT; + } + if (chkptId > 0) { snprintf(chkptPath, pathLen, "%s%s%s%s%s%" PRId64 "", prefixPath, TD_DIRSEP, "checkpoints", TD_DIRSEP, "checkpoint", chkptId); - code = rebuildFromLocalCheckpoint(key, chkptPath, chkptId, defaultPath); + code = rebuildFromLocalCheckpoint(key, chkptPath, chkptId, defaultPath, processVer); if (code != 0) { code = rebuildFromRemoteCheckpoint(key, chkptPath, chkptId, defaultPath); } @@ -559,7 +605,11 @@ int32_t restoreCheckpointData(const char* path, const char* key, int64_t chkptId *dbPath = defaultPath; *dbPrefixPath = prefixPath; + return 0; +_EXIT: + taosMemoryFree(defaultPath); + taosMemoryFree(prefixPath); return code; } @@ -2216,15 +2266,33 @@ _EXIT: return NULL; } -STaskDbWrapper* taskDbOpen(const char* path, const char* key, int64_t chkptId) { +STaskDbWrapper* taskDbOpen(const char* path, const char* key, int64_t chkptId, int64_t* processVer) { char* statePath = NULL; char* dbPath = NULL; - if (restoreCheckpointData(path, key, chkptId, &statePath, &dbPath) != 0) { + if (restoreCheckpointData(path, key, chkptId, &statePath, &dbPath, processVer) != 0) { + stError("failed to restore checkpoint data, path:%s, key:%s, checkpointId: %" PRId64 "reason:%s", path, key, + chkptId, tstrerror(terrno)); return NULL; } STaskDbWrapper* pTaskDb = taskDbOpenImpl(key, statePath, dbPath); + if (pTaskDb != NULL) { + int64_t chkpId = -1, ver = -1; + if (chkpLoadExtraInfo(dbPath, &chkpId, &ver) == 0) { + *processVer = ver; + } else { + if (terrno == TSDB_CODE_OUT_OF_MEMORY) { + taskDbDestroy(pTaskDb, false); + return NULL; + } else { + // not info file exists, caller handle this situation + terrno = 0; + *processVer = -1; + } + } + } + taosMemoryFree(dbPath); taosMemoryFree(statePath); return pTaskDb; @@ -2435,7 +2503,8 @@ int32_t streamStateCvtDataFormat(char* path, char* key, void* pCfInst) { int32_t code = 0; - STaskDbWrapper* pTaskDb = taskDbOpen(path, key, 0); + int64_t processVer = -1; + STaskDbWrapper* pTaskDb = taskDbOpen(path, key, 0, &processVer); RocksdbCfInst* pSrcBackend = pCfInst; for (int i = 0; i < nCf; i++) { diff --git a/source/libs/stream/src/streamMeta.c b/source/libs/stream/src/streamMeta.c index 03c7b93f91..864f9514da 100644 --- a/source/libs/stream/src/streamMeta.c +++ b/source/libs/stream/src/streamMeta.c @@ -151,7 +151,7 @@ int32_t streamMetaCheckBackendCompatible(SStreamMeta* pMeta) { int8_t ret = STREAM_STATA_COMPATIBLE; TBC* pCur = NULL; - if (tdbTbcOpen(pMeta->pTaskDb, &pCur, NULL) < 0) { // no task info, no stream + if (tdbTbcOpen(pMeta->pTaskDb, &pCur, NULL) < 0) { // no task info, no stream return ret; } @@ -262,8 +262,9 @@ int32_t streamTaskSetDb(SStreamMeta* pMeta, SStreamTask* pTask, const char* key) } STaskDbWrapper* pBackend = NULL; + int64_t processVer = -1; while (1) { - pBackend = taskDbOpen(pMeta->path, key, chkpId); + pBackend = taskDbOpen(pMeta->path, key, chkpId, &processVer); if (pBackend != NULL) { break; } @@ -557,7 +558,7 @@ int32_t streamMetaSaveTask(SStreamMeta* pMeta, SStreamTask* pTask) { return -1; } - if (pTask->ver < SSTREAM_TASK_SUBTABLE_CHANGED_VER){ + if (pTask->ver < SSTREAM_TASK_SUBTABLE_CHANGED_VER) { pTask->ver = SSTREAM_TASK_VER; } @@ -907,7 +908,7 @@ void streamMetaLoadAllTasks(SStreamMeta* pMeta) { if (p == NULL) { code = pMeta->buildTaskFn(pMeta->ahandle, pTask, pTask->chkInfo.checkpointVer + 1); if (code < 0) { - stError("failed to expand s-task:0x%"PRIx64", code:%s, continue", id.taskId, tstrerror(terrno)); + stError("failed to expand s-task:0x%" PRIx64 ", code:%s, continue", id.taskId, tstrerror(terrno)); tFreeStreamTask(pTask); continue; } @@ -1012,7 +1013,7 @@ static int32_t metaHeartbeatToMnodeImpl(SStreamMeta* pMeta) { for (int32_t i = 0; i < numOfTasks; ++i) { SStreamTaskId* pId = taosArrayGet(pMeta->pTaskList, i); - STaskId id = {.streamId = pId->streamId, .taskId = pId->taskId}; + STaskId id = {.streamId = pId->streamId, .taskId = pId->taskId}; SStreamTask** pTask = taosHashGet(pMeta->pTasksMap, &id, sizeof(id)); if (pTask == NULL) { continue; @@ -1052,12 +1053,14 @@ static int32_t metaHeartbeatToMnodeImpl(SStreamMeta* pMeta) { } if ((*pTask)->chkInfo.pActiveInfo->activeId != 0) { - entry.checkpointInfo.failed = ((*pTask)->chkInfo.pActiveInfo->failedId >= (*pTask)->chkInfo.pActiveInfo->activeId) ? 1 : 0; + entry.checkpointInfo.failed = + ((*pTask)->chkInfo.pActiveInfo->failedId >= (*pTask)->chkInfo.pActiveInfo->activeId) ? 1 : 0; entry.checkpointInfo.activeId = (*pTask)->chkInfo.pActiveInfo->activeId; entry.checkpointInfo.activeTransId = (*pTask)->chkInfo.pActiveInfo->transId; if (entry.checkpointInfo.failed) { - stInfo("s-task:%s set kill checkpoint trans in hb, transId:%d", (*pTask)->id.idStr, (*pTask)->chkInfo.pActiveInfo->transId); + stInfo("s-task:%s set kill checkpoint trans in hb, transId:%d", (*pTask)->id.idStr, + (*pTask)->chkInfo.pActiveInfo->transId); } } @@ -1384,7 +1387,7 @@ int32_t streamMetaStartAllTasks(SStreamMeta* pMeta, __stream_task_expand_fn expa int64_t now = taosGetTimestampMs(); int32_t numOfTasks = taosArrayGetSize(pMeta->pTaskList); - stInfo("vgId:%d start to check all %d stream task(s) downstream status, start ts:%"PRId64, vgId, numOfTasks, now); + stInfo("vgId:%d start to check all %d stream task(s) downstream status, start ts:%" PRId64, vgId, numOfTasks, now); if (numOfTasks == 0) { stInfo("vgId:%d no tasks to be started", pMeta->vgId); @@ -1513,8 +1516,8 @@ bool streamMetaAllTasksReady(const SStreamMeta* pMeta) { int32_t num = taosArrayGetSize(pMeta->pTaskList); for (int32_t i = 0; i < num; ++i) { SStreamTaskId* pId = taosArrayGet(pMeta->pTaskList, i); - STaskId id = {.streamId = pId->streamId, .taskId = pId->taskId}; - SStreamTask** ppTask = taosHashGet(pMeta->pTasksMap, &id, sizeof(id)); + STaskId id = {.streamId = pId->streamId, .taskId = pId->taskId}; + SStreamTask** ppTask = taosHashGet(pMeta->pTasksMap, &id, sizeof(id)); if (ppTask == NULL) { continue; } @@ -1598,7 +1601,7 @@ int32_t streamMetaAddTaskLaunchResult(SStreamMeta* pMeta, int64_t streamId, int3 if (pStartInfo->startAllTasks != 1) { int64_t el = endTs - startTs; stDebug("vgId:%d not start all task(s), not record status, s-task:0x%x launch succ:%d elapsed time:%" PRId64 "ms", - pMeta->vgId, taskId, ready, el); + pMeta->vgId, taskId, ready, el); streamMetaWUnLock(pMeta); return 0; } @@ -1725,7 +1728,8 @@ void streamMetaAddIntoUpdateTaskList(SStreamMeta* pMeta, SStreamTask* pTask, SSt taosHashPut(pMeta->updateInfo.pTasks, &hEntry, sizeof(hEntry), NULL, 0); stDebug("s-task:%s vgId:%d transId:%d task nodeEp update completed, streamTask/hTask closed, elapsed:%" PRId64 - " ms", id, vgId, transId, el); + " ms", + id, vgId, transId, el); } else { stDebug("s-task:%s vgId:%d transId:%d task nodeEp update completed, streamTask closed, elapsed time:%" PRId64 "ms", id, vgId, transId, el); From 49ba8132c0f7388aad1c81be1e3b61e0acdfdcef Mon Sep 17 00:00:00 2001 From: Yihao Deng Date: Tue, 25 Jun 2024 12:04:10 +0000 Subject: [PATCH 03/34] add self check info --- source/libs/stream/src/streamBackendRocksdb.c | 134 +++++++++++++++--- source/libs/stream/src/streamSnapshot.c | 25 +++- 2 files changed, 139 insertions(+), 20 deletions(-) diff --git a/source/libs/stream/src/streamBackendRocksdb.c b/source/libs/stream/src/streamBackendRocksdb.c index 4278757136..2642f608d9 100644 --- a/source/libs/stream/src/streamBackendRocksdb.c +++ b/source/libs/stream/src/streamBackendRocksdb.c @@ -1178,6 +1178,7 @@ int32_t chkpPreBuildDir(char* path, int64_t chkpId, char** chkpDir, char** chkpI } int32_t taskDbBuildSnap(void* arg, SArray* pSnap) { + // vnode task->db SStreamMeta* pMeta = arg; taosThreadMutexLock(&pMeta->backendMutex); @@ -1186,27 +1187,44 @@ int32_t taskDbBuildSnap(void* arg, SArray* pSnap) { while (pIter) { STaskDbWrapper* pTaskDb = *(STaskDbWrapper**)pIter; - taskDbAddRef(pTaskDb); - int64_t chkpId = pTaskDb->chkpId; - taskDbRefChkp(pTaskDb, chkpId); - code = taskDbDoCheckpoint(pTaskDb, chkpId, 0); - if (code != 0) { - taskDbUnRefChkp(pTaskDb, chkpId); + void* p = taskDbAddRef(pTaskDb); + if (p == NULL) { + terrno = 0; + pIter = taosHashIterate(pMeta->pTaskDbUnique, pIter); + continue; } - taskDbRemoveRef(pTaskDb); + // add chkpId to in-use-ckpkIdSet + taskDbRefChkp(pTaskDb, pTaskDb->chkpId); + + code = taskDbDoCheckpoint(pTaskDb, pTaskDb->chkpId, ((SStreamTask*)pTaskDb->pTask)->chkInfo.processedVer); + if (code != 0) { + // remove chkpId from in-use-ckpkIdSet + taskDbUnRefChkp(pTaskDb, pTaskDb->chkpId); + taskDbRemoveRef(pTaskDb); + code = -1; + break; + } SStreamTask* pTask = pTaskDb->pTask; SStreamTaskSnap snap = {.streamId = pTask->id.streamId, .taskId = pTask->id.taskId, .chkpId = pTaskDb->chkpId, .dbPrefixPath = taosStrdup(pTaskDb->path)}; + if (snap.dbPrefixPath == NULL) { + // remove chkpid from chkp-in-use set + taskDbUnRefChkp(pTaskDb, pTaskDb->chkpId); + taskDbRemoveRef(pTaskDb); + terrno = TSDB_CODE_OUT_OF_MEMORY; + code = -1; + break; + } taosArrayPush(pSnap, &snap); + pIter = taosHashIterate(pMeta->pTaskDbUnique, pIter); } taosThreadMutexUnlock(&pMeta->backendMutex); - return code; } int32_t taskDbDestroySnap(void* arg, SArray* pSnapInfo) { @@ -2172,23 +2190,35 @@ void taskDbDestroyChkpOpt(STaskDbWrapper* pTaskDb) { int32_t taskDbBuildFullPath(char* path, char* key, char** dbFullPath, char** stateFullPath) { int32_t code = 0; + char* statePath = taosMemoryCalloc(1, strlen(path) + 128); + if (statePath == NULL) { + terrno = TSDB_CODE_OUT_OF_MEMORY; + return -1; + } - char* statePath = taosMemoryCalloc(1, strlen(path) + 128); sprintf(statePath, "%s%s%s", path, TD_DIRSEP, key); if (!taosDirExist(statePath)) { code = taosMulMkDir(statePath); if (code != 0) { - stError("failed to create dir: %s, reason:%s", statePath, tstrerror(code)); + terrno = errno; + stError("failed to create dir: %s, reason:%s", statePath, tstrerror(terrno)); taosMemoryFree(statePath); return code; } } char* dbPath = taosMemoryCalloc(1, strlen(statePath) + 128); + if (dbPath == NULL) { + taosMemoryFree(statePath); + terrno = TSDB_CODE_OUT_OF_MEMORY; + return -1; + } + sprintf(dbPath, "%s%s%s", statePath, TD_DIRSEP, "state"); if (!taosDirExist(dbPath)) { code = taosMulMkDir(dbPath); if (code != 0) { + terrno = errno; stError("failed to create dir: %s, reason:%s", dbPath, tstrerror(code)); taosMemoryFree(statePath); taosMemoryFree(dbPath); @@ -2384,6 +2414,11 @@ int32_t taskDbGenChkpUploadData__rsync(STaskDbWrapper* pDb, int64_t chkpId, char } char* buf = taosMemoryCalloc(1, strlen(pDb->path) + 128); + if (buf == NULL) { + terrno = TSDB_CODE_OUT_OF_MEMORY; + return -1; + } + sprintf(buf, "%s%s%s%s%s%" PRId64 "", pDb->path, TD_DIRSEP, "checkpoints", TD_DIRSEP, "checkpoint", chkpId); if (taosIsDir(buf)) { code = 0; @@ -2402,6 +2437,11 @@ int32_t taskDbGenChkpUploadData__s3(STaskDbWrapper* pDb, void* bkdChkpMgt, int64 SBkdMgt* p = (SBkdMgt*)bkdChkpMgt; char* temp = taosMemoryCalloc(1, strlen(pDb->path) + 32); + if (temp == NULL) { + terrno = TSDB_CODE_OUT_OF_MEMORY; + return -1; + } + sprintf(temp, "%s%s%s%" PRId64, pDb->path, TD_DIRSEP, "tmp", chkpId); if (taosDirExist(temp)) { @@ -4239,14 +4279,12 @@ int32_t dbChkpGetDelta(SDbChkp* p, int64_t chkpId, SArray* list) { if (strlen(name) == currLen && strcmp(name, pCurrent) == 0) { taosMemoryFreeClear(p->pCurrent); p->pCurrent = taosStrdup(name); - // taosHashPut(p->pSstTbl[1 - p->idx], name, strlen(name), &dummy, sizeof(dummy)); continue; } if (strlen(name) >= maniLen && strncmp(name, pManifest, maniLen) == 0) { taosMemoryFreeClear(p->pManifest); p->pManifest = taosStrdup(name); - // taosHashPut(p->pSstTbl[1 - p->idx], name, strlen(name), &dummy, sizeof(dummy)); continue; } if (strlen(name) >= sstLen && strncmp(name + strlen(name) - 4, pSST, sstLen) == 0) { @@ -4301,31 +4339,75 @@ int32_t dbChkpGetDelta(SDbChkp* p, int64_t chkpId, SArray* list) { return 0; } +void dbChkpDestroy(SDbChkp* pChkp); + SDbChkp* dbChkpCreate(char* path, int64_t initChkpId) { SDbChkp* p = taosMemoryCalloc(1, sizeof(SDbChkp)); + if (p == NULL) { + terrno = TSDB_CODE_OUT_OF_MEMORY; + goto _EXIT; + } + p->curChkpId = initChkpId; p->preCkptId = -1; p->pSST = taosArrayInit(64, sizeof(void*)); + if (p->pSST == NULL) { + terrno = TSDB_CODE_OUT_OF_MEMORY; + dbChkpDestroy(p); + return NULL; + } + p->path = path; p->len = strlen(path) + 128; p->buf = taosMemoryCalloc(1, p->len); + if (p->buf == NULL) { + terrno = TSDB_CODE_OUT_OF_MEMORY; + goto _EXIT; + } p->idx = 0; p->pSstTbl[0] = taosHashInit(64, taosGetDefaultHashFunction(TSDB_DATA_TYPE_BINARY), false, HASH_ENTRY_LOCK); + if (p->pSstTbl[0] == NULL) { + terrno = TSDB_CODE_OUT_OF_MEMORY; + goto _EXIT; + } + p->pSstTbl[1] = taosHashInit(64, taosGetDefaultHashFunction(TSDB_DATA_TYPE_BINARY), false, HASH_ENTRY_LOCK); + if (p->pSstTbl[1] == NULL) { + terrno = TSDB_CODE_OUT_OF_MEMORY; + goto _EXIT; + } p->pAdd = taosArrayInit(64, sizeof(void*)); + if (p->pAdd == NULL) { + terrno = TSDB_CODE_OUT_OF_MEMORY; + goto _EXIT; + } + p->pDel = taosArrayInit(64, sizeof(void*)); + if (p->pDel == NULL) { + terrno = TSDB_CODE_OUT_OF_MEMORY; + goto _EXIT; + } + p->update = 0; taosThreadRwlockInit(&p->rwLock, NULL); SArray* list = NULL; int32_t code = dbChkpGetDelta(p, initChkpId, list); + if (code != 0) { + goto _EXIT; + } return p; +_EXIT: + dbChkpDestroy(p); + return NULL; } void dbChkpDestroy(SDbChkp* pChkp) { + if (pChkp == NULL) return; + taosMemoryFree(pChkp->buf); taosMemoryFree(pChkp->path); @@ -4357,6 +4439,11 @@ int32_t dbChkpDumpTo(SDbChkp* p, char* dname, SArray* list) { char* srcDir = taosMemoryCalloc(1, len); char* dstDir = taosMemoryCalloc(1, len); + if (srcBuf == NULL || dstBuf == NULL || srcDir == NULL || dstDir == NULL) { + terrno = TSDB_CODE_OUT_OF_MEMORY; + goto _ERROR; + } + sprintf(srcDir, "%s%s%s%s%s%" PRId64 "", p->path, TD_DIRSEP, "checkpoints", TD_DIRSEP, "checkpoint", p->curChkpId); sprintf(dstDir, "%s", dname); @@ -4375,7 +4462,8 @@ int32_t dbChkpDumpTo(SDbChkp* p, char* dname, SArray* list) { sprintf(dstBuf, "%s%s%s", dstDir, TD_DIRSEP, filename); if (taosCopyFile(srcBuf, dstBuf) < 0) { - stError("failed to copy file from %s to %s", srcBuf, dstBuf); + terrno = errno; + stError("failed to copy file from %s to %s, reason:%s", srcBuf, dstBuf, tstrerror(terrno)); goto _ERROR; } } @@ -4392,7 +4480,8 @@ int32_t dbChkpDumpTo(SDbChkp* p, char* dname, SArray* list) { sprintf(srcBuf, "%s%s%s", srcDir, TD_DIRSEP, p->pCurrent); sprintf(dstBuf, "%s%s%s_%" PRId64 "", dstDir, TD_DIRSEP, p->pCurrent, p->curChkpId); if (taosCopyFile(srcBuf, dstBuf) < 0) { - stError("failed to copy file from %s to %s", srcBuf, dstBuf); + terrno = errno; + stError("failed to copy file from %s to %s, reason:%s", srcBuf, dstBuf, tstrerror(terrno)); goto _ERROR; } @@ -4402,7 +4491,8 @@ int32_t dbChkpDumpTo(SDbChkp* p, char* dname, SArray* list) { sprintf(srcBuf, "%s%s%s", srcDir, TD_DIRSEP, p->pManifest); sprintf(dstBuf, "%s%s%s_%" PRId64 "", dstDir, TD_DIRSEP, p->pManifest, p->curChkpId); if (taosCopyFile(srcBuf, dstBuf) < 0) { - stError("failed to copy file from %s to %s", srcBuf, dstBuf); + terrno = errno; + stError("failed to copy file from %s to %s, reason:%s", srcBuf, dstBuf, tstrerror(terrno)); goto _ERROR; } @@ -4412,17 +4502,21 @@ int32_t dbChkpDumpTo(SDbChkp* p, char* dname, SArray* list) { TdFilePtr pFile = taosOpenFile(dstDir, TD_FILE_CREATE | TD_FILE_WRITE | TD_FILE_TRUNC); if (pFile == NULL) { - stError("chkp failed to create meta file: %s", dstDir); + terrno = errno; + stError("chkp failed to create meta file: %s, reason:%s", dstDir, tstrerror(terrno)); goto _ERROR; } char content[128] = {0}; snprintf(content, sizeof(content), "%s_%" PRId64 "\n%s_%" PRId64 "", p->pCurrent, p->curChkpId, p->pManifest, p->curChkpId); if (taosWriteFile(pFile, content, strlen(content)) <= 0) { - stError("chkp failed to write meta file: %s", dstDir); + terrno = errno; + stError("chkp failed to write meta file: %s,reason:%s", dstDir, tstrerror(terrno)); taosCloseFile(&pFile); + code = -1; goto _ERROR; } + taosCloseFile(&pFile); // clear delta data buf @@ -4471,6 +4565,12 @@ int32_t bkdMgtGetDelta(SBkdMgt* bm, char* taskId, int64_t chkpId, SArray* list, if (pChkp == NULL) { char* path = taosMemoryCalloc(1, strlen(bm->path) + 64); + if (path == NULL) { + terrno = TSDB_CODE_OUT_OF_MEMORY; + taosThreadRwlockUnlock(&bm->rwLock); + return -1; + } + sprintf(path, "%s%s%s", bm->path, TD_DIRSEP, taskId); SDbChkp* p = dbChkpCreate(path, chkpId); diff --git a/source/libs/stream/src/streamSnapshot.c b/source/libs/stream/src/streamSnapshot.c index adefe97f1f..868ff002bf 100644 --- a/source/libs/stream/src/streamSnapshot.c +++ b/source/libs/stream/src/streamSnapshot.c @@ -24,6 +24,7 @@ enum SBackendFileType { ROCKSDB_SST_TYPE = 3, ROCKSDB_CURRENT_TYPE = 4, ROCKSDB_CHECKPOINT_META_TYPE = 5, + ROCKSDB_CHECKPOINT_SELFCHECK_TYPE = 6, }; typedef struct SBackendFileItem { @@ -49,6 +50,7 @@ typedef struct SBackendSnapFiles2 { char* pOptions; SArray* pSst; char* pCheckpointMeta; + char* pCheckpointSelfcheck; char* path; int64_t checkpointId; @@ -111,6 +113,7 @@ const char* ROCKSDB_MAINFEST = "MANIFEST"; const char* ROCKSDB_SST = "sst"; const char* ROCKSDB_CURRENT = "CURRENT"; const char* ROCKSDB_CHECKPOINT_META = "CHECKPOINT"; +const char* ROCKSDB_CHECKPOINT_SELF_CHECK = "info"; static int64_t kBlockSize = 64 * 1024; int32_t streamSnapHandleInit(SStreamSnapHandle* handle, char* path, void* pMeta); @@ -127,6 +130,7 @@ int32_t streamGetFileSize(char* path, char* name, int64_t* sz) { int32_t ret = 0; char* fullname = taosMemoryCalloc(1, strlen(path) + 32); + sprintf(fullname, "%s%s%s", path, TD_DIRSEP, name); ret = taosStatFile(fullname, sz, NULL, NULL); @@ -148,7 +152,8 @@ int32_t streamDestroyTaskDbSnapInfo(void* arg, SArray* snap) { return taskDbDest void snapFileDebugInfo(SBackendSnapFile2* pSnapFile) { if (qDebugFlag & DEBUG_DEBUG) { - char* buf = taosMemoryCalloc(1, 512); + int16_t cap = 511; + char* buf = taosMemoryCalloc(1, cap + 1); sprintf(buf + strlen(buf), "["); if (pSnapFile->pCurrent) sprintf(buf, "current: %s,", pSnapFile->pCurrent); @@ -157,10 +162,10 @@ void snapFileDebugInfo(SBackendSnapFile2* pSnapFile) { if (pSnapFile->pSst) { for (int32_t i = 0; i < taosArrayGetSize(pSnapFile->pSst); i++) { char* name = taosArrayGetP(pSnapFile->pSst, i); - sprintf(buf + strlen(buf), "%s,", name); + if (strlen(buf) + strlen(name) < cap) sprintf(buf + strlen(buf), "%s,", name); } } - sprintf(buf + strlen(buf) - 1, "]"); + if ((strlen(buf)) < cap) sprintf(buf + strlen(buf) - 1, "]"); stInfo("%s %" PRId64 "-%" PRId64 " get file list: %s", STREAM_STATE_TRANSFER, pSnapFile->snapInfo.streamId, pSnapFile->snapInfo.taskId, buf); @@ -199,6 +204,13 @@ int32_t snapFileGenMeta(SBackendSnapFile2* pSnapFile) { // meta item.name = pSnapFile->pCheckpointMeta; item.type = ROCKSDB_CHECKPOINT_META_TYPE; + if (streamGetFileSize(pSnapFile->path, item.name, &item.size) == 0) { + taosArrayPush(pSnapFile->pFileList, &item); + } + + item.name = pSnapFile->pCheckpointSelfcheck; + item.type = ROCKSDB_CHECKPOINT_SELFCHECK_TYPE; + if (streamGetFileSize(pSnapFile->path, item.name, &item.size) == 0) { taosArrayPush(pSnapFile->pFileList, &item); } @@ -231,6 +243,11 @@ int32_t snapFileReadMeta(SBackendSnapFile2* pSnapFile) { pSnapFile->pCheckpointMeta = taosStrdup(name); continue; } + if (strlen(name) >= strlen(ROCKSDB_CHECKPOINT_SELF_CHECK) && + 0 == strncmp(name, ROCKSDB_CHECKPOINT_SELF_CHECK, strlen(ROCKSDB_CHECKPOINT_SELF_CHECK))) { + pSnapFile->pCheckpointSelfcheck = taosStrdup(name); + continue; + } if (strlen(name) >= strlen(ROCKSDB_SST) && 0 == strncmp(name + strlen(name) - strlen(ROCKSDB_SST), ROCKSDB_SST, strlen(ROCKSDB_SST))) { char* sst = taosStrdup(name); @@ -276,6 +293,7 @@ void snapFileDestroy(SBackendSnapFile2* pSnap) { taosMemoryFree(pSnap->pMainfest); taosMemoryFree(pSnap->pOptions); taosMemoryFree(pSnap->path); + taosMemoryFree(pSnap->pCheckpointSelfcheck); for (int32_t i = 0; i < taosArrayGetSize(pSnap->pSst); i++) { char* sst = taosArrayGetP(pSnap->pSst, i); taosMemoryFree(sst); @@ -298,6 +316,7 @@ int32_t streamSnapHandleInit(SStreamSnapHandle* pHandle, char* path, void* pMeta SArray* pSnapInfoSet = taosArrayInit(4, sizeof(SStreamTaskSnap)); int32_t code = streamCreateTaskDbSnapInfo(pMeta, path, pSnapInfoSet); if (code != 0) { + stError("failed to do task db snap info, reason:%s", tstrerror(terrno)); taosArrayDestroy(pSnapInfoSet); return -1; } From 061648071e2b1c07434d6ef7833b778c972f7c84 Mon Sep 17 00:00:00 2001 From: Yihao Deng Date: Tue, 25 Jun 2024 12:35:25 +0000 Subject: [PATCH 04/34] add self check info --- source/libs/stream/src/streamBackendRocksdb.c | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/source/libs/stream/src/streamBackendRocksdb.c b/source/libs/stream/src/streamBackendRocksdb.c index 2642f608d9..b5ae2ab3dd 100644 --- a/source/libs/stream/src/streamBackendRocksdb.c +++ b/source/libs/stream/src/streamBackendRocksdb.c @@ -4270,7 +4270,13 @@ int32_t dbChkpGetDelta(SDbChkp* p, int64_t chkpId, SArray* list) { taosArrayClearP(p->pDel, taosMemoryFree); taosHashClear(p->pSstTbl[1 - p->idx]); - TdDirPtr pDir = taosOpenDir(p->buf); + TdDirPtr pDir = taosOpenDir(p->buf); + if (pDir == NULL) { + terrno = errno; + taosThreadRwlockUnlock(&p->rwLock); + return -1; + } + TdDirEntryPtr de = NULL; int8_t dummy = 0; while ((de = taosReadDir(pDir)) != NULL) { From ac351c5b58515e5a3d5b17699c06f9520f715f20 Mon Sep 17 00:00:00 2001 From: Yihao Deng Date: Wed, 26 Jun 2024 01:26:39 +0000 Subject: [PATCH 05/34] add self check info --- source/libs/stream/src/streamBackendRocksdb.c | 25 ++++++++++--------- 1 file changed, 13 insertions(+), 12 deletions(-) diff --git a/source/libs/stream/src/streamBackendRocksdb.c b/source/libs/stream/src/streamBackendRocksdb.c index b5ae2ab3dd..5e8f45e8a2 100644 --- a/source/libs/stream/src/streamBackendRocksdb.c +++ b/source/libs/stream/src/streamBackendRocksdb.c @@ -4230,23 +4230,24 @@ void strArrayDebugInfo(SArray* pArr, char** buf) { *buf = p; } void dbChkpDebugInfo(SDbChkp* pDb) { - // stTrace("chkp get file list: curr"); - char* p[4] = {NULL}; + if (stDebugFlag & DEBUG_INFO) { + char* p[4] = {NULL}; - hashTableToDebug(pDb->pSstTbl[pDb->idx], &p[0]); - stTrace("chkp previous file: [%s]", p[0]); + hashTableToDebug(pDb->pSstTbl[pDb->idx], &p[0]); + stTrace("chkp previous file: [%s]", p[0]); - hashTableToDebug(pDb->pSstTbl[1 - pDb->idx], &p[1]); - stTrace("chkp curr file: [%s]", p[1]); + hashTableToDebug(pDb->pSstTbl[1 - pDb->idx], &p[1]); + stTrace("chkp curr file: [%s]", p[1]); - strArrayDebugInfo(pDb->pAdd, &p[2]); - stTrace("chkp newly addded file: [%s]", p[2]); + strArrayDebugInfo(pDb->pAdd, &p[2]); + stTrace("chkp newly addded file: [%s]", p[2]); - strArrayDebugInfo(pDb->pDel, &p[3]); - stTrace("chkp newly deleted file: [%s]", p[3]); + strArrayDebugInfo(pDb->pDel, &p[3]); + stTrace("chkp newly deleted file: [%s]", p[3]); - for (int i = 0; i < 4; i++) { - taosMemoryFree(p[i]); + for (int i = 0; i < 4; i++) { + taosMemoryFree(p[i]); + } } } int32_t dbChkpGetDelta(SDbChkp* p, int64_t chkpId, SArray* list) { From 0a35d7ef6b9a3f7cb26f0ea90d2c3fb0d46bdd47 Mon Sep 17 00:00:00 2001 From: Yihao Deng Date: Thu, 27 Jun 2024 01:33:25 +0000 Subject: [PATCH 06/34] add self check --- source/libs/stream/src/streamBackendRocksdb.c | 7 +++---- source/libs/stream/src/streamCheckpoint.c | 2 +- 2 files changed, 4 insertions(+), 5 deletions(-) diff --git a/source/libs/stream/src/streamBackendRocksdb.c b/source/libs/stream/src/streamBackendRocksdb.c index 5e8f45e8a2..c12ab68607 100644 --- a/source/libs/stream/src/streamBackendRocksdb.c +++ b/source/libs/stream/src/streamBackendRocksdb.c @@ -558,7 +558,6 @@ int32_t restoreCheckpointData(const char* path, const char* key, int64_t chkptId goto _EXIT; } - // int32_t pathLen = strlen(path) + 48; char* checkpointRoot = taosMemoryCalloc(1, pathLen + 48); if (checkpointRoot == NULL) { terrno = TSDB_CODE_OUT_OF_MEMORY; @@ -575,15 +574,15 @@ int32_t restoreCheckpointData(const char* path, const char* key, int64_t chkptId stDebug("%s check local backend dir:%s, checkpointId:%" PRId64 " succ", key, defaultPath, chkptId); - char* chkptPath = taosMemoryCalloc(1, pathLen); + char* chkptPath = taosMemoryCalloc(1, pathLen + 128); if (chkptPath == NULL) { terrno = TSDB_CODE_OUT_OF_MEMORY; goto _EXIT; } if (chkptId > 0) { - snprintf(chkptPath, pathLen, "%s%s%s%s%s%" PRId64 "", prefixPath, TD_DIRSEP, "checkpoints", TD_DIRSEP, "checkpoint", - chkptId); + snprintf(chkptPath, pathLen + 127, "%s%s%s%s%s%" PRId64 "", prefixPath, TD_DIRSEP, "checkpoints", TD_DIRSEP, + "checkpoint", chkptId); code = rebuildFromLocalCheckpoint(key, chkptPath, chkptId, defaultPath, processVer); if (code != 0) { diff --git a/source/libs/stream/src/streamCheckpoint.c b/source/libs/stream/src/streamCheckpoint.c index af7e969c07..26df7b1627 100644 --- a/source/libs/stream/src/streamCheckpoint.c +++ b/source/libs/stream/src/streamCheckpoint.c @@ -667,7 +667,7 @@ int32_t streamTaskBuildCheckpoint(SStreamTask* pTask) { if (pTask->info.taskLevel != TASK_LEVEL__SINK) { stDebug("s-task:%s level:%d start gen checkpoint, checkpointId:%" PRId64, id, pTask->info.taskLevel, ckId); - int64_t ver = 0; + int64_t ver = pTask->chkInfo.processedVer; code = streamBackendDoCheckpoint(pTask->pBackend, ckId, ver); if (code != TSDB_CODE_SUCCESS) { stError("s-task:%s gen checkpoint:%" PRId64 " failed, code:%s", id, ckId, tstrerror(terrno)); From f2fc09cd023a9e7340d2cb633dc72fec0f3e45e7 Mon Sep 17 00:00:00 2001 From: Yihao Deng Date: Thu, 27 Jun 2024 01:48:59 +0000 Subject: [PATCH 07/34] add self check --- source/libs/stream/src/streamBackendRocksdb.c | 4 ++++ source/libs/stream/src/streamMeta.c | 6 ++++-- 2 files changed, 8 insertions(+), 2 deletions(-) diff --git a/source/libs/stream/src/streamBackendRocksdb.c b/source/libs/stream/src/streamBackendRocksdb.c index c12ab68607..59916b8c0d 100644 --- a/source/libs/stream/src/streamBackendRocksdb.c +++ b/source/libs/stream/src/streamBackendRocksdb.c @@ -615,6 +615,10 @@ _EXIT: bool streamBackendDataIsExist(const char* path, int64_t chkpId, int32_t vgId) { bool exist = true; char* state = taosMemoryCalloc(1, strlen(path) + 32); + if (state == NULL) { + terrno = TSDB_CODE_OUT_OF_MEMORY; + return false; + } sprintf(state, "%s%s%s", path, TD_DIRSEP, "state"); if (!taosDirExist(state)) { exist = false; diff --git a/source/libs/stream/src/streamMeta.c b/source/libs/stream/src/streamMeta.c index 864f9514da..08e373fa56 100644 --- a/source/libs/stream/src/streamMeta.c +++ b/source/libs/stream/src/streamMeta.c @@ -195,7 +195,8 @@ int32_t streamMetaCvtDbFormat(SStreamMeta* pMeta) { bool exist = streamBackendDataIsExist(pMeta->path, chkpId, pMeta->vgId); if (exist == false) { - return code; + stError("failed to check backend data exist, reason:%s", tstrerror(terrno)); + return -1; } SBackendWrapper* pBackend = streamBackendInit(pMeta->path, chkpId, pMeta->vgId); @@ -319,7 +320,8 @@ SStreamMeta* streamMetaOpen(const char* path, void* ahandle, FTaskBuild buildTas } if (streamMetaMayCvtDbFormat(pMeta) < 0) { - stError("vgId:%d convert sub info format failed, open stream meta failed", pMeta->vgId); + stError("vgId:%d convert sub info format failed, open stream meta failed, reason: %s", pMeta->vgId, + tstrerror(terrno)); goto _err; } From 7c328f0cfaed5a8dbfdd57d0ee7d4f86fde19ac9 Mon Sep 17 00:00:00 2001 From: Yihao Deng Date: Thu, 27 Jun 2024 05:28:06 +0000 Subject: [PATCH 08/34] add self check --- source/libs/stream/src/streamBackendRocksdb.c | 36 ++++++++++++++++--- 1 file changed, 32 insertions(+), 4 deletions(-) diff --git a/source/libs/stream/src/streamBackendRocksdb.c b/source/libs/stream/src/streamBackendRocksdb.c index 59916b8c0d..a710f2531a 100644 --- a/source/libs/stream/src/streamBackendRocksdb.c +++ b/source/libs/stream/src/streamBackendRocksdb.c @@ -234,10 +234,25 @@ int32_t rebuildDirFromCheckpoint(const char* path, int64_t chkpId, char** dst) { } int32_t remoteChkp_readMetaData(char* path, SArray* list) { - char* metaPath = taosMemoryCalloc(1, strlen(path)); - sprintf(metaPath, "%s%s%s", path, TD_DIRSEP, "META"); + int32_t cap = strlen(path); + char* metaPath = taosMemoryCalloc(1, cap + 32); + if (metaPath == NULL) { + terrno = TSDB_CODE_OUT_OF_MEMORY; + return -1; + } + + if (sprintf(metaPath, "%s%s%s", path, TD_DIRSEP, "META") >= (cap + 32)) { + terrno = TSDB_CODE_OUT_OF_MEMORY; + taosMemoryFree(metaPath); + return -1; + } TdFilePtr pFile = taosOpenFile(path, TD_FILE_READ); + if (pFile == NULL) { + terrno = TAOS_SYSTEM_ERROR(errno); + taosMemoryFree(metaPath); + return -1; + } char buf[128] = {0}; if (taosReadFile(pFile, buf, sizeof(buf)) <= 0) { @@ -281,6 +296,10 @@ int32_t remoteChkp_validAndCvtMeta(char* path, SArray* list, int64_t chkpId) { int32_t len = strlen(path) + 32; char* src = taosMemoryCalloc(1, len); char* dst = taosMemoryCalloc(1, len); + if (src == NULL || dst == NULL) { + terrno = TSDB_CODE_OUT_OF_MEMORY; + return -1; + } int8_t count = 0; for (int i = 0; i < taosArrayGetSize(list); i++) { @@ -4461,6 +4480,11 @@ int32_t dbChkpDumpTo(SDbChkp* p, char* dname, SArray* list) { stError("failed to dump srcDir %s, reason: not exist such dir", srcDir); goto _ERROR; } + int64_t chkpId = 0, processId = -1; + if (chkpLoadExtraInfo(srcDir, &chkpId, &processId) != 0) { + stError("failed to load extra info from %s, reason:%s", srcDir, terrno != 0 ? "unkown" : tstrerror(terrno)); + goto _ERROR; + } // add file to $name dir for (int i = 0; i < taosArrayGetSize(p->pAdd); i++) { @@ -4516,9 +4540,13 @@ int32_t dbChkpDumpTo(SDbChkp* p, char* dname, SArray* list) { stError("chkp failed to create meta file: %s, reason:%s", dstDir, tstrerror(terrno)); goto _ERROR; } + // META_ON_S3 + // current_checkpointID + // manifest_checkpointID + // processVer_processID char content[128] = {0}; - snprintf(content, sizeof(content), "%s_%" PRId64 "\n%s_%" PRId64 "", p->pCurrent, p->curChkpId, p->pManifest, - p->curChkpId); + snprintf(content, sizeof(content), "%s_%" PRId64 "\n%s_%" PRId64 "\n%s_%" PRId64 "", p->pCurrent, p->curChkpId, + p->pManifest, p->curChkpId, "processVer", processId); if (taosWriteFile(pFile, content, strlen(content)) <= 0) { terrno = errno; stError("chkp failed to write meta file: %s,reason:%s", dstDir, tstrerror(terrno)); From 1004ac69245014fe63854f6b28bcb4f44956acd9 Mon Sep 17 00:00:00 2001 From: Yihao Deng Date: Thu, 27 Jun 2024 05:28:17 +0000 Subject: [PATCH 09/34] add self check --- source/libs/stream/src/streamMeta.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/source/libs/stream/src/streamMeta.c b/source/libs/stream/src/streamMeta.c index 08e373fa56..c74689fa9e 100644 --- a/source/libs/stream/src/streamMeta.c +++ b/source/libs/stream/src/streamMeta.c @@ -196,7 +196,7 @@ int32_t streamMetaCvtDbFormat(SStreamMeta* pMeta) { bool exist = streamBackendDataIsExist(pMeta->path, chkpId, pMeta->vgId); if (exist == false) { stError("failed to check backend data exist, reason:%s", tstrerror(terrno)); - return -1; + return code; } SBackendWrapper* pBackend = streamBackendInit(pMeta->path, chkpId, pMeta->vgId); @@ -283,6 +283,8 @@ int32_t streamTaskSetDb(SStreamMeta* pMeta, SStreamTask* pTask, const char* key) pBackend->pTask = pTask; pBackend->pMeta = pMeta; + pTask->chkInfo.processedVer = processVer; + taosHashPut(pMeta->pTaskDbUnique, key, strlen(key), &pBackend, sizeof(void*)); taosThreadMutexUnlock(&pMeta->backendMutex); From de77ce6480ec6cbd4ffea080a92dcee1639e2e5f Mon Sep 17 00:00:00 2001 From: Yihao Deng Date: Thu, 27 Jun 2024 09:51:49 +0000 Subject: [PATCH 10/34] add self check --- out | Bin 0 -> 21360 bytes source/libs/stream/src/streamBackendRocksdb.c | 199 ++++++++++++------ source/libs/stream/src/streamCheckpoint.c | 56 +++-- t.c | 12 ++ 4 files changed, 182 insertions(+), 85 deletions(-) create mode 100755 out create mode 100644 t.c diff --git a/out b/out new file mode 100755 index 0000000000000000000000000000000000000000..21f5cbee379517922a226c62b551376a5b0ad2f4 GIT binary patch literal 21360 zcmeHPe{@vUoxg7;Ap;3XApFM4L;*pXA%Q>vK@vh>QWKH|Qh(ORWacG#W-^n$c>{@c z6&KwjC|hdludZ6y(~76nF0O4?yS7DLslU359?vdy&u(eER@hn>Q*==)v!Cz1_d84; z8T7dO$DTcNleypf{oL>Od%yR4?|bjQpEotPuJaj&;N%yV3gWg*P)LKWIMJdDAPr)f z7>|6em?Or3FOrz5Z%_oaDjo5SqydflK}l~K6-J@wDOfON4~ddqz7!m*YM6>j#gkq! z6*cdrZQ2i}T%T<(NXke-nP+K{?jO@!J&H|jjgqfjHIfeK_69X&Ju*jnS7^N}v>wx- zZjUL)lVU=@W!j#X)}oFQQ?FjH*7MS}N=-1OHYja+!;O8K_ByoQ;h;1lyp-Ev%I*CL z^e8T$H1RNR)a~UPhr>FaOliC+EsWb;OY0ZLV>NL*kshoWT(-1kX?-Y_3@wuNNZ@c2z9@S0K^(24KuBzuw{@baq9eVEzZ_RrzHc;|N>^mewev=N# zP$GNUMafgS96yp#@wi#ygi(6M`iFj18a>|#c;YBk>+;~#PH+_Yt~~PO|7h}8=8^ww z9{C&d$Um6}xAWjv=fRKW!SBt3zXJRM{LV_I+0&m)m?<~nxTY!0md$Nu%yO(AJLOu= z=C-DIGGT3wbj2-IH+qd3NktN-ov>XIjl|>0s7NIS96RB5iT(NM}w zI#wNn=>%Ifd*aEiNIbhASN?1TVM&eEo^l$fY0t#55v(_}6REv^?_TohV@a2cci`GL}0raS}6+w1qg zdN-!EUyQ}XBP>0`#va`#CecJhTBi4hy#Jb7rSJw1-k>LW*n_{X@eU8p8%3Z&*<<_6 z2R-E5!PWjv)f~vd zd2FcWi5y%X`lZ6b9DJNIA@k>R@X{Roa1K5`2QL&05hz5U5P?Dj3K97K7=b@joc}?1 z$6JB$?Zq!F6e4`0e|@TzAR4*lHucD&GK~MA3lC_mZwF<@bNEYd0GGr zACE8(@8(GF)(Yt#_#cGG+cW&59{$@N{s9kvmxtf&;cxZuf9m1WxqRL4TXw#3ZFuLe z!#n={M91dlx`TBugm6hbsK zV>3r9W^E_AXElZVFQ{_&l2?h$KONqABK-8*tHV#9@`sH>;a5((lVIQ@TuvYZfy~iv z85h>4@!P(Nx+~HPI>S3wy-pV%!aLt^%fh#>`Yo7a_nppUj>Qn{L&g1&GrkDjZ2P2t zd>bmM&T1i_vA;hwvx5soj-d5_2Sr;SurOT92@LPcdwX}LIL-*faG49|5UA99BOT< z6K*Ifs8lPAnf^;~+ArvmLC1e)GJ~KeK=*=*6PXO1bnXG&0y^|gCUY2c{%|I90(3oS zrM`JV_i8B_*KZWYV5Kp$tR%1pb%fJ13G-lME(xqHk+-g9qgapM;kR{L<(2Eor+vPn zbX#D%SUqFK#fxWOK;oo-4Sr$xBozrnBHYEV0r(yQbfUKIz;7SgJ3v@-dF7w`n#Pau z?T1Iomq!5Cp)cp>)_2Nk>FdkD7ovV%ZoR4M2`Z#Q1PT!-M4%9XLIesCC`6zTfkFfd z5jb}Qvd^9A=gIiFGJcLspGJsbZHD=b=F8Z;J_*20E}0ti^K3!Q^K)L+nx|Wzl%^|^ zeI9P6=J}jrhUWSCH#(1?#P#o<&LoLHpdS+BXTlnEfuFrPqU-rYEvP%f&rVGvBS@1K z$>v&34C?2q_}Q-`T7u4ZDDg9AQxz-1x`T`#(2j7w@73+|vtHc3JokYOmisWH`Ui0d z;SQec?{;kucPu5Z&C#i$`Eux zbg-^2R3ECXsY^???#4xnLbdhtH7rkc)imQtFw&bl;6t%6WMx$bjO^25Ei~x{+_W}! zl*VCV@&~9jc1Pv1BPs)r!JMx~akvr?=qG+*6&Zm$V#;&GFD~tt zyv!=6J__o)+9=(I21QZj1IUl~NTc#)^x+q>IPElf^Gko}I^Z`|+yl-(W#SjW2xIyi z*itL~^8;7H_=LYec5%fCvNeGo?_VH$7ntEjdW8S8k|`TI4TeK+K}5DW_8G7X*TD+0 z#@H5^s7+|rH}=N>>pqV@Q*CS!Gq<2Sy1@^GW2w8=?B!t#Mi)HQgI(#=mI|-6~$B#GnWJM zH&xt)p3rDdq^h-3S~R3*_l~Vc$j2GQ0hwTI3pAqvl$$aE*sRLCHMg`wBJsP09S7?>LF=mCTF2xm2@HGB5f5i5&fwWL^%?&b#O# z8H&eu{h%?>Ne1^zzI^G?W>M4x)qTbT6qdh|VG%{c@cLT@9gh6p))1_S{l>EdJ*=VM zLh(BWHRk`WhHAk)YEYp3-;uAeZ7fC!d>c=@f$=ok#?!`aT#M}d@kQvaaR>4DQT7mJk5hJ#vY%7- zI%RKCc9JrKoGYhnCS~&|TSD0y%GxQrhBBM7A!M`0R7^-gGf1znjF~#F7)_4#XICV?02Md>A_%S&6TR!%>!6!BakR#Y386fduyFbxt#%MC~s*!OaWA${hOI4+b@|DwL zRb;%BjMrm*RTdD^t?EhS#P|wPS{XyVB+oiW_@QO|(u3pI*a4h-a zPG4JOup{F3wpxiEx0fvEHQZF++;Lg|b)v5=8B52l&d!##WF35Da8tU!KjI8&E9)(H zgEi>3B=8zgI_lzGo{p&KYlOS&<(obmt=RQ=!6@6AyiL~CZEjp|c5b+AL;K|$nh}o( zUIl8k6P9Y2EH=`MN0E(oPp_-($qu5oF$&?iI#gFn)|)yvZEkOaak3`E*lKmVBV3eK zEeY3&w0B)+MY$4fxUvni-|8Q5lHEBrq(dW&TyDqXohfTwB;|G{w%Lgo)cZfqMIH0& zAUo(ph6Ezp;aFSkWI8o6x?Ra+976PxlX@dd&H_OlT-)5(+S=YE`Zijw?N~AIZ0c)v zoTSrcrBaa|YfZYl+j3-h@eWu|kPZgucND{%jSqU!*p2tZHbs*IdJt(2Z0>ceh_bGR zr+E-UcqOZ&3G+{+6DhkVVa0;c-iV{DH9F{1Q!?3Svl(imDVcEXo;12hCA__5IqmIh z)Zj{=WR^8Y9-3zFHtIUXM{hHjapxNQcgVi(KfW^{v301oG|WbC!qqE-6@C^ZM&&nyqFjC z#1es2yA{M0(Y2hYH9#A&=sV*w##=Rm%|Z@x*oq9OCeCbB=Zq|-Huh08;A#`5txo&6 z(TZDLZ`#{}v!dlg1VVkg{U?Wbnca?h0H(1jXtZ82qtVy}CRXvHc)UWC8XlV-8 zbSIKEU1>Y++6l#2gL+I;ig#SsMk(2EyEWZ(ps5*1%Ap6CmV2ujmtF@Y@vU~s?y}>y zJG83Tj>Rl((jJJ^*wJnXM7wLSH*~S1#H3l(osP!=Mp8qGXs?q@V7spInoQ9Fj+K%Q zx&T3Rr(0D-JjGD7_sGH8r&t z((XcLralygq5}hBAw?;+uqPU=sjI8G!rHt%ekF72L|#iE=H}9R{hl?VeYWwBZ~oj8 zSy#n@e0uOEdw=oU_DJKD08d~K}VCPb4#D76npe{8??C8BmAHTr?nK}e7%z8TTn2> zOtH5?A>{KyW!Z_D^?DBzNX;P-MF-;cs*{y&iie+oF+=j*F{ zo$n=$^L5yH;3?&c&+Di&=qU?Y6nfHV{v4AsRoU+ou*!!h7`Xb%*EJcRfIt~yn($uN zqm#s-z}46WmQ-{GaDC}`M8)4oyfXX!jk&tEI*)t}@ZbmwilGY+6MYi|Ur%O{HNYqM zCJR0r!+Kq&%SoPva_F_dgE>4Gug@dD1Na28q?K@fE7P68HKY+m9syoCN_?IKKFUxD zx`s2sHzPYg+1OEy2O1brlr9%l=FDfVeP81bd*n|8A1w~ke52#Fz{&r;9y@fBJDQyv z^4M?9BX8%ycS*cbwCG0Y8Udw?$i5GE7qDrMNSmULTNg^w!nVsc>4F}Bl#4Ygo}D`L z!2;9mH>32)0y@cwCH40N%ox5mkTN6bL4n=kK-_YzSg3Y%3Ho+`ZANH4YT|^;84~g+ zXvT0}GX#|!PA;!Uh{~O6?oouE>@rj7E_1C}TU)z)xrtVZZP6@!17Y39#0hBv0u>D295HV5yL@q9Wwt zY+Quoj0ibN8KjWat7mm@4Az)~lDaaWX1FrR=)pFrSv%2cVL(bNE_~#bpGPiv(jrI3sompN5$UCG28wOjc={UY{`K z`;59;%5s_Lh4|qDRaQUWcZx`s5+9!wCj?#7%+>dPuV^+4YRbnShG~71tIz8zra>0e z)T@6v@b_?Uko!*;tSK?&_HpG=Ca?bypt!J>?LXgNW!g)@MIs-m$ohPr(*{OOa{CRq zv`2|)Q0psH+2AtKFX2b`K3SjdcQWPm75mR}OmBof-AiSj@24`YrACmv{wq|7-6&8@ zS)cFkGUfZb+`hN}_h|h!+92OAW_m>9q)%5Bz54rrQHa=n_Bg4Jmo&d~lXv{@M`f;l zQ0p<>o2!g6ABLIk_vjzg6--%<%`ngOfJgtZu3)-{6*Xl&rcZkG`8#h+f9aV&tjA{Q z4p?sgd40*0pJQNoZ~UIq`aJ$3s3kQ$tXU=+W>)+hC2F7b4{HIYYqOH7;BEh30O#sY zt5$MM>Aq%e;`zha8z@jr^k$!3NAvf^yzRrS;eG#L-4rpQ)Wk|Dpz^u!;rq=u@?)i&C<;Q7LTK8~kjrbRUZ6 m10O%>x(d%7rJ?d;&5C}-W8C8o&TU86-xgN-LmmYWEB+hAz@k(D literal 0 HcmV?d00001 diff --git a/source/libs/stream/src/streamBackendRocksdb.c b/source/libs/stream/src/streamBackendRocksdb.c index a710f2531a..7ff651d190 100644 --- a/source/libs/stream/src/streamBackendRocksdb.c +++ b/source/libs/stream/src/streamBackendRocksdb.c @@ -19,6 +19,8 @@ #include "tcommon.h" #include "tref.h" +#define META_ON_S3_FORMATE "%s_%" PRId64 "\n%s_%" PRId64 "\n%s_%" PRId64 "" + typedef struct SCompactFilteFactory { void* status; } SCompactFilteFactory; @@ -233,15 +235,28 @@ int32_t rebuildDirFromCheckpoint(const char* path, int64_t chkpId, char** dst) { return 0; } -int32_t remoteChkp_readMetaData(char* path, SArray* list) { - int32_t cap = strlen(path); - char* metaPath = taosMemoryCalloc(1, cap + 32); +typedef struct { + char pCurrName[24]; + int64_t currChkptId; + + char pManifestName[24]; + int64_t manifestChkptId; + + char processName[24]; + int64_t processId; +} SSChkpMetaOnS3; + +int32_t remoteChkp_readMetaData(char* path, SSChkpMetaOnS3** pMeta) { + int32_t cap = strlen(path) + 32; + + char* metaPath = taosMemoryCalloc(1, cap); if (metaPath == NULL) { terrno = TSDB_CODE_OUT_OF_MEMORY; return -1; } - if (sprintf(metaPath, "%s%s%s", path, TD_DIRSEP, "META") >= (cap + 32)) { + int32_t n = sprintf(metaPath, "%s%s%s", path, TD_DIRSEP, "META"); + if (n <= 0 || n >= (cap - 1)) { terrno = TSDB_CODE_OUT_OF_MEMORY; taosMemoryFree(metaPath); return -1; @@ -254,23 +269,23 @@ int32_t remoteChkp_readMetaData(char* path, SArray* list) { return -1; } - char buf[128] = {0}; + char buf[256] = {0}; if (taosReadFile(pFile, buf, sizeof(buf)) <= 0) { + terrno = TAOS_SYSTEM_ERROR(errno); taosMemoryFree(metaPath); taosCloseFile(&pFile); return -1; } - int32_t len = strlen(buf); - for (int i = 0; i < len; i++) { - if (buf[i] == '\n') { - char* item = taosMemoryCalloc(1, i + 1); - memcpy(item, buf, i); - taosArrayPush(list, &item); - item = taosMemoryCalloc(1, len - i); - memcpy(item, buf + i + 1, len - i - 1); - taosArrayPush(list, &item); - } + SSChkpMetaOnS3* p = taosMemoryCalloc(1, sizeof(SSChkpMetaOnS3)); + n = sscanf(buf, META_ON_S3_FORMATE, p->pCurrName, &p->currChkptId, p->pManifestName, &p->manifestChkptId, + p->processName, &p->processId); + if (n != 6) { + terrno = TSDB_CODE_INVALID_MSG; + taosMemoryFree(p); + taosMemoryFree(metaPath); + taosCloseFile(&pFile); + return -1; } taosCloseFile(&pFile); @@ -291,7 +306,7 @@ int32_t remoteChkp_validMetaFile(char* name, char* prename, int64_t chkpId) { } return valid; } -int32_t remoteChkp_validAndCvtMeta(char* path, SArray* list, int64_t chkpId) { +int32_t remoteChkp_validAndCvtMeta(char* path, SSChkpMetaOnS3* pMeta, int64_t chkpId) { int32_t complete = 1; int32_t len = strlen(path) + 32; char* src = taosMemoryCalloc(1, len); @@ -301,33 +316,38 @@ int32_t remoteChkp_validAndCvtMeta(char* path, SArray* list, int64_t chkpId) { return -1; } + if (pMeta->currChkptId != chkpId || pMeta->manifestChkptId != chkpId) { + terrno = TSDB_CODE_OUT_OF_MEMORY; + return -1; + } + int8_t count = 0; - for (int i = 0; i < taosArrayGetSize(list); i++) { - char* p = taosArrayGetP(list, i); - sprintf(src, "%s%s%s", path, TD_DIRSEP, p); + // for (int i = 0; i < taosArrayGetSize(list); i++) { + // char* p = taosArrayGetP(list, i); + // sprintf(src, "%s%s%s", path, TD_DIRSEP, p); - // check file exist - if (taosStatFile(src, NULL, NULL, NULL) != 0) { - complete = 0; - break; - } + // // check file exist + // if (taosStatFile(src, NULL, NULL, NULL) != 0) { + // complete = 0; + // break; + // } - // check file name - char temp[64] = {0}; - if (remoteChkp_validMetaFile(p, temp, chkpId)) { - count++; - } + // // check file name + // char temp[64] = {0}; + // if (remoteChkp_validMetaFile(p, temp, chkpId)) { + // count++; + // } - // rename file - sprintf(dst, "%s%s%s", path, TD_DIRSEP, temp); - taosRenameFile(src, dst); + // // rename file + // sprintf(dst, "%s%s%s", path, TD_DIRSEP, temp); + // taosRenameFile(src, dst); - memset(src, 0, len); - memset(dst, 0, len); - } - if (count != taosArrayGetSize(list)) { - complete = 0; - } + // memset(src, 0, len); + // memset(dst, 0, len); + // } + // if (count != taosArrayGetSize(list)) { + // complete = 0; + // } taosMemoryFree(src); taosMemoryFree(dst); @@ -385,12 +405,14 @@ int32_t rebuildFromRemoteChkp_s3(const char* key, char* chkpPath, int64_t chkpId if (taosIsDir(tmp)) taosRemoveDir(tmp); if (taosIsDir(defaultPath)) taosRenameFile(defaultPath, tmp); - SArray* list = taosArrayInit(2, sizeof(void*)); - code = remoteChkp_readMetaData(chkpPath, list); + // SArray* list = taosArrayInit(2, sizeof(void*)); + SSChkpMetaOnS3* pMeta; + code = remoteChkp_readMetaData(chkpPath, &pMeta); if (code == 0) { - code = remoteChkp_validAndCvtMeta(chkpPath, list, chkpId); + code = remoteChkp_validAndCvtMeta(chkpPath, pMeta, chkpId); } - taosArrayDestroyP(list, taosMemoryFree); + taosMemoryFree(pMeta); + // taosArrayDestroyP(list, taosMemoryFree); if (code == 0) { taosMkDir(defaultPath); @@ -1322,6 +1344,9 @@ int32_t chkpLoadExtraInfo(char* pChkpIdDir, int64_t* chkpId, int64_t* processId) TdFilePtr pFile = NULL; int32_t code = -1; + char buf[256] = {0}; + int32_t nBytes = 0; + int32_t len = strlen(pChkpIdDir); if (len == 0) { terrno = TSDB_CODE_INVALID_PARA; @@ -1336,7 +1361,8 @@ int32_t chkpLoadExtraInfo(char* pChkpIdDir, int64_t* chkpId, int64_t* processId) goto _EXIT; } - if (sprintf(pDst, "%s%sinfo", pChkpIdDir, TD_DIRSEP) <= 0) { + nBytes = snprintf(pDst, len + 64, "%s%sinfo", pChkpIdDir, TD_DIRSEP); + if (nBytes != strlen(pDst)) { code = -1; stError("failed to build dst to load extra info, dir:%s", pChkpIdDir); goto _EXIT; @@ -1349,7 +1375,6 @@ int32_t chkpLoadExtraInfo(char* pChkpIdDir, int64_t* chkpId, int64_t* processId) goto _EXIT; } - char buf[256] = {0}; if (taosReadFile(pFile, buf, sizeof(buf)) <= 0) { terrno = TAOS_SYSTEM_ERROR(errno); stError("failed to read file to load extra info, file:%s, reason:%s", pDst, tstrerror(terrno)); @@ -1368,8 +1393,12 @@ _EXIT: return code; } int32_t chkpAddExtraInfo(char* pChkpIdDir, int64_t chkpId, int64_t processId) { + int32_t code = -1; + TdFilePtr pFile = NULL; - int32_t code = -1; + + char buf[256] = {0}; + int32_t nBytes = 0; int32_t len = strlen(pChkpIdDir); if (len == 0) { @@ -1385,7 +1414,8 @@ int32_t chkpAddExtraInfo(char* pChkpIdDir, int64_t chkpId, int64_t processId) { goto _EXIT; } - if (sprintf(pDst, "%s%sinfo", pChkpIdDir, TD_DIRSEP) < 0) { + nBytes = snprintf(pDst, len + 64, "%s%sinfo", pChkpIdDir, TD_DIRSEP); + if (nBytes != strlen(pDst)) { stError("failed to build dst to add extra info, dir:%s", pChkpIdDir); goto _EXIT; } @@ -1397,15 +1427,14 @@ int32_t chkpAddExtraInfo(char* pChkpIdDir, int64_t chkpId, int64_t processId) { goto _EXIT; } - char buf[256] = {0}; - int n = snprintf(buf, sizeof(buf), "%" PRId64 " %" PRId64 "", chkpId, processId); - if (n <= 0 || n >= sizeof(buf)) { + nBytes = snprintf(buf, sizeof(buf), "%" PRId64 " %" PRId64 "", chkpId, processId); + if (nBytes != strlen(buf)) { code = -1; stError("failed to build content to add extra info, dir:%s", pChkpIdDir); goto _EXIT; } - if (taosWriteFile(pFile, buf, strlen(buf)) <= 0) { + if (nBytes != taosWriteFile(pFile, buf, nBytes)) { terrno = TAOS_SYSTEM_ERROR(errno); stError("failed to write file to add extra info, file:%s, reason:%s", pDst, tstrerror(terrno)); goto _EXIT; @@ -2430,18 +2459,27 @@ void taskDbDestroy2(void* pDb) { taskDbDestroy(pDb, true); } int32_t taskDbGenChkpUploadData__rsync(STaskDbWrapper* pDb, int64_t chkpId, char** path) { int32_t code = -1; int64_t refId = pDb->refId; + int32_t nBytes = 0; if (taosAcquireRef(taskDbWrapperId, refId) == NULL) { return -1; } - char* buf = taosMemoryCalloc(1, strlen(pDb->path) + 128); + int32_t cap = strlen(pDb->path) + 128; + + char* buf = taosMemoryCalloc(1, cap); if (buf == NULL) { terrno = TSDB_CODE_OUT_OF_MEMORY; return -1; } - sprintf(buf, "%s%s%s%s%s%" PRId64 "", pDb->path, TD_DIRSEP, "checkpoints", TD_DIRSEP, "checkpoint", chkpId); + nBytes = + snprintf(buf, cap, "%s%s%s%s%s%" PRId64 "", pDb->path, TD_DIRSEP, "checkpoints", TD_DIRSEP, "checkpoint", chkpId); + if (nBytes != strlen(buf)) { + terrno = TSDB_CODE_OUT_OF_RANGE; + return -1; + } + if (taosIsDir(buf)) { code = 0; *path = buf; @@ -4473,8 +4511,18 @@ int32_t dbChkpDumpTo(SDbChkp* p, char* dname, SArray* list) { goto _ERROR; } - sprintf(srcDir, "%s%s%s%s%s%" PRId64 "", p->path, TD_DIRSEP, "checkpoints", TD_DIRSEP, "checkpoint", p->curChkpId); - sprintf(dstDir, "%s", dname); + int nBytes = snprintf(srcDir, len, "%s%s%s%s%s%" PRId64 "", p->path, TD_DIRSEP, "checkpoints", TD_DIRSEP, + "checkpoint", p->curChkpId); + if (nBytes != strlen(srcBuf)) { + terrno = TSDB_CODE_OUT_OF_RANGE; + goto _ERROR; + } + + nBytes = snprintf(dstDir, len, "%s", dname); + if (nBytes != strlen(dstBuf)) { + terrno = TSDB_CODE_OUT_OF_RANGE; + goto _ERROR; + } if (!taosDirExist(srcDir)) { stError("failed to dump srcDir %s, reason: not exist such dir", srcDir); @@ -4540,14 +4588,20 @@ int32_t dbChkpDumpTo(SDbChkp* p, char* dname, SArray* list) { stError("chkp failed to create meta file: %s, reason:%s", dstDir, tstrerror(terrno)); goto _ERROR; } - // META_ON_S3 - // current_checkpointID - // manifest_checkpointID - // processVer_processID - char content[128] = {0}; - snprintf(content, sizeof(content), "%s_%" PRId64 "\n%s_%" PRId64 "\n%s_%" PRId64 "", p->pCurrent, p->curChkpId, - p->pManifest, p->curChkpId, "processVer", processId); - if (taosWriteFile(pFile, content, strlen(content)) <= 0) { + + char content[256] = {0}; + nBytes = snprintf(content, sizeof(content), META_ON_S3_FORMATE, p->pCurrent, p->curChkpId, p->pManifest, p->curChkpId, + "processVer", processId); + if (nBytes != strlen(content)) { + terrno = TSDB_CODE_INVALID_MSG; + stError("chkp failed to format meta file: %s, reason: invalid msg", dstDir); + taosCloseFile(&pFile); + code = -1; + goto _ERROR; + } + + nBytes = taosWriteFile(pFile, content, strlen(content)); + if (nBytes != strlen(content)) { terrno = errno; stError("chkp failed to write meta file: %s,reason:%s", dstDir, tstrerror(terrno)); taosCloseFile(&pFile); @@ -4612,17 +4666,28 @@ int32_t bkdMgtGetDelta(SBkdMgt* bm, char* taskId, int64_t chkpId, SArray* list, sprintf(path, "%s%s%s", bm->path, TD_DIRSEP, taskId); SDbChkp* p = dbChkpCreate(path, chkpId); - taosHashPut(bm->pDbChkpTbl, taskId, strlen(taskId), &p, sizeof(void*)); + if (p == NULL) { + taosMemoryFree(path); + taosThreadRwlockUnlock(&bm->rwLock); + return -1; + } + + if (taosHashPut(bm->pDbChkpTbl, taskId, strlen(taskId), &p, sizeof(void*)) != 0) { + dbChkpDestroy(p); + taosMemoryFree(path); + taosThreadRwlockUnlock(&bm->rwLock); + return -1; + } pChkp = p; - code = dbChkpDumpTo(pChkp, dname, list); taosThreadRwlockUnlock(&bm->rwLock); return code; - } + } else { + code = dbChkpGetDelta(pChkp, chkpId, NULL); - code = dbChkpGetDelta(pChkp, chkpId, NULL); - code = dbChkpDumpTo(pChkp, dname, list); + if (code == 0) code = dbChkpDumpTo(pChkp, dname, list); + } taosThreadRwlockUnlock(&bm->rwLock); return code; diff --git a/source/libs/stream/src/streamCheckpoint.c b/source/libs/stream/src/streamCheckpoint.c index 26df7b1627..bc3762a6d5 100644 --- a/source/libs/stream/src/streamCheckpoint.c +++ b/source/libs/stream/src/streamCheckpoint.c @@ -527,27 +527,41 @@ void streamTaskSetFailedCheckpointId(SStreamTask* pTask) { } static int32_t getCheckpointDataMeta(const char* id, const char* path, SArray* list) { - char buf[128] = {0}; + TdFilePtr pFile = NULL; + int32_t cap = strlen(path) + 32; + char buf[128] = {0}; + int32_t code = 0; - char* file = taosMemoryCalloc(1, strlen(path) + 32); - sprintf(file, "%s%s%s", path, TD_DIRSEP, "META_TMP"); + char* filePath = taosMemoryCalloc(1, cap); + if (filePath == NULL) { + terrno = TSDB_CODE_OUT_OF_MEMORY; + return -1; + } - int32_t code = downloadCheckpointDataByName(id, "META", file); + int32_t nBytes = snprintf(filePath, cap, "%s%s%s", path, TD_DIRSEP, "META_TMP"); + if (nBytes != strlen(filePath)) { + taosMemoryFree(filePath); + terrno = TSDB_CODE_OUT_OF_RANGE; + return -1; + } + + code = downloadCheckpointDataByName(id, "META", filePath); if (code != 0) { - stDebug("%s chkp failed to download meta file:%s", id, file); - taosMemoryFree(file); + stDebug("%s chkp failed to download meta file:%s", id, filePath); + taosMemoryFree(filePath); return code; } - TdFilePtr pFile = taosOpenFile(file, TD_FILE_READ); + pFile = taosOpenFile(filePath, TD_FILE_READ); if (pFile == NULL) { - stError("%s failed to open meta file:%s for checkpoint", id, file); - code = -1; - return code; + terrno = TAOS_SYSTEM_ERROR(errno); + stError("%s failed to open meta file:%s for checkpoint", id, filePath); + taosMemoryFree(filePath); + return -1; } if (taosReadFile(pFile, buf, sizeof(buf)) <= 0) { - stError("%s failed to read meta file:%s for checkpoint", id, file); + stError("%s failed to read meta file:%s for checkpoint", id, filePath); code = -1; } else { int32_t len = strnlen(buf, tListLen(buf)); @@ -565,27 +579,33 @@ static int32_t getCheckpointDataMeta(const char* id, const char* path, SArray* l } taosCloseFile(&pFile); - taosRemoveFile(file); - taosMemoryFree(file); + taosRemoveFile(filePath); + taosMemoryFree(filePath); return code; } int32_t uploadCheckpointData(SStreamTask* pTask, int64_t checkpointId, int64_t dbRefId, ECHECKPOINT_BACKUP_TYPE type) { char* path = NULL; int32_t code = 0; - SArray* toDelFiles = taosArrayInit(4, POINTER_BYTES); - int64_t now = taosGetTimestampMs(); SStreamMeta* pMeta = pTask->pMeta; const char* idStr = pTask->id.idStr; + int64_t now = taosGetTimestampMs(); + + SArray* toDelFiles = taosArrayInit(4, POINTER_BYTES); + if (toDelFiles == NULL) { + terrno = TSDB_CODE_OUT_OF_MEMORY; + return -1; + } if ((code = taskDbGenChkpUploadData(pTask->pBackend, pMeta->bkdChkptMgt, checkpointId, type, &path, toDelFiles, pTask->id.idStr)) != 0) { - stError("s-task:%s failed to gen upload checkpoint:%" PRId64, idStr, checkpointId); + stError("s-task:%s failed to gen upload checkpoint:%" PRId64 ", reason:%s", idStr, checkpointId, tstrerror(terrno)); } if (type == DATA_UPLOAD_S3) { if (code == TSDB_CODE_SUCCESS && (code = getCheckpointDataMeta(idStr, path, toDelFiles)) != 0) { - stError("s-task:%s failed to get checkpointData for checkpointId:%" PRId64 " meta", idStr, checkpointId); + stError("s-task:%s failed to get checkpointData for checkpointId:%" PRId64 ", reason:%s", idStr, checkpointId, + tstrerror(terrno)); } } @@ -594,7 +614,7 @@ int32_t uploadCheckpointData(SStreamTask* pTask, int64_t checkpointId, int64_t d if (code == TSDB_CODE_SUCCESS) { stDebug("s-task:%s upload checkpointId:%" PRId64 " to remote succ", idStr, checkpointId); } else { - stError("s-task:%s failed to upload checkpointId:%" PRId64 " data:%s", idStr, checkpointId, path); + stError("s-task:%s failed to upload checkpointId:%" PRId64 " path:%s,reason:%s", idStr, checkpointId, path); } } diff --git a/t.c b/t.c new file mode 100644 index 0000000000..a79ed4c134 --- /dev/null +++ b/t.c @@ -0,0 +1,12 @@ +#include +#include +#include + +int main() { + char *buf = calloc(1, 4); + int n = snprintf(buf, 4, "size"); + + printf("write size:%d \t buf:%s \t len:%d\n", n, buf, (int)(strlen(buf))); + buf[4] = 10; + return 1; +} From 51e4abe2563d5d45f093d04792776386cecfc065 Mon Sep 17 00:00:00 2001 From: Yihao Deng Date: Fri, 28 Jun 2024 02:58:30 +0000 Subject: [PATCH 11/34] add self check --- source/libs/stream/src/streamBackendRocksdb.c | 179 +++++++++++++----- 1 file changed, 130 insertions(+), 49 deletions(-) diff --git a/source/libs/stream/src/streamBackendRocksdb.c b/source/libs/stream/src/streamBackendRocksdb.c index 7ff651d190..53b45f13a2 100644 --- a/source/libs/stream/src/streamBackendRocksdb.c +++ b/source/libs/stream/src/streamBackendRocksdb.c @@ -1354,37 +1354,45 @@ int32_t chkpLoadExtraInfo(char* pChkpIdDir, int64_t* chkpId, int64_t* processId) return -1; } - char* pDst = taosMemoryCalloc(1, len + 64); + int32_t cap = len + 64; + char* pDst = taosMemoryCalloc(1, cap); if (pDst == NULL) { terrno = TSDB_CODE_OUT_OF_MEMORY; stError("failed to alloc memory to load extra info, dir:%s", pChkpIdDir); goto _EXIT; } - nBytes = snprintf(pDst, len + 64, "%s%sinfo", pChkpIdDir, TD_DIRSEP); - if (nBytes != strlen(pDst)) { - code = -1; + nBytes = snprintf(pDst, cap, "%s%sinfo", pChkpIdDir, TD_DIRSEP); + if (nBytes <= 0 || nBytes >= cap) { + terrno = TSDB_CODE_OUT_OF_RANGE; stError("failed to build dst to load extra info, dir:%s", pChkpIdDir); goto _EXIT; } pFile = taosOpenFile(pDst, TD_FILE_READ); if (pFile == NULL) { - terrno = TAOS_SYSTEM_ERROR(errno); - stError("failed to open file to load extra info, file:%s", pDst); + if (errno == ENOENT) { + // compatible with previous version + *processId = -1; + code = 0; + goto _EXIT; + } else { + terrno = TAOS_SYSTEM_ERROR(errno); + stError("failed to open file to load extra info, file:%s", pDst); + } goto _EXIT; } if (taosReadFile(pFile, buf, sizeof(buf)) <= 0) { terrno = TAOS_SYSTEM_ERROR(errno); stError("failed to read file to load extra info, file:%s, reason:%s", pDst, tstrerror(terrno)); - code = -1; goto _EXIT; } if (sscanf(buf, "%" PRId64 " %" PRId64 "", chkpId, processId) < 2) { terrno = TSDB_CODE_INVALID_PARA; stError("failed to read file content to load extra info, file:%s, reason:%s", pDst, tstrerror(terrno)); + goto _EXIT; } code = 0; _EXIT: @@ -1406,16 +1414,16 @@ int32_t chkpAddExtraInfo(char* pChkpIdDir, int64_t chkpId, int64_t processId) { stError("failed to add extra info, dir:%s, reason:%s", pChkpIdDir, tstrerror(terrno)); return -1; } - - char* pDst = taosMemoryCalloc(1, len + 64); + int32_t cap = len + 64; + char* pDst = taosMemoryCalloc(1, cap); if (pDst == NULL) { terrno = TSDB_CODE_OUT_OF_MEMORY; stError("failed to alloc memory to add extra info, dir:%s", pChkpIdDir); goto _EXIT; } - nBytes = snprintf(pDst, len + 64, "%s%sinfo", pChkpIdDir, TD_DIRSEP); - if (nBytes != strlen(pDst)) { + nBytes = snprintf(pDst, cap, "%s%sinfo", pChkpIdDir, TD_DIRSEP); + if (nBytes <= 0 || nBytes >= cap) { stError("failed to build dst to add extra info, dir:%s", pChkpIdDir); goto _EXIT; } @@ -1428,8 +1436,8 @@ int32_t chkpAddExtraInfo(char* pChkpIdDir, int64_t chkpId, int64_t processId) { } nBytes = snprintf(buf, sizeof(buf), "%" PRId64 " %" PRId64 "", chkpId, processId); - if (nBytes != strlen(buf)) { - code = -1; + if (nBytes <= 0 || nBytes >= sizeof(buf)) { + terrno = TSDB_CODE_OUT_OF_RANGE; stError("failed to build content to add extra info, dir:%s", pChkpIdDir); goto _EXIT; } @@ -2475,7 +2483,7 @@ int32_t taskDbGenChkpUploadData__rsync(STaskDbWrapper* pDb, int64_t chkpId, char nBytes = snprintf(buf, cap, "%s%s%s%s%s%" PRId64 "", pDb->path, TD_DIRSEP, "checkpoints", TD_DIRSEP, "checkpoint", chkpId); - if (nBytes != strlen(buf)) { + if (nBytes <= 0 || nBytes >= cap) { terrno = TSDB_CODE_OUT_OF_RANGE; return -1; } @@ -4311,6 +4319,7 @@ void dbChkpDebugInfo(SDbChkp* pDb) { } } int32_t dbChkpGetDelta(SDbChkp* p, int64_t chkpId, SArray* list) { + int32_t nBytes; taosThreadRwlockWrlock(&p->rwLock); p->preCkptId = p->curChkpId; @@ -4368,6 +4377,11 @@ int32_t dbChkpGetDelta(SDbChkp* p, int64_t chkpId, SArray* list) { char* name = taosHashGetKey(pIter, &len); if (name != NULL && !isBkdDataMeta(name, len)) { char* fname = taosMemoryCalloc(1, len + 1); + if (fname == NULL) { + terrno = TSDB_CODE_OUT_OF_MEMORY; + taosThreadRwlockUnlock(&p->rwLock); + return -1; + } strncpy(fname, name, len); taosArrayPush(p->pAdd, &fname); } @@ -4496,30 +4510,32 @@ int32_t dbChkpInit(SDbChkp* p) { } #endif int32_t dbChkpDumpTo(SDbChkp* p, char* dname, SArray* list) { + static char* chkpMeta = "META"; + int32_t code = -1; + int32_t cap = p->len + 128; + taosThreadRwlockRdlock(&p->rwLock); - int32_t code = -1; - int32_t len = p->len + 128; - char* srcBuf = taosMemoryCalloc(1, len); - char* dstBuf = taosMemoryCalloc(1, len); + char* srcBuf = taosMemoryCalloc(1, cap); + char* dstBuf = taosMemoryCalloc(1, cap); - char* srcDir = taosMemoryCalloc(1, len); - char* dstDir = taosMemoryCalloc(1, len); + char* srcDir = taosMemoryCalloc(1, cap); + char* dstDir = taosMemoryCalloc(1, cap); if (srcBuf == NULL || dstBuf == NULL || srcDir == NULL || dstDir == NULL) { terrno = TSDB_CODE_OUT_OF_MEMORY; goto _ERROR; } - int nBytes = snprintf(srcDir, len, "%s%s%s%s%s%" PRId64 "", p->path, TD_DIRSEP, "checkpoints", TD_DIRSEP, + int nBytes = snprintf(srcDir, cap, "%s%s%s%s%s%" PRId64 "", p->path, TD_DIRSEP, "checkpoints", TD_DIRSEP, "checkpoint", p->curChkpId); - if (nBytes != strlen(srcBuf)) { + if (nBytes <= 0 || nBytes >= cap) { terrno = TSDB_CODE_OUT_OF_RANGE; goto _ERROR; } - nBytes = snprintf(dstDir, len, "%s", dname); - if (nBytes != strlen(dstBuf)) { + nBytes = snprintf(dstDir, cap, "%s", dname); + if (nBytes <= 0 || nBytes >= cap) { terrno = TSDB_CODE_OUT_OF_RANGE; goto _ERROR; } @@ -4536,12 +4552,21 @@ int32_t dbChkpDumpTo(SDbChkp* p, char* dname, SArray* list) { // add file to $name dir for (int i = 0; i < taosArrayGetSize(p->pAdd); i++) { - memset(srcBuf, 0, len); - memset(dstBuf, 0, len); + memset(srcBuf, 0, cap); + memset(dstBuf, 0, cap); char* filename = taosArrayGetP(p->pAdd, i); - sprintf(srcBuf, "%s%s%s", srcDir, TD_DIRSEP, filename); - sprintf(dstBuf, "%s%s%s", dstDir, TD_DIRSEP, filename); + nBytes = snprintf(srcBuf, cap, "%s%s%s", srcDir, TD_DIRSEP, filename); + if (nBytes <= 0 || nBytes >= cap) { + terrno = TSDB_CODE_OUT_OF_RANGE; + goto _ERROR; + } + + nBytes = snprintf(dstBuf, cap, "%s%s%s", dstDir, TD_DIRSEP, filename); + if (nBytes <= 0 || nBytes >= cap) { + terrno = TSDB_CODE_OUT_OF_RANGE; + goto _ERROR; + } if (taosCopyFile(srcBuf, dstBuf) < 0) { terrno = errno; @@ -4553,14 +4578,29 @@ int32_t dbChkpDumpTo(SDbChkp* p, char* dname, SArray* list) { for (int i = 0; i < taosArrayGetSize(p->pDel); i++) { char* filename = taosArrayGetP(p->pDel, i); char* p = taosStrdup(filename); + if (p == NULL) { + terrno = TSDB_CODE_OUT_OF_MEMORY; + goto _ERROR; + } taosArrayPush(list, &p); } // copy current file to dst dir - memset(srcBuf, 0, len); - memset(dstBuf, 0, len); - sprintf(srcBuf, "%s%s%s", srcDir, TD_DIRSEP, p->pCurrent); - sprintf(dstBuf, "%s%s%s_%" PRId64 "", dstDir, TD_DIRSEP, p->pCurrent, p->curChkpId); + memset(srcBuf, 0, cap); + memset(dstBuf, 0, cap); + + nBytes = snprintf(srcBuf, cap, "%s%s%s", srcDir, TD_DIRSEP, p->pCurrent); + if (nBytes <= 0 || nBytes >= cap) { + terrno = TSDB_CODE_OUT_OF_RANGE; + goto _ERROR; + } + + nBytes = snprintf(dstBuf, cap, "%s%s%s_%" PRId64 "", dstDir, TD_DIRSEP, p->pCurrent, p->curChkpId); + if (nBytes <= 0 || nBytes >= cap) { + terrno = TSDB_CODE_OUT_OF_RANGE; + goto _ERROR; + } + if (taosCopyFile(srcBuf, dstBuf) < 0) { terrno = errno; stError("failed to copy file from %s to %s, reason:%s", srcBuf, dstBuf, tstrerror(terrno)); @@ -4568,23 +4608,37 @@ int32_t dbChkpDumpTo(SDbChkp* p, char* dname, SArray* list) { } // copy manifest file to dst dir - memset(srcBuf, 0, len); - memset(dstBuf, 0, len); - sprintf(srcBuf, "%s%s%s", srcDir, TD_DIRSEP, p->pManifest); - sprintf(dstBuf, "%s%s%s_%" PRId64 "", dstDir, TD_DIRSEP, p->pManifest, p->curChkpId); + memset(srcBuf, 0, cap); + memset(dstBuf, 0, cap); + + nBytes = snprintf(srcBuf, cap, "%s%s%s", srcDir, TD_DIRSEP, p->pManifest); + if (nBytes <= 0 || nBytes >= cap) { + terrno = TSDB_CODE_OUT_OF_RANGE; + goto _ERROR; + } + + nBytes = snprintf(dstBuf, cap, "%s%s%s_%" PRId64 "", dstDir, TD_DIRSEP, p->pManifest, p->curChkpId); + if (nBytes <= 0 || nBytes >= cap) { + terrno = TSDB_CODE_OUT_OF_RANGE; + goto _ERROR; + } + if (taosCopyFile(srcBuf, dstBuf) < 0) { terrno = errno; stError("failed to copy file from %s to %s, reason:%s", srcBuf, dstBuf, tstrerror(terrno)); goto _ERROR; } - static char* chkpMeta = "META"; - memset(dstBuf, 0, len); - sprintf(dstDir, "%s%s%s", dstDir, TD_DIRSEP, chkpMeta); + memset(dstBuf, 0, cap); + nBytes = snprintf(dstDir, cap, "%s%s%s", dstDir, TD_DIRSEP, chkpMeta); + if (nBytes <= 0 || nBytes >= cap) { + terrno = TSDB_CODE_OUT_OF_RANGE; + goto _ERROR; + } TdFilePtr pFile = taosOpenFile(dstDir, TD_FILE_CREATE | TD_FILE_WRITE | TD_FILE_TRUNC); if (pFile == NULL) { - terrno = errno; + terrno = TAOS_SYSTEM_ERROR(errno); stError("chkp failed to create meta file: %s, reason:%s", dstDir, tstrerror(terrno)); goto _ERROR; } @@ -4592,23 +4646,20 @@ int32_t dbChkpDumpTo(SDbChkp* p, char* dname, SArray* list) { char content[256] = {0}; nBytes = snprintf(content, sizeof(content), META_ON_S3_FORMATE, p->pCurrent, p->curChkpId, p->pManifest, p->curChkpId, "processVer", processId); - if (nBytes != strlen(content)) { - terrno = TSDB_CODE_INVALID_MSG; + if (nBytes <= 0 || nBytes >= sizeof(content)) { + terrno = TSDB_CODE_OUT_OF_RANGE; stError("chkp failed to format meta file: %s, reason: invalid msg", dstDir); taosCloseFile(&pFile); - code = -1; goto _ERROR; } nBytes = taosWriteFile(pFile, content, strlen(content)); if (nBytes != strlen(content)) { - terrno = errno; + terrno = TAOS_SYSTEM_ERROR(errno); stError("chkp failed to write meta file: %s,reason:%s", dstDir, tstrerror(terrno)); taosCloseFile(&pFile); - code = -1; goto _ERROR; } - taosCloseFile(&pFile); // clear delta data buf @@ -4624,11 +4675,34 @@ _ERROR: taosMemoryFree(dstDir); return code; } + SBkdMgt* bkdMgtCreate(char* path) { SBkdMgt* p = taosMemoryCalloc(1, sizeof(SBkdMgt)); + if (p == NULL) { + terrno = TSDB_CODE_OUT_OF_MEMORY; + return NULL; + } + p->pDbChkpTbl = taosHashInit(64, taosGetDefaultHashFunction(TSDB_DATA_TYPE_BINARY), true, HASH_ENTRY_LOCK); + if (p->pDbChkpTbl == NULL) { + terrno = TSDB_CODE_OUT_OF_MEMORY; + bkdMgtDestroy(p); + return NULL; + } + p->path = taosStrdup(path); - taosThreadRwlockInit(&p->rwLock, NULL); + if (p->path == NULL) { + terrno = TSDB_CODE_OUT_OF_MEMORY; + bkdMgtDestroy(p); + return NULL; + } + + if (taosThreadRwlockInit(&p->rwLock, NULL) != 0) { + terrno = TAOS_SYSTEM_ERROR(errno); + bkdMgtDestroy(p); + return NULL; + } + return p; } @@ -4656,14 +4730,21 @@ int32_t bkdMgtGetDelta(SBkdMgt* bm, char* taskId, int64_t chkpId, SArray* list, SDbChkp* pChkp = ppChkp != NULL ? *ppChkp : NULL; if (pChkp == NULL) { - char* path = taosMemoryCalloc(1, strlen(bm->path) + 64); + int32_t cap = strlen(bm->path) + 64; + char* path = taosMemoryCalloc(1, cap); if (path == NULL) { terrno = TSDB_CODE_OUT_OF_MEMORY; taosThreadRwlockUnlock(&bm->rwLock); return -1; } - sprintf(path, "%s%s%s", bm->path, TD_DIRSEP, taskId); + int32_t nBytes = snprintf(path, cap, "%s%s%s", bm->path, TD_DIRSEP, taskId); + if (nBytes <= 0 || nBytes >= cap) { + terrno = TSDB_CODE_OUT_OF_RANGE; + taosMemoryFree(path); + taosThreadRwlockUnlock(&bm->rwLock); + return -1; + } SDbChkp* p = dbChkpCreate(path, chkpId); if (p == NULL) { From 8fe57c166902cbd3fce2ae99c457f70f6e4bb225 Mon Sep 17 00:00:00 2001 From: Yihao Deng Date: Fri, 28 Jun 2024 03:08:02 +0000 Subject: [PATCH 12/34] add self check --- source/libs/stream/src/streamCheckpoint.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/source/libs/stream/src/streamCheckpoint.c b/source/libs/stream/src/streamCheckpoint.c index bc3762a6d5..bd81ee5b75 100644 --- a/source/libs/stream/src/streamCheckpoint.c +++ b/source/libs/stream/src/streamCheckpoint.c @@ -614,7 +614,8 @@ int32_t uploadCheckpointData(SStreamTask* pTask, int64_t checkpointId, int64_t d if (code == TSDB_CODE_SUCCESS) { stDebug("s-task:%s upload checkpointId:%" PRId64 " to remote succ", idStr, checkpointId); } else { - stError("s-task:%s failed to upload checkpointId:%" PRId64 " path:%s,reason:%s", idStr, checkpointId, path); + stError("s-task:%s failed to upload checkpointId:%" PRId64 " path:%s,reason:%s", idStr, checkpointId, path, + tstrerror(errno)); } } From 2ae54486b513db655b460978cbe935e311144a67 Mon Sep 17 00:00:00 2001 From: Yihao Deng Date: Fri, 28 Jun 2024 07:01:45 +0000 Subject: [PATCH 13/34] add self check --- source/libs/stream/src/streamBackendRocksdb.c | 139 +++++++++++------- 1 file changed, 86 insertions(+), 53 deletions(-) diff --git a/source/libs/stream/src/streamBackendRocksdb.c b/source/libs/stream/src/streamBackendRocksdb.c index 53b45f13a2..cbe6dcc886 100644 --- a/source/libs/stream/src/streamBackendRocksdb.c +++ b/source/libs/stream/src/streamBackendRocksdb.c @@ -373,24 +373,24 @@ int32_t createDirIfNotExist(const char* pPath) { } } -int32_t rebuildFromRemoteChkp_rsync(const char* key, char* chkptPath, int64_t checkpointId, char* defaultPath) { +int32_t rebuildFromRemoteChkp_rsync(const char* key, char* checkpointPath, int64_t checkpointId, char* defaultPath) { int32_t code = 0; - if (taosIsDir(chkptPath)) { - taosRemoveDir(chkptPath); - stDebug("remove local checkpoint data dir:%s succ", chkptPath); + if (taosIsDir(checkpointPath)) { + taosRemoveDir(checkpointPath); + stDebug("remove local checkpoint data dir:%s succ", checkpointPath); } cleanDir(defaultPath, key); stDebug("clear local default dir before downloading checkpoint data:%s succ", defaultPath); - code = streamTaskDownloadCheckpointData(key, chkptPath); + code = streamTaskDownloadCheckpointData(key, checkpointPath); if (code != 0) { stError("failed to download checkpoint data:%s", key); return code; } stDebug("download remote checkpoint data for checkpointId:%" PRId64 ", %s", checkpointId, key); - return backendCopyFiles(chkptPath, defaultPath); + return backendCopyFiles(checkpointPath, defaultPath); } int32_t rebuildFromRemoteChkp_s3(const char* key, char* chkpPath, int64_t chkpId, char* defaultPath) { @@ -399,29 +399,45 @@ int32_t rebuildFromRemoteChkp_s3(const char* key, char* chkpPath, int64_t chkpId return code; } - int32_t len = strlen(defaultPath) + 32; - char* tmp = taosMemoryCalloc(1, len); - sprintf(tmp, "%s%s", defaultPath, "_tmp"); + int32_t nBytes; + int32_t cap = strlen(defaultPath) + 32; + + char* tmp = taosMemoryCalloc(1, cap); + if (tmp == NULL) { + terrno = TSDB_CODE_OUT_OF_MEMORY; + return -1; + } + + nBytes = snprintf(tmp, cap, "%s%s", defaultPath, "_tmp"); + if (nBytes <= 0 || nBytes >= cap) { + terrno = TSDB_CODE_OUT_OF_RANGE; + taosMemoryFree(tmp); + return -1; + } + if (taosIsDir(tmp)) taosRemoveDir(tmp); if (taosIsDir(defaultPath)) taosRenameFile(defaultPath, tmp); // SArray* list = taosArrayInit(2, sizeof(void*)); SSChkpMetaOnS3* pMeta; code = remoteChkp_readMetaData(chkpPath, &pMeta); - if (code == 0) { - code = remoteChkp_validAndCvtMeta(chkpPath, pMeta, chkpId); - } + if (code == 0) code = remoteChkp_validAndCvtMeta(chkpPath, pMeta, chkpId); + taosMemoryFree(pMeta); - // taosArrayDestroyP(list, taosMemoryFree); if (code == 0) { - taosMkDir(defaultPath); + code = taosMkDir(defaultPath); + } + + if (code == 0) { code = backendCopyFiles(chkpPath, defaultPath); } if (code != 0) { if (taosIsDir(defaultPath)) taosRemoveDir(defaultPath); - if (taosIsDir(tmp)) taosRenameFile(tmp, defaultPath); + if (taosIsDir(tmp)) { + code = taosRenameFile(tmp, defaultPath); + } } else { taosRemoveDir(tmp); } @@ -430,12 +446,12 @@ int32_t rebuildFromRemoteChkp_s3(const char* key, char* chkpPath, int64_t chkpId return code; } -int32_t rebuildFromRemoteCheckpoint(const char* key, char* chkptPath, int64_t checkpointId, char* defaultPath) { +int32_t rebuildFromRemoteCheckpoint(const char* key, char* checkpointPath, int64_t checkpointId, char* defaultPath) { ECHECKPOINT_BACKUP_TYPE type = streamGetCheckpointBackupType(); if (type == DATA_UPLOAD_S3) { - return rebuildFromRemoteChkp_s3(key, chkptPath, checkpointId, defaultPath); + return rebuildFromRemoteChkp_s3(key, checkpointPath, checkpointId, defaultPath); } else if (type == DATA_UPLOAD_RSYNC) { - return rebuildFromRemoteChkp_rsync(key, chkptPath, checkpointId, defaultPath); + return rebuildFromRemoteChkp_rsync(key, checkpointPath, checkpointId, defaultPath); } else { stError("%s no remote backup checkpoint data for:%" PRId64, key, checkpointId); } @@ -570,69 +586,78 @@ int32_t restoreCheckpointData(const char* path, const char* key, int64_t chkptId int64_t* processVer) { int32_t code = -1; - size_t pathLen = strlen(path); - char* prefixPath = NULL; - char* defaultPath = NULL; + char* prefixPath = NULL; + char* defaultPath = NULL; + char* checkpointPath = NULL; + char* checkpointRoot = NULL; + + int32_t cap = strlen(path) + 128; + int32_t nBytes; // alloc buf - prefixPath = taosMemoryCalloc(1, pathLen + 64); - if (prefixPath == NULL) { + prefixPath = taosMemoryCalloc(1, cap); + defaultPath = taosMemoryCalloc(1, cap); + checkpointPath = taosMemoryCalloc(1, cap); + checkpointRoot = taosMemoryCalloc(1, cap); + if (prefixPath == NULL || defaultPath == NULL || checkpointPath == NULL || checkpointRoot == NULL) { terrno = TSDB_CODE_OUT_OF_MEMORY; goto _EXIT; } - sprintf(prefixPath, "%s%s%s", path, TD_DIRSEP, key); + nBytes = snprintf(prefixPath, cap, "%s%s%s", path, TD_DIRSEP, key); + if (nBytes <= 0 || nBytes >= cap) { + terrno = TSDB_CODE_OUT_OF_RANGE; + goto _EXIT; + } + code = createDirIfNotExist(prefixPath); if (code != 0) { + terrno = TAOS_SYSTEM_ERROR(errno); goto _EXIT; } - defaultPath = taosMemoryCalloc(1, pathLen + 128); - if (defaultPath == NULL) { - terrno = TSDB_CODE_OUT_OF_MEMORY; + nBytes = snprintf(defaultPath, cap, "%s%s%s", prefixPath, TD_DIRSEP, "state"); + if (nBytes <= 0 || nBytes >= cap) { + terrno = TSDB_CODE_OUT_OF_RANGE; goto _EXIT; } - sprintf(defaultPath, "%s%s%s", prefixPath, TD_DIRSEP, "state"); code = createDirIfNotExist(defaultPath); if (code != 0) { + terrno = TAOS_SYSTEM_ERROR(errno); goto _EXIT; } - char* checkpointRoot = taosMemoryCalloc(1, pathLen + 48); - if (checkpointRoot == NULL) { - terrno = TSDB_CODE_OUT_OF_MEMORY; + nBytes = snprintf(checkpointRoot, cap, "%s%s%s", prefixPath, TD_DIRSEP, "checkpoints"); + if (nBytes <= 0 || nBytes >= cap) { + terrno = TSDB_CODE_OUT_OF_RANGE; goto _EXIT; } - sprintf(checkpointRoot, "%s%s%s", prefixPath, TD_DIRSEP, "checkpoints"); code = createDirIfNotExist(checkpointRoot); if (code != 0) { - taosMemoryFreeClear(checkpointRoot); + terrno = TAOS_SYSTEM_ERROR(errno); goto _EXIT; } - taosMemoryFreeClear(checkpointRoot); stDebug("%s check local backend dir:%s, checkpointId:%" PRId64 " succ", key, defaultPath, chkptId); - - char* chkptPath = taosMemoryCalloc(1, pathLen + 128); - if (chkptPath == NULL) { - terrno = TSDB_CODE_OUT_OF_MEMORY; - goto _EXIT; - } - if (chkptId > 0) { - snprintf(chkptPath, pathLen + 127, "%s%s%s%s%s%" PRId64 "", prefixPath, TD_DIRSEP, "checkpoints", TD_DIRSEP, - "checkpoint", chkptId); + nBytes = snprintf(checkpointPath, cap, "%s%s%s%s%s%" PRId64 "", prefixPath, TD_DIRSEP, "checkpoints", TD_DIRSEP, + "checkpoint", chkptId); + if (nBytes <= 0 || nBytes >= cap) { + terrno = TSDB_CODE_OUT_OF_RANGE; + goto _EXIT; + } - code = rebuildFromLocalCheckpoint(key, chkptPath, chkptId, defaultPath, processVer); + code = rebuildFromLocalCheckpoint(key, checkpointPath, chkptId, defaultPath, processVer); if (code != 0) { - code = rebuildFromRemoteCheckpoint(key, chkptPath, chkptId, defaultPath); + terrno = 0; + code = rebuildFromRemoteCheckpoint(key, checkpointPath, chkptId, defaultPath); } if (code != 0) { - stError("failed to start stream backend at %s, reason: %s, restart from default defaultPath:%s", chkptPath, - tstrerror(code), defaultPath); + stError("failed to start stream backend at %s, reason: %s, restart from default defaultPath:%s, reason:%s", + checkpointPath, tstrerror(code), defaultPath, tstrerror(terrno)); code = 0; // reset the error code } } else { // no valid checkpoint id @@ -641,15 +666,18 @@ int32_t restoreCheckpointData(const char* path, const char* key, int64_t chkptId cleanDir(defaultPath, key); } - taosMemoryFree(chkptPath); - *dbPath = defaultPath; *dbPrefixPath = prefixPath; - return 0; + defaultPath = NULL; + prefixPath = NULL; + + code = 0; _EXIT: taosMemoryFree(defaultPath); taosMemoryFree(prefixPath); + taosMemoryFree(checkpointPath); + taosMemoryFree(checkpointRoot); return code; } @@ -4334,7 +4362,14 @@ int32_t dbChkpGetDelta(SDbChkp* p, int64_t chkpId, SArray* list) { int32_t sstLen = strlen(pSST); memset(p->buf, 0, p->len); - sprintf(p->buf, "%s%s%s%scheckpoint%" PRId64 "", p->path, TD_DIRSEP, "checkpoints", TD_DIRSEP, chkpId); + + nBytes = + snprintf(p->buf, p->len, "%s%s%s%scheckpoint%" PRId64 "", p->path, TD_DIRSEP, "checkpoints", TD_DIRSEP, chkpId); + if (nBytes <= 0 || nBytes >= p->len) { + terrno = TSDB_CODE_OUT_OF_RANGE; + taosThreadRwlockUnlock(&p->rwLock); + return -1; + } taosArrayClearP(p->pAdd, taosMemoryFree); taosArrayClearP(p->pDel, taosMemoryFree); @@ -4518,10 +4553,8 @@ int32_t dbChkpDumpTo(SDbChkp* p, char* dname, SArray* list) { char* srcBuf = taosMemoryCalloc(1, cap); char* dstBuf = taosMemoryCalloc(1, cap); - char* srcDir = taosMemoryCalloc(1, cap); char* dstDir = taosMemoryCalloc(1, cap); - if (srcBuf == NULL || dstBuf == NULL || srcDir == NULL || dstDir == NULL) { terrno = TSDB_CODE_OUT_OF_MEMORY; goto _ERROR; From 2e59284388e264655dbb27c0db302b512c8a3e8d Mon Sep 17 00:00:00 2001 From: Yihao Deng Date: Fri, 28 Jun 2024 08:14:34 +0000 Subject: [PATCH 14/34] add self check --- source/libs/stream/src/streamBackendRocksdb.c | 142 +++++++++++------- 1 file changed, 91 insertions(+), 51 deletions(-) diff --git a/source/libs/stream/src/streamBackendRocksdb.c b/source/libs/stream/src/streamBackendRocksdb.c index cbe6dcc886..eff0481d5b 100644 --- a/source/libs/stream/src/streamBackendRocksdb.c +++ b/source/libs/stream/src/streamBackendRocksdb.c @@ -196,28 +196,54 @@ int32_t getCfIdx(const char* cfName) { return idx; } -bool isValidCheckpoint(const char* dir) { return true; } +bool isValidCheckpoint(const char* dir) { + // not implement yet + return true; +} +/* + *copy pChkpIdDir's file to state dir + */ int32_t rebuildDirFromCheckpoint(const char* path, int64_t chkpId, char** dst) { // impl later int32_t code = 0; + int32_t cap = strlen(path) + 64; + int32_t nBytes = 0; + + char* state = taosMemoryCalloc(1, cap); + if (state == NULL) { + terrno = TSDB_CODE_OUT_OF_MEMORY; + return -1; + } + + nBytes = snprintf(state, cap, "%s%s%s", path, TD_DIRSEP, "state"); + if (nBytes <= 0 || nBytes >= cap) { + terrno = TSDB_CODE_OUT_OF_RANGE; + taosMemoryFree(state); + return -1; + } - /*param@1: checkpointId dir - param@2: state - copy pChkpIdDir's file to state dir - opt to set hard link to previous file - */ - char* state = taosMemoryCalloc(1, strlen(path) + 32); - sprintf(state, "%s%s%s", path, TD_DIRSEP, "state"); if (chkpId != 0) { - char* chkp = taosMemoryCalloc(1, strlen(path) + 64); - sprintf(chkp, "%s%s%s%scheckpoint%" PRId64 "", path, TD_DIRSEP, "checkpoints", TD_DIRSEP, chkpId); + char* chkp = taosMemoryCalloc(1, cap); + if (chkp == NULL) { + terrno = TSDB_CODE_OUT_OF_MEMORY; + taosMemoryFree(state); + return -1; + } + + nBytes = snprintf(chkp, cap, "%s%s%s%scheckpoint%" PRId64 "", path, TD_DIRSEP, "checkpoints", TD_DIRSEP, chkpId); + if (nBytes <= 0 || nBytes >= cap) { + terrno = TSDB_CODE_OUT_OF_RANGE; + taosMemoryFree(state); + taosMemoryFree(chkp); + return -1; + } + if (taosIsDir(chkp) && isValidCheckpoint(chkp)) { cleanDir(state, ""); code = backendCopyFiles(chkp, state); - stInfo("copy snap file from %s to %s", chkp, state); if (code != 0) { - stError("failed to restart stream backend from %s, reason: %s", chkp, tstrerror(TAOS_SYSTEM_ERROR(errno))); + stError("failed to restart stream backend from %s, reason: %s", chkp, tstrerror(TAOS_SYSTEM_ERROR(terrno))); } else { stInfo("start to restart stream backend at checkpoint path: %s", chkp); } @@ -225,7 +251,10 @@ int32_t rebuildDirFromCheckpoint(const char* path, int64_t chkpId, char** dst) { } else { stError("failed to start stream backend at %s, reason: %s, restart from default state dir:%s", chkp, tstrerror(TAOS_SYSTEM_ERROR(errno)), state); - taosMkDir(state); + code = taosMkDir(state); + if (code != 0) { + terrno = TAOS_SYSTEM_ERROR(errno); + } } taosMemoryFree(chkp); @@ -247,7 +276,9 @@ typedef struct { } SSChkpMetaOnS3; int32_t remoteChkp_readMetaData(char* path, SSChkpMetaOnS3** pMeta) { - int32_t cap = strlen(path) + 32; + int32_t code = -1; + int32_t cap = strlen(path) + 32; + TdFilePtr pFile = NULL; char* metaPath = taosMemoryCalloc(1, cap); if (metaPath == NULL) { @@ -256,41 +287,42 @@ int32_t remoteChkp_readMetaData(char* path, SSChkpMetaOnS3** pMeta) { } int32_t n = sprintf(metaPath, "%s%s%s", path, TD_DIRSEP, "META"); - if (n <= 0 || n >= (cap - 1)) { + if (n <= 0 || n >= cap) { terrno = TSDB_CODE_OUT_OF_MEMORY; taosMemoryFree(metaPath); return -1; } - TdFilePtr pFile = taosOpenFile(path, TD_FILE_READ); + pFile = taosOpenFile(path, TD_FILE_READ); if (pFile == NULL) { terrno = TAOS_SYSTEM_ERROR(errno); - taosMemoryFree(metaPath); - return -1; + goto _EXIT; } char buf[256] = {0}; if (taosReadFile(pFile, buf, sizeof(buf)) <= 0) { terrno = TAOS_SYSTEM_ERROR(errno); - taosMemoryFree(metaPath); - taosCloseFile(&pFile); - return -1; + goto _EXIT; } SSChkpMetaOnS3* p = taosMemoryCalloc(1, sizeof(SSChkpMetaOnS3)); + if (p == NULL) { + terrno = TSDB_CODE_OUT_OF_MEMORY; + goto _EXIT; + } n = sscanf(buf, META_ON_S3_FORMATE, p->pCurrName, &p->currChkptId, p->pManifestName, &p->manifestChkptId, p->processName, &p->processId); if (n != 6) { terrno = TSDB_CODE_INVALID_MSG; taosMemoryFree(p); - taosMemoryFree(metaPath); - taosCloseFile(&pFile); - return -1; + goto _EXIT; } - + *pMeta = p; + code = 0; +_EXIT: taosCloseFile(&pFile); taosMemoryFree(metaPath); - return 0; + return code; } int32_t remoteChkp_validMetaFile(char* name, char* prename, int64_t chkpId) { int8_t valid = 0; @@ -321,7 +353,6 @@ int32_t remoteChkp_validAndCvtMeta(char* path, SSChkpMetaOnS3* pMeta, int64_t ch return -1; } - int8_t count = 0; // for (int i = 0; i < taosArrayGetSize(list); i++) { // char* p = taosArrayGetP(list, i); // sprintf(src, "%s%s%s", path, TD_DIRSEP, p); @@ -419,7 +450,7 @@ int32_t rebuildFromRemoteChkp_s3(const char* key, char* chkpPath, int64_t chkpId if (taosIsDir(defaultPath)) taosRenameFile(defaultPath, tmp); // SArray* list = taosArrayInit(2, sizeof(void*)); - SSChkpMetaOnS3* pMeta; + SSChkpMetaOnS3* pMeta = NULL; code = remoteChkp_readMetaData(chkpPath, &pMeta); if (code == 0) code = remoteChkp_validAndCvtMeta(chkpPath, pMeta, chkpId); @@ -481,76 +512,84 @@ int32_t backendFileCopyFilesImpl(const char* src, const char* dst) { const char* info = "info"; size_t infoLen = strlen(info); - int32_t code = 0; + int32_t code = -1; int32_t sLen = strlen(src); int32_t dLen = strlen(dst); - char* srcName = taosMemoryCalloc(1, sLen + 64); - char* dstName = taosMemoryCalloc(1, dLen + 64); + int32_t cap = TMAX(sLen, dLen) + 64; + int32_t nBytes = 0; + + char* srcName = taosMemoryCalloc(1, cap); + char* dstName = taosMemoryCalloc(1, cap); + if (srcName == NULL || dstName == NULL) { + terrno = TSDB_CODE_OUT_OF_MEMORY; + return -1; + } // copy file to dst TdDirPtr pDir = taosOpenDir(src); if (pDir == NULL) { - taosMemoryFree(srcName); - taosMemoryFree(dstName); - code = TAOS_SYSTEM_ERROR(errno); - - errno = 0; - return code; + terrno = TAOS_SYSTEM_ERROR(errno); } errno = 0; TdDirEntryPtr de = NULL; - while ((de = taosReadDir(pDir)) != NULL) { char* name = taosGetDirEntryName(de); if (strcmp(name, ".") == 0 || strcmp(name, "..") == 0) { continue; } - sprintf(srcName, "%s%s%s", src, TD_DIRSEP, name); - sprintf(dstName, "%s%s%s", dst, TD_DIRSEP, name); + nBytes = snprintf(srcName, cap, "%s%s%s", src, TD_DIRSEP, name); + if (nBytes <= 0 || nBytes >= cap) { + terrno = TSDB_CODE_OUT_OF_RANGE; + goto _ERROR; + } + + nBytes = snprintf(dstName, cap, "%s%s%s", dst, TD_DIRSEP, name); + if (nBytes <= 0 || nBytes >= cap) { + terrno = TSDB_CODE_OUT_OF_RANGE; + goto _ERROR; + } if (strncmp(name, current, strlen(name) <= currLen ? strlen(name) : currLen) == 0) { code = copyFiles_create(srcName, dstName, 0); if (code != 0) { - code = TAOS_SYSTEM_ERROR(code); - stError("failed to copy file, detail: %s to %s reason:%s", srcName, dstName, tstrerror(code)); + terrno = TAOS_SYSTEM_ERROR(errno); + stError("failed to copy file, detail: %s to %s reason:%s", srcName, dstName, tstrerror(terrno)); goto _ERROR; } } else if (strncmp(name, info, strlen(name) <= infoLen ? strlen(name) : infoLen) == 0) { code = copyFiles_create(srcName, dstName, 0); if (code != 0) { - code = TAOS_SYSTEM_ERROR(code); - stError("failed to copy file, detail: %s to %s reason:%s", srcName, dstName, tstrerror(code)); + terrno = TAOS_SYSTEM_ERROR(errno); + stError("failed to copy file, detail: %s to %s reason:%s", srcName, dstName, tstrerror(terrno)); goto _ERROR; } } else { code = copyFiles_hardlink(srcName, dstName, 0); if (code != 0) { - code = TAOS_SYSTEM_ERROR(code); - stError("failed to hard link file, detail:%s to %s, reason:%s", srcName, dstName, tstrerror(code)); + terrno = TAOS_SYSTEM_ERROR(errno); + stError("failed to hard link file, detail:%s to %s, reason:%s", srcName, dstName, tstrerror(terrno)); goto _ERROR; } else { stDebug("succ hard link file:%s to %s", srcName, dstName); } } - memset(srcName, 0, sLen + 64); - memset(dstName, 0, dLen + 64); + memset(srcName, 0, cap); + memset(dstName, 0, cap); } taosMemoryFreeClear(srcName); taosMemoryFreeClear(dstName); taosCloseDir(&pDir); - errno = 0; return code; _ERROR: taosMemoryFreeClear(srcName); taosMemoryFreeClear(dstName); taosCloseDir(&pDir); - errno = 0; return code; } @@ -568,7 +607,8 @@ static int32_t rebuildFromLocalCheckpoint(const char* pTaskIdStr, const char* ch if (code != TSDB_CODE_SUCCESS) { cleanDir(defaultPath, pTaskIdStr); stError("%s failed to start stream backend from local %s, reason:%s, try download checkpoint from remote", - pTaskIdStr, checkpointPath, tstrerror(TAOS_SYSTEM_ERROR(errno))); + pTaskIdStr, checkpointPath, tstrerror(TAOS_SYSTEM_ERROR(terrno))); + terrno = 0; code = TSDB_CODE_SUCCESS; } else { stInfo("%s copy checkpoint data from:%s to:%s succ, try to start stream backend", pTaskIdStr, checkpointPath, From f023e7780cea869b72a26bf0f1983ff1bddc299e Mon Sep 17 00:00:00 2001 From: Yihao Deng Date: Sat, 29 Jun 2024 04:33:14 +0000 Subject: [PATCH 15/34] add self check --- source/libs/stream/src/streamCheckpoint.c | 88 +++++++++++++++++------ 1 file changed, 66 insertions(+), 22 deletions(-) diff --git a/source/libs/stream/src/streamCheckpoint.c b/source/libs/stream/src/streamCheckpoint.c index 9948847ecb..69c2ead7d2 100644 --- a/source/libs/stream/src/streamCheckpoint.c +++ b/source/libs/stream/src/streamCheckpoint.c @@ -56,6 +56,13 @@ SStreamDataBlock* createChkptTriggerBlock(SStreamTask* pTask, int32_t checkpoint pBlock->info.childId = pTask->info.selfChildId; pChkpoint->blocks = taosArrayInit(4, sizeof(SSDataBlock)); // pBlock; + if (pChkpoint->blocks == NULL) { + taosMemoryFree(pBlock); + taosFreeQitem(pChkpoint); + terrno = TSDB_CODE_OUT_OF_MEMORY; + return NULL; + } + taosArrayPush(pChkpoint->blocks, pBlock); taosMemoryFree(pBlock); @@ -110,7 +117,12 @@ int32_t streamTaskSendCheckpointTriggerMsg(SStreamTask* pTask, int32_t dstTaskId SRpcHandleInfo* pRpcInfo, int32_t code) { int32_t size = sizeof(SMsgHead) + sizeof(SCheckpointTriggerRsp); - void* pBuf = rpcMallocCont(size); + void* pBuf = rpcMallocCont(size); + if (pBuf == NULL) { + terrno = TSDB_CODE_OUT_OF_MEMORY; + return -1; + } + SCheckpointTriggerRsp* pRsp = POINTER_SHIFT(pBuf, sizeof(SMsgHead)); ((SMsgHead*)pBuf)->vgId = htonl(downstreamNodeId); @@ -131,6 +143,7 @@ int32_t streamTaskSendCheckpointTriggerMsg(SStreamTask* pTask, int32_t dstTaskId SRpcMsg rspMsg = {.code = 0, .pCont = pRsp, .contLen = size, .info = *pRpcInfo}; tmsgSendRsp(&rspMsg); + return 0; } @@ -1006,52 +1019,78 @@ void streamTaskSetTriggerDispatchConfirmed(SStreamTask* pTask, int32_t vgId) { } static int32_t uploadCheckpointToS3(const char* id, const char* path) { + int32_t code = 0; + int32_t nBytes = 0; + + if (s3Init() != 0) { + return -1; + } + TdDirPtr pDir = taosOpenDir(path); if (pDir == NULL) return -1; TdDirEntryPtr de = NULL; - s3Init(); while ((de = taosReadDir(pDir)) != NULL) { char* name = taosGetDirEntryName(de); if (strcmp(name, ".") == 0 || strcmp(name, "..") == 0 || taosDirEntryIsDir(de)) continue; char filename[PATH_MAX] = {0}; if (path[strlen(path) - 1] == TD_DIRSEP_CHAR) { - snprintf(filename, sizeof(filename), "%s%s", path, name); + nBytes = snprintf(filename, sizeof(filename), "%s%s", path, name); + if (nBytes <= 0 || nBytes >= sizeof(filename)) { + code = -1; + break; + } } else { - snprintf(filename, sizeof(filename), "%s%s%s", path, TD_DIRSEP, name); + nBytes = snprintf(filename, sizeof(filename), "%s%s%s", path, TD_DIRSEP, name); + if (nBytes <= 0 || nBytes >= sizeof(filename)) { + code = -1; + break; + } } char object[PATH_MAX] = {0}; - snprintf(object, sizeof(object), "%s%s%s", id, TD_DIRSEP, name); + nBytes = snprintf(object, sizeof(object), "%s%s%s", id, TD_DIRSEP, name); + if (nBytes <= 0 || nBytes >= sizeof(object)) { + code = -1; + break; + } if (s3PutObjectFromFile2(filename, object, 0) != 0) { - taosCloseDir(&pDir); - return -1; + code = -1; + stError("[s3] failed to upload checkpoint:%s", filename); + } else { + stDebug("[s3] upload checkpoint:%s", filename); } - stDebug("[s3] upload checkpoint:%s", filename); - // break; } - taosCloseDir(&pDir); - return 0; + return code; } int32_t downloadCheckpointByNameS3(const char* id, const char* fname, const char* dstName) { - int32_t code = 0; - char* buf = taosMemoryCalloc(1, strlen(id) + strlen(dstName) + 4); + int32_t nBytes; + int32_t cap = strlen(id) + strlen(dstName) + 16; + + char* buf = taosMemoryCalloc(1, cap); if (buf == NULL) { - code = terrno = TSDB_CODE_OUT_OF_MEMORY; - return code; + terrno = TSDB_CODE_OUT_OF_MEMORY; + return -1; + } + + nBytes = snprintf(buf, cap, "%s/%s", id, fname); + if (nBytes <= 0 || nBytes >= cap) { + taosMemoryFree(buf); + terrno = TSDB_CODE_OUT_OF_RANGE; + return -1; } - sprintf(buf, "%s/%s", id, fname); if (s3GetObjectToFile(buf, dstName) != 0) { - code = errno; + taosMemoryFree(buf); + terrno = TAOS_SYSTEM_ERROR(errno); + return -1; } - taosMemoryFree(buf); - return code; + return 0; } ECHECKPOINT_BACKUP_TYPE streamGetCheckpointBackupType() { @@ -1082,6 +1121,7 @@ int32_t streamTaskUploadCheckpoint(const char* id, const char* path) { // fileName: CURRENT int32_t downloadCheckpointDataByName(const char* id, const char* fname, const char* dstName) { if (id == NULL || fname == NULL || strlen(id) == 0 || strlen(fname) == 0 || strlen(fname) >= PATH_MAX) { + terrno = TSDB_CODE_INVALID_PARA; stError("down load checkpoint data parameters invalid"); return -1; } @@ -1125,9 +1165,13 @@ int32_t deleteCheckpoint(const char* id) { int32_t deleteCheckpointFile(const char* id, const char* name) { char object[128] = {0}; - snprintf(object, sizeof(object), "%s/%s", id, name); + + int32_t nBytes = snprintf(object, sizeof(object), "%s/%s", id, name); + if (nBytes <= 0 || nBytes >= sizeof(object)) { + terrno = TSDB_CODE_OUT_OF_RANGE; + return -1; + } char* tmp = object; - s3DeleteObjects((const char**)&tmp, 1); - return 0; + return s3DeleteObjects((const char**)&tmp, 1); } From 7290920c6ffaea6efc9d158e08c1fab3d5d190b3 Mon Sep 17 00:00:00 2001 From: Yihao Deng Date: Sat, 29 Jun 2024 04:35:54 +0000 Subject: [PATCH 16/34] add self check --- source/dnode/vnode/src/tq/tqStreamStateSnap.c | 7 ++- source/libs/stream/src/streamCheckpoint.c | 7 +++ source/libs/stream/src/streamSnapshot.c | 61 ++++++++++++++++--- 3 files changed, 64 insertions(+), 11 deletions(-) diff --git a/source/dnode/vnode/src/tq/tqStreamStateSnap.c b/source/dnode/vnode/src/tq/tqStreamStateSnap.c index 290266d94a..a2b9254db7 100644 --- a/source/dnode/vnode/src/tq/tqStreamStateSnap.c +++ b/source/dnode/vnode/src/tq/tqStreamStateSnap.c @@ -67,7 +67,7 @@ int32_t streamStateSnapReaderOpen(STQ* pTq, int64_t sver, int64_t ever, SStreamS _err: tqError("vgId:%d, vnode %s snapshot reader failed to open since %s", TD_VID(pTq->pVnode), STREAM_STATE_TRANSFER, - tstrerror(code)); + tstrerror(terrno)); *ppReader = NULL; return code; } @@ -145,14 +145,15 @@ int32_t streamStateSnapWriterOpen(STQ* pTq, int64_t sver, int64_t ever, SStreamS goto _err; } - tqDebug("vgId:%d, vnode %s snapshot writer opened, path:%s", TD_VID(pTq->pVnode), STREAM_STATE_TRANSFER, pTq->pStreamMeta->path); + tqDebug("vgId:%d, vnode %s snapshot writer opened, path:%s", TD_VID(pTq->pVnode), STREAM_STATE_TRANSFER, + pTq->pStreamMeta->path); pWriter->pWriterImpl = pSnapWriter; *ppWriter = pWriter; return code; _err: tqError("vgId:%d, vnode %s snapshot writer failed to open since %s", TD_VID(pTq->pVnode), STREAM_STATE_TRANSFER, - tstrerror(code)); + tstrerror(terrno)); taosMemoryFree(pWriter); *ppWriter = NULL; return -1; diff --git a/source/libs/stream/src/streamCheckpoint.c b/source/libs/stream/src/streamCheckpoint.c index 69c2ead7d2..bc5067d4d6 100644 --- a/source/libs/stream/src/streamCheckpoint.c +++ b/source/libs/stream/src/streamCheckpoint.c @@ -813,6 +813,11 @@ void checkpointTriggerMonitorFn(void* param, void* tmrId) { SArray* pList = pTask->upstreamInfo.pList; ASSERT(pTask->info.taskLevel > TASK_LEVEL__SOURCE); SArray* pNotSendList = taosArrayInit(4, sizeof(SStreamUpstreamEpInfo)); + if (pNotSendList == NULL) { + terrno = TSDB_CODE_OUT_OF_MEMORY; + stDebug("s-task:%s start to triggerMonitor, reason:%s", id, tstrerror(terrno)); + return; + } for (int32_t i = 0; i < taosArrayGetSize(pList); ++i) { SStreamUpstreamEpInfo* pInfo = taosArrayGetP(pList, i); @@ -1057,6 +1062,7 @@ static int32_t uploadCheckpointToS3(const char* id, const char* path) { } if (s3PutObjectFromFile2(filename, object, 0) != 0) { + terrno = TAOS_SYSTEM_ERROR(errno); code = -1; stError("[s3] failed to upload checkpoint:%s", filename); } else { @@ -1152,6 +1158,7 @@ int32_t streamTaskDownloadCheckpointData(const char* id, char* path) { int32_t deleteCheckpoint(const char* id) { if (id == NULL || strlen(id) == 0) { + terrno = TSDB_CODE_INVALID_PARA; stError("deleteCheckpoint parameters invalid"); return -1; } diff --git a/source/libs/stream/src/streamSnapshot.c b/source/libs/stream/src/streamSnapshot.c index 868ff002bf..7ef4e8ec09 100644 --- a/source/libs/stream/src/streamSnapshot.c +++ b/source/libs/stream/src/streamSnapshot.c @@ -130,7 +130,7 @@ int32_t streamGetFileSize(char* path, char* name, int64_t* sz) { int32_t ret = 0; char* fullname = taosMemoryCalloc(1, strlen(path) + 32); - + sprintf(fullname, "%s%s%s", path, TD_DIRSEP, name); ret = taosStatFile(fullname, sz, NULL, NULL); @@ -259,17 +259,33 @@ int32_t snapFileReadMeta(SBackendSnapFile2* pSnapFile) { } int32_t streamBackendSnapInitFile(char* metaPath, SStreamTaskSnap* pSnap, SBackendSnapFile2* pSnapFile) { int32_t code = -1; + int32_t nBytes = 0; + int32_t cap = strlen(pSnap->dbPrefixPath) + 256; + + char* path = taosMemoryCalloc(1, cap); + if (path == NULL) { + return -1; + } + + nBytes = snprintf(path, cap, "%s%s%s%s%s%" PRId64 "", pSnap->dbPrefixPath, TD_DIRSEP, "checkpoints", TD_DIRSEP, + "checkpoint", pSnap->chkpId); + if (nBytes <= 0 || nBytes >= cap) { + terrno = TSDB_CODE_OUT_OF_RANGE; + goto _ERROR; + } - char* path = taosMemoryCalloc(1, strlen(pSnap->dbPrefixPath) + 256); - // char idstr[64] = {0}; - sprintf(path, "%s%s%s%s%s%" PRId64 "", pSnap->dbPrefixPath, TD_DIRSEP, "checkpoints", TD_DIRSEP, "checkpoint", - pSnap->chkpId); if (!taosIsDir(path)) { + terrno = TSDB_CODE_INVALID_MSG; goto _ERROR; } pSnapFile->pSst = taosArrayInit(16, sizeof(void*)); pSnapFile->pFileList = taosArrayInit(64, sizeof(SBackendFileItem)); + if (pSnapFile->pSst == NULL || pSnapFile->pFileList == NULL) { + terrno = TSDB_CODE_OUT_OF_MEMORY; + goto _ERROR; + } + pSnapFile->path = path; pSnapFile->snapInfo = *pSnap; if ((code = snapFileReadMeta(pSnapFile)) != 0) { @@ -313,8 +329,15 @@ void snapFileDestroy(SBackendSnapFile2* pSnap) { } int32_t streamSnapHandleInit(SStreamSnapHandle* pHandle, char* path, void* pMeta) { // impl later + int32_t code = 0; + SArray* pSnapInfoSet = taosArrayInit(4, sizeof(SStreamTaskSnap)); - int32_t code = streamCreateTaskDbSnapInfo(pMeta, path, pSnapInfoSet); + if (pSnapInfoSet == NULL) { + terrno = TSDB_CODE_OUT_OF_MEMORY; + return -1; + } + + code = streamCreateTaskDbSnapInfo(pMeta, path, pSnapInfoSet); if (code != 0) { stError("failed to do task db snap info, reason:%s", tstrerror(terrno)); taosArrayDestroy(pSnapInfoSet); @@ -322,6 +345,11 @@ int32_t streamSnapHandleInit(SStreamSnapHandle* pHandle, char* path, void* pMeta } SArray* pDbSnapSet = taosArrayInit(8, sizeof(SBackendSnapFile2)); + if (pDbSnapSet == NULL) { + terrno = TSDB_CODE_OUT_OF_MEMORY; + taosArrayDestroy(pSnapInfoSet); + return -1; + } for (int32_t i = 0; i < taosArrayGetSize(pSnapInfoSet); i++) { SStreamTaskSnap* pSnap = taosArrayGet(pSnapInfoSet, i); @@ -369,7 +397,8 @@ int32_t streamSnapReaderOpen(void* pMeta, int64_t sver, int64_t chkpId, char* pa // impl later SStreamSnapReader* pReader = taosMemoryCalloc(1, sizeof(SStreamSnapReader)); if (pReader == NULL) { - return TSDB_CODE_OUT_OF_MEMORY; + terrno = TSDB_CODE_OUT_OF_MEMORY; + return -1; } if (streamSnapHandleInit(&pReader->handle, (char*)path, pMeta) < 0) { @@ -501,11 +530,27 @@ int32_t streamSnapWriterOpen(void* pMeta, int64_t sver, int64_t ever, char* path SStreamSnapHandle* pHandle = &pWriter->handle; pHandle->currIdx = 0; + pHandle->metaPath = taosStrdup(path); + if (pHandle->metaPath == NULL) { + terrno = TSDB_CODE_OUT_OF_MEMORY; + taosMemoryFree(pWriter); + } + pHandle->pDbSnapSet = taosArrayInit(8, sizeof(SBackendSnapFile2)); + if (pHandle->pDbSnapSet == NULL) { + terrno = TSDB_CODE_OUT_OF_MEMORY; + taosMemoryFree(pHandle->metaPath); + taosMemoryFree(pWriter); + return -1; + } SBackendSnapFile2 snapFile = {0}; - taosArrayPush(pHandle->pDbSnapSet, &snapFile); + if (taosArrayPush(pHandle->pDbSnapSet, &snapFile) == NULL) { + terrno = TSDB_CODE_OUT_OF_MEMORY; + streamSnapWriterClose(pWriter, 0); + return -1; + } *ppWriter = pWriter; return 0; From 6c6bff611a01cd591a29407cfca3d0c2acc8bdaa Mon Sep 17 00:00:00 2001 From: Yihao Deng Date: Sat, 29 Jun 2024 12:56:36 +0000 Subject: [PATCH 17/34] add self check --- source/dnode/vnode/src/tq/tqStreamStateSnap.c | 15 +++++++++------ 1 file changed, 9 insertions(+), 6 deletions(-) diff --git a/source/dnode/vnode/src/tq/tqStreamStateSnap.c b/source/dnode/vnode/src/tq/tqStreamStateSnap.c index a2b9254db7..655778568b 100644 --- a/source/dnode/vnode/src/tq/tqStreamStateSnap.c +++ b/source/dnode/vnode/src/tq/tqStreamStateSnap.c @@ -75,7 +75,7 @@ _err: int32_t streamStateSnapReaderClose(SStreamStateReader* pReader) { int32_t code = 0; tqDebug("vgId:%d, vnode %s snapshot reader closed", TD_VID(pReader->pTq->pVnode), STREAM_STATE_TRANSFER); - streamSnapReaderClose(pReader->pReaderImpl); + code = streamSnapReaderClose(pReader->pReaderImpl); taosMemoryFree(pReader); return code; } @@ -138,7 +138,12 @@ int32_t streamStateSnapWriterOpen(STQ* pTq, int64_t sver, int64_t ever, SStreamS pWriter->sver = sver; pWriter->ever = ever; - taosMkDir(pTq->pStreamMeta->path); + if (taosMkDir(pTq->pStreamMeta->path) != 0) { + terrno = TAOS_SYSTEM_ERROR(errno); + tqError("vgId:%d, vnode %s snapshot writer failed to create directory %s since %s", TD_VID(pTq->pVnode), + STREAM_STATE_TRANSFER, pTq->pStreamMeta->path, tstrerror(terrno)); + goto _err; + } SStreamSnapWriter* pSnapWriter = NULL; if (streamSnapWriterOpen(pTq, sver, ever, pTq->pStreamMeta->path, &pSnapWriter) < 0) { @@ -151,6 +156,7 @@ int32_t streamStateSnapWriterOpen(STQ* pTq, int64_t sver, int64_t ever, SStreamS *ppWriter = pWriter; return code; + _err: tqError("vgId:%d, vnode %s snapshot writer failed to open since %s", TD_VID(pTq->pVnode), STREAM_STATE_TRANSFER, tstrerror(terrno)); @@ -160,11 +166,8 @@ _err: } int32_t streamStateSnapWriterClose(SStreamStateWriter* pWriter, int8_t rollback) { - int32_t code = 0; tqDebug("vgId:%d, vnode %s snapshot writer closed", TD_VID(pWriter->pTq->pVnode), STREAM_STATE_TRANSFER); - code = streamSnapWriterClose(pWriter->pWriterImpl, rollback); - - return code; + return streamSnapWriterClose(pWriter->pWriterImpl, rollback); } int32_t streamStateSnapWrite(SStreamStateWriter* pWriter, uint8_t* pData, uint32_t nData) { From ca1562a990059a891e1b893778a05e9c1363d485 Mon Sep 17 00:00:00 2001 From: Yihao Deng Date: Tue, 2 Jul 2024 08:23:56 +0000 Subject: [PATCH 18/34] add self check --- source/libs/stream/inc/streamBackendRocksdb.h | 3 + source/libs/stream/src/streamBackendRocksdb.c | 228 ++++++++++++------ source/libs/stream/src/streamCheckpoint.c | 38 +-- 3 files changed, 162 insertions(+), 107 deletions(-) diff --git a/source/libs/stream/inc/streamBackendRocksdb.h b/source/libs/stream/inc/streamBackendRocksdb.h index 24cd861550..e4c5787020 100644 --- a/source/libs/stream/inc/streamBackendRocksdb.h +++ b/source/libs/stream/inc/streamBackendRocksdb.h @@ -131,6 +131,8 @@ typedef struct { TdThreadRwlock rwLock; } SBkdMgt; +#define META_ON_S3_FORMATE "%s_%" PRId64 "\n%s_%" PRId64 "\n%s_%" PRId64 "" + bool streamBackendDataIsExist(const char* path, int64_t chkpId, int32_t vgId); void* streamBackendInit(const char* path, int64_t chkpId, int32_t vgId); void streamBackendCleanup(void* arg); @@ -258,6 +260,7 @@ void bkdMgtDestroy(SBkdMgt* bm); int32_t taskDbGenChkpUploadData(void* arg, void* bkdMgt, int64_t chkpId, int8_t type, char** path, SArray* list, const char* id); +int32_t remoteChkpGetDelFile(char* path, SArray* toDel); void* taskAcquireDb(int64_t refId); void taskReleaseDb(int64_t refId); diff --git a/source/libs/stream/src/streamBackendRocksdb.c b/source/libs/stream/src/streamBackendRocksdb.c index 1042e6dfc9..0074251669 100644 --- a/source/libs/stream/src/streamBackendRocksdb.c +++ b/source/libs/stream/src/streamBackendRocksdb.c @@ -19,8 +19,6 @@ #include "tcommon.h" #include "tref.h" -#define META_ON_S3_FORMATE "%s_%" PRId64 "\n%s_%" PRId64 "\n%s_%" PRId64 "" - typedef struct SCompactFilteFactory { void* status; } SCompactFilteFactory; @@ -152,6 +150,9 @@ static rocksdb_iterator_t* streamStateIterCreate(SStreamState* pState, const cha void taskDbRefChkp(STaskDbWrapper* pTaskDb, int64_t chkp); void taskDbUnRefChkp(STaskDbWrapper* pTaskDb, int64_t chkp); +int32_t chkpAddExtraInfo(char* pChkpIdDir, int64_t chkpId, int64_t processId); +int32_t chkpLoadExtraInfo(char* pChkpIdDir, int64_t* chkpId, int64_t* processId); + #define GEN_COLUMN_FAMILY_NAME(name, idstr, SUFFIX) sprintf(name, "%s_%s", idstr, (SUFFIX)); int32_t copyFiles(const char* src, const char* dst); uint32_t nextPow2(uint32_t x); @@ -286,7 +287,7 @@ int32_t remoteChkp_readMetaData(char* path, SSChkpMetaOnS3** pMeta) { return -1; } - int32_t n = sprintf(metaPath, "%s%s%s", path, TD_DIRSEP, "META"); + int32_t n = snprintf(metaPath, cap, "%s%s%s", path, TD_DIRSEP, "META"); if (n <= 0 || n >= cap) { terrno = TSDB_CODE_OUT_OF_MEMORY; taosMemoryFree(metaPath); @@ -317,6 +318,12 @@ int32_t remoteChkp_readMetaData(char* path, SSChkpMetaOnS3** pMeta) { taosMemoryFree(p); goto _EXIT; } + + if (p->currChkptId != p->manifestChkptId) { + terrno = TSDB_CODE_INVALID_MSG; + taosMemoryFree(p); + goto _EXIT; + } *pMeta = p; code = 0; _EXIT: @@ -324,66 +331,100 @@ _EXIT: taosMemoryFree(metaPath); return code; } -int32_t remoteChkp_validMetaFile(char* name, char* prename, int64_t chkpId) { - int8_t valid = 0; - for (int i = 0; i < strlen(name); i++) { - if (name[i] == '_') { - memcpy(prename, name, i); - if (taosStr2int64(name + i + 1) != chkpId) { - break; - } else { - valid = 1; - } - } - } - return valid; -} + int32_t remoteChkp_validAndCvtMeta(char* path, SSChkpMetaOnS3* pMeta, int64_t chkpId) { - int32_t complete = 1; - int32_t len = strlen(path) + 32; - char* src = taosMemoryCalloc(1, len); - char* dst = taosMemoryCalloc(1, len); + int32_t code = -1; + int32_t nBytes = 0; + int32_t cap = strlen(path) + 64; + char* src = taosMemoryCalloc(1, cap); + char* dst = taosMemoryCalloc(1, cap); if (src == NULL || dst == NULL) { terrno = TSDB_CODE_OUT_OF_MEMORY; - return -1; + return code; } if (pMeta->currChkptId != chkpId || pMeta->manifestChkptId != chkpId) { - terrno = TSDB_CODE_OUT_OF_MEMORY; - return -1; + terrno = TSDB_CODE_INVALID_CFG; + return code; } + // rename current_chkp/mainfest to current + for (int i = 0; i < 2; i++) { + char* key = (i == 0 ? pMeta->pCurrName : pMeta->pManifestName); + if (strlen(key) <= 0) { + terrno = TSDB_CODE_INVALID_PARA; + } - // for (int i = 0; i < taosArrayGetSize(list); i++) { - // char* p = taosArrayGetP(list, i); - // sprintf(src, "%s%s%s", path, TD_DIRSEP, p); + nBytes = snprintf(src, cap, "%s%s%s_%" PRId64 "", path, TD_DIRSEP, key, pMeta->currChkptId); + if (nBytes <= 0 || nBytes >= cap) { + terrno = TSDB_CODE_OUT_OF_RANGE; + goto _EXIT; + } - // // check file exist - // if (taosStatFile(src, NULL, NULL, NULL) != 0) { - // complete = 0; - // break; - // } + if (taosStatFile(src, NULL, NULL, NULL) != 0) { + terrno = TAOS_SYSTEM_ERROR(errno); + goto _EXIT; + } - // // check file name - // char temp[64] = {0}; - // if (remoteChkp_validMetaFile(p, temp, chkpId)) { - // count++; - // } + nBytes = snprintf(dst, cap, "%s%s%s", path, TD_DIRSEP, key); + if (nBytes <= 0 || nBytes >= cap) { + terrno = TSDB_CODE_OUT_OF_RANGE; + goto _EXIT; + } - // // rename file - // sprintf(dst, "%s%s%s", path, TD_DIRSEP, temp); - // taosRenameFile(src, dst); + if (taosRenameFile(src, dst) != 0) { + terrno = TAOS_SYSTEM_ERROR(errno); + goto _EXIT; + } - // memset(src, 0, len); - // memset(dst, 0, len); - // } - // if (count != taosArrayGetSize(list)) { - // complete = 0; - // } + memset(src, 0, cap); + memset(dst, 0, cap); + } + code = 0; +// rename manifest_chkp to manifest +_EXIT: taosMemoryFree(src); taosMemoryFree(dst); + return code; +} +int32_t remoteChkpGetDelFile(char* path, SArray* toDel) { + int32_t code = -1; + int32_t nBytes = 0; - return complete == 1 ? 0 : -1; + SSChkpMetaOnS3* pMeta = NULL; + code = remoteChkp_readMetaData(path, &pMeta); + if (code != 0) { + return code; + } + + for (int i = 0; i < 2; i++) { + char* key = (i == 0 ? pMeta->pCurrName : pMeta->pManifestName); + + int32_t cap = strlen(key) + 32; + char* p = taosMemoryCalloc(1, cap); + if (p == NULL) { + terrno = TSDB_CODE_OUT_OF_MEMORY; + taosMemoryFree(pMeta); + return -1; + } + + nBytes = snprintf(p, cap, "%s_%" PRId64 "", key, pMeta->currChkptId); + if (nBytes <= 0 || nBytes >= cap) { + terrno = TSDB_CODE_OUT_OF_RANGE; + taosMemoryFree(pMeta); + taosMemoryFree(p); + return code; + } + if (taosArrayPush(toDel, &p) == NULL) { + terrno = TSDB_CODE_OUT_OF_MEMORY; + taosMemoryFree(pMeta); + taosMemoryFree(p); + return code; + } + } + code = 0; + + return code; } void cleanDir(const char* pPath, const char* id) { @@ -424,56 +465,91 @@ int32_t rebuildFromRemoteChkp_rsync(const char* key, char* checkpointPath, int64 return backendCopyFiles(checkpointPath, defaultPath); } +int32_t rebuildDataFromS3(char* chkpPath, int64_t chkpId) { + SSChkpMetaOnS3* pMeta = NULL; + + int32_t code = remoteChkp_readMetaData(chkpPath, &pMeta); + if (code != 0) { + return -1; + } + + if (pMeta->currChkptId != chkpId || pMeta->manifestChkptId != chkpId) { + taosMemoryFree(pMeta); + terrno = TSDB_CODE_INVALID_PARA; + return -1; + } + + code = remoteChkp_validAndCvtMeta(chkpPath, pMeta, chkpId); + if (code != 0) { + taosMemoryFree(pMeta); + return -1; + } + + return chkpAddExtraInfo(chkpPath, chkpId, pMeta->processId); +} + int32_t rebuildFromRemoteChkp_s3(const char* key, char* chkpPath, int64_t chkpId, char* defaultPath) { + int8_t rename = 0; int32_t code = streamTaskDownloadCheckpointData(key, chkpPath); if (code != 0) { return code; } - int32_t nBytes; int32_t cap = strlen(defaultPath) + 32; - char* tmp = taosMemoryCalloc(1, cap); - if (tmp == NULL) { + char* defaultTmp = taosMemoryCalloc(1, cap); + if (defaultTmp == NULL) { terrno = TSDB_CODE_OUT_OF_MEMORY; return -1; } - nBytes = snprintf(tmp, cap, "%s%s", defaultPath, "_tmp"); + int32_t nBytes = snprintf(defaultPath, cap, "%s%s", defaultPath, "_tmp"); if (nBytes <= 0 || nBytes >= cap) { terrno = TSDB_CODE_OUT_OF_RANGE; - taosMemoryFree(tmp); + taosMemoryFree(defaultPath); return -1; } - if (taosIsDir(tmp)) taosRemoveDir(tmp); - if (taosIsDir(defaultPath)) taosRenameFile(defaultPath, tmp); - - // SArray* list = taosArrayInit(2, sizeof(void*)); - SSChkpMetaOnS3* pMeta = NULL; - code = remoteChkp_readMetaData(chkpPath, &pMeta); - if (code == 0) code = remoteChkp_validAndCvtMeta(chkpPath, pMeta, chkpId); - - taosMemoryFree(pMeta); - - if (code == 0) { - code = taosMkDir(defaultPath); - } - - if (code == 0) { - code = backendCopyFiles(chkpPath, defaultPath); - } - - if (code != 0) { - if (taosIsDir(defaultPath)) taosRemoveDir(defaultPath); - if (taosIsDir(tmp)) { - code = taosRenameFile(tmp, defaultPath); + if (taosIsDir(defaultTmp)) taosRemoveDir(defaultTmp); + if (taosIsDir(defaultPath)) { + code = taosRenameFile(defaultPath, defaultTmp); + if (code != 0) { + terrno = TAOS_SYSTEM_ERROR(errno); + goto _EXIT; + } else { + rename = 1; } } else { - taosRemoveDir(tmp); + code = taosMkDir(defaultPath); + if (code != 0) { + terrno = TAOS_SYSTEM_ERROR(errno); + goto _EXIT; + } } - taosMemoryFree(tmp); + code = rebuildDataFromS3(chkpPath, chkpId); + if (code != 0) { + goto _EXIT; + } + + code = backendCopyFiles(chkpPath, defaultPath); + if (code != 0) { + goto _EXIT; + } + code = 0; + +_EXIT: + if (code != 0) { + if (rename) { + taosRenameFile(defaultTmp, defaultPath); + } + } + + if (taosIsDir(defaultPath)) { + taosRemoveDir(defaultPath); + } + + taosMemoryFree(defaultTmp); return code; } diff --git a/source/libs/stream/src/streamCheckpoint.c b/source/libs/stream/src/streamCheckpoint.c index bc5067d4d6..8b75e74d3b 100644 --- a/source/libs/stream/src/streamCheckpoint.c +++ b/source/libs/stream/src/streamCheckpoint.c @@ -542,7 +542,7 @@ void streamTaskSetFailedCheckpointId(SStreamTask* pTask) { static int32_t getCheckpointDataMeta(const char* id, const char* path, SArray* list) { TdFilePtr pFile = NULL; - int32_t cap = strlen(path) + 32; + int32_t cap = strlen(path) + 64; char buf[128] = {0}; int32_t code = 0; @@ -553,7 +553,7 @@ static int32_t getCheckpointDataMeta(const char* id, const char* path, SArray* l } int32_t nBytes = snprintf(filePath, cap, "%s%s%s", path, TD_DIRSEP, "META_TMP"); - if (nBytes != strlen(filePath)) { + if (nBytes <= 0 || nBytes >= cap) { taosMemoryFree(filePath); terrno = TSDB_CODE_OUT_OF_RANGE; return -1; @@ -561,41 +561,17 @@ static int32_t getCheckpointDataMeta(const char* id, const char* path, SArray* l code = downloadCheckpointDataByName(id, "META", filePath); if (code != 0) { - stDebug("%s chkp failed to download meta file:%s", id, filePath); + stError("%s chkp failed to download meta file:%s", id, filePath); taosMemoryFree(filePath); return code; } - pFile = taosOpenFile(filePath, TD_FILE_READ); - if (pFile == NULL) { - terrno = TAOS_SYSTEM_ERROR(errno); - stError("%s failed to open meta file:%s for checkpoint", id, filePath); + code = remoteChkpGetDelFile(filePath, list); + if (code != 0) { + stError("%s chkp failed to get to del:%s", id, filePath); taosMemoryFree(filePath); - return -1; } - - if (taosReadFile(pFile, buf, sizeof(buf)) <= 0) { - stError("%s failed to read meta file:%s for checkpoint", id, filePath); - code = -1; - } else { - int32_t len = strnlen(buf, tListLen(buf)); - for (int i = 0; i < len; i++) { - if (buf[i] == '\n') { - char* item = taosMemoryCalloc(1, i + 1); - memcpy(item, buf, i); - taosArrayPush(list, &item); - - item = taosMemoryCalloc(1, len - i); - memcpy(item, buf + i + 1, len - i - 1); - taosArrayPush(list, &item); - } - } - } - - taosCloseFile(&pFile); - taosRemoveFile(filePath); - taosMemoryFree(filePath); - return code; + return 0; } int32_t uploadCheckpointData(SStreamTask* pTask, int64_t checkpointId, int64_t dbRefId, ECHECKPOINT_BACKUP_TYPE type) { From 95469124f8ac2b9fac2c801b74798dccc936bf03 Mon Sep 17 00:00:00 2001 From: Yihao Deng Date: Wed, 3 Jul 2024 06:47:52 +0000 Subject: [PATCH 19/34] fix stream restart crash --- source/libs/stream/src/streamMeta.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/source/libs/stream/src/streamMeta.c b/source/libs/stream/src/streamMeta.c index 958c3bc00f..2244861bc7 100644 --- a/source/libs/stream/src/streamMeta.c +++ b/source/libs/stream/src/streamMeta.c @@ -273,7 +273,7 @@ int32_t streamTaskSetDb(SStreamMeta* pMeta, SStreamTask* pTask, const char* key) pBackend->pTask = pTask; pBackend->pMeta = pMeta; - pTask->chkInfo.processedVer = processVer; + if (processVer != -1) pTask->chkInfo.processedVer = processVer; taosHashPut(pMeta->pTaskDbUnique, key, strlen(key), &pBackend, sizeof(void*)); taosThreadMutexUnlock(&pMeta->backendMutex); @@ -905,7 +905,7 @@ void streamMetaLoadAllTasks(SStreamMeta* pMeta) { if (p == NULL) { code = pMeta->buildTaskFn(pMeta->ahandle, pTask, pTask->chkInfo.checkpointVer + 1); if (code < 0) { - stError("failed to load s-task:0x%"PRIx64", code:%s, continue", id.taskId, tstrerror(terrno)); + stError("failed to load s-task:0x%" PRIx64 ", code:%s, continue", id.taskId, tstrerror(terrno)); tFreeStreamTask(pTask); continue; } @@ -990,7 +990,7 @@ void streamMetaNotifyClose(SStreamMeta* pMeta) { streamMetaGetHbSendInfo(pMeta->pHbInfo, &startTs, &sendCount); stInfo("vgId:%d notify all stream tasks that current vnode is closing. isLeader:%d startHb:%" PRId64 ", totalHb:%d", - vgId, (pMeta->role == NODE_ROLE_LEADER), startTs, sendCount); + vgId, (pMeta->role == NODE_ROLE_LEADER), startTs, sendCount); // wait for the stream meta hb function stopping streamMetaWaitForHbTmrQuit(pMeta); @@ -1175,7 +1175,7 @@ int32_t streamMetaStartAllTasks(SStreamMeta* pMeta) { int64_t now = taosGetTimestampMs(); int32_t numOfTasks = taosArrayGetSize(pMeta->pTaskList); - stInfo("vgId:%d start to consensus checkpointId for all %d task(s), start ts:%"PRId64, vgId, numOfTasks, now); + stInfo("vgId:%d start to consensus checkpointId for all %d task(s), start ts:%" PRId64, vgId, numOfTasks, now); if (numOfTasks == 0) { stInfo("vgId:%d no tasks exist, quit from consensus checkpointId", pMeta->vgId); From 0bd51f21333b6753530f3a472581f966418fb27a Mon Sep 17 00:00:00 2001 From: Yihao Deng Date: Thu, 4 Jul 2024 06:36:36 +0000 Subject: [PATCH 20/34] fix comment --- out | Bin 21360 -> 0 bytes source/dnode/vnode/src/tq/tqStreamStateSnap.c | 8 +- source/libs/stream/src/streamBackendRocksdb.c | 82 ++++++++++++------ source/libs/stream/src/streamCheckpoint.c | 2 +- source/libs/stream/src/streamSnapshot.c | 51 +++++++++-- t.c | 12 --- 6 files changed, 105 insertions(+), 50 deletions(-) delete mode 100755 out delete mode 100644 t.c diff --git a/out b/out deleted file mode 100755 index 21f5cbee379517922a226c62b551376a5b0ad2f4..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 21360 zcmeHPe{@vUoxg7;Ap;3XApFM4L;*pXA%Q>vK@vh>QWKH|Qh(ORWacG#W-^n$c>{@c z6&KwjC|hdludZ6y(~76nF0O4?yS7DLslU359?vdy&u(eER@hn>Q*==)v!Cz1_d84; z8T7dO$DTcNleypf{oL>Od%yR4?|bjQpEotPuJaj&;N%yV3gWg*P)LKWIMJdDAPr)f z7>|6em?Or3FOrz5Z%_oaDjo5SqydflK}l~K6-J@wDOfON4~ddqz7!m*YM6>j#gkq! z6*cdrZQ2i}T%T<(NXke-nP+K{?jO@!J&H|jjgqfjHIfeK_69X&Ju*jnS7^N}v>wx- zZjUL)lVU=@W!j#X)}oFQQ?FjH*7MS}N=-1OHYja+!;O8K_ByoQ;h;1lyp-Ev%I*CL z^e8T$H1RNR)a~UPhr>FaOliC+EsWb;OY0ZLV>NL*kshoWT(-1kX?-Y_3@wuNNZ@c2z9@S0K^(24KuBzuw{@baq9eVEzZ_RrzHc;|N>^mewev=N# zP$GNUMafgS96yp#@wi#ygi(6M`iFj18a>|#c;YBk>+;~#PH+_Yt~~PO|7h}8=8^ww z9{C&d$Um6}xAWjv=fRKW!SBt3zXJRM{LV_I+0&m)m?<~nxTY!0md$Nu%yO(AJLOu= z=C-DIGGT3wbj2-IH+qd3NktN-ov>XIjl|>0s7NIS96RB5iT(NM}w zI#wNn=>%Ifd*aEiNIbhASN?1TVM&eEo^l$fY0t#55v(_}6REv^?_TohV@a2cci`GL}0raS}6+w1qg zdN-!EUyQ}XBP>0`#va`#CecJhTBi4hy#Jb7rSJw1-k>LW*n_{X@eU8p8%3Z&*<<_6 z2R-E5!PWjv)f~vd zd2FcWi5y%X`lZ6b9DJNIA@k>R@X{Roa1K5`2QL&05hz5U5P?Dj3K97K7=b@joc}?1 z$6JB$?Zq!F6e4`0e|@TzAR4*lHucD&GK~MA3lC_mZwF<@bNEYd0GGr zACE8(@8(GF)(Yt#_#cGG+cW&59{$@N{s9kvmxtf&;cxZuf9m1WxqRL4TXw#3ZFuLe z!#n={M91dlx`TBugm6hbsK zV>3r9W^E_AXElZVFQ{_&l2?h$KONqABK-8*tHV#9@`sH>;a5((lVIQ@TuvYZfy~iv z85h>4@!P(Nx+~HPI>S3wy-pV%!aLt^%fh#>`Yo7a_nppUj>Qn{L&g1&GrkDjZ2P2t zd>bmM&T1i_vA;hwvx5soj-d5_2Sr;SurOT92@LPcdwX}LIL-*faG49|5UA99BOT< z6K*Ifs8lPAnf^;~+ArvmLC1e)GJ~KeK=*=*6PXO1bnXG&0y^|gCUY2c{%|I90(3oS zrM`JV_i8B_*KZWYV5Kp$tR%1pb%fJ13G-lME(xqHk+-g9qgapM;kR{L<(2Eor+vPn zbX#D%SUqFK#fxWOK;oo-4Sr$xBozrnBHYEV0r(yQbfUKIz;7SgJ3v@-dF7w`n#Pau z?T1Iomq!5Cp)cp>)_2Nk>FdkD7ovV%ZoR4M2`Z#Q1PT!-M4%9XLIesCC`6zTfkFfd z5jb}Qvd^9A=gIiFGJcLspGJsbZHD=b=F8Z;J_*20E}0ti^K3!Q^K)L+nx|Wzl%^|^ zeI9P6=J}jrhUWSCH#(1?#P#o<&LoLHpdS+BXTlnEfuFrPqU-rYEvP%f&rVGvBS@1K z$>v&34C?2q_}Q-`T7u4ZDDg9AQxz-1x`T`#(2j7w@73+|vtHc3JokYOmisWH`Ui0d z;SQec?{;kucPu5Z&C#i$`Eux zbg-^2R3ECXsY^???#4xnLbdhtH7rkc)imQtFw&bl;6t%6WMx$bjO^25Ei~x{+_W}! zl*VCV@&~9jc1Pv1BPs)r!JMx~akvr?=qG+*6&Zm$V#;&GFD~tt zyv!=6J__o)+9=(I21QZj1IUl~NTc#)^x+q>IPElf^Gko}I^Z`|+yl-(W#SjW2xIyi z*itL~^8;7H_=LYec5%fCvNeGo?_VH$7ntEjdW8S8k|`TI4TeK+K}5DW_8G7X*TD+0 z#@H5^s7+|rH}=N>>pqV@Q*CS!Gq<2Sy1@^GW2w8=?B!t#Mi)HQgI(#=mI|-6~$B#GnWJM zH&xt)p3rDdq^h-3S~R3*_l~Vc$j2GQ0hwTI3pAqvl$$aE*sRLCHMg`wBJsP09S7?>LF=mCTF2xm2@HGB5f5i5&fwWL^%?&b#O# z8H&eu{h%?>Ne1^zzI^G?W>M4x)qTbT6qdh|VG%{c@cLT@9gh6p))1_S{l>EdJ*=VM zLh(BWHRk`WhHAk)YEYp3-;uAeZ7fC!d>c=@f$=ok#?!`aT#M}d@kQvaaR>4DQT7mJk5hJ#vY%7- zI%RKCc9JrKoGYhnCS~&|TSD0y%GxQrhBBM7A!M`0R7^-gGf1znjF~#F7)_4#XICV?02Md>A_%S&6TR!%>!6!BakR#Y386fduyFbxt#%MC~s*!OaWA${hOI4+b@|DwL zRb;%BjMrm*RTdD^t?EhS#P|wPS{XyVB+oiW_@QO|(u3pI*a4h-a zPG4JOup{F3wpxiEx0fvEHQZF++;Lg|b)v5=8B52l&d!##WF35Da8tU!KjI8&E9)(H zgEi>3B=8zgI_lzGo{p&KYlOS&<(obmt=RQ=!6@6AyiL~CZEjp|c5b+AL;K|$nh}o( zUIl8k6P9Y2EH=`MN0E(oPp_-($qu5oF$&?iI#gFn)|)yvZEkOaak3`E*lKmVBV3eK zEeY3&w0B)+MY$4fxUvni-|8Q5lHEBrq(dW&TyDqXohfTwB;|G{w%Lgo)cZfqMIH0& zAUo(ph6Ezp;aFSkWI8o6x?Ra+976PxlX@dd&H_OlT-)5(+S=YE`Zijw?N~AIZ0c)v zoTSrcrBaa|YfZYl+j3-h@eWu|kPZgucND{%jSqU!*p2tZHbs*IdJt(2Z0>ceh_bGR zr+E-UcqOZ&3G+{+6DhkVVa0;c-iV{DH9F{1Q!?3Svl(imDVcEXo;12hCA__5IqmIh z)Zj{=WR^8Y9-3zFHtIUXM{hHjapxNQcgVi(KfW^{v301oG|WbC!qqE-6@C^ZM&&nyqFjC z#1es2yA{M0(Y2hYH9#A&=sV*w##=Rm%|Z@x*oq9OCeCbB=Zq|-Huh08;A#`5txo&6 z(TZDLZ`#{}v!dlg1VVkg{U?Wbnca?h0H(1jXtZ82qtVy}CRXvHc)UWC8XlV-8 zbSIKEU1>Y++6l#2gL+I;ig#SsMk(2EyEWZ(ps5*1%Ap6CmV2ujmtF@Y@vU~s?y}>y zJG83Tj>Rl((jJJ^*wJnXM7wLSH*~S1#H3l(osP!=Mp8qGXs?q@V7spInoQ9Fj+K%Q zx&T3Rr(0D-JjGD7_sGH8r&t z((XcLralygq5}hBAw?;+uqPU=sjI8G!rHt%ekF72L|#iE=H}9R{hl?VeYWwBZ~oj8 zSy#n@e0uOEdw=oU_DJKD08d~K}VCPb4#D76npe{8??C8BmAHTr?nK}e7%z8TTn2> zOtH5?A>{KyW!Z_D^?DBzNX;P-MF-;cs*{y&iie+oF+=j*F{ zo$n=$^L5yH;3?&c&+Di&=qU?Y6nfHV{v4AsRoU+ou*!!h7`Xb%*EJcRfIt~yn($uN zqm#s-z}46WmQ-{GaDC}`M8)4oyfXX!jk&tEI*)t}@ZbmwilGY+6MYi|Ur%O{HNYqM zCJR0r!+Kq&%SoPva_F_dgE>4Gug@dD1Na28q?K@fE7P68HKY+m9syoCN_?IKKFUxD zx`s2sHzPYg+1OEy2O1brlr9%l=FDfVeP81bd*n|8A1w~ke52#Fz{&r;9y@fBJDQyv z^4M?9BX8%ycS*cbwCG0Y8Udw?$i5GE7qDrMNSmULTNg^w!nVsc>4F}Bl#4Ygo}D`L z!2;9mH>32)0y@cwCH40N%ox5mkTN6bL4n=kK-_YzSg3Y%3Ho+`ZANH4YT|^;84~g+ zXvT0}GX#|!PA;!Uh{~O6?oouE>@rj7E_1C}TU)z)xrtVZZP6@!17Y39#0hBv0u>D295HV5yL@q9Wwt zY+Quoj0ibN8KjWat7mm@4Az)~lDaaWX1FrR=)pFrSv%2cVL(bNE_~#bpGPiv(jrI3sompN5$UCG28wOjc={UY{`K z`;59;%5s_Lh4|qDRaQUWcZx`s5+9!wCj?#7%+>dPuV^+4YRbnShG~71tIz8zra>0e z)T@6v@b_?Uko!*;tSK?&_HpG=Ca?bypt!J>?LXgNW!g)@MIs-m$ohPr(*{OOa{CRq zv`2|)Q0psH+2AtKFX2b`K3SjdcQWPm75mR}OmBof-AiSj@24`YrACmv{wq|7-6&8@ zS)cFkGUfZb+`hN}_h|h!+92OAW_m>9q)%5Bz54rrQHa=n_Bg4Jmo&d~lXv{@M`f;l zQ0p<>o2!g6ABLIk_vjzg6--%<%`ngOfJgtZu3)-{6*Xl&rcZkG`8#h+f9aV&tjA{Q z4p?sgd40*0pJQNoZ~UIq`aJ$3s3kQ$tXU=+W>)+hC2F7b4{HIYYqOH7;BEh30O#sY zt5$MM>Aq%e;`zha8z@jr^k$!3NAvf^yzRrS;eG#L-4rpQ)Wk|Dpz^u!;rq=u@?)i&C<;Q7LTK8~kjrbRUZ6 m10O%>x(d%7rJ?d;&5C}-W8C8o&TU86-xgN-LmmYWEB+hAz@k(D diff --git a/source/dnode/vnode/src/tq/tqStreamStateSnap.c b/source/dnode/vnode/src/tq/tqStreamStateSnap.c index 655778568b..be768e375e 100644 --- a/source/dnode/vnode/src/tq/tqStreamStateSnap.c +++ b/source/dnode/vnode/src/tq/tqStreamStateSnap.c @@ -38,7 +38,8 @@ int32_t streamStateSnapReaderOpen(STQ* pTq, int64_t sver, int64_t ever, SStreamS // alloc pReader = (SStreamStateReader*)taosMemoryCalloc(1, sizeof(SStreamStateReader)); if (pReader == NULL) { - code = TSDB_CODE_OUT_OF_MEMORY; + terrno = TSDB_CODE_OUT_OF_MEMORY; + code = terrno; goto _err; } @@ -54,7 +55,7 @@ int32_t streamStateSnapReaderOpen(STQ* pTq, int64_t sver, int64_t ever, SStreamS if (streamSnapReaderOpen(meta, sver, chkpId, meta->path, &pSnapReader) == 0) { pReader->complete = 1; } else { - code = -1; + code = terrno; taosMemoryFree(pReader); goto _err; } @@ -131,7 +132,8 @@ int32_t streamStateSnapWriterOpen(STQ* pTq, int64_t sver, int64_t ever, SStreamS // alloc pWriter = (SStreamStateWriter*)taosMemoryCalloc(1, sizeof(*pWriter)); if (pWriter == NULL) { - code = TSDB_CODE_OUT_OF_MEMORY; + terrno = TSDB_CODE_OUT_OF_MEMORY; + code = terrno; goto _err; } pWriter->pTq = pTq; diff --git a/source/libs/stream/src/streamBackendRocksdb.c b/source/libs/stream/src/streamBackendRocksdb.c index 4ba409e3f0..54abba8bdc 100644 --- a/source/libs/stream/src/streamBackendRocksdb.c +++ b/source/libs/stream/src/streamBackendRocksdb.c @@ -214,14 +214,14 @@ int32_t rebuildDirFromCheckpoint(const char* path, int64_t chkpId, char** dst) { char* state = taosMemoryCalloc(1, cap); if (state == NULL) { terrno = TSDB_CODE_OUT_OF_MEMORY; - return -1; + return terrno; } nBytes = snprintf(state, cap, "%s%s%s", path, TD_DIRSEP, "state"); if (nBytes <= 0 || nBytes >= cap) { terrno = TSDB_CODE_OUT_OF_RANGE; taosMemoryFree(state); - return -1; + return terrno; } if (chkpId != 0) { @@ -229,7 +229,7 @@ int32_t rebuildDirFromCheckpoint(const char* path, int64_t chkpId, char** dst) { if (chkp == NULL) { terrno = TSDB_CODE_OUT_OF_MEMORY; taosMemoryFree(state); - return -1; + return terrno; } nBytes = snprintf(chkp, cap, "%s%s%s%scheckpoint%" PRId64 "", path, TD_DIRSEP, "checkpoints", TD_DIRSEP, chkpId); @@ -237,7 +237,7 @@ int32_t rebuildDirFromCheckpoint(const char* path, int64_t chkpId, char** dst) { terrno = TSDB_CODE_OUT_OF_RANGE; taosMemoryFree(state); taosMemoryFree(chkp); - return -1; + return terrno; } if (taosIsDir(chkp) && isValidCheckpoint(chkp)) { @@ -255,6 +255,7 @@ int32_t rebuildDirFromCheckpoint(const char* path, int64_t chkpId, char** dst) { code = taosMkDir(state); if (code != 0) { terrno = TAOS_SYSTEM_ERROR(errno); + code = terrno; } } @@ -262,7 +263,7 @@ int32_t rebuildDirFromCheckpoint(const char* path, int64_t chkpId, char** dst) { } *dst = state; - return 0; + return code; } typedef struct { @@ -284,14 +285,14 @@ int32_t remoteChkp_readMetaData(char* path, SSChkpMetaOnS3** pMeta) { char* metaPath = taosMemoryCalloc(1, cap); if (metaPath == NULL) { terrno = TSDB_CODE_OUT_OF_MEMORY; - return -1; + return terrno; } int32_t n = snprintf(metaPath, cap, "%s%s%s", path, TD_DIRSEP, "META"); if (n <= 0 || n >= cap) { terrno = TSDB_CODE_OUT_OF_MEMORY; taosMemoryFree(metaPath); - return -1; + return terrno; } pFile = taosOpenFile(path, TD_FILE_READ); @@ -329,29 +330,32 @@ int32_t remoteChkp_readMetaData(char* path, SSChkpMetaOnS3** pMeta) { _EXIT: taosCloseFile(&pFile); taosMemoryFree(metaPath); + code = terrno; return code; } int32_t remoteChkp_validAndCvtMeta(char* path, SSChkpMetaOnS3* pMeta, int64_t chkpId) { int32_t code = -1; int32_t nBytes = 0; + int32_t cap = strlen(path) + 64; char* src = taosMemoryCalloc(1, cap); char* dst = taosMemoryCalloc(1, cap); if (src == NULL || dst == NULL) { terrno = TSDB_CODE_OUT_OF_MEMORY; - return code; + goto _EXIT; } if (pMeta->currChkptId != chkpId || pMeta->manifestChkptId != chkpId) { terrno = TSDB_CODE_INVALID_CFG; - return code; + goto _EXIT; } // rename current_chkp/mainfest to current for (int i = 0; i < 2; i++) { char* key = (i == 0 ? pMeta->pCurrName : pMeta->pManifestName); if (strlen(key) <= 0) { terrno = TSDB_CODE_INVALID_PARA; + goto _EXIT; } nBytes = snprintf(src, cap, "%s%s%s_%" PRId64 "", path, TD_DIRSEP, key, pMeta->currChkptId); @@ -385,6 +389,7 @@ int32_t remoteChkp_validAndCvtMeta(char* path, SSChkpMetaOnS3* pMeta, int64_t ch _EXIT: taosMemoryFree(src); taosMemoryFree(dst); + code = terrno; return code; } int32_t remoteChkpGetDelFile(char* path, SArray* toDel) { @@ -1495,7 +1500,7 @@ int32_t chkpLoadExtraInfo(char* pChkpIdDir, int64_t* chkpId, int64_t* processId) if (len == 0) { terrno = TSDB_CODE_INVALID_PARA; stError("failed to load extra info, dir:%s, reason:%s", pChkpIdDir, tstrerror(terrno)); - return -1; + return terrno; } int32_t cap = len + 64; @@ -1542,7 +1547,7 @@ int32_t chkpLoadExtraInfo(char* pChkpIdDir, int64_t* chkpId, int64_t* processId) _EXIT: taosMemoryFree(pDst); taosCloseFile(&pFile); - return code; + return terrno; } int32_t chkpAddExtraInfo(char* pChkpIdDir, int64_t chkpId, int64_t processId) { int32_t code = -1; @@ -4399,6 +4404,10 @@ int32_t compareHashTableImpl(SHashObj* p1, SHashObj* p2, SArray* diff) { char* name = taosHashGetKey(pIter, &len); if (!isBkdDataMeta(name, len) && !taosHashGet(p1, name, len)) { char* fname = taosMemoryCalloc(1, len + 1); + if (fname == NULL) { + terrno = TSDB_CODE_OUT_OF_MEMORY; + return terrno; + } strncpy(fname, name, len); taosArrayPush(diff, &fname); } @@ -4410,7 +4419,9 @@ int32_t compareHashTable(SHashObj* p1, SHashObj* p2, SArray* add, SArray* del) { int32_t code = 0; code = compareHashTableImpl(p1, p2, add); - code = compareHashTableImpl(p2, p1, del); + if (code != 0) { + code = compareHashTableImpl(p2, p1, del); + } return code; } @@ -4493,7 +4504,7 @@ int32_t dbChkpGetDelta(SDbChkp* p, int64_t chkpId, SArray* list) { if (nBytes <= 0 || nBytes >= p->len) { terrno = TSDB_CODE_OUT_OF_RANGE; taosThreadRwlockUnlock(&p->rwLock); - return -1; + return terrno; } taosArrayClearP(p->pAdd, taosMemoryFree); @@ -4502,9 +4513,9 @@ int32_t dbChkpGetDelta(SDbChkp* p, int64_t chkpId, SArray* list) { TdDirPtr pDir = taosOpenDir(p->buf); if (pDir == NULL) { - terrno = errno; + terrno = TAOS_SYSTEM_ERROR(errno); taosThreadRwlockUnlock(&p->rwLock); - return -1; + return terrno; } TdDirEntryPtr de = NULL; @@ -4514,21 +4525,36 @@ int32_t dbChkpGetDelta(SDbChkp* p, int64_t chkpId, SArray* list) { if (strcmp(name, ".") == 0 || strcmp(name, "..") == 0) continue; if (strlen(name) == currLen && strcmp(name, pCurrent) == 0) { taosMemoryFreeClear(p->pCurrent); + p->pCurrent = taosStrdup(name); + if (p->pCurrent == NULL) { + terrno = TSDB_CODE_OUT_OF_MEMORY; + break; + } continue; } if (strlen(name) >= maniLen && strncmp(name, pManifest, maniLen) == 0) { taosMemoryFreeClear(p->pManifest); p->pManifest = taosStrdup(name); + if (p->pManifest == NULL) { + terrno = TSDB_CODE_OUT_OF_MEMORY; + break; + } continue; } if (strlen(name) >= sstLen && strncmp(name + strlen(name) - 4, pSST, sstLen) == 0) { - taosHashPut(p->pSstTbl[1 - p->idx], name, strlen(name), &dummy, sizeof(dummy)); + if (taosHashPut(p->pSstTbl[1 - p->idx], name, strlen(name), &dummy, sizeof(dummy)) != 0) { + break; + } continue; } } taosCloseDir(&pDir); + if (terrno != 0) { + taosThreadRwlockUnlock(&p->rwLock); + return terrno; + } if (p->init == 0) { void* pIter = taosHashIterate(p->pSstTbl[1 - p->idx], NULL); @@ -4542,6 +4568,7 @@ int32_t dbChkpGetDelta(SDbChkp* p, int64_t chkpId, SArray* list) { taosThreadRwlockUnlock(&p->rwLock); return -1; } + strncpy(fname, name, len); taosArrayPush(p->pAdd, &fname); } @@ -4560,7 +4587,7 @@ int32_t dbChkpGetDelta(SDbChkp* p, int64_t chkpId, SArray* list) { taosArrayClearP(p->pDel, taosMemoryFree); taosHashClear(p->pSstTbl[1 - p->idx]); p->update = 0; - return code; + return terrno; } if (taosArrayGetSize(p->pAdd) == 0 && taosArrayGetSize(p->pDel) == 0) { @@ -4831,7 +4858,8 @@ _ERROR: taosMemoryFree(dstBuf); taosMemoryFree(srcDir); taosMemoryFree(dstDir); - return code; + + return terrno; } SBkdMgt* bkdMgtCreate(char* path) { @@ -4893,7 +4921,7 @@ int32_t bkdMgtGetDelta(SBkdMgt* bm, char* taskId, int64_t chkpId, SArray* list, if (path == NULL) { terrno = TSDB_CODE_OUT_OF_MEMORY; taosThreadRwlockUnlock(&bm->rwLock); - return -1; + return terrno; } int32_t nBytes = snprintf(path, cap, "%s%s%s", bm->path, TD_DIRSEP, taskId); @@ -4901,21 +4929,20 @@ int32_t bkdMgtGetDelta(SBkdMgt* bm, char* taskId, int64_t chkpId, SArray* list, terrno = TSDB_CODE_OUT_OF_RANGE; taosMemoryFree(path); taosThreadRwlockUnlock(&bm->rwLock); - return -1; + return terrno; } SDbChkp* p = dbChkpCreate(path, chkpId); if (p == NULL) { taosMemoryFree(path); taosThreadRwlockUnlock(&bm->rwLock); - return -1; + return terrno; } if (taosHashPut(bm->pDbChkpTbl, taskId, strlen(taskId), &p, sizeof(void*)) != 0) { dbChkpDestroy(p); - taosMemoryFree(path); taosThreadRwlockUnlock(&bm->rwLock); - return -1; + return terrno; } pChkp = p; @@ -4923,13 +4950,14 @@ int32_t bkdMgtGetDelta(SBkdMgt* bm, char* taskId, int64_t chkpId, SArray* list, taosThreadRwlockUnlock(&bm->rwLock); return code; } else { - code = dbChkpGetDelta(pChkp, chkpId, NULL); - - if (code == 0) code = dbChkpDumpTo(pChkp, dname, list); + terrno = dbChkpGetDelta(pChkp, chkpId, NULL); + if (code == 0) { + terrno = dbChkpDumpTo(pChkp, dname, list); + } } taosThreadRwlockUnlock(&bm->rwLock); - return code; + return terrno; } #ifdef BUILD_NO_CALL diff --git a/source/libs/stream/src/streamCheckpoint.c b/source/libs/stream/src/streamCheckpoint.c index d6612cd4d8..df73b9f1c8 100644 --- a/source/libs/stream/src/streamCheckpoint.c +++ b/source/libs/stream/src/streamCheckpoint.c @@ -123,7 +123,7 @@ int32_t streamTaskSendCheckpointTriggerMsg(SStreamTask* pTask, int32_t dstTaskId void* pBuf = rpcMallocCont(size); if (pBuf == NULL) { terrno = TSDB_CODE_OUT_OF_MEMORY; - return -1; + return terrno; } SCheckpointTriggerRsp* pRsp = POINTER_SHIFT(pBuf, sizeof(SMsgHead)); diff --git a/source/libs/stream/src/streamSnapshot.c b/source/libs/stream/src/streamSnapshot.c index 7ef4e8ec09..160bd3525d 100644 --- a/source/libs/stream/src/streamSnapshot.c +++ b/source/libs/stream/src/streamSnapshot.c @@ -152,9 +152,20 @@ int32_t streamDestroyTaskDbSnapInfo(void* arg, SArray* snap) { return taskDbDest void snapFileDebugInfo(SBackendSnapFile2* pSnapFile) { if (qDebugFlag & DEBUG_DEBUG) { - int16_t cap = 511; - char* buf = taosMemoryCalloc(1, cap + 1); - sprintf(buf + strlen(buf), "["); + int16_t cap = 512; + + char* buf = taosMemoryCalloc(1, cap); + if (buf == NULL) { + stError("%s failed to alloc memory", STREAM_STATE_TRANSFER, tstrerror(TSDB_CODE_OUT_OF_MEMORY)); + return; + } + + int32_t nBytes = snprintf(buf + strlen(buf), cap, "["); + if (nBytes <= 0 || nBytes >= cap) { + taosMemoryFree(buf); + stError("%s failed to write buf, reason:%s", STREAM_STATE_TRANSFER, tstrerror(TSDB_CODE_OUT_OF_RANGE)); + return; + } if (pSnapFile->pCurrent) sprintf(buf, "current: %s,", pSnapFile->pCurrent); if (pSnapFile->pMainfest) sprintf(buf + strlen(buf), "MANIFEST: %s,", pSnapFile->pMainfest); @@ -219,8 +230,9 @@ int32_t snapFileGenMeta(SBackendSnapFile2* pSnapFile) { int32_t snapFileReadMeta(SBackendSnapFile2* pSnapFile) { TdDirPtr pDir = taosOpenDir(pSnapFile->path); if (NULL == pDir) { + terrno = TAOS_SYSTEM_ERROR(errno); stError("%s failed to open %s", STREAM_STATE_TRANSFER, pSnapFile->path); - return -1; + return terrno; } TdDirEntryPtr pDirEntry; @@ -228,34 +240,58 @@ int32_t snapFileReadMeta(SBackendSnapFile2* pSnapFile) { char* name = taosGetDirEntryName(pDirEntry); if (strlen(name) >= strlen(ROCKSDB_CURRENT) && 0 == strncmp(name, ROCKSDB_CURRENT, strlen(ROCKSDB_CURRENT))) { pSnapFile->pCurrent = taosStrdup(name); + if (pSnapFile->pCurrent == NULL) { + terrno = TSDB_CODE_OUT_OF_MEMORY; + break; + } continue; } if (strlen(name) >= strlen(ROCKSDB_MAINFEST) && 0 == strncmp(name, ROCKSDB_MAINFEST, strlen(ROCKSDB_MAINFEST))) { pSnapFile->pMainfest = taosStrdup(name); + if (pSnapFile->pMainfest == NULL) { + terrno = TSDB_CODE_OUT_OF_MEMORY; + break; + } continue; } if (strlen(name) >= strlen(ROCKSDB_OPTIONS) && 0 == strncmp(name, ROCKSDB_OPTIONS, strlen(ROCKSDB_OPTIONS))) { pSnapFile->pOptions = taosStrdup(name); + if (pSnapFile->pOptions == NULL) { + terrno = TSDB_CODE_OUT_OF_MEMORY; + break; + } continue; } if (strlen(name) >= strlen(ROCKSDB_CHECKPOINT_META) && 0 == strncmp(name, ROCKSDB_CHECKPOINT_META, strlen(ROCKSDB_CHECKPOINT_META))) { pSnapFile->pCheckpointMeta = taosStrdup(name); + if (pSnapFile->pCheckpointMeta == NULL) { + terrno = TSDB_CODE_OUT_OF_MEMORY; + break; + } continue; } if (strlen(name) >= strlen(ROCKSDB_CHECKPOINT_SELF_CHECK) && 0 == strncmp(name, ROCKSDB_CHECKPOINT_SELF_CHECK, strlen(ROCKSDB_CHECKPOINT_SELF_CHECK))) { pSnapFile->pCheckpointSelfcheck = taosStrdup(name); + if (pSnapFile->pCheckpointSelfcheck == NULL) { + terrno = TSDB_CODE_OUT_OF_MEMORY; + break; + } continue; } if (strlen(name) >= strlen(ROCKSDB_SST) && 0 == strncmp(name + strlen(name) - strlen(ROCKSDB_SST), ROCKSDB_SST, strlen(ROCKSDB_SST))) { char* sst = taosStrdup(name); + if (sst == NULL) { + terrno = TSDB_CODE_OUT_OF_MEMORY; + break; + } taosArrayPush(pSnapFile->pSst, &sst); } } taosCloseDir(&pDir); - return 0; + return terrno; } int32_t streamBackendSnapInitFile(char* metaPath, SStreamTaskSnap* pSnap, SBackendSnapFile2* pSnapFile) { int32_t code = -1; @@ -535,6 +571,7 @@ int32_t streamSnapWriterOpen(void* pMeta, int64_t sver, int64_t ever, char* path if (pHandle->metaPath == NULL) { terrno = TSDB_CODE_OUT_OF_MEMORY; taosMemoryFree(pWriter); + return terrno; } pHandle->pDbSnapSet = taosArrayInit(8, sizeof(SBackendSnapFile2)); @@ -542,14 +579,14 @@ int32_t streamSnapWriterOpen(void* pMeta, int64_t sver, int64_t ever, char* path terrno = TSDB_CODE_OUT_OF_MEMORY; taosMemoryFree(pHandle->metaPath); taosMemoryFree(pWriter); - return -1; + return terrno; } SBackendSnapFile2 snapFile = {0}; if (taosArrayPush(pHandle->pDbSnapSet, &snapFile) == NULL) { terrno = TSDB_CODE_OUT_OF_MEMORY; streamSnapWriterClose(pWriter, 0); - return -1; + return terrno; } *ppWriter = pWriter; diff --git a/t.c b/t.c deleted file mode 100644 index a79ed4c134..0000000000 --- a/t.c +++ /dev/null @@ -1,12 +0,0 @@ -#include -#include -#include - -int main() { - char *buf = calloc(1, 4); - int n = snprintf(buf, 4, "size"); - - printf("write size:%d \t buf:%s \t len:%d\n", n, buf, (int)(strlen(buf))); - buf[4] = 10; - return 1; -} From ea01f1eb85dcc83a1cafd8946a224bcc1b0646b3 Mon Sep 17 00:00:00 2001 From: Yihao Deng Date: Thu, 4 Jul 2024 06:38:14 +0000 Subject: [PATCH 21/34] fix comment --- source/libs/stream/src/streamSnapshot.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/source/libs/stream/src/streamSnapshot.c b/source/libs/stream/src/streamSnapshot.c index 160bd3525d..bbf7f5499d 100644 --- a/source/libs/stream/src/streamSnapshot.c +++ b/source/libs/stream/src/streamSnapshot.c @@ -577,8 +577,7 @@ int32_t streamSnapWriterOpen(void* pMeta, int64_t sver, int64_t ever, char* path pHandle->pDbSnapSet = taosArrayInit(8, sizeof(SBackendSnapFile2)); if (pHandle->pDbSnapSet == NULL) { terrno = TSDB_CODE_OUT_OF_MEMORY; - taosMemoryFree(pHandle->metaPath); - taosMemoryFree(pWriter); + streamSnapWriterClose(pWriter, 0); return terrno; } From eb1a5e3cc64ab939252835c78398229338a8edf2 Mon Sep 17 00:00:00 2001 From: Yihao Deng Date: Thu, 4 Jul 2024 08:05:51 +0000 Subject: [PATCH 22/34] fix comment --- source/libs/stream/src/streamSnapshot.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/source/libs/stream/src/streamSnapshot.c b/source/libs/stream/src/streamSnapshot.c index bbf7f5499d..3514ad218d 100644 --- a/source/libs/stream/src/streamSnapshot.c +++ b/source/libs/stream/src/streamSnapshot.c @@ -156,7 +156,7 @@ void snapFileDebugInfo(SBackendSnapFile2* pSnapFile) { char* buf = taosMemoryCalloc(1, cap); if (buf == NULL) { - stError("%s failed to alloc memory", STREAM_STATE_TRANSFER, tstrerror(TSDB_CODE_OUT_OF_MEMORY)); + stError("%s failed to alloc memory, reason:%s", STREAM_STATE_TRANSFER, tstrerror(TSDB_CODE_OUT_OF_MEMORY)); return; } From 5b1dddf4d55c6adba643fc42ea40bc813c0faf38 Mon Sep 17 00:00:00 2001 From: Yihao Deng Date: Thu, 4 Jul 2024 08:14:50 +0000 Subject: [PATCH 23/34] fix comment --- source/libs/stream/src/streamCheckpoint.c | 10 ++++--- source/libs/stream/src/streamSnapshot.c | 35 +++++++++++------------ 2 files changed, 23 insertions(+), 22 deletions(-) diff --git a/source/libs/stream/src/streamCheckpoint.c b/source/libs/stream/src/streamCheckpoint.c index df73b9f1c8..66dcfec86e 100644 --- a/source/libs/stream/src/streamCheckpoint.c +++ b/source/libs/stream/src/streamCheckpoint.c @@ -1150,12 +1150,14 @@ int32_t deleteCheckpointFile(const char* id, const char* name) { int32_t nBytes = snprintf(object, sizeof(object), "%s/%s", id, name); if (nBytes <= 0 || nBytes >= sizeof(object)) { - terrno = TSDB_CODE_OUT_OF_RANGE; - return -1; + return TSDB_CODE_OUT_OF_RANGE; } - char* tmp = object; - return s3DeleteObjects((const char**)&tmp, 1); + char* tmp = object; + int32_t code = s3DeleteObjects((const char**)&tmp, 1); + if (code != 0) { + return code; + } } int32_t streamTaskSendRestoreChkptMsg(SStreamTask* pTask) { diff --git a/source/libs/stream/src/streamSnapshot.c b/source/libs/stream/src/streamSnapshot.c index 3514ad218d..57723132d8 100644 --- a/source/libs/stream/src/streamSnapshot.c +++ b/source/libs/stream/src/streamSnapshot.c @@ -228,6 +228,7 @@ int32_t snapFileGenMeta(SBackendSnapFile2* pSnapFile) { return 0; } int32_t snapFileReadMeta(SBackendSnapFile2* pSnapFile) { + terrno = 0; TdDirPtr pDir = taosOpenDir(pSnapFile->path); if (NULL == pDir) { terrno = TAOS_SYSTEM_ERROR(errno); @@ -294,13 +295,13 @@ int32_t snapFileReadMeta(SBackendSnapFile2* pSnapFile) { return terrno; } int32_t streamBackendSnapInitFile(char* metaPath, SStreamTaskSnap* pSnap, SBackendSnapFile2* pSnapFile) { - int32_t code = -1; + terrno = 0; int32_t nBytes = 0; int32_t cap = strlen(pSnap->dbPrefixPath) + 256; char* path = taosMemoryCalloc(1, cap); if (path == NULL) { - return -1; + return TSDB_CODE_OUT_OF_MEMORY; } nBytes = snprintf(path, cap, "%s%s%s%s%s%" PRId64 "", pSnap->dbPrefixPath, TD_DIRSEP, "checkpoints", TD_DIRSEP, @@ -324,20 +325,19 @@ int32_t streamBackendSnapInitFile(char* metaPath, SStreamTaskSnap* pSnap, SBacke pSnapFile->path = path; pSnapFile->snapInfo = *pSnap; - if ((code = snapFileReadMeta(pSnapFile)) != 0) { + if ((terrno = snapFileReadMeta(pSnapFile)) != 0) { goto _ERROR; } - if ((code = snapFileGenMeta(pSnapFile)) != 0) { + if ((terrno = snapFileGenMeta(pSnapFile)) != 0) { goto _ERROR; } snapFileDebugInfo(pSnapFile); path = NULL; - code = 0; _ERROR: taosMemoryFree(path); - return code; + return terrno; } void snapFileDestroy(SBackendSnapFile2* pSnap) { taosMemoryFree(pSnap->pCheckpointMeta); @@ -365,19 +365,19 @@ void snapFileDestroy(SBackendSnapFile2* pSnap) { } int32_t streamSnapHandleInit(SStreamSnapHandle* pHandle, char* path, void* pMeta) { // impl later - int32_t code = 0; + terrno = 0; SArray* pSnapInfoSet = taosArrayInit(4, sizeof(SStreamTaskSnap)); if (pSnapInfoSet == NULL) { terrno = TSDB_CODE_OUT_OF_MEMORY; - return -1; + return terrno; } - code = streamCreateTaskDbSnapInfo(pMeta, path, pSnapInfoSet); - if (code != 0) { + terrno = streamCreateTaskDbSnapInfo(pMeta, path, pSnapInfoSet); + if (terrno != 0) { stError("failed to do task db snap info, reason:%s", tstrerror(terrno)); taosArrayDestroy(pSnapInfoSet); - return -1; + return terrno; } SArray* pDbSnapSet = taosArrayInit(8, sizeof(SBackendSnapFile2)); @@ -391,8 +391,8 @@ int32_t streamSnapHandleInit(SStreamSnapHandle* pHandle, char* path, void* pMeta SStreamTaskSnap* pSnap = taosArrayGet(pSnapInfoSet, i); SBackendSnapFile2 snapFile = {0}; - code = streamBackendSnapInitFile(path, pSnap, &snapFile); - ASSERT(code == 0); + terrno = streamBackendSnapInitFile(path, pSnap, &snapFile); + ASSERT(terrno == 0); taosArrayPush(pDbSnapSet, &snapFile); } pHandle->pDbSnapSet = pDbSnapSet; @@ -403,9 +403,7 @@ int32_t streamSnapHandleInit(SStreamSnapHandle* pHandle, char* path, void* pMeta _err: streamSnapHandleDestroy(pHandle); - - code = -1; - return code; + return terrno; } void streamSnapHandleDestroy(SStreamSnapHandle* handle) { @@ -437,9 +435,10 @@ int32_t streamSnapReaderOpen(void* pMeta, int64_t sver, int64_t chkpId, char* pa return -1; } - if (streamSnapHandleInit(&pReader->handle, (char*)path, pMeta) < 0) { + int32_t code = streamSnapHandleInit(&pReader->handle, (char*)path, pMeta); + if (code != 0) { taosMemoryFree(pReader); - return -1; + return code; } *ppReader = pReader; From 7171b6dd6d2bfa92baf76cd13427c10d51875979 Mon Sep 17 00:00:00 2001 From: Yihao Deng Date: Thu, 4 Jul 2024 08:34:42 +0000 Subject: [PATCH 24/34] fix comment --- source/libs/stream/src/streamCheckpoint.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/source/libs/stream/src/streamCheckpoint.c b/source/libs/stream/src/streamCheckpoint.c index 66dcfec86e..1e77e70efa 100644 --- a/source/libs/stream/src/streamCheckpoint.c +++ b/source/libs/stream/src/streamCheckpoint.c @@ -1135,7 +1135,7 @@ int32_t deleteCheckpoint(const char* id) { if (id == NULL || strlen(id) == 0) { terrno = TSDB_CODE_INVALID_PARA; stError("deleteCheckpoint parameters invalid"); - return -1; + return terrno; } if (strlen(tsSnodeAddress) != 0) { return deleteRsync(id); From 8428a5be374aef352de78f9101ad4af5a3c809f8 Mon Sep 17 00:00:00 2001 From: Yihao Deng Date: Thu, 4 Jul 2024 10:25:37 +0000 Subject: [PATCH 25/34] refactor backend --- include/util/taoserror.h | 1 + source/dnode/vnode/src/tq/tqStreamStateSnap.c | 11 +- source/libs/stream/src/streamBackendRocksdb.c | 429 +++++++++--------- source/libs/stream/src/streamCheckpoint.c | 58 ++- source/libs/stream/src/streamSnapshot.c | 92 ++-- source/util/src/terror.c | 1 + 6 files changed, 295 insertions(+), 297 deletions(-) diff --git a/include/util/taoserror.h b/include/util/taoserror.h index 2de336d036..359872e8cd 100644 --- a/include/util/taoserror.h +++ b/include/util/taoserror.h @@ -136,6 +136,7 @@ int32_t taosGetErrSize(); #define TSDB_CODE_TIMEOUT_ERROR TAOS_DEF_ERROR_CODE(0, 0x012C) #define TSDB_CODE_MSG_ENCODE_ERROR TAOS_DEF_ERROR_CODE(0, 0x012D) #define TSDB_CODE_NO_ENOUGH_DISKSPACE TAOS_DEF_ERROR_CODE(0, 0x012E) +#define TSDB_CODE_THIRDPARTY_ERROR TAOS_DEF_ERROR_CODE(0, 0x012F) #define TSDB_CODE_APP_IS_STARTING TAOS_DEF_ERROR_CODE(0, 0x0130) #define TSDB_CODE_APP_IS_STOPPING TAOS_DEF_ERROR_CODE(0, 0x0131) diff --git a/source/dnode/vnode/src/tq/tqStreamStateSnap.c b/source/dnode/vnode/src/tq/tqStreamStateSnap.c index be768e375e..07bfd52a9c 100644 --- a/source/dnode/vnode/src/tq/tqStreamStateSnap.c +++ b/source/dnode/vnode/src/tq/tqStreamStateSnap.c @@ -132,8 +132,7 @@ int32_t streamStateSnapWriterOpen(STQ* pTq, int64_t sver, int64_t ever, SStreamS // alloc pWriter = (SStreamStateWriter*)taosMemoryCalloc(1, sizeof(*pWriter)); if (pWriter == NULL) { - terrno = TSDB_CODE_OUT_OF_MEMORY; - code = terrno; + code = TSDB_CODE_OUT_OF_MEMORY; goto _err; } pWriter->pTq = pTq; @@ -141,14 +140,14 @@ int32_t streamStateSnapWriterOpen(STQ* pTq, int64_t sver, int64_t ever, SStreamS pWriter->ever = ever; if (taosMkDir(pTq->pStreamMeta->path) != 0) { - terrno = TAOS_SYSTEM_ERROR(errno); + code = TAOS_SYSTEM_ERROR(errno); tqError("vgId:%d, vnode %s snapshot writer failed to create directory %s since %s", TD_VID(pTq->pVnode), STREAM_STATE_TRANSFER, pTq->pStreamMeta->path, tstrerror(terrno)); goto _err; } SStreamSnapWriter* pSnapWriter = NULL; - if (streamSnapWriterOpen(pTq, sver, ever, pTq->pStreamMeta->path, &pSnapWriter) < 0) { + if ((code = streamSnapWriterOpen(pTq, sver, ever, pTq->pStreamMeta->path, &pSnapWriter)) < 0) { goto _err; } @@ -157,14 +156,14 @@ int32_t streamStateSnapWriterOpen(STQ* pTq, int64_t sver, int64_t ever, SStreamS pWriter->pWriterImpl = pSnapWriter; *ppWriter = pWriter; - return code; + return 0; _err: tqError("vgId:%d, vnode %s snapshot writer failed to open since %s", TD_VID(pTq->pVnode), STREAM_STATE_TRANSFER, tstrerror(terrno)); taosMemoryFree(pWriter); *ppWriter = NULL; - return -1; + return code; } int32_t streamStateSnapWriterClose(SStreamStateWriter* pWriter, int8_t rollback) { diff --git a/source/libs/stream/src/streamBackendRocksdb.c b/source/libs/stream/src/streamBackendRocksdb.c index 54abba8bdc..057ff56aa9 100644 --- a/source/libs/stream/src/streamBackendRocksdb.c +++ b/source/libs/stream/src/streamBackendRocksdb.c @@ -213,38 +213,34 @@ int32_t rebuildDirFromCheckpoint(const char* path, int64_t chkpId, char** dst) { char* state = taosMemoryCalloc(1, cap); if (state == NULL) { - terrno = TSDB_CODE_OUT_OF_MEMORY; - return terrno; + return TSDB_CODE_OUT_OF_MEMORY; } nBytes = snprintf(state, cap, "%s%s%s", path, TD_DIRSEP, "state"); if (nBytes <= 0 || nBytes >= cap) { - terrno = TSDB_CODE_OUT_OF_RANGE; taosMemoryFree(state); - return terrno; + return TSDB_CODE_OUT_OF_RANGE; } if (chkpId != 0) { char* chkp = taosMemoryCalloc(1, cap); if (chkp == NULL) { - terrno = TSDB_CODE_OUT_OF_MEMORY; taosMemoryFree(state); - return terrno; + return TSDB_CODE_OUT_OF_MEMORY; } nBytes = snprintf(chkp, cap, "%s%s%s%scheckpoint%" PRId64 "", path, TD_DIRSEP, "checkpoints", TD_DIRSEP, chkpId); if (nBytes <= 0 || nBytes >= cap) { - terrno = TSDB_CODE_OUT_OF_RANGE; taosMemoryFree(state); taosMemoryFree(chkp); - return terrno; + return TSDB_CODE_OUT_OF_RANGE; } if (taosIsDir(chkp) && isValidCheckpoint(chkp)) { cleanDir(state, ""); code = backendCopyFiles(chkp, state); if (code != 0) { - stError("failed to restart stream backend from %s, reason: %s", chkp, tstrerror(TAOS_SYSTEM_ERROR(terrno))); + stError("failed to restart stream backend from %s, reason: %s", chkp, tstrerror(TAOS_SYSTEM_ERROR(code))); } else { stInfo("start to restart stream backend at checkpoint path: %s", chkp); } @@ -254,8 +250,7 @@ int32_t rebuildDirFromCheckpoint(const char* path, int64_t chkpId, char** dst) { tstrerror(TAOS_SYSTEM_ERROR(errno)), state); code = taosMkDir(state); if (code != 0) { - terrno = TAOS_SYSTEM_ERROR(errno); - code = terrno; + code = TAOS_SYSTEM_ERROR(errno); } } @@ -278,50 +273,48 @@ typedef struct { } SSChkpMetaOnS3; int32_t remoteChkp_readMetaData(char* path, SSChkpMetaOnS3** pMeta) { - int32_t code = -1; + int32_t code = 0; int32_t cap = strlen(path) + 32; TdFilePtr pFile = NULL; char* metaPath = taosMemoryCalloc(1, cap); if (metaPath == NULL) { - terrno = TSDB_CODE_OUT_OF_MEMORY; - return terrno; + return TSDB_CODE_OUT_OF_MEMORY; } int32_t n = snprintf(metaPath, cap, "%s%s%s", path, TD_DIRSEP, "META"); if (n <= 0 || n >= cap) { - terrno = TSDB_CODE_OUT_OF_MEMORY; taosMemoryFree(metaPath); - return terrno; + return TSDB_CODE_OUT_OF_MEMORY; } pFile = taosOpenFile(path, TD_FILE_READ); if (pFile == NULL) { - terrno = TAOS_SYSTEM_ERROR(errno); + code = TAOS_SYSTEM_ERROR(errno); goto _EXIT; } char buf[256] = {0}; if (taosReadFile(pFile, buf, sizeof(buf)) <= 0) { - terrno = TAOS_SYSTEM_ERROR(errno); + code = TAOS_SYSTEM_ERROR(errno); goto _EXIT; } SSChkpMetaOnS3* p = taosMemoryCalloc(1, sizeof(SSChkpMetaOnS3)); if (p == NULL) { - terrno = TSDB_CODE_OUT_OF_MEMORY; + code = TSDB_CODE_OUT_OF_MEMORY; goto _EXIT; } n = sscanf(buf, META_ON_S3_FORMATE, p->pCurrName, &p->currChkptId, p->pManifestName, &p->manifestChkptId, p->processName, &p->processId); if (n != 6) { - terrno = TSDB_CODE_INVALID_MSG; + code = TSDB_CODE_INVALID_MSG; taosMemoryFree(p); goto _EXIT; } if (p->currChkptId != p->manifestChkptId) { - terrno = TSDB_CODE_INVALID_MSG; + code = TSDB_CODE_INVALID_MSG; taosMemoryFree(p); goto _EXIT; } @@ -330,53 +323,52 @@ int32_t remoteChkp_readMetaData(char* path, SSChkpMetaOnS3** pMeta) { _EXIT: taosCloseFile(&pFile); taosMemoryFree(metaPath); - code = terrno; return code; } int32_t remoteChkp_validAndCvtMeta(char* path, SSChkpMetaOnS3* pMeta, int64_t chkpId) { - int32_t code = -1; + int32_t code = 0; int32_t nBytes = 0; int32_t cap = strlen(path) + 64; char* src = taosMemoryCalloc(1, cap); char* dst = taosMemoryCalloc(1, cap); if (src == NULL || dst == NULL) { - terrno = TSDB_CODE_OUT_OF_MEMORY; + code = TSDB_CODE_OUT_OF_MEMORY; goto _EXIT; } if (pMeta->currChkptId != chkpId || pMeta->manifestChkptId != chkpId) { - terrno = TSDB_CODE_INVALID_CFG; + code = TSDB_CODE_INVALID_CFG; goto _EXIT; } // rename current_chkp/mainfest to current for (int i = 0; i < 2; i++) { char* key = (i == 0 ? pMeta->pCurrName : pMeta->pManifestName); if (strlen(key) <= 0) { - terrno = TSDB_CODE_INVALID_PARA; + code = TSDB_CODE_INVALID_PARA; goto _EXIT; } nBytes = snprintf(src, cap, "%s%s%s_%" PRId64 "", path, TD_DIRSEP, key, pMeta->currChkptId); if (nBytes <= 0 || nBytes >= cap) { - terrno = TSDB_CODE_OUT_OF_RANGE; + code = TSDB_CODE_OUT_OF_RANGE; goto _EXIT; } if (taosStatFile(src, NULL, NULL, NULL) != 0) { - terrno = TAOS_SYSTEM_ERROR(errno); + code = TAOS_SYSTEM_ERROR(errno); goto _EXIT; } nBytes = snprintf(dst, cap, "%s%s%s", path, TD_DIRSEP, key); if (nBytes <= 0 || nBytes >= cap) { - terrno = TSDB_CODE_OUT_OF_RANGE; + code = TSDB_CODE_OUT_OF_RANGE; goto _EXIT; } if (taosRenameFile(src, dst) != 0) { - terrno = TAOS_SYSTEM_ERROR(errno); + code = TAOS_SYSTEM_ERROR(errno); goto _EXIT; } @@ -389,11 +381,10 @@ int32_t remoteChkp_validAndCvtMeta(char* path, SSChkpMetaOnS3* pMeta, int64_t ch _EXIT: taosMemoryFree(src); taosMemoryFree(dst); - code = terrno; return code; } int32_t remoteChkpGetDelFile(char* path, SArray* toDel) { - int32_t code = -1; + int32_t code = 0; int32_t nBytes = 0; SSChkpMetaOnS3* pMeta = NULL; @@ -408,28 +399,24 @@ int32_t remoteChkpGetDelFile(char* path, SArray* toDel) { int32_t cap = strlen(key) + 32; char* p = taosMemoryCalloc(1, cap); if (p == NULL) { - terrno = TSDB_CODE_OUT_OF_MEMORY; taosMemoryFree(pMeta); - return -1; + return TSDB_CODE_OUT_OF_MEMORY; } nBytes = snprintf(p, cap, "%s_%" PRId64 "", key, pMeta->currChkptId); if (nBytes <= 0 || nBytes >= cap) { - terrno = TSDB_CODE_OUT_OF_RANGE; taosMemoryFree(pMeta); taosMemoryFree(p); - return code; + return TSDB_CODE_OUT_OF_RANGE; } if (taosArrayPush(toDel, &p) == NULL) { - terrno = TSDB_CODE_OUT_OF_MEMORY; taosMemoryFree(pMeta); taosMemoryFree(p); - return code; + return TSDB_CODE_OUT_OF_MEMORY; } } - code = 0; - return code; + return 0; } void cleanDir(const char* pPath, const char* id) { @@ -475,19 +462,18 @@ int32_t rebuildDataFromS3(char* chkpPath, int64_t chkpId) { int32_t code = remoteChkp_readMetaData(chkpPath, &pMeta); if (code != 0) { - return -1; + return code; } if (pMeta->currChkptId != chkpId || pMeta->manifestChkptId != chkpId) { taosMemoryFree(pMeta); - terrno = TSDB_CODE_INVALID_PARA; - return -1; + return TSDB_CODE_INVALID_PARA; } code = remoteChkp_validAndCvtMeta(chkpPath, pMeta, chkpId); if (code != 0) { taosMemoryFree(pMeta); - return -1; + return code; } return chkpAddExtraInfo(chkpPath, chkpId, pMeta->processId); @@ -504,22 +490,20 @@ int32_t rebuildFromRemoteChkp_s3(const char* key, char* chkpPath, int64_t chkpId char* defaultTmp = taosMemoryCalloc(1, cap); if (defaultTmp == NULL) { - terrno = TSDB_CODE_OUT_OF_MEMORY; - return -1; + return TSDB_CODE_OUT_OF_MEMORY; } int32_t nBytes = snprintf(defaultPath, cap, "%s%s", defaultPath, "_tmp"); if (nBytes <= 0 || nBytes >= cap) { - terrno = TSDB_CODE_OUT_OF_RANGE; taosMemoryFree(defaultPath); - return -1; + return TSDB_CODE_OUT_OF_RANGE; } if (taosIsDir(defaultTmp)) taosRemoveDir(defaultTmp); if (taosIsDir(defaultPath)) { code = taosRenameFile(defaultPath, defaultTmp); if (code != 0) { - terrno = TAOS_SYSTEM_ERROR(errno); + code = TAOS_SYSTEM_ERROR(errno); goto _EXIT; } else { rename = 1; @@ -527,7 +511,7 @@ int32_t rebuildFromRemoteChkp_s3(const char* key, char* chkpPath, int64_t chkpId } else { code = taosMkDir(defaultPath); if (code != 0) { - terrno = TAOS_SYSTEM_ERROR(errno); + code = TAOS_SYSTEM_ERROR(errno); goto _EXIT; } } @@ -593,7 +577,7 @@ int32_t backendFileCopyFilesImpl(const char* src, const char* dst) { const char* info = "info"; size_t infoLen = strlen(info); - int32_t code = -1; + int32_t code = 0; int32_t sLen = strlen(src); int32_t dLen = strlen(dst); int32_t cap = TMAX(sLen, dLen) + 64; @@ -602,14 +586,17 @@ int32_t backendFileCopyFilesImpl(const char* src, const char* dst) { char* srcName = taosMemoryCalloc(1, cap); char* dstName = taosMemoryCalloc(1, cap); if (srcName == NULL || dstName == NULL) { - terrno = TSDB_CODE_OUT_OF_MEMORY; - return -1; + taosMemoryFree(srcName); + taosMemoryFree(dstName); + code = TSDB_CODE_OUT_OF_MEMORY; + return code; } // copy file to dst TdDirPtr pDir = taosOpenDir(src); if (pDir == NULL) { - terrno = TAOS_SYSTEM_ERROR(errno); + code = TAOS_SYSTEM_ERROR(errno); + goto _ERROR; } errno = 0; @@ -622,36 +609,36 @@ int32_t backendFileCopyFilesImpl(const char* src, const char* dst) { nBytes = snprintf(srcName, cap, "%s%s%s", src, TD_DIRSEP, name); if (nBytes <= 0 || nBytes >= cap) { - terrno = TSDB_CODE_OUT_OF_RANGE; + code = TSDB_CODE_OUT_OF_RANGE; goto _ERROR; } nBytes = snprintf(dstName, cap, "%s%s%s", dst, TD_DIRSEP, name); if (nBytes <= 0 || nBytes >= cap) { - terrno = TSDB_CODE_OUT_OF_RANGE; + code = TSDB_CODE_OUT_OF_RANGE; goto _ERROR; } if (strncmp(name, current, strlen(name) <= currLen ? strlen(name) : currLen) == 0) { code = copyFiles_create(srcName, dstName, 0); if (code != 0) { - terrno = TAOS_SYSTEM_ERROR(errno); - stError("failed to copy file, detail: %s to %s reason:%s", srcName, dstName, tstrerror(terrno)); + code = TAOS_SYSTEM_ERROR(errno); + stError("failed to copy file, detail: %s to %s reason:%s", srcName, dstName, tstrerror(code)); goto _ERROR; } } else if (strncmp(name, info, strlen(name) <= infoLen ? strlen(name) : infoLen) == 0) { code = copyFiles_create(srcName, dstName, 0); if (code != 0) { - terrno = TAOS_SYSTEM_ERROR(errno); - stError("failed to copy file, detail: %s to %s reason:%s", srcName, dstName, tstrerror(terrno)); + code = TAOS_SYSTEM_ERROR(errno); + stError("failed to copy file, detail: %s to %s reason:%s", srcName, dstName, tstrerror(code)); goto _ERROR; } } else { code = copyFiles_hardlink(srcName, dstName, 0); if (code != 0) { - terrno = TAOS_SYSTEM_ERROR(errno); - stError("failed to hard link file, detail:%s to %s, reason:%s", srcName, dstName, tstrerror(terrno)); + code = TAOS_SYSTEM_ERROR(errno); + stError("failed to hard link file, detail:%s to %s, reason:%s", srcName, dstName, tstrerror(code)); goto _ERROR; } else { stDebug("succ hard link file:%s to %s", srcName, dstName); @@ -688,8 +675,7 @@ static int32_t rebuildFromLocalCheckpoint(const char* pTaskIdStr, const char* ch if (code != TSDB_CODE_SUCCESS) { cleanDir(defaultPath, pTaskIdStr); stError("%s failed to start stream backend from local %s, reason:%s, try download checkpoint from remote", - pTaskIdStr, checkpointPath, tstrerror(TAOS_SYSTEM_ERROR(terrno))); - terrno = 0; + pTaskIdStr, checkpointPath, tstrerror(TAOS_SYSTEM_ERROR(code))); code = TSDB_CODE_SUCCESS; } else { stInfo("%s copy checkpoint data from:%s to:%s succ, try to start stream backend", pTaskIdStr, checkpointPath, @@ -705,7 +691,7 @@ static int32_t rebuildFromLocalCheckpoint(const char* pTaskIdStr, const char* ch int32_t restoreCheckpointData(const char* path, const char* key, int64_t chkptId, char** dbPrefixPath, char** dbPath, int64_t* processVer) { - int32_t code = -1; + int32_t code = 0; char* prefixPath = NULL; char* defaultPath = NULL; @@ -721,43 +707,43 @@ int32_t restoreCheckpointData(const char* path, const char* key, int64_t chkptId checkpointPath = taosMemoryCalloc(1, cap); checkpointRoot = taosMemoryCalloc(1, cap); if (prefixPath == NULL || defaultPath == NULL || checkpointPath == NULL || checkpointRoot == NULL) { - terrno = TSDB_CODE_OUT_OF_MEMORY; + code = TSDB_CODE_OUT_OF_MEMORY; goto _EXIT; } nBytes = snprintf(prefixPath, cap, "%s%s%s", path, TD_DIRSEP, key); if (nBytes <= 0 || nBytes >= cap) { - terrno = TSDB_CODE_OUT_OF_RANGE; + code = TSDB_CODE_OUT_OF_RANGE; goto _EXIT; } code = createDirIfNotExist(prefixPath); if (code != 0) { - terrno = TAOS_SYSTEM_ERROR(errno); + code = TAOS_SYSTEM_ERROR(errno); goto _EXIT; } nBytes = snprintf(defaultPath, cap, "%s%s%s", prefixPath, TD_DIRSEP, "state"); if (nBytes <= 0 || nBytes >= cap) { - terrno = TSDB_CODE_OUT_OF_RANGE; + code = TSDB_CODE_OUT_OF_RANGE; goto _EXIT; } code = createDirIfNotExist(defaultPath); if (code != 0) { - terrno = TAOS_SYSTEM_ERROR(errno); + code = TAOS_SYSTEM_ERROR(errno); goto _EXIT; } nBytes = snprintf(checkpointRoot, cap, "%s%s%s", prefixPath, TD_DIRSEP, "checkpoints"); if (nBytes <= 0 || nBytes >= cap) { - terrno = TSDB_CODE_OUT_OF_RANGE; + code = TSDB_CODE_OUT_OF_RANGE; goto _EXIT; } code = createDirIfNotExist(checkpointRoot); if (code != 0) { - terrno = TAOS_SYSTEM_ERROR(errno); + code = TAOS_SYSTEM_ERROR(errno); goto _EXIT; } @@ -766,19 +752,18 @@ int32_t restoreCheckpointData(const char* path, const char* key, int64_t chkptId nBytes = snprintf(checkpointPath, cap, "%s%s%s%s%s%" PRId64 "", prefixPath, TD_DIRSEP, "checkpoints", TD_DIRSEP, "checkpoint", chkptId); if (nBytes <= 0 || nBytes >= cap) { - terrno = TSDB_CODE_OUT_OF_RANGE; + code = TSDB_CODE_OUT_OF_RANGE; goto _EXIT; } code = rebuildFromLocalCheckpoint(key, checkpointPath, chkptId, defaultPath, processVer); if (code != 0) { - terrno = 0; code = rebuildFromRemoteCheckpoint(key, checkpointPath, chkptId, defaultPath); } if (code != 0) { stError("failed to start stream backend at %s, reason: %s, restart from default defaultPath:%s, reason:%s", - checkpointPath, tstrerror(code), defaultPath, tstrerror(terrno)); + checkpointPath, tstrerror(code), defaultPath, tstrerror(code)); code = 0; // reset the error code } } else { // no valid checkpoint id @@ -802,21 +787,6 @@ _EXIT: return code; } -bool streamBackendDataIsExist(const char* path, int64_t chkpId, int32_t vgId) { - bool exist = true; - char* state = taosMemoryCalloc(1, strlen(path) + 32); - if (state == NULL) { - terrno = TSDB_CODE_OUT_OF_MEMORY; - return false; - } - sprintf(state, "%s%s%s", path, TD_DIRSEP, "state"); - if (!taosDirExist(state)) { - exist = false; - } - taosMemoryFree(state); - return exist; -} - void* streamBackendInit(const char* streamPath, int64_t chkpId, int32_t vgId) { char* backendPath = NULL; int32_t code = rebuildDirFromCheckpoint(streamPath, chkpId, &backendPath); @@ -1313,12 +1283,14 @@ int32_t chkpDoDbCheckpoint(rocksdb_t* db, char* path) { if (cp == NULL || err != NULL) { stError("failed to do checkpoint at:%s, reason:%s", path, err); taosMemoryFreeClear(err); + code = TSDB_CODE_THIRDPARTY_ERROR; goto _ERROR; } rocksdb_checkpoint_create(cp, path, UINT64_MAX, &err); if (err != NULL) { stError("failed to do checkpoint at:%s, reason:%s", path, err); taosMemoryFreeClear(err); + code = TSDB_CODE_THIRDPARTY_ERROR; } else { code = 0; } @@ -1332,13 +1304,17 @@ int32_t chkpPreFlushDb(rocksdb_t* db, rocksdb_column_family_handle_t** cf, int32 char* err = NULL; rocksdb_flushoptions_t* flushOpt = rocksdb_flushoptions_create(); + if (flushOpt == NULL) { + return TSDB_CODE_OUT_OF_MEMORY; + } + rocksdb_flushoptions_set_wait(flushOpt, 1); rocksdb_flush_cfs(db, flushOpt, cf, nCf, &err); if (err != NULL) { stError("failed to flush db before streamBackend clean up, reason:%s", err); taosMemoryFree(err); - code = -1; + code = TSDB_CODE_THIRDPARTY_ERROR; } rocksdb_flushoptions_destroy(flushOpt); return code; @@ -1346,28 +1322,47 @@ int32_t chkpPreFlushDb(rocksdb_t* db, rocksdb_column_family_handle_t** cf, int32 int32_t chkpPreBuildDir(char* path, int64_t chkpId, char** chkpDir, char** chkpIdDir) { int32_t code = 0; - char* pChkpDir = taosMemoryCalloc(1, 256); - char* pChkpIdDir = taosMemoryCalloc(1, 256); + int32_t cap = strlen(path) + 256; + int32_t nBytes = 0; - sprintf(pChkpDir, "%s%s%s", path, TD_DIRSEP, "checkpoints"); - code = taosMulModeMkDir(pChkpDir, 0755, true); - if (code != 0) { - stError("failed to prepare checkpoint dir, path:%s, reason:%s", path, tstrerror(code)); - taosMemoryFree(pChkpDir); - taosMemoryFree(pChkpIdDir); - code = -1; - return code; + char* pChkpDir = taosMemoryCalloc(1, cap); + char* pChkpIdDir = taosMemoryCalloc(1, cap); + if (pChkpDir == NULL || pChkpIdDir == NULL) { + code = TSDB_CODE_OUT_OF_MEMORY; + goto _EXIT; + } + + nBytes = snprintf(pChkpDir, cap, "%s%s%s", path, TD_DIRSEP, "checkpoints"); + if (nBytes <= 0 || nBytes >= cap) { + code = TSDB_CODE_OUT_OF_RANGE; + goto _EXIT; + } + + nBytes = snprintf(pChkpIdDir, cap, "%s%s%s%" PRId64, pChkpDir, TD_DIRSEP, "checkpoint", chkpId); + if (nBytes <= 0 || nBytes >= cap) { + code = TSDB_CODE_OUT_OF_RANGE; + goto _EXIT; + } + + code = taosMulModeMkDir(pChkpDir, 0755, true); + if (code != 0) { + code = TAOS_SYSTEM_ERROR(errno); + stError("failed to prepare checkpoint dir, path:%s, reason:%s", path, tstrerror(code)); + goto _EXIT; } - sprintf(pChkpIdDir, "%s%s%s%" PRId64, pChkpDir, TD_DIRSEP, "checkpoint", chkpId); if (taosIsDir(pChkpIdDir)) { stInfo("stream rm exist checkpoint%s", pChkpIdDir); taosRemoveDir(pChkpIdDir); } + *chkpDir = pChkpDir; *chkpIdDir = pChkpIdDir; - return 0; +_EXIT: + taosMemoryFree(pChkpDir); + taosMemoryFree(pChkpIdDir); + return code; } int32_t taskDbBuildSnap(void* arg, SArray* pSnap) { @@ -1396,7 +1391,6 @@ int32_t taskDbBuildSnap(void* arg, SArray* pSnap) { // remove chkpId from in-use-ckpkIdSet taskDbUnRefChkp(pTaskDb, pTaskDb->chkpId); taskDbRemoveRef(pTaskDb); - code = -1; break; } @@ -1409,8 +1403,7 @@ int32_t taskDbBuildSnap(void* arg, SArray* pSnap) { // remove chkpid from chkp-in-use set taskDbUnRefChkp(pTaskDb, pTaskDb->chkpId); taskDbRemoveRef(pTaskDb); - terrno = TSDB_CODE_OUT_OF_MEMORY; - code = -1; + code = TSDB_CODE_OUT_OF_MEMORY; break; } taosArrayPush(pSnap, &snap); @@ -1491,29 +1484,29 @@ int64_t taskGetDBRef(void* arg) { int32_t chkpLoadExtraInfo(char* pChkpIdDir, int64_t* chkpId, int64_t* processId) { TdFilePtr pFile = NULL; - int32_t code = -1; + int32_t code = 0; char buf[256] = {0}; int32_t nBytes = 0; int32_t len = strlen(pChkpIdDir); if (len == 0) { - terrno = TSDB_CODE_INVALID_PARA; - stError("failed to load extra info, dir:%s, reason:%s", pChkpIdDir, tstrerror(terrno)); - return terrno; + code = TSDB_CODE_INVALID_PARA; + stError("failed to load extra info, dir:%s, reason:%s", pChkpIdDir, tstrerror(code)); + return code; } int32_t cap = len + 64; char* pDst = taosMemoryCalloc(1, cap); if (pDst == NULL) { - terrno = TSDB_CODE_OUT_OF_MEMORY; + code = TSDB_CODE_OUT_OF_MEMORY; stError("failed to alloc memory to load extra info, dir:%s", pChkpIdDir); goto _EXIT; } nBytes = snprintf(pDst, cap, "%s%sinfo", pChkpIdDir, TD_DIRSEP); if (nBytes <= 0 || nBytes >= cap) { - terrno = TSDB_CODE_OUT_OF_RANGE; + code = TSDB_CODE_OUT_OF_RANGE; stError("failed to build dst to load extra info, dir:%s", pChkpIdDir); goto _EXIT; } @@ -1526,31 +1519,31 @@ int32_t chkpLoadExtraInfo(char* pChkpIdDir, int64_t* chkpId, int64_t* processId) code = 0; goto _EXIT; } else { - terrno = TAOS_SYSTEM_ERROR(errno); - stError("failed to open file to load extra info, file:%s", pDst); + code = TAOS_SYSTEM_ERROR(errno); + stError("failed to open file to load extra info, file:%s, reason:%s", pDst, tstrerror(code)); } goto _EXIT; } if (taosReadFile(pFile, buf, sizeof(buf)) <= 0) { - terrno = TAOS_SYSTEM_ERROR(errno); - stError("failed to read file to load extra info, file:%s, reason:%s", pDst, tstrerror(terrno)); + code = TAOS_SYSTEM_ERROR(errno); + stError("failed to read file to load extra info, file:%s, reason:%s", pDst, tstrerror(code)); goto _EXIT; } if (sscanf(buf, "%" PRId64 " %" PRId64 "", chkpId, processId) < 2) { - terrno = TSDB_CODE_INVALID_PARA; - stError("failed to read file content to load extra info, file:%s, reason:%s", pDst, tstrerror(terrno)); + code = TSDB_CODE_INVALID_PARA; + stError("failed to read file content to load extra info, file:%s, reason:%s", pDst, tstrerror(code)); goto _EXIT; } code = 0; _EXIT: taosMemoryFree(pDst); taosCloseFile(&pFile); - return terrno; + return code; } int32_t chkpAddExtraInfo(char* pChkpIdDir, int64_t chkpId, int64_t processId) { - int32_t code = -1; + int32_t code = 0; TdFilePtr pFile = NULL; @@ -1559,41 +1552,43 @@ int32_t chkpAddExtraInfo(char* pChkpIdDir, int64_t chkpId, int64_t processId) { int32_t len = strlen(pChkpIdDir); if (len == 0) { - terrno = TSDB_CODE_INVALID_PARA; - stError("failed to add extra info, dir:%s, reason:%s", pChkpIdDir, tstrerror(terrno)); - return -1; + code = TSDB_CODE_INVALID_PARA; + stError("failed to add extra info, dir:%s, reason:%s", pChkpIdDir, tstrerror(code)); + return code; } + int32_t cap = len + 64; char* pDst = taosMemoryCalloc(1, cap); if (pDst == NULL) { - terrno = TSDB_CODE_OUT_OF_MEMORY; + code = TSDB_CODE_OUT_OF_MEMORY; stError("failed to alloc memory to add extra info, dir:%s", pChkpIdDir); goto _EXIT; } nBytes = snprintf(pDst, cap, "%s%sinfo", pChkpIdDir, TD_DIRSEP); if (nBytes <= 0 || nBytes >= cap) { - stError("failed to build dst to add extra info, dir:%s", pChkpIdDir); + code = TSDB_CODE_OUT_OF_RANGE; + stError("failed to build dst to add extra info, dir:%s, reason:%d", pChkpIdDir, tstrerror(code)); goto _EXIT; } pFile = taosOpenFile(pDst, TD_FILE_CREATE | TD_FILE_WRITE | TD_FILE_TRUNC); if (pFile == NULL) { - terrno = TAOS_SYSTEM_ERROR(errno); - stError("failed to open file to add extra info, file:%s", pDst); + code = TAOS_SYSTEM_ERROR(errno); + stError("failed to open file to add extra info, file:%s, reason:%s", pDst, tstrerror(code)); goto _EXIT; } nBytes = snprintf(buf, sizeof(buf), "%" PRId64 " %" PRId64 "", chkpId, processId); if (nBytes <= 0 || nBytes >= sizeof(buf)) { - terrno = TSDB_CODE_OUT_OF_RANGE; - stError("failed to build content to add extra info, dir:%s", pChkpIdDir); + code = TSDB_CODE_OUT_OF_RANGE; + stError("failed to build content to add extra info, dir:%s,reason:%d", pChkpIdDir, tstrerror(code)); goto _EXIT; } if (nBytes != taosWriteFile(pFile, buf, nBytes)) { - terrno = TAOS_SYSTEM_ERROR(errno); - stError("failed to write file to add extra info, file:%s, reason:%s", pDst, tstrerror(terrno)); + code = TAOS_SYSTEM_ERROR(errno); + stError("failed to write file to add extra info, file:%s, reason:%s", pDst, tstrerror(code)); goto _EXIT; } code = 0; @@ -1606,17 +1601,18 @@ _EXIT: int32_t taskDbDoCheckpoint(void* arg, int64_t chkpId, int64_t processId) { STaskDbWrapper* pTaskDb = arg; int64_t st = taosGetTimestampMs(); - int32_t code = -1; + int32_t code = 0; int64_t refId = pTaskDb->refId; if (taosAcquireRef(taskDbWrapperId, refId) == NULL) { - return -1; + code = terrno; + terrno = 0; + return code; } char* pChkpDir = NULL; char* pChkpIdDir = NULL; - if (chkpPreBuildDir(pTaskDb->path, chkpId, &pChkpDir, &pChkpIdDir) != 0) { - code = -1; + if ((code = chkpPreBuildDir(pTaskDb->path, chkpId, &pChkpDir, &pChkpIdDir)) < 0) { goto _EXIT; } // Get all cf and acquire cfWrappter @@ -2404,16 +2400,15 @@ int32_t taskDbBuildFullPath(char* path, char* key, char** dbFullPath, char** sta int32_t code = 0; char* statePath = taosMemoryCalloc(1, strlen(path) + 128); if (statePath == NULL) { - terrno = TSDB_CODE_OUT_OF_MEMORY; - return -1; + return TSDB_CODE_OUT_OF_MEMORY; } sprintf(statePath, "%s%s%s", path, TD_DIRSEP, key); if (!taosDirExist(statePath)) { code = taosMulMkDir(statePath); if (code != 0) { - terrno = errno; - stError("failed to create dir: %s, reason:%s", statePath, tstrerror(terrno)); + code = TAOS_SYSTEM_ERROR(errno); + stError("failed to create dir: %s, reason:%s", statePath, tstrerror(code)); taosMemoryFree(statePath); return code; } @@ -2422,15 +2417,14 @@ int32_t taskDbBuildFullPath(char* path, char* key, char** dbFullPath, char** sta char* dbPath = taosMemoryCalloc(1, strlen(statePath) + 128); if (dbPath == NULL) { taosMemoryFree(statePath); - terrno = TSDB_CODE_OUT_OF_MEMORY; - return -1; + return TSDB_CODE_OUT_OF_MEMORY; } sprintf(dbPath, "%s%s%s", statePath, TD_DIRSEP, "state"); if (!taosDirExist(dbPath)) { code = taosMulMkDir(dbPath); if (code != 0) { - terrno = errno; + code = TAOS_SYSTEM_ERROR(errno); stError("failed to create dir: %s, reason:%s", dbPath, tstrerror(code)); taosMemoryFree(statePath); taosMemoryFree(dbPath); @@ -2511,8 +2505,9 @@ _EXIT: STaskDbWrapper* taskDbOpen(const char* path, const char* key, int64_t chkptId, int64_t* processVer) { char* statePath = NULL; char* dbPath = NULL; - - if (restoreCheckpointData(path, key, chkptId, &statePath, &dbPath, processVer) != 0) { + int code = 0; + if ((code = restoreCheckpointData(path, key, chkptId, &statePath, &dbPath, processVer)) < 0) { + terrno = code; stError("failed to restore checkpoint data, path:%s, key:%s, checkpointId: %" PRId64 "reason:%s", path, key, chkptId, tstrerror(terrno)); return NULL; @@ -2521,17 +2516,14 @@ STaskDbWrapper* taskDbOpen(const char* path, const char* key, int64_t chkptId, i STaskDbWrapper* pTaskDb = taskDbOpenImpl(key, statePath, dbPath); if (pTaskDb != NULL) { int64_t chkpId = -1, ver = -1; - if (chkpLoadExtraInfo(dbPath, &chkpId, &ver) == 0) { + if ((code = chkpLoadExtraInfo(dbPath, &chkpId, &ver) == 0)) { *processVer = ver; } else { - if (terrno == TSDB_CODE_OUT_OF_MEMORY) { - taskDbDestroy(pTaskDb, false); - return NULL; - } else { - // not info file exists, caller handle this situation - terrno = 0; - *processVer = -1; - } + terrno = code; + stError("failed to load extra info, path:%s, key:%s, checkpointId: %" PRId64 "reason:%s", path, key, chkptId, + tstrerror(terrno)); + taskDbDestroy(pTaskDb, false); + return NULL; } } @@ -2623,27 +2615,27 @@ void taskDbDestroy(void* pDb, bool flush) { void taskDbDestroy2(void* pDb) { taskDbDestroy(pDb, true); } int32_t taskDbGenChkpUploadData__rsync(STaskDbWrapper* pDb, int64_t chkpId, char** path) { - int32_t code = -1; + int32_t code = 0; int64_t refId = pDb->refId; int32_t nBytes = 0; if (taosAcquireRef(taskDbWrapperId, refId) == NULL) { - return -1; + code = terrno; + return code; } int32_t cap = strlen(pDb->path) + 128; char* buf = taosMemoryCalloc(1, cap); if (buf == NULL) { - terrno = TSDB_CODE_OUT_OF_MEMORY; - return -1; + return TSDB_CODE_OUT_OF_MEMORY; } nBytes = snprintf(buf, cap, "%s%s%s%s%s%" PRId64 "", pDb->path, TD_DIRSEP, "checkpoints", TD_DIRSEP, "checkpoint", chkpId); if (nBytes <= 0 || nBytes >= cap) { - terrno = TSDB_CODE_OUT_OF_RANGE; - return -1; + taosMemoryFree(buf); + return TSDB_CODE_OUT_OF_RANGE; } if (taosIsDir(buf)) { @@ -2660,20 +2652,28 @@ int32_t taskDbGenChkpUploadData__rsync(STaskDbWrapper* pDb, int64_t chkpId, char int32_t taskDbGenChkpUploadData__s3(STaskDbWrapper* pDb, void* bkdChkpMgt, int64_t chkpId, char** path, SArray* list, const char* idStr) { int32_t code = 0; + int32_t cap = strlen(pDb->path) + 32; SBkdMgt* p = (SBkdMgt*)bkdChkpMgt; - char* temp = taosMemoryCalloc(1, strlen(pDb->path) + 32); + char* temp = taosMemoryCalloc(1, cap); if (temp == NULL) { - terrno = TSDB_CODE_OUT_OF_MEMORY; - return -1; + return TSDB_CODE_OUT_OF_MEMORY; } - sprintf(temp, "%s%s%s%" PRId64, pDb->path, TD_DIRSEP, "tmp", chkpId); + int32_t nBytes = snprintf(temp, cap, "%s%s%s%" PRId64, pDb->path, TD_DIRSEP, "tmp", chkpId); + if (nBytes <= 0 || nBytes >= cap) { + taosMemoryFree(temp); + return TSDB_CODE_OUT_OF_RANGE; + } if (taosDirExist(temp)) { cleanDir(temp, idStr); } else { - taosMkDir(temp); + code = taosMkDir(temp); + if (code != 0) { + taosMemoryFree(temp); + return TAOS_SYSTEM_ERROR(errno); + } } code = bkdMgtGetDelta(p, pDb->idstr, chkpId, list, temp); @@ -4405,8 +4405,7 @@ int32_t compareHashTableImpl(SHashObj* p1, SHashObj* p2, SArray* diff) { if (!isBkdDataMeta(name, len) && !taosHashGet(p1, name, len)) { char* fname = taosMemoryCalloc(1, len + 1); if (fname == NULL) { - terrno = TSDB_CODE_OUT_OF_MEMORY; - return terrno; + return TSDB_CODE_OUT_OF_MEMORY; } strncpy(fname, name, len); taosArrayPush(diff, &fname); @@ -4483,6 +4482,7 @@ void dbChkpDebugInfo(SDbChkp* pDb) { } } int32_t dbChkpGetDelta(SDbChkp* p, int64_t chkpId, SArray* list) { + int32_t code = 0; int32_t nBytes; taosThreadRwlockWrlock(&p->rwLock); @@ -4502,9 +4502,8 @@ int32_t dbChkpGetDelta(SDbChkp* p, int64_t chkpId, SArray* list) { nBytes = snprintf(p->buf, p->len, "%s%s%s%scheckpoint%" PRId64 "", p->path, TD_DIRSEP, "checkpoints", TD_DIRSEP, chkpId); if (nBytes <= 0 || nBytes >= p->len) { - terrno = TSDB_CODE_OUT_OF_RANGE; taosThreadRwlockUnlock(&p->rwLock); - return terrno; + return TSDB_CODE_OUT_OF_RANGE; } taosArrayClearP(p->pAdd, taosMemoryFree); @@ -4513,9 +4512,8 @@ int32_t dbChkpGetDelta(SDbChkp* p, int64_t chkpId, SArray* list) { TdDirPtr pDir = taosOpenDir(p->buf); if (pDir == NULL) { - terrno = TAOS_SYSTEM_ERROR(errno); taosThreadRwlockUnlock(&p->rwLock); - return terrno; + return TAOS_SYSTEM_ERROR(errno); } TdDirEntryPtr de = NULL; @@ -4528,7 +4526,7 @@ int32_t dbChkpGetDelta(SDbChkp* p, int64_t chkpId, SArray* list) { p->pCurrent = taosStrdup(name); if (p->pCurrent == NULL) { - terrno = TSDB_CODE_OUT_OF_MEMORY; + code = TSDB_CODE_OUT_OF_MEMORY; break; } continue; @@ -4538,7 +4536,7 @@ int32_t dbChkpGetDelta(SDbChkp* p, int64_t chkpId, SArray* list) { taosMemoryFreeClear(p->pManifest); p->pManifest = taosStrdup(name); if (p->pManifest == NULL) { - terrno = TSDB_CODE_OUT_OF_MEMORY; + code = TSDB_CODE_OUT_OF_MEMORY; break; } continue; @@ -4551,9 +4549,9 @@ int32_t dbChkpGetDelta(SDbChkp* p, int64_t chkpId, SArray* list) { } } taosCloseDir(&pDir); - if (terrno != 0) { + if (code != 0) { taosThreadRwlockUnlock(&p->rwLock); - return terrno; + return code; } if (p->init == 0) { @@ -4564,9 +4562,8 @@ int32_t dbChkpGetDelta(SDbChkp* p, int64_t chkpId, SArray* list) { if (name != NULL && !isBkdDataMeta(name, len)) { char* fname = taosMemoryCalloc(1, len + 1); if (fname == NULL) { - terrno = TSDB_CODE_OUT_OF_MEMORY; taosThreadRwlockUnlock(&p->rwLock); - return -1; + return TSDB_CODE_OUT_OF_MEMORY; } strncpy(fname, name, len); @@ -4587,7 +4584,7 @@ int32_t dbChkpGetDelta(SDbChkp* p, int64_t chkpId, SArray* list) { taosArrayClearP(p->pDel, taosMemoryFree); taosHashClear(p->pSstTbl[1 - p->idx]); p->update = 0; - return terrno; + return code; } if (taosArrayGetSize(p->pAdd) == 0 && taosArrayGetSize(p->pDel) == 0) { @@ -4604,7 +4601,7 @@ int32_t dbChkpGetDelta(SDbChkp* p, int64_t chkpId, SArray* list) { taosThreadRwlockUnlock(&p->rwLock); - return 0; + return code; } void dbChkpDestroy(SDbChkp* pChkp); @@ -4698,7 +4695,7 @@ int32_t dbChkpInit(SDbChkp* p) { #endif int32_t dbChkpDumpTo(SDbChkp* p, char* dname, SArray* list) { static char* chkpMeta = "META"; - int32_t code = -1; + int32_t code = 0; int32_t cap = p->len + 128; taosThreadRwlockRdlock(&p->rwLock); @@ -4708,30 +4705,33 @@ int32_t dbChkpDumpTo(SDbChkp* p, char* dname, SArray* list) { char* srcDir = taosMemoryCalloc(1, cap); char* dstDir = taosMemoryCalloc(1, cap); if (srcBuf == NULL || dstBuf == NULL || srcDir == NULL || dstDir == NULL) { - terrno = TSDB_CODE_OUT_OF_MEMORY; + code = TSDB_CODE_OUT_OF_MEMORY; goto _ERROR; } int nBytes = snprintf(srcDir, cap, "%s%s%s%s%s%" PRId64 "", p->path, TD_DIRSEP, "checkpoints", TD_DIRSEP, "checkpoint", p->curChkpId); if (nBytes <= 0 || nBytes >= cap) { - terrno = TSDB_CODE_OUT_OF_RANGE; + code = TSDB_CODE_OUT_OF_RANGE; goto _ERROR; } nBytes = snprintf(dstDir, cap, "%s", dname); if (nBytes <= 0 || nBytes >= cap) { - terrno = TSDB_CODE_OUT_OF_RANGE; + code = TSDB_CODE_OUT_OF_RANGE; goto _ERROR; } if (!taosDirExist(srcDir)) { stError("failed to dump srcDir %s, reason: not exist such dir", srcDir); + code = TSDB_CODE_INVALID_PARA; goto _ERROR; } int64_t chkpId = 0, processId = -1; - if (chkpLoadExtraInfo(srcDir, &chkpId, &processId) != 0) { - stError("failed to load extra info from %s, reason:%s", srcDir, terrno != 0 ? "unkown" : tstrerror(terrno)); + code = chkpLoadExtraInfo(srcDir, &chkpId, &processId); + if (code < 0) { + stError("failed to load extra info from %s, reason:%s", srcDir, code != 0 ? "unkown" : tstrerror(code)); + goto _ERROR; } @@ -4743,19 +4743,19 @@ int32_t dbChkpDumpTo(SDbChkp* p, char* dname, SArray* list) { char* filename = taosArrayGetP(p->pAdd, i); nBytes = snprintf(srcBuf, cap, "%s%s%s", srcDir, TD_DIRSEP, filename); if (nBytes <= 0 || nBytes >= cap) { - terrno = TSDB_CODE_OUT_OF_RANGE; + code = TSDB_CODE_OUT_OF_RANGE; goto _ERROR; } nBytes = snprintf(dstBuf, cap, "%s%s%s", dstDir, TD_DIRSEP, filename); if (nBytes <= 0 || nBytes >= cap) { - terrno = TSDB_CODE_OUT_OF_RANGE; + code = TSDB_CODE_OUT_OF_RANGE; goto _ERROR; } if (taosCopyFile(srcBuf, dstBuf) < 0) { - terrno = errno; - stError("failed to copy file from %s to %s, reason:%s", srcBuf, dstBuf, tstrerror(terrno)); + code = TAOS_SYSTEM_ERROR(errno); + stError("failed to copy file from %s to %s, reason:%s", srcBuf, dstBuf, tstrerror(code)); goto _ERROR; } } @@ -4764,7 +4764,7 @@ int32_t dbChkpDumpTo(SDbChkp* p, char* dname, SArray* list) { char* filename = taosArrayGetP(p->pDel, i); char* p = taosStrdup(filename); if (p == NULL) { - terrno = TSDB_CODE_OUT_OF_MEMORY; + code = TSDB_CODE_OUT_OF_MEMORY; goto _ERROR; } taosArrayPush(list, &p); @@ -4776,19 +4776,19 @@ int32_t dbChkpDumpTo(SDbChkp* p, char* dname, SArray* list) { nBytes = snprintf(srcBuf, cap, "%s%s%s", srcDir, TD_DIRSEP, p->pCurrent); if (nBytes <= 0 || nBytes >= cap) { - terrno = TSDB_CODE_OUT_OF_RANGE; + code = TSDB_CODE_OUT_OF_RANGE; goto _ERROR; } nBytes = snprintf(dstBuf, cap, "%s%s%s_%" PRId64 "", dstDir, TD_DIRSEP, p->pCurrent, p->curChkpId); if (nBytes <= 0 || nBytes >= cap) { - terrno = TSDB_CODE_OUT_OF_RANGE; + code = TSDB_CODE_OUT_OF_RANGE; goto _ERROR; } if (taosCopyFile(srcBuf, dstBuf) < 0) { - terrno = errno; - stError("failed to copy file from %s to %s, reason:%s", srcBuf, dstBuf, tstrerror(terrno)); + code = TAOS_SYSTEM_ERROR(errno); + stError("failed to copy file from %s to %s, reason:%s", srcBuf, dstBuf, tstrerror(code)); goto _ERROR; } @@ -4798,33 +4798,33 @@ int32_t dbChkpDumpTo(SDbChkp* p, char* dname, SArray* list) { nBytes = snprintf(srcBuf, cap, "%s%s%s", srcDir, TD_DIRSEP, p->pManifest); if (nBytes <= 0 || nBytes >= cap) { - terrno = TSDB_CODE_OUT_OF_RANGE; + code = TSDB_CODE_OUT_OF_RANGE; goto _ERROR; } nBytes = snprintf(dstBuf, cap, "%s%s%s_%" PRId64 "", dstDir, TD_DIRSEP, p->pManifest, p->curChkpId); if (nBytes <= 0 || nBytes >= cap) { - terrno = TSDB_CODE_OUT_OF_RANGE; + code = TSDB_CODE_OUT_OF_RANGE; goto _ERROR; } if (taosCopyFile(srcBuf, dstBuf) < 0) { - terrno = errno; - stError("failed to copy file from %s to %s, reason:%s", srcBuf, dstBuf, tstrerror(terrno)); + code = TAOS_SYSTEM_ERROR(errno); + stError("failed to copy file from %s to %s, reason:%s", srcBuf, dstBuf, tstrerror(code)); goto _ERROR; } memset(dstBuf, 0, cap); nBytes = snprintf(dstDir, cap, "%s%s%s", dstDir, TD_DIRSEP, chkpMeta); if (nBytes <= 0 || nBytes >= cap) { - terrno = TSDB_CODE_OUT_OF_RANGE; + code = TSDB_CODE_OUT_OF_RANGE; goto _ERROR; } TdFilePtr pFile = taosOpenFile(dstDir, TD_FILE_CREATE | TD_FILE_WRITE | TD_FILE_TRUNC); if (pFile == NULL) { - terrno = TAOS_SYSTEM_ERROR(errno); - stError("chkp failed to create meta file: %s, reason:%s", dstDir, tstrerror(terrno)); + code = TAOS_SYSTEM_ERROR(errno); + stError("chkp failed to create meta file: %s, reason:%s", dstDir, tstrerror(code)); goto _ERROR; } @@ -4832,7 +4832,7 @@ int32_t dbChkpDumpTo(SDbChkp* p, char* dname, SArray* list) { nBytes = snprintf(content, sizeof(content), META_ON_S3_FORMATE, p->pCurrent, p->curChkpId, p->pManifest, p->curChkpId, "processVer", processId); if (nBytes <= 0 || nBytes >= sizeof(content)) { - terrno = TSDB_CODE_OUT_OF_RANGE; + code = TSDB_CODE_OUT_OF_RANGE; stError("chkp failed to format meta file: %s, reason: invalid msg", dstDir); taosCloseFile(&pFile); goto _ERROR; @@ -4840,8 +4840,8 @@ int32_t dbChkpDumpTo(SDbChkp* p, char* dname, SArray* list) { nBytes = taosWriteFile(pFile, content, strlen(content)); if (nBytes != strlen(content)) { - terrno = TAOS_SYSTEM_ERROR(errno); - stError("chkp failed to write meta file: %s,reason:%s", dstDir, tstrerror(terrno)); + code = TAOS_SYSTEM_ERROR(errno); + stError("chkp failed to write meta file: %s,reason:%s", dstDir, tstrerror(code)); taosCloseFile(&pFile); goto _ERROR; } @@ -4859,10 +4859,11 @@ _ERROR: taosMemoryFree(srcDir); taosMemoryFree(dstDir); - return terrno; + return code; } SBkdMgt* bkdMgtCreate(char* path) { + terrno = 0; SBkdMgt* p = taosMemoryCalloc(1, sizeof(SBkdMgt)); if (p == NULL) { terrno = TSDB_CODE_OUT_OF_MEMORY; @@ -4910,7 +4911,6 @@ void bkdMgtDestroy(SBkdMgt* bm) { } int32_t bkdMgtGetDelta(SBkdMgt* bm, char* taskId, int64_t chkpId, SArray* list, char* dname) { int32_t code = 0; - taosThreadRwlockWrlock(&bm->rwLock); SDbChkp** ppChkp = taosHashGet(bm->pDbChkpTbl, taskId, strlen(taskId)); SDbChkp* pChkp = ppChkp != NULL ? *ppChkp : NULL; @@ -4919,30 +4919,31 @@ int32_t bkdMgtGetDelta(SBkdMgt* bm, char* taskId, int64_t chkpId, SArray* list, int32_t cap = strlen(bm->path) + 64; char* path = taosMemoryCalloc(1, cap); if (path == NULL) { - terrno = TSDB_CODE_OUT_OF_MEMORY; taosThreadRwlockUnlock(&bm->rwLock); - return terrno; + return TSDB_CODE_OUT_OF_MEMORY; } int32_t nBytes = snprintf(path, cap, "%s%s%s", bm->path, TD_DIRSEP, taskId); if (nBytes <= 0 || nBytes >= cap) { - terrno = TSDB_CODE_OUT_OF_RANGE; taosMemoryFree(path); taosThreadRwlockUnlock(&bm->rwLock); - return terrno; + code = TSDB_CODE_OUT_OF_RANGE; + return code; } SDbChkp* p = dbChkpCreate(path, chkpId); if (p == NULL) { taosMemoryFree(path); taosThreadRwlockUnlock(&bm->rwLock); - return terrno; + code = terrno; + return code; } if (taosHashPut(bm->pDbChkpTbl, taskId, strlen(taskId), &p, sizeof(void*)) != 0) { dbChkpDestroy(p); taosThreadRwlockUnlock(&bm->rwLock); - return terrno; + code = terrno; + return code; } pChkp = p; @@ -4950,14 +4951,14 @@ int32_t bkdMgtGetDelta(SBkdMgt* bm, char* taskId, int64_t chkpId, SArray* list, taosThreadRwlockUnlock(&bm->rwLock); return code; } else { - terrno = dbChkpGetDelta(pChkp, chkpId, NULL); + code = dbChkpGetDelta(pChkp, chkpId, NULL); if (code == 0) { - terrno = dbChkpDumpTo(pChkp, dname, list); + code = dbChkpDumpTo(pChkp, dname, list); } } taosThreadRwlockUnlock(&bm->rwLock); - return terrno; + return code; } #ifdef BUILD_NO_CALL diff --git a/source/libs/stream/src/streamCheckpoint.c b/source/libs/stream/src/streamCheckpoint.c index 1e77e70efa..a66c7a7cfa 100644 --- a/source/libs/stream/src/streamCheckpoint.c +++ b/source/libs/stream/src/streamCheckpoint.c @@ -548,15 +548,13 @@ static int32_t getCheckpointDataMeta(const char* id, const char* path, SArray* l char* filePath = taosMemoryCalloc(1, cap); if (filePath == NULL) { - terrno = TSDB_CODE_OUT_OF_MEMORY; - return -1; + return TSDB_CODE_OUT_OF_MEMORY; } int32_t nBytes = snprintf(filePath, cap, "%s%s%s", path, TD_DIRSEP, "META_TMP"); if (nBytes <= 0 || nBytes >= cap) { taosMemoryFree(filePath); - terrno = TSDB_CODE_OUT_OF_RANGE; - return -1; + return TSDB_CODE_OUT_OF_RANGE; } code = downloadCheckpointDataByName(id, "META", filePath); @@ -584,19 +582,18 @@ int32_t uploadCheckpointData(SStreamTask* pTask, int64_t checkpointId, int64_t d SArray* toDelFiles = taosArrayInit(4, POINTER_BYTES); if (toDelFiles == NULL) { - terrno = TSDB_CODE_OUT_OF_MEMORY; - return -1; + return TSDB_CODE_OUT_OF_MEMORY; } if ((code = taskDbGenChkpUploadData(pTask->pBackend, pMeta->bkdChkptMgt, checkpointId, type, &path, toDelFiles, pTask->id.idStr)) != 0) { - stError("s-task:%s failed to gen upload checkpoint:%" PRId64 ", reason:%s", idStr, checkpointId, tstrerror(terrno)); + stError("s-task:%s failed to gen upload checkpoint:%" PRId64 ", reason:%s", idStr, checkpointId, tstrerror(code)); } if (type == DATA_UPLOAD_S3) { if (code == TSDB_CODE_SUCCESS && (code = getCheckpointDataMeta(idStr, path, toDelFiles)) != 0) { stError("s-task:%s failed to get checkpointData for checkpointId:%" PRId64 ", reason:%s", idStr, checkpointId, - tstrerror(terrno)); + tstrerror(code)); } } @@ -1003,11 +1000,13 @@ static int32_t uploadCheckpointToS3(const char* id, const char* path) { int32_t nBytes = 0; if (s3Init() != 0) { - return -1; + return TSDB_CODE_THIRDPARTY_ERROR; } TdDirPtr pDir = taosOpenDir(path); - if (pDir == NULL) return -1; + if (pDir == NULL) { + return TAOS_SYSTEM_ERROR(errno); + } TdDirEntryPtr de = NULL; while ((de = taosReadDir(pDir)) != NULL) { @@ -1018,13 +1017,13 @@ static int32_t uploadCheckpointToS3(const char* id, const char* path) { if (path[strlen(path) - 1] == TD_DIRSEP_CHAR) { nBytes = snprintf(filename, sizeof(filename), "%s%s", path, name); if (nBytes <= 0 || nBytes >= sizeof(filename)) { - code = -1; + code = TSDB_CODE_OUT_OF_RANGE; break; } } else { nBytes = snprintf(filename, sizeof(filename), "%s%s%s", path, TD_DIRSEP, name); if (nBytes <= 0 || nBytes >= sizeof(filename)) { - code = -1; + code = TSDB_CODE_OUT_OF_RANGE; break; } } @@ -1032,14 +1031,13 @@ static int32_t uploadCheckpointToS3(const char* id, const char* path) { char object[PATH_MAX] = {0}; nBytes = snprintf(object, sizeof(object), "%s%s%s", id, TD_DIRSEP, name); if (nBytes <= 0 || nBytes >= sizeof(object)) { - code = -1; + code = TSDB_CODE_OUT_OF_RANGE; break; } - if (s3PutObjectFromFile2(filename, object, 0) != 0) { - terrno = TAOS_SYSTEM_ERROR(errno); - code = -1; - stError("[s3] failed to upload checkpoint:%s", filename); + code = s3PutObjectFromFile2(filename, object, 0); + if (code != 0) { + stError("[s3] failed to upload checkpoint:%s, reason:%s", filename, tstrerror(code)); } else { stDebug("[s3] upload checkpoint:%s", filename); } @@ -1054,21 +1052,18 @@ int32_t downloadCheckpointByNameS3(const char* id, const char* fname, const char char* buf = taosMemoryCalloc(1, cap); if (buf == NULL) { - terrno = TSDB_CODE_OUT_OF_MEMORY; - return -1; + return TSDB_CODE_OUT_OF_MEMORY; } nBytes = snprintf(buf, cap, "%s/%s", id, fname); if (nBytes <= 0 || nBytes >= cap) { taosMemoryFree(buf); - terrno = TSDB_CODE_OUT_OF_RANGE; - return -1; + return TSDB_CODE_OUT_OF_RANGE; } - - if (s3GetObjectToFile(buf, dstName) != 0) { + int32_t code = s3GetObjectToFile(buf, dstName); + if (code != 0) { taosMemoryFree(buf); - terrno = TAOS_SYSTEM_ERROR(errno); - return -1; + return TAOS_SYSTEM_ERROR(errno); } taosMemoryFree(buf); return 0; @@ -1102,9 +1097,8 @@ int32_t streamTaskUploadCheckpoint(const char* id, const char* path) { // fileName: CURRENT int32_t downloadCheckpointDataByName(const char* id, const char* fname, const char* dstName) { if (id == NULL || fname == NULL || strlen(id) == 0 || strlen(fname) == 0 || strlen(fname) >= PATH_MAX) { - terrno = TSDB_CODE_INVALID_PARA; stError("down load checkpoint data parameters invalid"); - return -1; + return TSDB_CODE_INVALID_PARA; } if (strlen(tsSnodeAddress) != 0) { @@ -1133,9 +1127,8 @@ int32_t streamTaskDownloadCheckpointData(const char* id, char* path) { int32_t deleteCheckpoint(const char* id) { if (id == NULL || strlen(id) == 0) { - terrno = TSDB_CODE_INVALID_PARA; stError("deleteCheckpoint parameters invalid"); - return terrno; + return TSDB_CODE_INVALID_PARA; } if (strlen(tsSnodeAddress) != 0) { return deleteRsync(id); @@ -1156,8 +1149,9 @@ int32_t deleteCheckpointFile(const char* id, const char* name) { char* tmp = object; int32_t code = s3DeleteObjects((const char**)&tmp, 1); if (code != 0) { - return code; + return TSDB_CODE_THIRDPARTY_ERROR; } + return code; } int32_t streamTaskSendRestoreChkptMsg(SStreamTask* pTask) { @@ -1180,14 +1174,14 @@ int32_t streamTaskSendRestoreChkptMsg(SStreamTask* pTask) { tEncodeSize(tEncodeRestoreCheckpointInfo, &req, tlen, code); if (code < 0) { stError("s-task:%s vgId:%d encode stream task latest-checkpoint-id failed, code:%s", id, vgId, tstrerror(code)); - return -1; + return TSDB_CODE_INVALID_MSG; } void* buf = rpcMallocCont(tlen); if (buf == NULL) { stError("s-task:%s vgId:%d encode stream task latest-checkpoint-id msg failed, code:%s", id, vgId, tstrerror(TSDB_CODE_OUT_OF_MEMORY)); - return -1; + return TSDB_CODE_OUT_OF_MEMORY; } SEncoder encoder; diff --git a/source/libs/stream/src/streamSnapshot.c b/source/libs/stream/src/streamSnapshot.c index 57723132d8..878cb2ac71 100644 --- a/source/libs/stream/src/streamSnapshot.c +++ b/source/libs/stream/src/streamSnapshot.c @@ -228,12 +228,12 @@ int32_t snapFileGenMeta(SBackendSnapFile2* pSnapFile) { return 0; } int32_t snapFileReadMeta(SBackendSnapFile2* pSnapFile) { - terrno = 0; + int32_t code = 0; TdDirPtr pDir = taosOpenDir(pSnapFile->path); if (NULL == pDir) { - terrno = TAOS_SYSTEM_ERROR(errno); - stError("%s failed to open %s", STREAM_STATE_TRANSFER, pSnapFile->path); - return terrno; + code = TAOS_SYSTEM_ERROR(errno); + stError("%s failed to open %s, reason:%s", STREAM_STATE_TRANSFER, pSnapFile->path, tstrerror(code)); + return code; } TdDirEntryPtr pDirEntry; @@ -242,7 +242,7 @@ int32_t snapFileReadMeta(SBackendSnapFile2* pSnapFile) { if (strlen(name) >= strlen(ROCKSDB_CURRENT) && 0 == strncmp(name, ROCKSDB_CURRENT, strlen(ROCKSDB_CURRENT))) { pSnapFile->pCurrent = taosStrdup(name); if (pSnapFile->pCurrent == NULL) { - terrno = TSDB_CODE_OUT_OF_MEMORY; + code = TSDB_CODE_OUT_OF_MEMORY; break; } continue; @@ -250,7 +250,7 @@ int32_t snapFileReadMeta(SBackendSnapFile2* pSnapFile) { if (strlen(name) >= strlen(ROCKSDB_MAINFEST) && 0 == strncmp(name, ROCKSDB_MAINFEST, strlen(ROCKSDB_MAINFEST))) { pSnapFile->pMainfest = taosStrdup(name); if (pSnapFile->pMainfest == NULL) { - terrno = TSDB_CODE_OUT_OF_MEMORY; + code = TSDB_CODE_OUT_OF_MEMORY; break; } continue; @@ -258,7 +258,7 @@ int32_t snapFileReadMeta(SBackendSnapFile2* pSnapFile) { if (strlen(name) >= strlen(ROCKSDB_OPTIONS) && 0 == strncmp(name, ROCKSDB_OPTIONS, strlen(ROCKSDB_OPTIONS))) { pSnapFile->pOptions = taosStrdup(name); if (pSnapFile->pOptions == NULL) { - terrno = TSDB_CODE_OUT_OF_MEMORY; + code = TSDB_CODE_OUT_OF_MEMORY; break; } continue; @@ -267,7 +267,7 @@ int32_t snapFileReadMeta(SBackendSnapFile2* pSnapFile) { 0 == strncmp(name, ROCKSDB_CHECKPOINT_META, strlen(ROCKSDB_CHECKPOINT_META))) { pSnapFile->pCheckpointMeta = taosStrdup(name); if (pSnapFile->pCheckpointMeta == NULL) { - terrno = TSDB_CODE_OUT_OF_MEMORY; + code = TSDB_CODE_OUT_OF_MEMORY; break; } continue; @@ -276,7 +276,7 @@ int32_t snapFileReadMeta(SBackendSnapFile2* pSnapFile) { 0 == strncmp(name, ROCKSDB_CHECKPOINT_SELF_CHECK, strlen(ROCKSDB_CHECKPOINT_SELF_CHECK))) { pSnapFile->pCheckpointSelfcheck = taosStrdup(name); if (pSnapFile->pCheckpointSelfcheck == NULL) { - terrno = TSDB_CODE_OUT_OF_MEMORY; + code = TSDB_CODE_OUT_OF_MEMORY; break; } continue; @@ -285,17 +285,17 @@ int32_t snapFileReadMeta(SBackendSnapFile2* pSnapFile) { 0 == strncmp(name + strlen(name) - strlen(ROCKSDB_SST), ROCKSDB_SST, strlen(ROCKSDB_SST))) { char* sst = taosStrdup(name); if (sst == NULL) { - terrno = TSDB_CODE_OUT_OF_MEMORY; + code = TSDB_CODE_OUT_OF_MEMORY; break; } taosArrayPush(pSnapFile->pSst, &sst); } } taosCloseDir(&pDir); - return terrno; + return code; } int32_t streamBackendSnapInitFile(char* metaPath, SStreamTaskSnap* pSnap, SBackendSnapFile2* pSnapFile) { - terrno = 0; + int32_t code = 0; int32_t nBytes = 0; int32_t cap = strlen(pSnap->dbPrefixPath) + 256; @@ -307,28 +307,28 @@ int32_t streamBackendSnapInitFile(char* metaPath, SStreamTaskSnap* pSnap, SBacke nBytes = snprintf(path, cap, "%s%s%s%s%s%" PRId64 "", pSnap->dbPrefixPath, TD_DIRSEP, "checkpoints", TD_DIRSEP, "checkpoint", pSnap->chkpId); if (nBytes <= 0 || nBytes >= cap) { - terrno = TSDB_CODE_OUT_OF_RANGE; + code = TSDB_CODE_OUT_OF_RANGE; goto _ERROR; } if (!taosIsDir(path)) { - terrno = TSDB_CODE_INVALID_MSG; + code = TSDB_CODE_INVALID_MSG; goto _ERROR; } pSnapFile->pSst = taosArrayInit(16, sizeof(void*)); pSnapFile->pFileList = taosArrayInit(64, sizeof(SBackendFileItem)); if (pSnapFile->pSst == NULL || pSnapFile->pFileList == NULL) { - terrno = TSDB_CODE_OUT_OF_MEMORY; + code = TSDB_CODE_OUT_OF_MEMORY; goto _ERROR; } pSnapFile->path = path; pSnapFile->snapInfo = *pSnap; - if ((terrno = snapFileReadMeta(pSnapFile)) != 0) { + if ((code = snapFileReadMeta(pSnapFile)) != 0) { goto _ERROR; } - if ((terrno = snapFileGenMeta(pSnapFile)) != 0) { + if ((code = snapFileGenMeta(pSnapFile)) != 0) { goto _ERROR; } @@ -337,7 +337,7 @@ int32_t streamBackendSnapInitFile(char* metaPath, SStreamTaskSnap* pSnap, SBacke _ERROR: taosMemoryFree(path); - return terrno; + return code; } void snapFileDestroy(SBackendSnapFile2* pSnap) { taosMemoryFree(pSnap->pCheckpointMeta); @@ -365,34 +365,32 @@ void snapFileDestroy(SBackendSnapFile2* pSnap) { } int32_t streamSnapHandleInit(SStreamSnapHandle* pHandle, char* path, void* pMeta) { // impl later - - terrno = 0; + int32_t code = 0; SArray* pSnapInfoSet = taosArrayInit(4, sizeof(SStreamTaskSnap)); if (pSnapInfoSet == NULL) { - terrno = TSDB_CODE_OUT_OF_MEMORY; - return terrno; + return TSDB_CODE_OUT_OF_MEMORY; } - terrno = streamCreateTaskDbSnapInfo(pMeta, path, pSnapInfoSet); - if (terrno != 0) { - stError("failed to do task db snap info, reason:%s", tstrerror(terrno)); + code = streamCreateTaskDbSnapInfo(pMeta, path, pSnapInfoSet); + if (code != 0) { + stError("failed to do task db snap info, reason:%s", tstrerror(code)); taosArrayDestroy(pSnapInfoSet); - return terrno; + return code; } SArray* pDbSnapSet = taosArrayInit(8, sizeof(SBackendSnapFile2)); if (pDbSnapSet == NULL) { - terrno = TSDB_CODE_OUT_OF_MEMORY; taosArrayDestroy(pSnapInfoSet); - return -1; + code = TSDB_CODE_OUT_OF_MEMORY; + return code; } for (int32_t i = 0; i < taosArrayGetSize(pSnapInfoSet); i++) { SStreamTaskSnap* pSnap = taosArrayGet(pSnapInfoSet, i); SBackendSnapFile2 snapFile = {0}; - terrno = streamBackendSnapInitFile(path, pSnap, &snapFile); - ASSERT(terrno == 0); + code = streamBackendSnapInitFile(path, pSnap, &snapFile); + ASSERT(code == 0); taosArrayPush(pDbSnapSet, &snapFile); } pHandle->pDbSnapSet = pDbSnapSet; @@ -403,7 +401,7 @@ int32_t streamSnapHandleInit(SStreamSnapHandle* pHandle, char* path, void* pMeta _err: streamSnapHandleDestroy(pHandle); - return terrno; + return code; } void streamSnapHandleDestroy(SStreamSnapHandle* handle) { @@ -431,8 +429,7 @@ int32_t streamSnapReaderOpen(void* pMeta, int64_t sver, int64_t chkpId, char* pa // impl later SStreamSnapReader* pReader = taosMemoryCalloc(1, sizeof(SStreamSnapReader)); if (pReader == NULL) { - terrno = TSDB_CODE_OUT_OF_MEMORY; - return -1; + return TSDB_CODE_OUT_OF_MEMORY; } int32_t code = streamSnapHandleInit(&pReader->handle, (char*)path, pMeta); @@ -498,10 +495,10 @@ _NEXT: int64_t nread = taosPReadFile(pSnapFile->fd, buf + sizeof(SStreamSnapBlockHdr), kBlockSize, pSnapFile->offset); if (nread == -1) { taosMemoryFree(buf); - code = TAOS_SYSTEM_ERROR(terrno); + code = TAOS_SYSTEM_ERROR(errno); stError("%s snap failed to read snap, file name:%s, type:%d,reason:%s", STREAM_STATE_TRANSFER, item->name, item->type, tstrerror(code)); - return -1; + return code; } else if (nread > 0 && nread <= kBlockSize) { // left bytes less than kBlockSize stDebug("%s read file %s, current offset:%" PRId64 ",size:% " PRId64 ", file no.%d", STREAM_STATE_TRANSFER, @@ -558,6 +555,7 @@ _NEXT: // SMetaSnapWriter ======================================== int32_t streamSnapWriterOpen(void* pMeta, int64_t sver, int64_t ever, char* path, SStreamSnapWriter** ppWriter) { // impl later + int32_t code = 0; SStreamSnapWriter* pWriter = taosMemoryCalloc(1, sizeof(SStreamSnapWriter)); if (pWriter == NULL) { return TSDB_CODE_OUT_OF_MEMORY; @@ -568,23 +566,23 @@ int32_t streamSnapWriterOpen(void* pMeta, int64_t sver, int64_t ever, char* path pHandle->metaPath = taosStrdup(path); if (pHandle->metaPath == NULL) { - terrno = TSDB_CODE_OUT_OF_MEMORY; taosMemoryFree(pWriter); - return terrno; + code = TSDB_CODE_OUT_OF_MEMORY; + return code; } pHandle->pDbSnapSet = taosArrayInit(8, sizeof(SBackendSnapFile2)); if (pHandle->pDbSnapSet == NULL) { - terrno = TSDB_CODE_OUT_OF_MEMORY; streamSnapWriterClose(pWriter, 0); - return terrno; + code = TSDB_CODE_OUT_OF_MEMORY; + return code; } SBackendSnapFile2 snapFile = {0}; if (taosArrayPush(pHandle->pDbSnapSet, &snapFile) == NULL) { - terrno = TSDB_CODE_OUT_OF_MEMORY; streamSnapWriterClose(pWriter, 0); - return terrno; + code = TSDB_CODE_OUT_OF_MEMORY; + return code; } *ppWriter = pWriter; @@ -607,7 +605,7 @@ int32_t streamSnapWriteImpl(SStreamSnapWriter* pWriter, uint8_t* pData, uint32_t if (pSnapFile->fd == 0) { pSnapFile->fd = streamOpenFile(pSnapFile->path, pItem->name, TD_FILE_CREATE | TD_FILE_WRITE | TD_FILE_APPEND); if (pSnapFile->fd == NULL) { - code = TAOS_SYSTEM_ERROR(terrno); + code = TAOS_SYSTEM_ERROR(errno); stError("%s failed to open file name:%s%s%s, reason:%s", STREAM_STATE_TRANSFER, pHandle->metaPath, TD_DIRSEP, pHdr->name, tstrerror(code)); } @@ -615,7 +613,7 @@ int32_t streamSnapWriteImpl(SStreamSnapWriter* pWriter, uint8_t* pData, uint32_t if (strlen(pHdr->name) == strlen(pItem->name) && strcmp(pHdr->name, pItem->name) == 0) { int64_t bytes = taosPWriteFile(pSnapFile->fd, pHdr->data, pHdr->size, pSnapFile->offset); if (bytes != pHdr->size) { - code = TAOS_SYSTEM_ERROR(terrno); + code = TAOS_SYSTEM_ERROR(errno); stError("%s failed to write snap, file name:%s, reason:%s", STREAM_STATE_TRANSFER, pHdr->name, tstrerror(code)); return code; } else { @@ -636,12 +634,16 @@ int32_t streamSnapWriteImpl(SStreamSnapWriter* pWriter, uint8_t* pData, uint32_t SBackendFileItem* pItem = taosArrayGet(pSnapFile->pFileList, pSnapFile->currFileIdx); pSnapFile->fd = streamOpenFile(pSnapFile->path, pItem->name, TD_FILE_CREATE | TD_FILE_WRITE | TD_FILE_APPEND); if (pSnapFile->fd == NULL) { - code = TAOS_SYSTEM_ERROR(terrno); + code = TAOS_SYSTEM_ERROR(errno); stError("%s failed to open file name:%s%s%s, reason:%s", STREAM_STATE_TRANSFER, pSnapFile->path, TD_DIRSEP, pHdr->name, tstrerror(code)); } - taosPWriteFile(pSnapFile->fd, pHdr->data, pHdr->size, pSnapFile->offset); + if (taosPWriteFile(pSnapFile->fd, pHdr->data, pHdr->size, pSnapFile->offset) != pHdr->size) { + code = TAOS_SYSTEM_ERROR(errno); + stError("%s failed to write snap, file name:%s, reason:%s", STREAM_STATE_TRANSFER, pHdr->name, tstrerror(code)); + return code; + } stInfo("succ to write data %s", pItem->name); pSnapFile->offset += pHdr->size; } diff --git a/source/util/src/terror.c b/source/util/src/terror.c index c5bba6fa53..4563e21c6e 100644 --- a/source/util/src/terror.c +++ b/source/util/src/terror.c @@ -96,6 +96,7 @@ TAOS_DEFINE_ERROR(TSDB_CODE_NOT_FOUND, "Not found") TAOS_DEFINE_ERROR(TSDB_CODE_NO_DISKSPACE, "Out of disk space") TAOS_DEFINE_ERROR(TSDB_CODE_TIMEOUT_ERROR, "Operation timeout") TAOS_DEFINE_ERROR(TSDB_CODE_NO_ENOUGH_DISKSPACE, "No enough disk space") +TAOS_DEFINE_ERROR(TSDB_CODE_THIRDPARTY_ERROR, "third party error, please check the log") TAOS_DEFINE_ERROR(TSDB_CODE_APP_IS_STARTING, "Database is starting up") TAOS_DEFINE_ERROR(TSDB_CODE_APP_IS_STOPPING, "Database is closing down") From 92ab689c46a1f3691104a00d4f09c20337254340 Mon Sep 17 00:00:00 2001 From: Yihao Deng Date: Thu, 4 Jul 2024 10:26:58 +0000 Subject: [PATCH 26/34] refactor backend --- source/libs/stream/src/streamBackendRocksdb.c | 1 - 1 file changed, 1 deletion(-) diff --git a/source/libs/stream/src/streamBackendRocksdb.c b/source/libs/stream/src/streamBackendRocksdb.c index 057ff56aa9..cb40864c6e 100644 --- a/source/libs/stream/src/streamBackendRocksdb.c +++ b/source/libs/stream/src/streamBackendRocksdb.c @@ -1606,7 +1606,6 @@ int32_t taskDbDoCheckpoint(void* arg, int64_t chkpId, int64_t processId) { if (taosAcquireRef(taskDbWrapperId, refId) == NULL) { code = terrno; - terrno = 0; return code; } From 0cdfae3a2c2fa34a3bbbd8a8405fc86ecc866220 Mon Sep 17 00:00:00 2001 From: Yihao Deng Date: Thu, 4 Jul 2024 12:13:17 +0000 Subject: [PATCH 27/34] refactor backend --- source/dnode/vnode/src/tq/tqStreamStateSnap.c | 12 +++++------- source/libs/stream/src/streamMeta.c | 6 +++++- 2 files changed, 10 insertions(+), 8 deletions(-) diff --git a/source/dnode/vnode/src/tq/tqStreamStateSnap.c b/source/dnode/vnode/src/tq/tqStreamStateSnap.c index 07bfd52a9c..c79fc66a06 100644 --- a/source/dnode/vnode/src/tq/tqStreamStateSnap.c +++ b/source/dnode/vnode/src/tq/tqStreamStateSnap.c @@ -38,8 +38,7 @@ int32_t streamStateSnapReaderOpen(STQ* pTq, int64_t sver, int64_t ever, SStreamS // alloc pReader = (SStreamStateReader*)taosMemoryCalloc(1, sizeof(SStreamStateReader)); if (pReader == NULL) { - terrno = TSDB_CODE_OUT_OF_MEMORY; - code = terrno; + code = TSDB_CODE_OUT_OF_MEMORY; goto _err; } @@ -52,10 +51,9 @@ int32_t streamStateSnapReaderOpen(STQ* pTq, int64_t sver, int64_t ever, SStreamS SStreamSnapReader* pSnapReader = NULL; - if (streamSnapReaderOpen(meta, sver, chkpId, meta->path, &pSnapReader) == 0) { + if ((code = streamSnapReaderOpen(meta, sver, chkpId, meta->path, &pSnapReader)) == 0) { pReader->complete = 1; } else { - code = terrno; taosMemoryFree(pReader); goto _err; } @@ -68,7 +66,7 @@ int32_t streamStateSnapReaderOpen(STQ* pTq, int64_t sver, int64_t ever, SStreamS _err: tqError("vgId:%d, vnode %s snapshot reader failed to open since %s", TD_VID(pTq->pVnode), STREAM_STATE_TRANSFER, - tstrerror(terrno)); + tstrerror(code)); *ppReader = NULL; return code; } @@ -142,7 +140,7 @@ int32_t streamStateSnapWriterOpen(STQ* pTq, int64_t sver, int64_t ever, SStreamS if (taosMkDir(pTq->pStreamMeta->path) != 0) { code = TAOS_SYSTEM_ERROR(errno); tqError("vgId:%d, vnode %s snapshot writer failed to create directory %s since %s", TD_VID(pTq->pVnode), - STREAM_STATE_TRANSFER, pTq->pStreamMeta->path, tstrerror(terrno)); + STREAM_STATE_TRANSFER, pTq->pStreamMeta->path, tstrerror(code)); goto _err; } @@ -160,7 +158,7 @@ int32_t streamStateSnapWriterOpen(STQ* pTq, int64_t sver, int64_t ever, SStreamS _err: tqError("vgId:%d, vnode %s snapshot writer failed to open since %s", TD_VID(pTq->pVnode), STREAM_STATE_TRANSFER, - tstrerror(terrno)); + tstrerror(code)); taosMemoryFree(pWriter); *ppWriter = NULL; return code; diff --git a/source/libs/stream/src/streamMeta.c b/source/libs/stream/src/streamMeta.c index 2244861bc7..901d91d02d 100644 --- a/source/libs/stream/src/streamMeta.c +++ b/source/libs/stream/src/streamMeta.c @@ -398,6 +398,9 @@ SStreamMeta* streamMetaOpen(const char* path, void* ahandle, FTaskBuild buildTas pMeta->qHandle = taosInitScheduler(32, 1, "stream-chkp", NULL); pMeta->bkdChkptMgt = bkdMgtCreate(tpath); + if (pMeta->bkdChkptMgt == NULL) { + goto _err; + } taosThreadMutexInit(&pMeta->backendMutex, NULL); return pMeta; @@ -413,9 +416,10 @@ _err: if (pMeta->updateInfo.pTasks) taosHashCleanup(pMeta->updateInfo.pTasks); if (pMeta->startInfo.pReadyTaskSet) taosHashCleanup(pMeta->startInfo.pReadyTaskSet); if (pMeta->startInfo.pFailedTaskSet) taosHashCleanup(pMeta->startInfo.pFailedTaskSet); + if (pMeta->bkdChkptMgt) bkdMgtDestroy(pMeta->bkdChkptMgt); taosMemoryFree(pMeta); - stError("failed to open stream meta"); + stError("failed to open stream meta, reason:%s", tstrerror(terrno)); return NULL; } From f9679feaa72bb51a5587531589bc41f8879bb827 Mon Sep 17 00:00:00 2001 From: Yihao Deng Date: Fri, 5 Jul 2024 02:42:22 +0000 Subject: [PATCH 28/34] fix compile error --- source/libs/stream/src/streamBackendRocksdb.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/source/libs/stream/src/streamBackendRocksdb.c b/source/libs/stream/src/streamBackendRocksdb.c index f27462fb08..1f4603c466 100644 --- a/source/libs/stream/src/streamBackendRocksdb.c +++ b/source/libs/stream/src/streamBackendRocksdb.c @@ -1568,7 +1568,7 @@ int32_t chkpAddExtraInfo(char* pChkpIdDir, int64_t chkpId, int64_t processId) { nBytes = snprintf(pDst, cap, "%s%sinfo", pChkpIdDir, TD_DIRSEP); if (nBytes <= 0 || nBytes >= cap) { code = TSDB_CODE_OUT_OF_RANGE; - stError("failed to build dst to add extra info, dir:%s, reason:%d", pChkpIdDir, tstrerror(code)); + stError("failed to build dst to add extra info, dir:%s, reason:%s", pChkpIdDir, tstrerror(code)); goto _EXIT; } @@ -1582,7 +1582,7 @@ int32_t chkpAddExtraInfo(char* pChkpIdDir, int64_t chkpId, int64_t processId) { nBytes = snprintf(buf, sizeof(buf), "%" PRId64 " %" PRId64 "", chkpId, processId); if (nBytes <= 0 || nBytes >= sizeof(buf)) { code = TSDB_CODE_OUT_OF_RANGE; - stError("failed to build content to add extra info, dir:%s,reason:%d", pChkpIdDir, tstrerror(code)); + stError("failed to build content to add extra info, dir:%s,reason:%s", pChkpIdDir, tstrerror(code)); goto _EXIT; } From 52a80826223543797ab2503370704f1c4bfa6eb4 Mon Sep 17 00:00:00 2001 From: Yihao Deng Date: Fri, 5 Jul 2024 03:40:19 +0000 Subject: [PATCH 29/34] refactor backend --- source/libs/stream/inc/streamBackendRocksdb.h | 2 +- source/libs/stream/src/streamBackendRocksdb.c | 23 +++++++++++++++++++ source/libs/stream/src/streamMeta.c | 10 ++++++-- 3 files changed, 32 insertions(+), 3 deletions(-) diff --git a/source/libs/stream/inc/streamBackendRocksdb.h b/source/libs/stream/inc/streamBackendRocksdb.h index e4c5787020..f0647f44a3 100644 --- a/source/libs/stream/inc/streamBackendRocksdb.h +++ b/source/libs/stream/inc/streamBackendRocksdb.h @@ -133,7 +133,7 @@ typedef struct { #define META_ON_S3_FORMATE "%s_%" PRId64 "\n%s_%" PRId64 "\n%s_%" PRId64 "" -bool streamBackendDataIsExist(const char* path, int64_t chkpId, int32_t vgId); +bool streamBackendDataIsExist(const char* path, int64_t chkpId); void* streamBackendInit(const char* path, int64_t chkpId, int32_t vgId); void streamBackendCleanup(void* arg); void streamBackendHandleCleanup(void* arg); diff --git a/source/libs/stream/src/streamBackendRocksdb.c b/source/libs/stream/src/streamBackendRocksdb.c index 1f4603c466..8d39db33e8 100644 --- a/source/libs/stream/src/streamBackendRocksdb.c +++ b/source/libs/stream/src/streamBackendRocksdb.c @@ -786,6 +786,29 @@ _EXIT: taosMemoryFree(checkpointRoot); return code; } +bool streamBackendDataIsExist(const char* path, int64_t chkpId) { + bool exist = true; + int32_t cap = strlen(path) + 32; + + char* state = taosMemoryCalloc(1, cap); + if (state == NULL) { + terrno = TSDB_CODE_OUT_OF_MEMORY; + return false; + } + + int16_t nBytes = snprintf(state, cap, "%s%s%s", path, TD_DIRSEP, "state"); + if (nBytes <= 0 || nBytes >= cap) { + terrno = TSDB_CODE_OUT_OF_RANGE; + exist = false; + } else { + if (!taosDirExist(state)) { + exist = false; + } + } + + taosMemoryFree(state); + return exist; +} void* streamBackendInit(const char* streamPath, int64_t chkpId, int32_t vgId) { char* backendPath = NULL; diff --git a/source/libs/stream/src/streamMeta.c b/source/libs/stream/src/streamMeta.c index 901d91d02d..a97b803703 100644 --- a/source/libs/stream/src/streamMeta.c +++ b/source/libs/stream/src/streamMeta.c @@ -183,9 +183,15 @@ int32_t streamMetaCvtDbFormat(SStreamMeta* pMeta) { int32_t code = 0; int64_t chkpId = streamMetaGetLatestCheckpointId(pMeta); - bool exist = streamBackendDataIsExist(pMeta->path, chkpId, pMeta->vgId); + bool exist = streamBackendDataIsExist(pMeta->path, chkpId); if (exist == false) { - stError("failed to check backend data exist, reason:%s", tstrerror(terrno)); + if (terrno != 0) { + code = terrno; + terrno = 0; + stError("failed to check backend data exist, reason:%s", tstrerror(code)); + } else { + stInfo("not need to convert stream backend formate"); + } return code; } From 4ac897be44f61e545971f3fed0fd4d9711cd2a53 Mon Sep 17 00:00:00 2001 From: Yihao Deng Date: Fri, 5 Jul 2024 06:51:13 +0000 Subject: [PATCH 30/34] refactor backend --- source/libs/stream/src/streamBackendRocksdb.c | 5 +++-- source/libs/stream/src/streamMeta.c | 2 +- 2 files changed, 4 insertions(+), 3 deletions(-) diff --git a/source/libs/stream/src/streamBackendRocksdb.c b/source/libs/stream/src/streamBackendRocksdb.c index 8d39db33e8..e8a5e30661 100644 --- a/source/libs/stream/src/streamBackendRocksdb.c +++ b/source/libs/stream/src/streamBackendRocksdb.c @@ -762,8 +762,8 @@ int32_t restoreCheckpointData(const char* path, const char* key, int64_t chkptId } if (code != 0) { - stError("failed to start stream backend at %s, reason: %s, restart from default defaultPath:%s, reason:%s", - checkpointPath, tstrerror(code), defaultPath, tstrerror(code)); + stError("failed to start stream backend at %s, restart from default defaultPath:%s, reason:%s", checkpointPath, + defaultPath, tstrerror(code)); code = 0; // reset the error code } } else { // no valid checkpoint id @@ -2528,6 +2528,7 @@ STaskDbWrapper* taskDbOpen(const char* path, const char* key, int64_t chkptId, i char* statePath = NULL; char* dbPath = NULL; int code = 0; + terrno = 0; if ((code = restoreCheckpointData(path, key, chkptId, &statePath, &dbPath, processVer)) < 0) { terrno = code; stError("failed to restore checkpoint data, path:%s, key:%s, checkpointId: %" PRId64 "reason:%s", path, key, diff --git a/source/libs/stream/src/streamMeta.c b/source/libs/stream/src/streamMeta.c index a97b803703..15aa42e741 100644 --- a/source/libs/stream/src/streamMeta.c +++ b/source/libs/stream/src/streamMeta.c @@ -182,7 +182,7 @@ int32_t streamMetaCheckBackendCompatible(SStreamMeta* pMeta) { int32_t streamMetaCvtDbFormat(SStreamMeta* pMeta) { int32_t code = 0; int64_t chkpId = streamMetaGetLatestCheckpointId(pMeta); - + terrno = 0; bool exist = streamBackendDataIsExist(pMeta->path, chkpId); if (exist == false) { if (terrno != 0) { From 8cbe534f931cc3f37eb199a8ed0a542ba296bf25 Mon Sep 17 00:00:00 2001 From: Yihao Deng Date: Sun, 7 Jul 2024 06:04:59 +0000 Subject: [PATCH 31/34] fix invalid read --- source/libs/stream/src/streamBackendRocksdb.c | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/source/libs/stream/src/streamBackendRocksdb.c b/source/libs/stream/src/streamBackendRocksdb.c index 8c1f962f00..8c390c189c 100644 --- a/source/libs/stream/src/streamBackendRocksdb.c +++ b/source/libs/stream/src/streamBackendRocksdb.c @@ -2074,7 +2074,9 @@ int32_t valueEncode(void* value, int32_t vlen, int64_t ttl, char** dest) { len += taosEncodeFixedI32((void**)&buf, key.len); len += taosEncodeFixedI32((void**)&buf, key.rawLen); len += taosEncodeFixedI8((void**)&buf, key.compress); - len += taosEncodeBinary((void**)&buf, (char*)value, key.len); + if (value != NULL && key.len != 0) { + len += taosEncodeBinary((void**)&buf, (char*)value, key.len); + } *dest = p; } else { char* buf = *dest; @@ -2082,7 +2084,9 @@ int32_t valueEncode(void* value, int32_t vlen, int64_t ttl, char** dest) { len += taosEncodeFixedI32((void**)&buf, key.len); len += taosEncodeFixedI32((void**)&buf, key.rawLen); len += taosEncodeFixedI8((void**)&buf, key.compress); - len += taosEncodeBinary((void**)&buf, (char*)value, key.len); + if (value != NULL && key.len != 0) { + len += taosEncodeBinary((void**)&buf, (char*)value, key.len); + } } taosMemoryFree(dst); From 982fed581d3be22708c26103368524f74b4f8980 Mon Sep 17 00:00:00 2001 From: Yihao Deng Date: Mon, 8 Jul 2024 07:34:06 +0000 Subject: [PATCH 32/34] fix invalid read --- source/libs/stream/src/streamBackendRocksdb.c | 13 ++++--------- 1 file changed, 4 insertions(+), 9 deletions(-) diff --git a/source/libs/stream/src/streamBackendRocksdb.c b/source/libs/stream/src/streamBackendRocksdb.c index 8c390c189c..231fc2ce5b 100644 --- a/source/libs/stream/src/streamBackendRocksdb.c +++ b/source/libs/stream/src/streamBackendRocksdb.c @@ -1536,15 +1536,10 @@ int32_t chkpLoadExtraInfo(char* pChkpIdDir, int64_t* chkpId, int64_t* processId) pFile = taosOpenFile(pDst, TD_FILE_READ); if (pFile == NULL) { - if (errno == ENOENT) { - // compatible with previous version - *processId = -1; - code = 0; - goto _EXIT; - } else { - code = TAOS_SYSTEM_ERROR(errno); - stError("failed to open file to load extra info, file:%s, reason:%s", pDst, tstrerror(code)); - } + // compatible with previous version + *processId = -1; + code = 0; + stError("failed to open file to load extra info, file:%s, reason:%s", pDst, tstrerror(TAOS_SYSTEM_ERROR(errno))); goto _EXIT; } From ee09e26f470a03ea22149b44f47a715c7bf13fd0 Mon Sep 17 00:00:00 2001 From: Yihao Deng Date: Tue, 9 Jul 2024 08:24:30 +0000 Subject: [PATCH 33/34] refactor code --- source/libs/stream/src/streamBackendRocksdb.c | 24 ++++++++++--------- source/libs/stream/src/streamCheckpoint.c | 16 +++++++------ 2 files changed, 22 insertions(+), 18 deletions(-) diff --git a/source/libs/stream/src/streamBackendRocksdb.c b/source/libs/stream/src/streamBackendRocksdb.c index 231fc2ce5b..8b87019ee0 100644 --- a/source/libs/stream/src/streamBackendRocksdb.c +++ b/source/libs/stream/src/streamBackendRocksdb.c @@ -475,6 +475,7 @@ int32_t rebuildDataFromS3(char* chkpPath, int64_t chkpId) { taosMemoryFree(pMeta); return code; } + taosMemoryFree(pMeta); return chkpAddExtraInfo(chkpPath, chkpId, pMeta->processId); } @@ -2648,6 +2649,7 @@ int32_t taskDbGenChkpUploadData__rsync(STaskDbWrapper* pDb, int64_t chkpId, char char* buf = taosMemoryCalloc(1, cap); if (buf == NULL) { + taosReleaseRef(taskDbWrapperId, refId); return TSDB_CODE_OUT_OF_MEMORY; } @@ -2655,6 +2657,7 @@ int32_t taskDbGenChkpUploadData__rsync(STaskDbWrapper* pDb, int64_t chkpId, char snprintf(buf, cap, "%s%s%s%s%s%" PRId64 "", pDb->path, TD_DIRSEP, "checkpoints", TD_DIRSEP, "checkpoint", chkpId); if (nBytes <= 0 || nBytes >= cap) { taosMemoryFree(buf); + taosReleaseRef(taskDbWrapperId, refId); return TSDB_CODE_OUT_OF_RANGE; } @@ -4716,19 +4719,22 @@ int32_t dbChkpInit(SDbChkp* p) { int32_t dbChkpDumpTo(SDbChkp* p, char* dname, SArray* list) { static char* chkpMeta = "META"; int32_t code = 0; - int32_t cap = p->len + 128; taosThreadRwlockRdlock(&p->rwLock); - char* srcBuf = taosMemoryCalloc(1, cap); - char* dstBuf = taosMemoryCalloc(1, cap); - char* srcDir = taosMemoryCalloc(1, cap); - char* dstDir = taosMemoryCalloc(1, cap); - if (srcBuf == NULL || dstBuf == NULL || srcDir == NULL || dstDir == NULL) { + int32_t cap = p->len + 128; + + char* buffer = taosMemoryCalloc(4, cap); + if (buffer == NULL) { code = TSDB_CODE_OUT_OF_MEMORY; goto _ERROR; } + char* srcBuf = buffer; + char* dstBuf = &srcBuf[cap]; + char* srcDir = &dstBuf[cap]; + char* dstDir = &srcDir[cap]; + int nBytes = snprintf(srcDir, cap, "%s%s%s%s%s%" PRId64 "", p->path, TD_DIRSEP, "checkpoints", TD_DIRSEP, "checkpoint", p->curChkpId); if (nBytes <= 0 || nBytes >= cap) { @@ -4872,12 +4878,8 @@ int32_t dbChkpDumpTo(SDbChkp* p, char* dname, SArray* list) { code = 0; _ERROR: + taosMemoryFree(buffer); taosThreadRwlockUnlock(&p->rwLock); - taosMemoryFree(srcBuf); - taosMemoryFree(dstBuf); - taosMemoryFree(srcDir); - taosMemoryFree(dstDir); - return code; } diff --git a/source/libs/stream/src/streamCheckpoint.c b/source/libs/stream/src/streamCheckpoint.c index a66c7a7cfa..731b6e9586 100644 --- a/source/libs/stream/src/streamCheckpoint.c +++ b/source/libs/stream/src/streamCheckpoint.c @@ -541,10 +541,8 @@ void streamTaskSetFailedCheckpointId(SStreamTask* pTask) { } static int32_t getCheckpointDataMeta(const char* id, const char* path, SArray* list) { - TdFilePtr pFile = NULL; - int32_t cap = strlen(path) + 64; - char buf[128] = {0}; - int32_t code = 0; + int32_t code = 0; + int32_t cap = strlen(path) + 64; char* filePath = taosMemoryCalloc(1, cap); if (filePath == NULL) { @@ -603,7 +601,7 @@ int32_t uploadCheckpointData(SStreamTask* pTask, int64_t checkpointId, int64_t d stDebug("s-task:%s upload checkpointId:%" PRId64 " to remote succ", idStr, checkpointId); } else { stError("s-task:%s failed to upload checkpointId:%" PRId64 " path:%s,reason:%s", idStr, checkpointId, path, - tstrerror(errno)); + tstrerror(code)); } } @@ -1080,13 +1078,17 @@ ECHECKPOINT_BACKUP_TYPE streamGetCheckpointBackupType() { } int32_t streamTaskUploadCheckpoint(const char* id, const char* path) { + int32_t code = 0; if (id == NULL || path == NULL || strlen(id) == 0 || strlen(path) == 0 || strlen(path) >= PATH_MAX) { stError("invalid parameters in upload checkpoint, %s", id); - return -1; + return TSDB_CODE_INVALID_CFG; } if (strlen(tsSnodeAddress) != 0) { - return uploadByRsync(id, path); + code = uploadByRsync(id, path); + if (code != 0) { + return TAOS_SYSTEM_ERROR(errno); + } } else if (tsS3StreamEnabled) { return uploadCheckpointToS3(id, path); } From 41f8553d53c43059b2a0191074bfcd4d90e34570 Mon Sep 17 00:00:00 2001 From: Yihao Deng Date: Tue, 9 Jul 2024 08:35:04 +0000 Subject: [PATCH 34/34] refactor code --- source/libs/stream/src/streamMeta.c | 8 +------- 1 file changed, 1 insertion(+), 7 deletions(-) diff --git a/source/libs/stream/src/streamMeta.c b/source/libs/stream/src/streamMeta.c index 15aa42e741..b38b8d73c0 100644 --- a/source/libs/stream/src/streamMeta.c +++ b/source/libs/stream/src/streamMeta.c @@ -185,13 +185,7 @@ int32_t streamMetaCvtDbFormat(SStreamMeta* pMeta) { terrno = 0; bool exist = streamBackendDataIsExist(pMeta->path, chkpId); if (exist == false) { - if (terrno != 0) { - code = terrno; - terrno = 0; - stError("failed to check backend data exist, reason:%s", tstrerror(code)); - } else { - stInfo("not need to convert stream backend formate"); - } + code = terrno; return code; }