From c012704a5b4efc8ed95151a17a18886c4448be01 Mon Sep 17 00:00:00 2001 From: Hongze Cheng Date: Sun, 10 Nov 2019 19:54:05 +0800 Subject: [PATCH 01/20] Submit the first version of merge import --- src/system/detail/src/vnodeCache.c | 88 +- src/system/detail/src/vnodeFile.c | 4 +- src/system/detail/src/vnodeImport.c | 2215 +++++++++++++++++---------- 3 files changed, 1492 insertions(+), 815 deletions(-) diff --git a/src/system/detail/src/vnodeCache.c b/src/system/detail/src/vnodeCache.c index 8b51bc4609..ac7e19524a 100644 --- a/src/system/detail/src/vnodeCache.c +++ b/src/system/detail/src/vnodeCache.c @@ -372,13 +372,60 @@ void vnodeCancelCommit(SVnodeObj *pVnode) { taosTmrReset(vnodeProcessCommitTimer, pVnode->cfg.commitTime * 1000, pVnode, vnodeTmrCtrl, &pVnode->commitTimer); } +/* The vnode cache lock should be hold before calling this interface + */ +SCacheBlock *vnodeGetFreeCacheBlock(SVnodeObj *pVnode) { + SCachePool *pPool = (SCachePool *)(pVnode->pCachePool); + SVnodeCfg *pCfg = &(pVnode->cfg); + SCacheBlock *pCacheBlock = NULL; + int skipped = 0; + + while (1) { + pCacheBlock = (SCacheBlock *)(pPool->pMem[((int64_t)pPool->freeSlot)]); + if (pCacheBlock->blockId == 0) break; + + if (pCacheBlock->notFree) { + pPool->freeSlot++; + pPool->freeSlot = pPool->freeSlot % pCfg->cacheNumOfBlocks.totalBlocks; + skipped++; + if (skipped > pPool->threshold) { + vnodeCreateCommitThread(pVnode); + pthread_mutex_unlock(&pPool->vmutex); + dError("vid:%d committing process is too slow, notFreeSlots:%d....", pVnode->vnode, pPool->notFreeSlots); + return NULL; + } + } else { + SMeterObj * pRelObj = pCacheBlock->pMeterObj; + SCacheInfo *pRelInfo = (SCacheInfo *)pRelObj->pCache; + int firstSlot = (pRelInfo->currentSlot - pRelInfo->numOfBlocks + 1 + pRelInfo->maxBlocks) % pRelInfo->maxBlocks; + pCacheBlock = pRelInfo->cacheBlocks[firstSlot]; + if (pCacheBlock) { + pPool->freeSlot = pCacheBlock->index; + vnodeFreeCacheBlock(pCacheBlock); + break; + } else { + pPool->freeSlot = (pPool->freeSlot + 1) % pCfg->cacheNumOfBlocks.totalBlocks; + skipped++; + } + } + } + + pCacheBlock = (SCacheBlock *)(pPool->pMem[pPool->freeSlot]); + pCacheBlock->index = pPool->freeSlot; + pCacheBlock->notFree = 1; + pPool->freeSlot = (pPool->freeSlot + 1) % pCfg->cacheNumOfBlocks.totalBlocks; + pPool->notFreeSlots++; + + return pCacheBlock; +} + int vnodeAllocateCacheBlock(SMeterObj *pObj) { int index; SCachePool * pPool; SCacheBlock *pCacheBlock; SCacheInfo * pInfo; SVnodeObj * pVnode; - int skipped = 0, commit = 0; + int commit = 0; pVnode = vnodeList + pObj->vnode; pPool = (SCachePool *)pVnode->pCachePool; @@ -406,45 +453,10 @@ int vnodeAllocateCacheBlock(SMeterObj *pObj) { return -1; } - while (1) { - pCacheBlock = (SCacheBlock *)(pPool->pMem[((int64_t)pPool->freeSlot)]); - if (pCacheBlock->blockId == 0) break; - - if (pCacheBlock->notFree) { - pPool->freeSlot++; - pPool->freeSlot = pPool->freeSlot % pCfg->cacheNumOfBlocks.totalBlocks; - skipped++; - if (skipped > pPool->threshold) { - vnodeCreateCommitThread(pVnode); - pthread_mutex_unlock(&pPool->vmutex); - dError("vid:%d sid:%d id:%s, committing process is too slow, notFreeSlots:%d....", - pObj->vnode, pObj->sid, pObj->meterId, pPool->notFreeSlots); - return -1; - } - } else { - SMeterObj *pRelObj = pCacheBlock->pMeterObj; - SCacheInfo *pRelInfo = (SCacheInfo *)pRelObj->pCache; - int firstSlot = (pRelInfo->currentSlot - pRelInfo->numOfBlocks + 1 + pRelInfo->maxBlocks) % pRelInfo->maxBlocks; - pCacheBlock = pRelInfo->cacheBlocks[firstSlot]; - if (pCacheBlock) { - pPool->freeSlot = pCacheBlock->index; - vnodeFreeCacheBlock(pCacheBlock); - break; - } else { - pPool->freeSlot = (pPool->freeSlot + 1) % pCfg->cacheNumOfBlocks.totalBlocks; - skipped++; - } - } - } - - index = pPool->freeSlot; - pPool->freeSlot++; - pPool->freeSlot = pPool->freeSlot % pCfg->cacheNumOfBlocks.totalBlocks; - pPool->notFreeSlots++; + if ((pCacheBlock = vnodeGetFreeCacheBlock(pVnode)) == NULL) return -1; + index = pCacheBlock->index; pCacheBlock->pMeterObj = pObj; - pCacheBlock->notFree = 1; - pCacheBlock->index = index; pCacheBlock->offset[0] = ((char *)(pCacheBlock)) + sizeof(SCacheBlock) + pObj->numOfColumns * sizeof(char *); for (int col = 1; col < pObj->numOfColumns; ++col) diff --git a/src/system/detail/src/vnodeFile.c b/src/system/detail/src/vnodeFile.c index df94c883ac..f95ef01765 100644 --- a/src/system/detail/src/vnodeFile.c +++ b/src/system/detail/src/vnodeFile.c @@ -103,8 +103,8 @@ void vnodeGetDnameFromLname(char *lhead, char *ldata, char *llast, char *dhead, } void vnodeGetHeadTname(char *nHeadName, char *nLastName, int vnode, int fileId) { - sprintf(nHeadName, "%s/vnode%d/db/v%df%d.t", tsDirectory, vnode, vnode, fileId); - sprintf(nLastName, "%s/vnode%d/db/v%df%d.l", tsDirectory, vnode, vnode, fileId); + if (nHeadName != NULL) sprintf(nHeadName, "%s/vnode%d/db/v%df%d.t", tsDirectory, vnode, vnode, fileId); + if (nLastName != NULL) sprintf(nLastName, "%s/vnode%d/db/v%df%d.l", tsDirectory, vnode, vnode, fileId); } void vnodeCreateDataDirIfNeeded(int vnode, char *path) { diff --git a/src/system/detail/src/vnodeImport.c b/src/system/detail/src/vnodeImport.c index f50b6f4946..96aeb99e20 100644 --- a/src/system/detail/src/vnodeImport.c +++ b/src/system/detail/src/vnodeImport.c @@ -15,31 +15,24 @@ #define _DEFAULT_SOURCE #include +#include +#include #include -#include #include -#include "trpc.h" -#include "ttimer.h" #include "vnode.h" -#include "vnodeMgmt.h" -#include "vnodeShell.h" -#include "vnodeShell.h" #include "vnodeUtil.h" -#pragma GCC diagnostic ignored "-Wpointer-sign" -#pragma GCC diagnostic ignored "-Wint-conversion" -typedef struct { - SCompHeader *headList; - SCompInfo compInfo; - int last; // 0:last block in data file, 1:not the last block - int newBlocks; - int oldNumOfBlocks; - int64_t compInfoOffset; // offset for compInfo in head file - int64_t leftOffset; // copy from this offset to end of head file - int64_t hfdSize; // old head file size -} SHeadInfo; +extern void vnodeGetHeadTname(char *nHeadName, char *nLastName, int vnode, int fileId); +extern int vnodeReadColumnToMem(int fd, SCompBlock *pBlock, SField **fields, int col, char *data, int dataSize, + char *temp, char *buffer, int bufferSize); +extern int vnodeSendShellSubmitRspMsg(SShellObj *pObj, int code, int numOfPoints); +extern void vnodeGetHeadDataLname(char *headName, char *dataName, char *lastName, int vnode, int fileId); +extern int vnodeCreateEmptyCompFile(int vnode, int fileId); +extern int vnodeUpdateFreeSlot(SVnodeObj *pVnode); +extern SCacheBlock *vnodeGetFreeCacheBlock(SVnodeObj *pVnode); +#define KEY_AT_INDEX(payload, step, idx) (*(TSKEY *)((char *)(payload) + (step) * (idx))) typedef struct { void * signature; SShellObj *pShell; @@ -56,678 +49,62 @@ typedef struct { // only for file int numOfPoints; - int fileId; int64_t offset; // offset in data file - SData *sdata[TSDB_MAX_COLUMNS]; - char *buffer; - char *payload; - char *opayload; + char * payload; + char * opayload; // allocated space for payload from client int rows; } SImportInfo; -int vnodeImportData(SMeterObj *pObj, SImportInfo *pImport); +typedef struct { + // in .head file + SCompHeader *pHeader; + size_t pHeaderSize; -int vnodeGetImportStartPart(SMeterObj *pObj, char *payload, int rows, TSKEY key1) { - int i; + SCompInfo compInfo; + SCompBlock *pBlocks; + // in .data file + int blockId; + uint8_t blockLoadState; - for (i = 0; i < rows; ++i) { - TSKEY key = *((TSKEY *)(payload + i * pObj->bytesPerPoint)); - if (key >= key1) break; - } - - return i; -} - -int vnodeGetImportEndPart(SMeterObj *pObj, char *payload, int rows, char **pStart, TSKEY key0) { - int i; - - for (i = 0; i < rows; ++i) { - TSKEY key = *((TSKEY *)(payload + i * pObj->bytesPerPoint)); - if (key > key0) break; - } - - *pStart = payload + i * pObj->bytesPerPoint; - return rows - i; -} - -int vnodeCloseFileForImport(SMeterObj *pObj, SHeadInfo *pHinfo) { - SVnodeObj *pVnode = &vnodeList[pObj->vnode]; - SVnodeCfg *pCfg = &pVnode->cfg; - TSCKSUM chksum = 0; - - if (pHinfo->newBlocks == 0 || pHinfo->compInfoOffset == 0) return 0; - - if (pHinfo->oldNumOfBlocks == 0) twrite(pVnode->nfd, &chksum, sizeof(TSCKSUM)); - - int leftSize = pHinfo->hfdSize - pHinfo->leftOffset; - if (leftSize > 0) { - lseek(pVnode->hfd, pHinfo->leftOffset, SEEK_SET); - tsendfile(pVnode->nfd, pVnode->hfd, NULL, leftSize); - } - - pHinfo->compInfo.numOfBlocks += pHinfo->newBlocks; - int offset = (pHinfo->compInfo.numOfBlocks - pHinfo->oldNumOfBlocks) * sizeof(SCompBlock); - if (pHinfo->oldNumOfBlocks == 0) offset += sizeof(SCompInfo) + sizeof(TSCKSUM); - - pHinfo->headList[pObj->sid].compInfoOffset = pHinfo->compInfoOffset; - for (int sid = pObj->sid + 1; sid < pCfg->maxSessions; ++sid) { - if (pHinfo->headList[sid].compInfoOffset) pHinfo->headList[sid].compInfoOffset += offset; - } - - lseek(pVnode->nfd, TSDB_FILE_HEADER_LEN, SEEK_SET); - int tmsize = sizeof(SCompHeader) * pCfg->maxSessions + sizeof(TSCKSUM); - taosCalcChecksumAppend(0, (uint8_t *)pHinfo->headList, tmsize); - twrite(pVnode->nfd, pHinfo->headList, tmsize); - - int size = pHinfo->compInfo.numOfBlocks * sizeof(SCompBlock); - char *buffer = malloc(size); - lseek(pVnode->nfd, pHinfo->compInfoOffset + sizeof(SCompInfo), SEEK_SET); - read(pVnode->nfd, buffer, size); - SCompBlock *pBlock = (SCompBlock *)(buffer + (pHinfo->compInfo.numOfBlocks - 1) * sizeof(SCompBlock)); - - pHinfo->compInfo.uid = pObj->uid; - pHinfo->compInfo.delimiter = TSDB_VNODE_DELIMITER; - pHinfo->compInfo.last = pBlock->last; - - taosCalcChecksumAppend(0, (uint8_t *)(&pHinfo->compInfo), sizeof(SCompInfo)); - lseek(pVnode->nfd, pHinfo->compInfoOffset, SEEK_SET); - twrite(pVnode->nfd, &pHinfo->compInfo, sizeof(SCompInfo)); - - chksum = taosCalcChecksum(0, (uint8_t *)buffer, size); - lseek(pVnode->nfd, pHinfo->compInfoOffset + sizeof(SCompInfo) + size, SEEK_SET); - twrite(pVnode->nfd, &chksum, sizeof(TSCKSUM)); - free(buffer); - - vnodeCloseCommitFiles(pVnode); - - return 0; -} - -int vnodeProcessLastBlock(SImportInfo *pImport, SHeadInfo *pHinfo, SData *data[]) { - SMeterObj *pObj = pImport->pObj; - SVnodeObj *pVnode = &vnodeList[pObj->vnode]; - SCompBlock lastBlock; - int code = 0; - - if (pHinfo->compInfo.last == 0) return 0; - - // read into memory - uint64_t offset = - pHinfo->compInfoOffset + (pHinfo->compInfo.numOfBlocks - 1) * sizeof(SCompBlock) + sizeof(SCompInfo); - lseek(pVnode->hfd, offset, SEEK_SET); - read(pVnode->hfd, &lastBlock, sizeof(SCompBlock)); - assert(lastBlock.last); - - if (lastBlock.sversion != pObj->sversion) { - lseek(pVnode->lfd, lastBlock.offset, SEEK_SET); - lastBlock.offset = lseek(pVnode->dfd, 0, SEEK_END); - tsendfile(pVnode->dfd, pVnode->lfd, NULL, lastBlock.len); - - lastBlock.last = 0; - lseek(pVnode->hfd, offset, SEEK_SET); - twrite(pVnode->hfd, &lastBlock, sizeof(SCompBlock)); - } else { - vnodeReadLastBlockToMem(pObj, &lastBlock, data); - pHinfo->compInfo.numOfBlocks--; - code = lastBlock.numOfPoints; - } - - return code; -} - -int vnodeOpenFileForImport(SImportInfo *pImport, char *payload, SHeadInfo *pHinfo, SData *data[]) { - SMeterObj *pObj = pImport->pObj; - SVnodeObj *pVnode = &vnodeList[pObj->vnode]; - SVnodeCfg *pCfg = &pVnode->cfg; - TSKEY firstKey = *((TSKEY *)payload); - struct stat filestat; - int sid, rowsBefore = 0; - - if (pVnode->nfd <= 0 || firstKey > pVnode->commitLastKey) { - if (pVnode->nfd > 0) vnodeCloseFileForImport(pObj, pHinfo); - - pVnode->commitFirstKey = firstKey; - if (vnodeOpenCommitFiles(pVnode, pObj->sid) < 0) return -1; - - fstat(pVnode->hfd, &filestat); - pHinfo->hfdSize = filestat.st_size; - pHinfo->newBlocks = 0; - pHinfo->last = 1; // by default, new blockes are at the end of block list - - lseek(pVnode->hfd, TSDB_FILE_HEADER_LEN, SEEK_SET); - read(pVnode->hfd, pHinfo->headList, sizeof(SCompHeader) * pCfg->maxSessions); - - if (pHinfo->headList[pObj->sid].compInfoOffset > 0) { - lseek(pVnode->hfd, pHinfo->headList[pObj->sid].compInfoOffset, SEEK_SET); - if (read(pVnode->hfd, &pHinfo->compInfo, sizeof(SCompInfo)) != sizeof(SCompInfo)) { - dError("vid:%d sid:%d, failed to read compInfo from file:%s", pObj->vnode, pObj->sid, pVnode->cfn); - return -1; - } - - if (pHinfo->compInfo.uid == pObj->uid) { - pHinfo->compInfoOffset = pHinfo->headList[pObj->sid].compInfoOffset; - pHinfo->leftOffset = pHinfo->headList[pObj->sid].compInfoOffset + sizeof(SCompInfo); - } else { - pHinfo->headList[pObj->sid].compInfoOffset = 0; - } - } - - if ( pHinfo->headList[pObj->sid].compInfoOffset == 0 ) { - memset(&pHinfo->compInfo, 0, sizeof(SCompInfo)); - pHinfo->compInfo.uid = pObj->uid; - - for (sid = pObj->sid + 1; sid < pCfg->maxSessions; ++sid) - if (pHinfo->headList[sid].compInfoOffset > 0) break; - - pHinfo->compInfoOffset = (sid == pCfg->maxSessions) ? pHinfo->hfdSize : pHinfo->headList[sid].compInfoOffset; - pHinfo->leftOffset = pHinfo->compInfoOffset; - } - - pHinfo->oldNumOfBlocks = pHinfo->compInfo.numOfBlocks; - lseek(pVnode->hfd, 0, SEEK_SET); - lseek(pVnode->nfd, 0, SEEK_SET); - tsendfile(pVnode->nfd, pVnode->hfd, NULL, pHinfo->compInfoOffset); - twrite(pVnode->nfd, &pHinfo->compInfo, sizeof(SCompInfo)); - if (pHinfo->headList[pObj->sid].compInfoOffset > 0) lseek(pVnode->hfd, sizeof(SCompInfo), SEEK_CUR); - - if (pVnode->commitFileId < pImport->fileId) { - if (pHinfo->compInfo.numOfBlocks > 0) - pHinfo->leftOffset += pHinfo->compInfo.numOfBlocks * sizeof(SCompBlock); - - rowsBefore = vnodeProcessLastBlock(pImport, pHinfo, data); - - // copy all existing compBlockInfo - lseek(pVnode->hfd, pHinfo->compInfoOffset + sizeof(SCompInfo), SEEK_SET); - if (pHinfo->compInfo.numOfBlocks > 0) - tsendfile(pVnode->nfd, pVnode->hfd, NULL, pHinfo->compInfo.numOfBlocks * sizeof(SCompBlock)); - - } else if (pVnode->commitFileId == pImport->fileId) { - int slots = pImport->pos ? pImport->slot + 1 : pImport->slot; - pHinfo->leftOffset += slots * sizeof(SCompBlock); - - // check if last block is at last file, if it is, read into memory - if (pImport->pos == 0 && pHinfo->compInfo.numOfBlocks > 0 && pImport->slot == pHinfo->compInfo.numOfBlocks && - pHinfo->compInfo.last) { - rowsBefore = vnodeProcessLastBlock(pImport, pHinfo, data); - if ( rowsBefore > 0 ) pImport->slot--; - } - - // this block will be replaced by new blocks - if (pImport->pos > 0) pHinfo->compInfo.numOfBlocks--; - - if (pImport->slot > 0) { - lseek(pVnode->hfd, pHinfo->compInfoOffset + sizeof(SCompInfo), SEEK_SET); - tsendfile(pVnode->nfd, pVnode->hfd, NULL, pImport->slot * sizeof(SCompBlock)); - } - - if (pImport->slot < pHinfo->compInfo.numOfBlocks) - pHinfo->last = 0; // new blocks are not at the end of block list - - } else { - // nothing - - pHinfo->last = 0; // new blocks are not at the end of block list - } - } - - return rowsBefore; -} - -extern int vnodeSendShellSubmitRspMsg(SShellObj *pObj, int code, int numOfPoints); -int vnodeImportToFile(SImportInfo *pImport); - -void vnodeProcessImportTimer(void *param, void *tmrId) { - SImportInfo *pImport = (SImportInfo *)param; - if (pImport == NULL || pImport->signature != param) { - dError("import timer is messed up, signature:%p", pImport); - return; - } - - SMeterObj *pObj = pImport->pObj; - SVnodeObj *pVnode = &vnodeList[pObj->vnode]; - SCachePool *pPool = (SCachePool *)pVnode->pCachePool; - SShellObj *pShell = pImport->pShell; - - pImport->retry++; - - //slow query will block the import operation - int32_t state = vnodeSetMeterState(pObj, TSDB_METER_STATE_IMPORTING); - if (state >= TSDB_METER_STATE_DELETING) { - dError("vid:%d sid:%d id:%s, meter is deleted, failed to import, state:%d", - pObj->vnode, pObj->sid, pObj->meterId, state); - return; - } - - int32_t num = 0; - pthread_mutex_lock(&pVnode->vmutex); - num = pObj->numOfQueries; - pthread_mutex_unlock(&pVnode->vmutex); - - //if the num == 0, it will never be increased before state is set to TSDB_METER_STATE_READY - int32_t commitInProcess = 0; - pthread_mutex_lock(&pPool->vmutex); - if (((commitInProcess = pPool->commitInProcess) == 1) || num > 0 || state != TSDB_METER_STATE_READY) { - pthread_mutex_unlock(&pPool->vmutex); - vnodeClearMeterState(pObj, TSDB_METER_STATE_IMPORTING); - - if (pImport->retry < 1000) { - dTrace("vid:%d sid:%d id:%s, import failed, retry later. commit in process or queries on it, or not ready." - "commitInProcess:%d, numOfQueries:%d, state:%d", pObj->vnode, pObj->sid, pObj->meterId, - commitInProcess, num, state); - - taosTmrStart(vnodeProcessImportTimer, 10, pImport, vnodeTmrCtrl); - return; - } else { - pShell->code = TSDB_CODE_TOO_SLOW; - } - } else { - pPool->commitInProcess = 1; - pthread_mutex_unlock(&pPool->vmutex); - int code = vnodeImportData(pObj, pImport); - if (pShell) { - pShell->code = code; - pShell->numOfTotalPoints += pImport->importedRows; - } - } - - vnodeClearMeterState(pObj, TSDB_METER_STATE_IMPORTING); - - pVnode->version++; - - // send response back to shell - if (pShell) { - pShell->count--; - if (pShell->count <= 0) vnodeSendShellSubmitRspMsg(pImport->pShell, pShell->code, pShell->numOfTotalPoints); - } - - pImport->signature = NULL; - free(pImport->opayload); - free(pImport); -} - -int vnodeImportToFile(SImportInfo *pImport) { - SMeterObj *pObj = pImport->pObj; - SVnodeObj *pVnode = &vnodeList[pObj->vnode]; - SVnodeCfg *pCfg = &pVnode->cfg; - SHeadInfo headInfo; - int code = 0, col; - SCompBlock compBlock; - char * payload = pImport->payload; - int rows = pImport->rows; - SCachePool *pPool = (SCachePool *)pVnode->pCachePool; - - TSKEY lastKey = *((TSKEY *)(payload + pObj->bytesPerPoint * (rows - 1))); - TSKEY firstKey = *((TSKEY *)payload); - memset(&headInfo, 0, sizeof(headInfo)); - headInfo.headList = malloc(sizeof(SCompHeader) * pCfg->maxSessions + sizeof(TSCKSUM)); - - SData *cdata[TSDB_MAX_COLUMNS]; - char *buffer1 = - malloc(pObj->bytesPerPoint * pCfg->rowsInFileBlock + (sizeof(SData) + EXTRA_BYTES) * pObj->numOfColumns); - cdata[0] = (SData *)buffer1; + SField *pField; + size_t pFieldSize; SData *data[TSDB_MAX_COLUMNS]; - char *buffer2 = - malloc(pObj->bytesPerPoint * pCfg->rowsInFileBlock + (sizeof(SData) + EXTRA_BYTES) * pObj->numOfColumns); - data[0] = (SData *)buffer2; + char * buffer; - for (col = 1; col < pObj->numOfColumns; ++col) { - cdata[col] = (SData *)(((char *)cdata[col - 1]) + sizeof(SData) + EXTRA_BYTES + - pObj->pointsPerFileBlock * pObj->schema[col - 1].bytes); - data[col] = (SData *)(((char *)data[col - 1]) + sizeof(SData) + EXTRA_BYTES + - pObj->pointsPerFileBlock * pObj->schema[col - 1].bytes); - } + char *temp; - int rowsBefore = 0; - int rowsRead = 0; - int rowsUnread = 0; - int leftRows = rows; // left number of rows of imported data - int row, rowsToWrite; - int64_t offset[TSDB_MAX_COLUMNS]; + char * tempBuffer; + size_t tempBufferSize; + // Variables for sendfile + int64_t compInfoOffset; + int64_t nextNo0Offset; // next sid whose compInfoOffset > 0 + int64_t hfSize; + int64_t driftOffset; - if (pImport->pos > 0) { - for (col = 0; col < pObj->numOfColumns; ++col) - memcpy(data[col]->data, pImport->sdata[col]->data, pImport->pos * pObj->schema[col].bytes); + int oldNumOfBlocks; + int newNumOfBlocks; + int last; +} SImportHandle; - rowsBefore = pImport->pos; - rowsRead = pImport->pos; - rowsUnread = pImport->numOfPoints - pImport->pos; - } +typedef struct { + int slot; + int pos; + int oslot; // old slot + TSKEY nextKey; +} SBlockIter; - dTrace("vid:%d sid:%d id:%s, %d rows data will be imported to file, firstKey:%ld lastKey:%ld", - pObj->vnode, pObj->sid, pObj->meterId, rows, firstKey, lastKey); - do { - if (leftRows > 0) { - code = vnodeOpenFileForImport(pImport, payload, &headInfo, data); - if (code < 0) goto _exit; - if (code > 0) { - rowsBefore = code; - code = 0; - }; - } else { - // if payload is already imported, rows unread shall still be processed - rowsBefore = 0; - } +typedef struct { + int64_t spos; + int64_t epos; + int64_t totalRows; + char * offset[]; +} SMergeBuffer; - int rowsToProcess = pObj->pointsPerFileBlock - rowsBefore; - if (rowsToProcess > leftRows) rowsToProcess = leftRows; - - for (col = 0; col < pObj->numOfColumns; ++col) { - offset[col] = data[col]->data + rowsBefore * pObj->schema[col].bytes; - } - - row = 0; - if (leftRows > 0) { - for (row = 0; row < rowsToProcess; ++row) { - if (*((TSKEY *)payload) > pVnode->commitLastKey) break; - - for (col = 0; col < pObj->numOfColumns; ++col) { - memcpy((void *)offset[col], payload, pObj->schema[col].bytes); - payload += pObj->schema[col].bytes; - offset[col] += pObj->schema[col].bytes; - } - } - } - - leftRows -= row; - rowsToWrite = rowsBefore + row; - rowsBefore = 0; - - if (leftRows == 0 && rowsUnread > 0) { - // copy the unread - int rowsToCopy = pObj->pointsPerFileBlock - rowsToWrite; - if (rowsToCopy > rowsUnread) rowsToCopy = rowsUnread; - - for (col = 0; col < pObj->numOfColumns; ++col) { - int bytes = pObj->schema[col].bytes; - memcpy(data[col]->data + rowsToWrite * bytes, pImport->sdata[col]->data + rowsRead * bytes, rowsToCopy * bytes); - } - - rowsRead += rowsToCopy; - rowsUnread -= rowsToCopy; - rowsToWrite += rowsToCopy; - } - - for (col = 0; col < pObj->numOfColumns; ++col) { - data[col]->len = rowsToWrite * pObj->schema[col].bytes; - } - - compBlock.last = headInfo.last; - vnodeWriteBlockToFile(pObj, &compBlock, data, cdata, rowsToWrite); - twrite(pVnode->nfd, &compBlock, sizeof(SCompBlock)); - - rowsToWrite = 0; - headInfo.newBlocks++; - - } while (leftRows > 0 || rowsUnread > 0); - - if (compBlock.keyLast > pObj->lastKeyOnFile) - pObj->lastKeyOnFile = compBlock.keyLast; - - vnodeCloseFileForImport(pObj, &headInfo); - dTrace("vid:%d sid:%d id:%s, %d rows data are imported to file", pObj->vnode, pObj->sid, pObj->meterId, rows); - - SCacheInfo *pInfo = (SCacheInfo *)pObj->pCache; - pthread_mutex_lock(&pPool->vmutex); - - if (pInfo->numOfBlocks > 0) { - int slot = (pInfo->currentSlot - pInfo->numOfBlocks + 1 + pInfo->maxBlocks) % pInfo->maxBlocks; - TSKEY firstKeyInCache = *((TSKEY *)(pInfo->cacheBlocks[slot]->offset[0])); - - // data may be in commited cache, cache shall be released - if (lastKey > firstKeyInCache) { - while (slot != pInfo->commitSlot) { - SCacheBlock *pCacheBlock = pInfo->cacheBlocks[slot]; - vnodeFreeCacheBlock(pCacheBlock); - slot = (slot + 1 + pInfo->maxBlocks) % pInfo->maxBlocks; - } - - // last slot, the uncommitted slots shall be shifted - SCacheBlock *pCacheBlock = pInfo->cacheBlocks[slot]; - int points = pCacheBlock->numOfPoints - pInfo->commitPoint; - if (points > 0) { - for (int col = 0; col < pObj->numOfColumns; ++col) { - int size = points * pObj->schema[col].bytes; - memmove(pCacheBlock->offset[col], pCacheBlock->offset[col] + pObj->schema[col].bytes * pInfo->commitPoint, size); - } - } - - if (pInfo->commitPoint != pObj->pointsPerBlock) { - // commit point shall be set to 0 if last block is not full - pInfo->commitPoint = 0; - pCacheBlock->numOfPoints = points; - if (slot == pInfo->currentSlot) { - __sync_fetch_and_add(&pObj->freePoints, pInfo->commitPoint); - } - } else { - // if last block is full and committed - SCacheBlock *pCacheBlock = pInfo->cacheBlocks[slot]; - if (pCacheBlock->pMeterObj == pObj) { - vnodeFreeCacheBlock(pCacheBlock); - } - } - } - } - - if (lastKey > pObj->lastKeyOnFile) pObj->lastKeyOnFile = lastKey; - - pthread_mutex_unlock(&pPool->vmutex); - -_exit: - tfree(headInfo.headList); - tfree(buffer1); - tfree(buffer2); - tfree(pImport->buffer); - - return code; -} - -int vnodeImportToCache(SImportInfo *pImport, char *payload, int rows) { - SMeterObj *pObj = pImport->pObj; - SVnodeObj *pVnode = &vnodeList[pObj->vnode]; - SVnodeCfg *pCfg = &pVnode->cfg; - int code = -1; - SCacheInfo *pInfo = (SCacheInfo *)pObj->pCache; - int slot, pos, row, col, points, tpoints; - - char *data[TSDB_MAX_COLUMNS], *current[TSDB_MAX_COLUMNS]; - int slots = pInfo->unCommittedBlocks + 1; - int trows = slots * pObj->pointsPerBlock + rows; // max rows in buffer - int tsize = (trows / pObj->pointsPerBlock + 1) * pCfg->cacheBlockSize; - TSKEY firstKey = *((TSKEY *)payload); - TSKEY lastKey = *((TSKEY *)(payload + pObj->bytesPerPoint * (rows - 1))); - - if (pObj->freePoints < rows || pObj->freePoints < (pObj->pointsPerBlock << 1)) { - dError("vid:%d sid:%d id:%s, import failed, cache is full, freePoints:%d", pObj->vnode, pObj->sid, pObj->meterId, - pObj->freePoints); - pImport->importedRows = 0; - pImport->commit = 1; - code = TSDB_CODE_ACTION_IN_PROGRESS; - return code; - } - - dTrace("vid:%d sid:%d id:%s, %d rows data will be imported to cache, firstKey:%ld lastKey:%ld", - pObj->vnode, pObj->sid, pObj->meterId, rows, firstKey, lastKey); - - pthread_mutex_lock(&(pVnode->vmutex)); - if (firstKey < pVnode->firstKey) pVnode->firstKey = firstKey; - pthread_mutex_unlock(&(pVnode->vmutex)); - - char *buffer = malloc(tsize); // buffer to hold unCommitted data plus import data - data[0] = buffer; - current[0] = data[0]; - for (col = 1; col < pObj->numOfColumns; ++col) { - data[col] = data[col - 1] + trows * pObj->schema[col - 1].bytes; - current[col] = data[col]; - } - - // write import data into buffer first - for (row = 0; row < rows; ++row) { - for (col = 0; col < pObj->numOfColumns; ++col) { - memcpy(current[col], payload, pObj->schema[col].bytes); - payload += pObj->schema[col].bytes; - current[col] += pObj->schema[col].bytes; - } - } - - // copy the overwritten data into buffer - tpoints = rows; - pos = pImport->pos; - slot = pImport->slot; - while (1) { - points = pInfo->cacheBlocks[slot]->numOfPoints - pos; - for (col = 0; col < pObj->numOfColumns; ++col) { - int size = points * pObj->schema[col].bytes; - memcpy(current[col], pInfo->cacheBlocks[slot]->offset[col] + pos * pObj->schema[col].bytes, size); - current[col] += size; - } - pos = 0; - tpoints += points; - - if (slot == pInfo->currentSlot) break; - slot = (slot + 1) % pInfo->maxBlocks; - } - - for (col = 0; col < pObj->numOfColumns; ++col) current[col] = data[col]; - pos = pImport->pos; - - // write back to existing slots first - slot = pImport->slot; - while (1) { - points = (tpoints > pObj->pointsPerBlock - pos) ? pObj->pointsPerBlock - pos : tpoints; - SCacheBlock *pCacheBlock = pInfo->cacheBlocks[slot]; - for (col = 0; col < pObj->numOfColumns; ++col) { - int size = points * pObj->schema[col].bytes; - memcpy(pCacheBlock->offset[col] + pos * pObj->schema[col].bytes, current[col], size); - current[col] += size; - } - pCacheBlock->numOfPoints = points + pos; - pos = 0; - tpoints -= points; - - if (slot == pInfo->currentSlot) break; - slot = (slot + 1) % pInfo->maxBlocks; - } - - // allocate new cache block if there are still data left - while (tpoints > 0) { - pImport->commit = vnodeAllocateCacheBlock(pObj); - if (pImport->commit < 0) goto _exit; - points = (tpoints > pObj->pointsPerBlock) ? pObj->pointsPerBlock : tpoints; - SCacheBlock *pCacheBlock = pInfo->cacheBlocks[pInfo->currentSlot]; - for (col = 0; col < pObj->numOfColumns; ++col) { - int size = points * pObj->schema[col].bytes; - memcpy(pCacheBlock->offset[col] + pos * pObj->schema[col].bytes, current[col], size); - current[col] += size; - } - tpoints -= points; - pCacheBlock->numOfPoints = points; - } - - code = 0; - __sync_fetch_and_sub(&pObj->freePoints, rows); - dTrace("vid:%d sid:%d id:%s, %d rows data are imported to cache", pObj->vnode, pObj->sid, pObj->meterId, rows); - -_exit: - free(buffer); - return code; -} - -int vnodeFindKeyInFile(SImportInfo *pImport, int order) { - SMeterObj *pObj = pImport->pObj; - SVnodeObj *pVnode = &vnodeList[pObj->vnode]; - int code = -1; - SQuery query; - SColumnInfoEx colList[TSDB_MAX_COLUMNS] = {0}; - - TSKEY key = order ? pImport->firstKey : pImport->lastKey; - memset(&query, 0, sizeof(query)); - query.order.order = order; - query.skey = key; - query.ekey = order ? INT64_MAX : 0; - query.colList = colList; - query.numOfCols = pObj->numOfColumns; - - for (int16_t i = 0; i < pObj->numOfColumns; ++i) { - colList[i].data.colId = pObj->schema[i].colId; - colList[i].data.bytes = pObj->schema[i].bytes; - colList[i].data.type = pObj->schema[i].type; - - colList[i].colIdx = i; - colList[i].colIdxInBuf = i; - } - - int ret = vnodeSearchPointInFile(pObj, &query); - - if (ret >= 0) { - if (query.slot < 0) { - pImport->slot = 0; - pImport->pos = 0; - pImport->key = 0; - pImport->fileId = pVnode->fileId - pVnode->numOfFiles + 1; - dTrace("vid:%d sid:%d id:%s, import to head of file", pObj->vnode, pObj->sid, pObj->meterId); - code = 0; - } else if (query.slot >= 0) { - code = 0; - pImport->slot = query.slot; - pImport->pos = query.pos; - pImport->key = query.key; - pImport->fileId = query.fileId; - SCompBlock *pBlock = &query.pBlock[query.slot]; - pImport->numOfPoints = pBlock->numOfPoints; - - if (pImport->key != key) { - if (order == 0) { - pImport->pos++; - - if (pImport->pos >= pBlock->numOfPoints) { - pImport->slot++; - pImport->pos = 0; - } - } else { - if (pImport->pos < 0) pImport->pos = 0; - } - } - - if (pImport->key != key && pImport->pos > 0) { - if ( pObj->sversion != pBlock->sversion ) { - dError("vid:%d sid:%d id:%s, import sversion not matached, expected:%d received:%d", pObj->vnode, pObj->sid, - pBlock->sversion, pObj->sversion); - code = TSDB_CODE_OTHERS; - } else { - pImport->offset = pBlock->offset; - - pImport->buffer = - malloc(pObj->bytesPerPoint * pVnode->cfg.rowsInFileBlock + sizeof(SData) * pObj->numOfColumns); - pImport->sdata[0] = (SData *)pImport->buffer; - for (int col = 1; col < pObj->numOfColumns; ++col) - pImport->sdata[col] = (SData *)(((char *)pImport->sdata[col - 1]) + sizeof(SData) + - pObj->pointsPerFileBlock * pObj->schema[col - 1].bytes); - - code = vnodeReadCompBlockToMem(pObj, &query, pImport->sdata); - if (code < 0) { - code = -code; - tfree(pImport->buffer); - } - } - } - } - } else { - dError("vid:%d sid:%d id:%s, file is corrupted, import failed", pObj->vnode, pObj->sid, pObj->meterId); - code = -ret; - } - - tclose(query.hfd); - tclose(query.dfd); - tclose(query.lfd); - vnodeFreeFields(&query); - tfree(query.pBlock); - - return code; -} +int vnodeImportData(SMeterObj *pObj, SImportInfo *pImport); int vnodeFindKeyInCache(SImportInfo *pImport, int order) { - SMeterObj *pObj = pImport->pObj; + SMeterObj * pObj = pImport->pObj; int code = 0; SQuery query; SCacheInfo *pInfo = (SCacheInfo *)pObj->pCache; @@ -769,98 +146,97 @@ int vnodeFindKeyInCache(SImportInfo *pImport, int order) { return code; } -int vnodeImportStartToCache(SImportInfo *pImport, char *payload, int rows) { - int code = 0; - SMeterObj *pObj = pImport->pObj; +void vnodeGetValidDataRange(int vnode, TSKEY now, TSKEY *minKey, TSKEY *maxKey) { + SVnodeObj *pVnode = vnodeList + vnode; - code = vnodeFindKeyInCache(pImport, 1); - if (code != 0) return code; - - if (pImport->key != pImport->firstKey) { - rows = vnodeGetImportStartPart(pObj, payload, rows, pImport->key); - pImport->importedRows = rows; - code = vnodeImportToCache(pImport, payload, rows); - } else { - dTrace("vid:%d sid:%d id:%s, data is already imported to cache", pObj->vnode, pObj->sid, pObj->meterId); - } - - return code; + int64_t delta = pVnode->cfg.daysPerFile * tsMsPerDay[pVnode->cfg.precision]; + int fid = now / delta; + *minKey = (fid - pVnode->maxFiles + 1) * delta; + *maxKey = (fid + 2) * delta - 1; + return; } -int vnodeImportStartToFile(SImportInfo *pImport, char *payload, int rows) { - int code = 0; - SMeterObj *pObj = pImport->pObj; - - code = vnodeFindKeyInFile(pImport, 1); - if (code != 0) return code; - - if (pImport->key != pImport->firstKey) { - pImport->payload = payload; - pImport->rows = vnodeGetImportStartPart(pObj, payload, rows, pImport->key); - pImport->importedRows = pImport->rows; - code = vnodeImportToFile(pImport); - } else { - dTrace("vid:%d sid:%d id:%s, data is already imported to file", pObj->vnode, pObj->sid, pObj->meterId); +void vnodeProcessImportTimer(void *param, void *tmrId) { + SImportInfo *pImport = (SImportInfo *)param; + if (pImport == NULL || pImport->signature != param) { + dError("import timer is messed up, signature:%p", pImport); + return; } - return code; -} + SMeterObj * pObj = pImport->pObj; + SVnodeObj * pVnode = &vnodeList[pObj->vnode]; + SCachePool *pPool = (SCachePool *)pVnode->pCachePool; + SShellObj * pShell = pImport->pShell; -int vnodeImportWholeToFile(SImportInfo *pImport, char *payload, int rows) { - int code = 0; - SMeterObj *pObj = pImport->pObj; + pImport->retry++; - code = vnodeFindKeyInFile(pImport, 0); - if (code != 0) return code; - - if (pImport->key != pImport->lastKey) { - pImport->payload = payload; - pImport->rows = vnodeGetImportEndPart(pObj, payload, rows, &pImport->payload, pImport->key); - pImport->importedRows = pImport->rows; - code = vnodeImportToFile(pImport); - } else { - code = vnodeImportStartToFile(pImport, payload, rows); + // slow query will block the import operation + int32_t state = vnodeSetMeterState(pObj, TSDB_METER_STATE_IMPORTING); + if (state >= TSDB_METER_STATE_DELETING) { + dError("vid:%d sid:%d id:%s, meter is deleted, failed to import, state:%d", pObj->vnode, pObj->sid, pObj->meterId, + state); + return; } - return code; -} + int32_t num = 0; + pthread_mutex_lock(&pVnode->vmutex); + num = pObj->numOfQueries; + pthread_mutex_unlock(&pVnode->vmutex); -int vnodeImportWholeToCache(SImportInfo *pImport, char *payload, int rows) { - int code = 0; - SMeterObj *pObj = pImport->pObj; + // if the num == 0, it will never be increased before state is set to TSDB_METER_STATE_READY + int32_t commitInProcess = 0; + pthread_mutex_lock(&pPool->vmutex); + if (((commitInProcess = pPool->commitInProcess) == 1) || num > 0 || state != TSDB_METER_STATE_READY) { + pthread_mutex_unlock(&pPool->vmutex); + vnodeClearMeterState(pObj, TSDB_METER_STATE_IMPORTING); - code = vnodeFindKeyInCache(pImport, 0); - if (code != 0) return code; + if (pImport->retry < 1000) { + dTrace( + "vid:%d sid:%d id:%s, import failed, retry later. commit in process or queries on it, or not ready." + "commitInProcess:%d, numOfQueries:%d, state:%d", + pObj->vnode, pObj->sid, pObj->meterId, commitInProcess, num, state); - if (pImport->key != pImport->lastKey) { - char *pStart; - if ( pImport->key < pObj->lastKeyOnFile ) pImport->key = pObj->lastKeyOnFile; - rows = vnodeGetImportEndPart(pObj, payload, rows, &pStart, pImport->key); - pImport->importedRows = rows; - code = vnodeImportToCache(pImport, pStart, rows); + taosTmrStart(vnodeProcessImportTimer, 10, pImport, vnodeTmrCtrl); + return; + } else { + pShell->code = TSDB_CODE_TOO_SLOW; + } } else { - if (pImport->firstKey > pObj->lastKeyOnFile) { - code = vnodeImportStartToCache(pImport, payload, rows); - } else if (pImport->firstKey < pObj->lastKeyOnFile) { - code = vnodeImportStartToFile(pImport, payload, rows); - } else { // firstKey == pObj->lastKeyOnFile - dTrace("vid:%d sid:%d id:%s, data is already there", pObj->vnode, pObj->sid, pObj->meterId); + pPool->commitInProcess = 1; + pthread_mutex_unlock(&pPool->vmutex); + int code = vnodeImportData(pObj, pImport); + if (pShell) { + pShell->code = code; + pShell->numOfTotalPoints += pImport->importedRows; } } - return code; + vnodeClearMeterState(pObj, TSDB_METER_STATE_IMPORTING); + + pVnode->version++; + + // send response back to shell + if (pShell) { + pShell->count--; + if (pShell->count <= 0) vnodeSendShellSubmitRspMsg(pImport->pShell, pShell->code, pShell->numOfTotalPoints); + } + + pImport->signature = NULL; + free(pImport->opayload); + free(pImport); } int vnodeImportPoints(SMeterObj *pObj, char *cont, int contLen, char source, void *param, int sversion, int *pNumOfPoints, TSKEY now) { SSubmitMsg *pSubmit = (SSubmitMsg *)cont; - SVnodeObj *pVnode = &vnodeList[pObj->vnode]; + SVnodeObj * pVnode = vnodeList + pObj->vnode; int rows; - char *payload; + char * payload; int code = TSDB_CODE_ACTION_IN_PROGRESS; - SCachePool *pPool = (SCachePool *)pVnode->pCachePool; - SShellObj *pShell = (SShellObj *)param; + SCachePool *pPool = (SCachePool *)(pVnode->pCachePool); + SShellObj * pShell = (SShellObj *)param; int pointsImported = 0; + TSKEY minKey, maxKey; rows = htons(pSubmit->numOfRows); int expectedLen = rows * pObj->bytesPerPoint + sizeof(pSubmit->numOfRows); @@ -870,37 +246,35 @@ int vnodeImportPoints(SMeterObj *pObj, char *cont, int contLen, char source, voi return TSDB_CODE_WRONG_MSG_SIZE; } + // FIXME: check sversion here should not be here (Take import convert to insert case into consideration) if (sversion != pObj->sversion) { dError("vid:%d sid:%d id:%s, invalid sversion, expected:%d received:%d", pObj->vnode, pObj->sid, pObj->meterId, pObj->sversion, sversion); return TSDB_CODE_OTHERS; } + // Check timestamp context. payload = pSubmit->payLoad; - TSKEY firstKey = *(TSKEY *)payload; - TSKEY lastKey = *(TSKEY *)(payload + pObj->bytesPerPoint*(rows-1)); - int cfid = now/pVnode->cfg.daysPerFile/tsMsPerDay[pVnode->cfg.precision]; - TSKEY minAllowedKey = (cfid - pVnode->maxFiles + 1)*pVnode->cfg.daysPerFile*tsMsPerDay[pVnode->cfg.precision]; - TSKEY maxAllowedKey = (cfid + 2)*pVnode->cfg.daysPerFile*tsMsPerDay[pVnode->cfg.precision] - 1; - if (firstKey < minAllowedKey || firstKey > maxAllowedKey || lastKey < minAllowedKey || lastKey > maxAllowedKey) { - dError("vid:%d sid:%d id:%s, vnode lastKeyOnFile:%lld, data is out of range, rows:%d firstKey:%lld lastKey:%lld minAllowedKey:%lld maxAllowedKey:%lld", - pObj->vnode, pObj->sid, pObj->meterId, pVnode->lastKeyOnFile, rows, firstKey, lastKey, minAllowedKey, maxAllowedKey); + TSKEY firstKey = KEY_AT_INDEX(payload, pObj->bytesPerPoint, 0); + TSKEY lastKey = KEY_AT_INDEX(payload, pObj->bytesPerPoint, rows - 1); + assert(firstKey <= lastKey); + vnodeGetValidDataRange(pObj->vnode, now, &minKey, &maxKey); + if (firstKey < minKey || firstKey > maxKey || lastKey < minKey || lastKey > maxKey) { + dError( + "vid:%d sid:%d id:%s, invalid timestamp to import, rows:%d firstKey: %ld lastKey: %ld minAllowedKey:%ld " + "maxAllowedKey:%ld", + pObj->vnode, pObj->sid, pObj->meterId, rows, firstKey, lastKey, minKey, maxKey); return TSDB_CODE_TIMESTAMP_OUT_OF_RANGE; } - // forward to peers - if (pShell && pVnode->cfg.replications > 1) { - code = vnodeForwardToPeer(pObj, cont, contLen, TSDB_ACTION_IMPORT, sversion); - if (code != 0) return code; - } - + // FIXME: Commit log here is invalid (Take retry into consideration) if (pVnode->cfg.commitLog && source != TSDB_DATA_SOURCE_LOG) { if (pVnode->logFd < 0) return TSDB_CODE_INVALID_COMMIT_LOG; code = vnodeWriteToCommitLog(pObj, TSDB_ACTION_IMPORT, cont, contLen, sversion); if (code != 0) return code; } - if (*((TSKEY *)(pSubmit->payLoad + (rows - 1) * pObj->bytesPerPoint)) > pObj->lastKey) { + if (firstKey > pObj->lastKey) { // Just call insert vnodeClearMeterState(pObj, TSDB_METER_STATE_IMPORTING); vnodeSetMeterState(pObj, TSDB_METER_STATE_INSERT); code = vnodeInsertPoints(pObj, cont, contLen, TSDB_DATA_SOURCE_LOG, NULL, pObj->sversion, &pointsImported, now); @@ -911,18 +285,21 @@ int vnodeImportPoints(SMeterObj *pObj, char *cont, int contLen, char source, voi } vnodeClearMeterState(pObj, TSDB_METER_STATE_INSERT); - } else { + } else { // trigger import SImportInfo *pNew, import; - dTrace("vid:%d sid:%d id:%s, import %d rows data", pObj->vnode, pObj->sid, pObj->meterId, rows); + dTrace("vid:%d sid:%d id:%s, try to import %d rows data, firstKey:%ld, lastKey:%ld, object lastKey:%ld", + pObj->vnode, pObj->sid, pObj->meterId, rows, firstKey, lastKey, pObj->lastKey); memset(&import, 0, sizeof(import)); - import.firstKey = *((TSKEY *)(payload)); - import.lastKey = *((TSKEY *)(pSubmit->payLoad + (rows - 1) * pObj->bytesPerPoint)); + import.firstKey = firstKey; + import.lastKey = lastKey; import.pObj = pObj; import.pShell = pShell; import.payload = payload; import.rows = rows; + // FIXME: mutex here seems meaningless and num here still can + // be changed int32_t num = 0; pthread_mutex_lock(&pVnode->vmutex); num = pObj->numOfQueries; @@ -931,7 +308,8 @@ int vnodeImportPoints(SMeterObj *pObj, char *cont, int contLen, char source, voi int32_t commitInProcess = 0; pthread_mutex_lock(&pPool->vmutex); - if (((commitInProcess = pPool->commitInProcess) == 1) || num > 0) { + if (((commitInProcess = pPool->commitInProcess) == 1) || + num > 0) { // mutual exclusion with read (need to change here) pthread_mutex_unlock(&pPool->vmutex); pNew = (SImportInfo *)malloc(sizeof(SImportInfo)); @@ -958,6 +336,7 @@ int vnodeImportPoints(SMeterObj *pObj, char *cont, int contLen, char source, voi } } + // How about the retry? Will this also cause vnode version++? pVnode->version++; if (pShell) { @@ -968,23 +347,1309 @@ int vnodeImportPoints(SMeterObj *pObj, char *cont, int contLen, char source, voi return 0; } -//todo abort from the procedure if the meter is going to be dropped -int vnodeImportData(SMeterObj *pObj, SImportInfo *pImport) { - int code = 0; +/* Function to search keys in a range + * + * Assumption: keys in payload are in ascending order + * + * @payload: data records, key in ascending order + * @step: bytes each record takes + * @rows: number of data records + * @skey: range start (included) + * @ekey: range end (included) + * @srows: rtype, start index of records + * @nrows: rtype, number of records in range + * + * @rtype: 0 means find data in the range + * -1 means find no data in the range + */ +static int vnodeSearchKeyInRange(char *payload, int step, int rows, TSKEY skey, TSKEY ekey, int *srow, int *nrows) { + if (rows <= 0 || KEY_AT_INDEX(payload, step, 0) > ekey || KEY_AT_INDEX(payload, step, rows - 1) < skey || skey > ekey) + return -1; - if (pImport->lastKey > pObj->lastKeyOnFile) { - code = vnodeImportWholeToCache(pImport, pImport->payload, pImport->rows); - } else if (pImport->lastKey < pObj->lastKeyOnFile) { - code = vnodeImportWholeToFile(pImport, pImport->payload, pImport->rows); - } else { // lastKey == pObj->lastkeyOnFile - code = vnodeImportStartToFile(pImport, pImport->payload, pImport->rows); + int left = 0; + int right = rows - 1; + int mid; + + // Binary search the first key in payload >= skey + do { + mid = (left + right) / 2; + if (skey < KEY_AT_INDEX(payload, step, mid)) { + right = mid; + } else if (skey > KEY_AT_INDEX(payload, step, mid)) { + left = mid + 1; + } else { + break; + } + } while (left < right); + + if (skey <= KEY_AT_INDEX(payload, step, mid)) { + *srow = mid; + } else { + if (mid + 1 >= rows) { + return -1; + } else { + *srow = mid + 1; + } } - SVnodeObj *pVnode = &vnodeList[pObj->vnode]; - SCachePool *pPool = (SCachePool *)pVnode->pCachePool; - pPool->commitInProcess = 0; + assert(skey <= KEY_AT_INDEX(payload, step, *srow)); - if (pImport->commit) vnodeProcessCommitTimer(pVnode, NULL); + *nrows = 0; + for (int i = *srow; i < rows; i++) { + if (KEY_AT_INDEX(payload, step, i) <= ekey) { + (*nrows)++; + } else { + break; + } + } + + if (*nrows == 0) return -1; + + return 0; +} + +int vnodeOpenMinFilesForImport(int vnode, int fid) { + char dname[TSDB_FILENAME_LEN] = "\0"; + SVnodeObj * pVnode = vnodeList + vnode; + struct stat filestat; + int minFileSize; + + minFileSize = TSDB_FILE_HEADER_LEN + sizeof(SCompHeader) * pVnode->cfg.maxSessions + sizeof(TSCKSUM); + + vnodeGetHeadDataLname(pVnode->cfn, dname, pVnode->lfn, vnode, fid); + + // Open .head file + pVnode->hfd = open(pVnode->cfn, O_RDONLY); + if (pVnode->hfd < 0) { + dError("vid:%d, failed to open head file:%s, reason:%s", vnode, pVnode->cfn, strerror(errno)); + taosLogError("vid:%d, failed to open head file:%s, reason:%s", vnode, pVnode->cfn, strerror(errno)); + goto _error_open; + } + + fstat(pVnode->hfd, &filestat); + if (filestat.st_size < minFileSize) { + dError("vid:%d, head file:%s is corrupted", vnode, pVnode->cfn); + taosLogError("vid:%d, head file:%s corrupted", vnode, pVnode->cfn); + goto _error_open; + } + + // Open .data file + pVnode->dfd = open(dname, O_RDWR); + if (pVnode->dfd < 0) { + dError("vid:%d, failed to open data file:%s, reason:%s", vnode, dname, strerror(errno)); + taosLogError("vid:%d, failed to open data file:%s, reason:%s", vnode, dname, strerror(errno)); + goto _error_open; + } + + fstat(pVnode->dfd, &filestat); + if (filestat.st_size < TSDB_FILE_HEADER_LEN) { + dError("vid:%d, data file:%s corrupted", vnode, dname); + taosLogError("vid:%d, data file:%s corrupted", vnode, dname); + goto _error_open; + } + + // Open .last file + pVnode->lfd = open(pVnode->lfn, O_RDWR); + if (pVnode->lfd < 0) { + dError("vid:%d, failed to open last file:%s, reason:%s", vnode, pVnode->lfn, strerror(errno)); + taosLogError("vid:%d, failed to open last file:%s, reason:%s", vnode, pVnode->lfn, strerror(errno)); + goto _error_open; + } + + fstat(pVnode->lfd, &filestat); + if (filestat.st_size < TSDB_FILE_HEADER_LEN) { + dError("vid:%d, last file:%s corrupted", vnode, pVnode->lfn); + taosLogError("vid:%d, last file:%s corrupted", vnode, pVnode->lfn); + goto _error_open; + } + + return 0; + +_error_open: + if (pVnode->hfd > 0) close(pVnode->hfd); + pVnode->hfd = 0; + + if (pVnode->dfd > 0) close(pVnode->dfd); + pVnode->dfd = 0; + + if (pVnode->lfd > 0) close(pVnode->lfd); + pVnode->lfd = 0; + + return -1; +} + +/* Function to open .t file and sendfile the first part + */ +int vnodeOpenTempFilesForImport(SImportHandle *pHandle, SMeterObj *pObj, int fid) { + char dHeadName[TSDB_FILENAME_LEN] = "\0"; + SVnodeObj * pVnode = vnodeList + pObj->vnode; + struct stat filestat; + int sid; + + // cfn: .head + if (readlink(pVnode->cfn, dHeadName, TSDB_FILENAME_LEN) < 0) return -1; + + size_t len = strlen(dHeadName); + // switch head name + switch (dHeadName[len - 1]) { + case '0': + dHeadName[len - 1] = '1'; + break; + case '1': + dHeadName[len - 1] = '0'; + break; + default: + dError("vid: %d, fid: %d, head target filename not end with 0 or 1", pVnode->vnode, fid); + return -1; + } + + vnodeGetHeadTname(pVnode->nfn, NULL, pVnode->vnode, fid); + symlink(dHeadName, pVnode->nfn); + + pVnode->nfd = open(pVnode->nfn, O_RDWR | O_CREAT | O_TRUNC, S_IRWXU | S_IRWXG | S_IRWXO); + if (pVnode->nfd < 0) { + dError("vid:%d, failed to open new head file:%s, reason:%s", pVnode->vnode, pVnode->nfn, strerror(errno)); + taosLogError("vid:%d, failed to open new head file:%s, reason:%s", pVnode->vnode, pVnode->nfn, strerror(errno)); + return -1; + } + + fstat(pVnode->hfd, &filestat); + pHandle->hfSize = filestat.st_size; + + // Find the next sid whose compInfoOffset > 0 + for (sid = pObj->sid + 1; sid < pVnode->cfg.maxSessions; sid++) { + if (pHandle->pHeader[sid].compInfoOffset > 0) break; + } + + pHandle->nextNo0Offset = (sid == pVnode->cfg.maxSessions) ? pHandle->hfSize : pHandle->pHeader[sid].compInfoOffset; + + // FIXME: sendfile the original part + // TODO: Here, we need to take the deleted table case in consideration, this function + // just assume the case is handled before calling this function + if (pHandle->pHeader[pObj->sid].compInfoOffset > 0) { + pHandle->compInfoOffset = pHandle->pHeader[pObj->sid].compInfoOffset; + } else { + pHandle->compInfoOffset = pHandle->nextNo0Offset; + } + + assert(pHandle->compInfoOffset <= pHandle->hfSize); + + lseek(pVnode->hfd, 0, SEEK_SET); + lseek(pVnode->nfd, 0, SEEK_SET); + if (tsendfile(pVnode->nfd, pVnode->hfd, NULL, pHandle->compInfoOffset) < 0) { + // TODO : deal with ERROR here + } + + // Leave a SCompInfo space here + lseek(pVnode->nfd, sizeof(SCompInfo), SEEK_CUR); + + return 0; +} + +typedef enum { DATA_LOAD_TIMESTAMP = 0x1, DATA_LOAD_OTHER_DATA = 0x2 } DataLoadMod; + +/* Function to load a block data at the requirement of mod + */ +static int vnodeLoadNeededBlockData(SMeterObj *pObj, SImportHandle *pHandle, int blockId, uint8_t loadMod) { + size_t size; + int code = 0; + SCompBlock *pBlock = pHandle->pBlocks + blockId; + + assert(pBlock->sversion == pObj->sversion); + + SVnodeObj *pVnode = vnodeList + pObj->vnode; + + int dfd = pBlock->last ? pVnode->lfd : pVnode->dfd; + + if (pHandle->blockId != blockId) { + pHandle->blockId = blockId; + pHandle->blockLoadState = 0; + } + + if (pHandle->blockLoadState == 0){ // Reload pField + size = sizeof(SField) * pBlock->numOfCols + sizeof(TSCKSUM); + if (pHandle->pFieldSize < size) { + pHandle->pField = (SField *)realloc((void *)(pHandle->pField), size); + if (pHandle->pField == NULL) { + dError("vid: %d, sid: %d, meterId: %s, failed to allocate memory, size: %ul", pObj->vnode, pObj->sid, + pObj->meterId, size); + return -1; + } + pHandle->pFieldSize = size; + } + + lseek(dfd, pBlock->offset, SEEK_SET); + if (read(dfd, (void *)(pHandle->pField), pHandle->pFieldSize) < 0) { + dError("vid:%d sid:%d meterId:%s, failed to read data file, size:%ld reason:%s", pVnode->vnode, pObj->sid, + pObj->meterId, pHandle->pFieldSize, strerror(errno)); + return -1; + } + + if (!taosCheckChecksumWhole((uint8_t *)(pHandle->pField), pHandle->pFieldSize)) { + dError("vid:%d sid:%d meterId:%s, data file %s is broken since checksum mismatch", pVnode->vnode, pObj->sid, + pObj->meterId, pVnode->lfn); + return -1; + } + } + + { // Allocate necessary buffer + size = pObj->bytesPerPoint * pObj->pointsPerFileBlock + (sizeof(SData) + EXTRA_BYTES) * pObj->numOfColumns; + if (pHandle->buffer == NULL) { + pHandle->buffer = malloc(size); + if (pHandle->buffer == NULL) { + dError("vid: %d, sid: %d, meterId: %s, failed to allocate memory, size: %ul", pObj->vnode, pObj->sid, + pObj->meterId, size); + return -1; + } + + // TODO: Init data + pHandle->data[0] = (SData *)(pHandle->buffer); + for (int col = 1; col < pObj->numOfColumns; col++) { + pHandle->data[col] = (SData *)((char *)(pHandle->data[col - 1]) + sizeof(SData) + EXTRA_BYTES + + pObj->pointsPerFileBlock * pObj->schema[col - 1].bytes); + } + } + + if (pHandle->temp == NULL) { + pHandle->temp = malloc(size); + if (pHandle->temp == NULL) { + dError("vid: %d, sid: %d, meterId: %s, failed to allocate memory, size: %ul", pObj->vnode, pObj->sid, + pObj->meterId, size); + return -1; + } + } + + if (pHandle->tempBuffer == NULL) { + pHandle->tempBufferSize = pObj->maxBytes + EXTRA_BYTES; + pHandle->tempBuffer = malloc(pHandle->tempBufferSize); + if (pHandle->tempBuffer == NULL) { + dError("vid: %d, sid: %d, meterId: %s, failed to allocate memory, size: %ul", pObj->vnode, pObj->sid, + pObj->meterId, pHandle->tempBufferSize); + return -1; + } + } + } + + if ((loadMod & DATA_LOAD_TIMESTAMP) && + (~(pHandle->blockLoadState & DATA_LOAD_TIMESTAMP))) { // load only timestamp part + code = + vnodeReadColumnToMem(dfd, pBlock, &(pHandle->pField), PRIMARYKEY_TIMESTAMP_COL_INDEX, + pHandle->data[PRIMARYKEY_TIMESTAMP_COL_INDEX]->data, sizeof(TSKEY) * pBlock->numOfPoints, + pHandle->temp, pHandle->tempBuffer, pHandle->tempBufferSize); + + if (code != 0) return -1; + pHandle->blockLoadState |= DATA_LOAD_TIMESTAMP; + } + + if ((loadMod & DATA_LOAD_OTHER_DATA) && (~(pHandle->blockLoadState & DATA_LOAD_OTHER_DATA))) { // load other columns + for (int col = 1; col < pBlock->numOfCols; col++) { + code = vnodeReadColumnToMem(dfd, pBlock, &(pHandle->pField), col, pHandle->data[col]->data, + pBlock->numOfPoints * pObj->schema[col].bytes, pHandle->temp, pHandle->tempBuffer, + pHandle->tempBufferSize); + if (code != 0) return -1; + } + + pHandle->blockLoadState |= DATA_LOAD_OTHER_DATA; + } + + return 0; +} + +static int vnodeCloseImportFiles(SMeterObj *pObj, SImportHandle *pHandle) { + SVnodeObj *pVnode = vnodeList + pObj->vnode; + char dpath[TSDB_FILENAME_LEN] = "\0"; + SCompInfo compInfo; + __off_t offset = 0; + + if (pVnode->nfd > 0) { + offset = lseek(pVnode->nfd, 0, SEEK_CUR); + assert(offset == pHandle->nextNo0Offset + pHandle->driftOffset); + + { // Write the SCompInfo part + compInfo.uid = pObj->uid; + compInfo.last = pHandle->last; + compInfo.numOfBlocks = pHandle->newNumOfBlocks + pHandle->oldNumOfBlocks; + compInfo.delimiter = TSDB_VNODE_DELIMITER; + taosCalcChecksumAppend(0, (uint8_t *)(&compInfo), sizeof(SCompInfo)); + + lseek(pVnode->nfd, pHandle->compInfoOffset, SEEK_SET); + if (twrite(pVnode->nfd, (void *)(&compInfo), sizeof(SCompInfo)) < 0) { + dError("vid:%d sid:%d meterId:%s, failed to wirte SCompInfo, reason:%s", pObj->vnode, pObj->sid, pObj->meterId, + strerror(errno)); + return -1; + } + } + + // Write the rest of the SCompBlock part + if (pHandle->hfSize > pHandle->nextNo0Offset) { + lseek(pVnode->nfd, 0, SEEK_END); + lseek(pVnode->hfd, pHandle->nextNo0Offset, SEEK_SET); + if (tsendfile(pVnode->nfd, pVnode->hfd, NULL, pHandle->hfSize - pHandle->nextNo0Offset) < 0) { + dError("vid:%d sid:%d meterId:%s, failed to sendfile, size:%ld, reason:%s", pObj->vnode, pObj->sid, + pObj->meterId, pHandle->hfSize - pHandle->nextNo0Offset, strerror(errno)); + return -1; + } + } + + // Write SCompHeader part + pHandle->pHeader[pObj->sid].compInfoOffset = pHandle->compInfoOffset; + for (int sid = pObj->sid + 1; sid < pVnode->cfg.maxSessions; ++sid) { + if (pHandle->pHeader[sid].compInfoOffset > 0) { + pHandle->pHeader[sid].compInfoOffset += pHandle->driftOffset; + } + } + + taosCalcChecksumAppend(0, (uint8_t *)(pHandle->pHeader), pHandle->pHeaderSize); + lseek(pVnode->nfd, TSDB_FILE_HEADER_LEN, SEEK_SET); + if (twrite(pVnode->nfd, (void *)(pHandle->pHeader), pHandle->pHeaderSize) < 0) { + dError("vid:%d sid:%d meterId:%s, failed to wirte SCompHeader part, size:%ld, reason:%s", pObj->vnode, pObj->sid, + pObj->meterId, pHandle->pHeaderSize, strerror(errno)); + return -1; + } + } + + // Close opened files + close(pVnode->dfd); + pVnode->dfd = 0; + + close(pVnode->hfd); + pVnode->hfd = 0; + + close(pVnode->lfd); + pVnode->lfd = 0; + + if (pVnode->nfd > 0) { + close(pVnode->nfd); + pVnode->nfd = 0; + + readlink(pVnode->cfn, dpath, TSDB_FILENAME_LEN); + rename(pVnode->nfn, pVnode->cfn); + remove(dpath); + } + + return 0; +} + +void vnodeConvertRowsToCols(SMeterObj *pObj, const char *payload, int rows, SData *data[], int rowOffset) { + int sdataRow; + int offset; + + for (int row = 0; row < rows; ++row) { + sdataRow = row + rowOffset; + offset = 0; + for (int col = 0; col < pObj->numOfColumns; ++col) { + memcpy(data[col]->data + sdataRow * pObj->schema[col].bytes, payload + pObj->bytesPerPoint * row + offset, + pObj->schema[col].bytes); + + offset += pObj->schema[col].bytes; + } + } +} + +// TODO : Check the correctness +int vnodeCreateNeccessaryFiles(SVnodeObj *pVnode) { + int numOfFiles = 0, fileId, filesAdded = 0; + int vnode = pVnode->vnode; + SVnodeCfg *pCfg = &(pVnode->cfg); + + if (pVnode->lastKeyOnFile == 0) { + if (pCfg->daysPerFile == 0) pCfg->daysPerFile = 10; + pVnode->fileId = pVnode->firstKey / tsMsPerDay[pVnode->cfg.precision] / pCfg->daysPerFile; + pVnode->lastKeyOnFile = (long)(pVnode->fileId + 1) * pCfg->daysPerFile * tsMsPerDay[pVnode->cfg.precision] - 1; + pVnode->numOfFiles = 1; + if (vnodeCreateEmptyCompFile(vnode, pVnode->fileId) < 0) return -1; + } + + numOfFiles = (pVnode->lastKeyOnFile - pVnode->commitFirstKey) / tsMsPerDay[pVnode->cfg.precision] / pCfg->daysPerFile; + if (pVnode->commitFirstKey > pVnode->lastKeyOnFile) numOfFiles = -1; + + dTrace("vid:%d, commitFirstKey:%ld lastKeyOnFile:%ld numOfFiles:%d fileId:%d vnodeNumOfFiles:%d", pVnode->vnode, + pVnode->commitFirstKey, pVnode->lastKeyOnFile, numOfFiles, pVnode->fileId, pVnode->numOfFiles); + + if (numOfFiles >= pVnode->numOfFiles) { + // create empty header files backward + filesAdded = numOfFiles - pVnode->numOfFiles + 1; + for (int i = 0; i < filesAdded; ++i) { + fileId = pVnode->fileId - pVnode->numOfFiles - i; + if (vnodeCreateEmptyCompFile(vnode, fileId) < 0) return -1; + } + } else if (numOfFiles < 0) { + // create empty header files forward + pVnode->fileId++; + if (vnodeCreateEmptyCompFile(vnode, pVnode->fileId) < 0) return -1; + pVnode->lastKeyOnFile += (long)tsMsPerDay[pVnode->cfg.precision] * pCfg->daysPerFile; + filesAdded = 1; + numOfFiles = 0; // hacker way + } + + fileId = pVnode->fileId - numOfFiles; + pVnode->commitLastKey = + pVnode->lastKeyOnFile - (long)numOfFiles * tsMsPerDay[pVnode->cfg.precision] * pCfg->daysPerFile; + pVnode->commitFirstKey = pVnode->commitLastKey - (long)tsMsPerDay[pVnode->cfg.precision] * pCfg->daysPerFile + 1; + pVnode->commitFileId = fileId; + pVnode->numOfFiles = pVnode->numOfFiles + filesAdded; + + return 0; +} + +static int vnodeMergeDataIntoFile(SImportInfo *pImport, const char *payload, int rows, int fid) { + SMeterObj * pObj = (SMeterObj *)(pImport->pObj); + SVnodeObj * pVnode = vnodeList + pObj->vnode; + SImportHandle importHandle; + size_t size = 0; + SData * data[TSDB_MAX_COLUMNS]; + char * buffer = NULL; + SData * cdata[TSDB_MAX_COLUMNS]; + char * cbuffer = NULL; + SCompBlock compBlock; + TSCKSUM checksum = 0; + int pointsImported = 0; + + TSKEY delta = pVnode->cfg.daysPerFile * tsMsPerDay[pVnode->cfg.precision]; + TSKEY minFileKey = fid * delta; + TSKEY maxFileKey = minFileKey + delta - 1; + TSKEY firstKey = KEY_AT_INDEX(payload, pObj->bytesPerPoint, 0); + TSKEY lastKey = KEY_AT_INDEX(payload, pObj->bytesPerPoint, rows - 1); + + assert(firstKey >= minFileKey && firstKey <= maxFileKey && lastKey >= minFileKey && lastKey <= maxFileKey); + + // create neccessary files + pVnode->commitFirstKey = firstKey; + if (vnodeCreateNeccessaryFiles(pVnode) < 0) return -1; + + assert(pVnode->commitFileId == fid); + + // Open least files to import .head(hfd) .data(dfd) .last(lfd) + if (vnodeOpenMinFilesForImport(pObj->vnode, fid) < 0) return -1; + + memset(&importHandle, 0, sizeof(SImportHandle)); + + { // Load SCompHeader part from .head file + importHandle.pHeaderSize = sizeof(SCompHeader) * pVnode->cfg.maxSessions + sizeof(TSCKSUM); + importHandle.pHeader = (SCompHeader *)malloc(importHandle.pHeaderSize); + if (importHandle.pHeader == NULL) { + dError("vid: %d, sid: %d, meterId: %s, failed to allocate memory, size: %ul", pObj->vnode, pObj->sid, + pObj->meterId, importHandle.pHeaderSize); + goto _error_merge; + } + + lseek(pVnode->hfd, TSDB_FILE_HEADER_LEN, SEEK_SET); + if (read(pVnode->hfd, (void *)(importHandle.pHeader), importHandle.pHeaderSize) < importHandle.pHeaderSize) { + dError("vid: %d, sid: %d, meterId: %s, fid: %d failed to read SCompHeader part, reason:%s", pObj->vnode, + pObj->sid, pObj->meterId, fid, strerror(errno)); + goto _error_merge; + } + + if (!taosCheckChecksumWhole((uint8_t *)(importHandle.pHeader), importHandle.pHeaderSize)) { + dError("vid: %d, sid: %d, meterId: %s, fid: %d SCompHeader part is broken", pObj->vnode, pObj->sid, pObj->meterId, + fid); + goto _error_merge; + } + } + + { // Initialize data[] and cdata[], which is used to hold data to write to data file + size = pObj->bytesPerPoint * pVnode->cfg.rowsInFileBlock + (sizeof(SData) + EXTRA_BYTES) * pObj->numOfColumns; + + buffer = (char *)malloc(size); + if (buffer == NULL) { + dError("vid: %d, sid: %d, meterId: %s, failed to allocate memory, size: %ul", pObj->vnode, pObj->sid, + pObj->meterId, size); + goto _error_merge; + } + + cbuffer = (char *)malloc(size); + if (cbuffer == NULL) { + dError("vid: %d, sid: %d, meterId: %s, failed to allocate memory, size: %ul", pObj->vnode, pObj->sid, + pObj->meterId, size); + goto _error_merge; + } + + data[0] = (SData *)buffer; + cdata[0] = (SData *)cbuffer; + + for (int col = 1; col < pObj->numOfColumns; col++) { + data[col] = (SData *)((char *)data[col - 1] + sizeof(SData) + EXTRA_BYTES + + pObj->pointsPerFileBlock * pObj->schema[col - 1].bytes); + cdata[col] = (SData *)((char *)cdata[col - 1] + sizeof(SData) + EXTRA_BYTES + + pObj->pointsPerFileBlock * pObj->schema[col - 1].bytes); + } + } + + if (importHandle.pHeader[pObj->sid].compInfoOffset == 0) { // No data in this file, just write it + _write_empty_point: + if (vnodeOpenTempFilesForImport(&importHandle, pObj, fid) < 0) { + goto _error_merge; + } + importHandle.oldNumOfBlocks = 0; + importHandle.driftOffset += sizeof(SCompInfo); + + for (int rowsWritten = 0; rowsWritten < rows;) { + int rowsToWrite = MIN(pVnode->cfg.rowsInFileBlock, (rows - rowsWritten) /* the rows left */); + vnodeConvertRowsToCols(pObj, payload + rowsWritten * pObj->bytesPerPoint, rowsToWrite, data, 0); + pointsImported += rowsToWrite; + + // TODO : Write the block to the file + compBlock.last = 1; + if (vnodeWriteBlockToFile(pObj, &compBlock, data, cdata, rowsToWrite) < 0) { + // TODO: deal with ERROR here + } + + importHandle.last = compBlock.last; + + checksum = taosCalcChecksum(checksum, (uint8_t *)(&compBlock), sizeof(SCompBlock)); + twrite(pVnode->nfd, &compBlock, sizeof(SCompBlock)); + importHandle.newNumOfBlocks++; + importHandle.driftOffset += sizeof(SCompBlock); + + rowsWritten += rowsToWrite; + } + twrite(pVnode->nfd, &checksum, sizeof(TSCKSUM)); + importHandle.driftOffset += sizeof(TSCKSUM); + } else { // Else if there are old data in this file. + { // load SCompInfo and SCompBlock part + lseek(pVnode->hfd, importHandle.pHeader[pObj->sid].compInfoOffset, SEEK_SET); + if (read(pVnode->hfd, (void *)(&(importHandle.compInfo)), sizeof(SCompInfo)) < sizeof(SCompInfo)) { + dError("vid:%d sid:%d meterId:%s, failed to read .head file, reason:%s", pVnode->vnode, pObj->sid, + pObj->meterId, strerror(errno)); + goto _error_merge; + } + + if ((importHandle.compInfo.delimiter != TSDB_VNODE_DELIMITER) || + (!taosCheckChecksumWhole((uint8_t *)(&(importHandle.compInfo)), sizeof(SCompInfo)))) { + dError("vid:%d sid:%d meterId:%s, .head file %s is broken, delemeter:%x", pVnode->vnode, pObj->sid, + pObj->meterId, pVnode->cfn, importHandle.compInfo.delimiter); + goto _error_merge; + } + + { // Check the context of SCompInfo part + if (importHandle.compInfo.uid != pObj->uid) { // The data belongs to the other meter + goto _write_empty_point; + } + } + + importHandle.oldNumOfBlocks = importHandle.compInfo.numOfBlocks; + importHandle.last = importHandle.compInfo.last; + + size = sizeof(SCompBlock) * importHandle.compInfo.numOfBlocks + sizeof(TSCKSUM); + importHandle.pBlocks = (SCompBlock *)malloc(size); + if (importHandle.pBlocks == NULL) { + dError("vid:%d sid:%d meterId:%s, failed to allocate importHandle.pBlock, size:%ul", pVnode->vnode, pObj->sid, + pObj->meterId, size); + goto _error_merge; + } + + if (read(pVnode->hfd, (void *)(importHandle.pBlocks), size) < size) { + dError("vid:%d sid:%d meterId:%s, failed to read importHandle.pBlock, reason:%s", pVnode->vnode, pObj->sid, + pObj->meterId, strerror(errno)); + goto _error_merge; + } + + if (!taosCheckChecksumWhole((uint8_t *)(importHandle.pBlocks), size)) { + dError("vid:%d sid:%d meterId:%s, pBlock part is broken in %s", pVnode->vnode, pObj->sid, pObj->meterId, + pVnode->cfn); + goto _error_merge; + } + } + + /* Now we have _payload_, we have _importHandle.pBlocks_, just merge payload into the importHandle.pBlocks + * + * Input: payload, pObj->bytesPerBlock, rows, importHandle.pBlocks + */ + { + int payloadIter = 0; + SBlockIter blockIter = {0, 0, 0, 0}; + + while (1) { + if (payloadIter >= rows) { // payload end, break + // write the remaining blocks to the file + if (pVnode->nfd > 0) { + int blocksLeft = importHandle.compInfo.numOfBlocks - blockIter.oslot; + if (blocksLeft > 0) { + checksum = taosCalcChecksum(checksum, (uint8_t *)(importHandle.pBlocks + blockIter.oslot), + sizeof(SCompBlock) * blocksLeft); + if (twrite(pVnode->nfd, (void *)(importHandle.pBlocks + blockIter.oslot), + sizeof(SCompBlock) * blocksLeft) < 0) { + dError("vid:%d sid:%d meterId:%s, failed to write %s file, size:%ul, reason:%s", pVnode->vnode, + pObj->sid, pObj->meterId, pVnode->nfn, sizeof(SCompBlock) * blocksLeft, strerror(errno)); + goto _error_merge; + } + } + + if (twrite(pVnode->nfd, (void *)(&checksum), sizeof(TSCKSUM)) < 0) { + dError("vid:%d sid:%d meterId:%s, failed to write %s file, size:%ul, reason:%s", pVnode->vnode, pObj->sid, + pObj->meterId, pVnode->nfn, sizeof(TSCKSUM), strerror(errno)); + goto _error_merge; + } + } + break; + } + + if (blockIter.slot >= importHandle.compInfo.numOfBlocks) { // blocks end, break + assert(false); + + // Should never come here + int rowsLeft = rows - payloadIter; + if (pVnode->nfd > 0 && rowsLeft > 0) { + // TODO : Convert into while here + vnodeConvertRowsToCols(pObj, payload + pObj->bytesPerPoint * payloadIter, rowsLeft, data, 0); + pointsImported++; + + assert(importHandle.last == 0); + + compBlock.last = 1; + if (vnodeWriteBlockToFile(pObj, &compBlock, data, cdata, rows - payloadIter) < 0) { + // TODO : + } + + checksum = taosCalcChecksum(checksum, (uint8_t *)(&compBlock), sizeof(SCompBlock)); + importHandle.newNumOfBlocks++; + importHandle.driftOffset += sizeof(SCompBlock); + importHandle.last = compBlock.last; + twrite(pVnode->nfd, (void *)(&compBlock), sizeof(SCompBlock)); + twrite(pVnode->nfd, (void *)(&checksum), sizeof(TSCKSUM)); + } + break; + } + + TSKEY key = KEY_AT_INDEX(payload, pObj->bytesPerPoint, payloadIter); + + { // Binary search the (slot, pos) which is >= key as well as nextKey + int left = blockIter.slot; + int right = importHandle.compInfo.numOfBlocks - 1; + TSKEY minKey = importHandle.pBlocks[left].keyFirst; + TSKEY maxKey = importHandle.pBlocks[right].keyLast; + + assert(minKey <= maxKey); + + if (key < minKey) { // Case 1. write just ahead the blockIter.slot + blockIter.slot = left; + blockIter.pos = 0; + blockIter.nextKey = minKey; + } else if (key > maxKey) { // Case 2. write to the end + if (importHandle.pBlocks[right].last) { // Case 2.1 last block in .last file, need to merge + assert(importHandle.last != 0); + importHandle.last = 0; + blockIter.slot = right; + blockIter.pos = importHandle.pBlocks[right].numOfPoints; + } else { // Case 2.2 just write after the last block + blockIter.slot = right + 1; + blockIter.pos = 0; + } + blockIter.nextKey = maxFileKey + 1; + } else { // Case 3. need to search the block for slot and pos + if (key == minKey || key == maxKey) { + payloadIter++; + continue; + } + + // Here: minKey < key < maxKey + + int mid; + TSKEY blockMinKey; + TSKEY blockMaxKey; + + // Binary search the slot + do { + mid = (left + right) / 2; + blockMinKey = importHandle.pBlocks[mid].keyFirst; + blockMaxKey = importHandle.pBlocks[mid].keyLast; + + assert(blockMinKey <= blockMaxKey); + + if (key < blockMinKey) { + right = mid; + } else if (key > blockMaxKey) { + left = mid + 1; + } else { /* blockMinKey <= key <= blockMaxKey */ + break; + } + } while (left < right); + + if (key == blockMinKey || key == blockMaxKey) { // duplicate key + payloadIter++; + continue; + } + + // Get the slot + if (key > blockMaxKey) { /* pos = 0 or pos = ? */ + blockIter.slot = mid + 1; + } else { /* key < blockMinKey (pos = 0) || (key > blockMinKey && key < blockMaxKey) (pos=?) */ + blockIter.slot = mid; + } + + // Get the pos + assert(blockIter.slot < importHandle.compInfo.numOfBlocks); + + if (key == importHandle.pBlocks[blockIter.slot].keyFirst || + key == importHandle.pBlocks[blockIter.slot].keyLast) { + payloadIter++; + continue; + } + + assert(key < importHandle.pBlocks[blockIter.slot].keyLast); + + /* */ + if (key < importHandle.pBlocks[blockIter.slot].keyFirst) { + blockIter.pos = 0; + blockIter.nextKey = importHandle.pBlocks[blockIter.slot].keyFirst; + } else { + SCompBlock *pBlock = importHandle.pBlocks + blockIter.slot; + if (pBlock->sversion != pObj->sversion) { /*TODO*/ + } + if (vnodeLoadNeededBlockData(pObj, &importHandle, blockIter.slot, DATA_LOAD_TIMESTAMP) < 0) { + } + int pos = (*vnodeSearchKeyFunc[pObj->searchAlgorithm])( + importHandle.data[PRIMARYKEY_TIMESTAMP_COL_INDEX]->data, pBlock->numOfPoints, key, TSQL_SO_ASC); + assert(pos != 0); + if (KEY_AT_INDEX(importHandle.data[PRIMARYKEY_TIMESTAMP_COL_INDEX]->data, sizeof(TSKEY), pos) == key) { + payloadIter++; + continue; + } + + blockIter.pos = pos; + blockIter.nextKey = (blockIter.slot + 1 < importHandle.compInfo.numOfBlocks) + ? importHandle.pBlocks[blockIter.slot + 1].keyFirst + : maxFileKey + 1; + // Need to merge with this block + if (importHandle.pBlocks[blockIter.slot].last) { // this is to merge with the last block + assert((blockIter.slot == (importHandle.compInfo.numOfBlocks - 1))); + importHandle.last = 0; + } + } + } + } + + // Open the new .t file if not opened yet. + if (pVnode->nfd <= 0) { + if (vnodeOpenTempFilesForImport(&importHandle, pObj, fid) < 0) { + goto _error_merge; + } + } + + if (blockIter.slot > blockIter.oslot) { // write blocks in range [blockIter.oslot, blockIter.slot) to .t file + checksum = taosCalcChecksum(checksum, (uint8_t *)(importHandle.pBlocks + blockIter.oslot), + sizeof(SCompBlock) * (blockIter.slot - blockIter.oslot)); + if (twrite(pVnode->nfd, (void *)(importHandle.pBlocks + blockIter.oslot), + sizeof(SCompBlock) * (blockIter.slot - blockIter.oslot)) < 0) { + dError("vid:%d sid:%d meterId:%s, failed to write %s file, size:%ul, reason:%s", pVnode->vnode, pObj->sid, + pObj->meterId, pVnode->nfn, sizeof(SCompBlock) * (blockIter.slot - blockIter.oslot), + strerror(errno)); + goto _error_merge; + } + + blockIter.oslot = blockIter.slot; + } + + if (blockIter.pos == 0) { // No need to merge + // copy payload part to data + int rowOffset = 0; + for (; payloadIter < rows; rowOffset++) { + if (KEY_AT_INDEX(payload, pObj->bytesPerPoint, payloadIter) >= blockIter.nextKey) break; + + vnodeConvertRowsToCols(pObj, payload + pObj->bytesPerPoint * payloadIter, 1, data, rowOffset); + pointsImported++; + payloadIter++; + } + + // write directly to .data file + compBlock.last = 0; + if (vnodeWriteBlockToFile(pObj, &compBlock, data, cdata, rowOffset) < 0) { + // TODO: Deal with the ERROR here + } + + checksum = taosCalcChecksum(checksum, (uint8_t *)(&compBlock), sizeof(SCompBlock)); + if (twrite(pVnode->nfd, &compBlock, sizeof(SCompBlock)) < 0) { + // TODO : deal with the ERROR here + } + importHandle.newNumOfBlocks++; + importHandle.driftOffset += sizeof(SCompBlock); + } else { // Merge block and payload from payloadIter + + if (vnodeLoadNeededBlockData(pObj, &importHandle, blockIter.slot, + DATA_LOAD_TIMESTAMP | DATA_LOAD_OTHER_DATA) < 0) { // Load neccessary blocks + goto _error_merge; + } + + importHandle.oldNumOfBlocks--; + importHandle.driftOffset -= sizeof(SCompBlock); + + int rowOffset = blockIter.pos; // counter for data + + // Copy the front part + for (int col = 0; col < pObj->numOfColumns; col++) { + memcpy((void *)(data[col]->data), (void *)(importHandle.data[col]->data), + pObj->schema[col].bytes * blockIter.pos); + } + + // Merge part + while (1) { + if (rowOffset >= pVnode->cfg.rowsInFileBlock) { // data full in a block to commit + compBlock.last = 0; + if (vnodeWriteBlockToFile(pObj, &compBlock, data, cdata, rowOffset) < 0) { + // TODO : deal with the ERROR here + } + + checksum = taosCalcChecksum(checksum, (uint8_t *)(&compBlock), sizeof(SCompBlock)); + if (twrite(pVnode->nfd, (void *)(&compBlock), sizeof(SCompBlock)) < 0) { + dError("vid:%d sid:%d meterId:%s, failed to write %s file, size:%ul, reason:%s", pVnode->vnode, + pObj->sid, pObj->meterId, pVnode->nfn, sizeof(SCompBlock), strerror(errno)); + goto _error_merge; + } + importHandle.newNumOfBlocks++; + importHandle.driftOffset += sizeof(SCompBlock); + rowOffset = 0; + } + + if ((payloadIter >= rows || KEY_AT_INDEX(payload, pObj->bytesPerPoint, payloadIter) >= blockIter.nextKey) && + blockIter.pos >= importHandle.pBlocks[blockIter.slot].numOfPoints) + break; + + if (payloadIter >= rows || + KEY_AT_INDEX(payload, pObj->bytesPerPoint, payloadIter) >= blockIter.nextKey) { // payload end + for (int col = 0; col < pObj->numOfColumns; col++) { + memcpy(data[col]->data + rowOffset * pObj->schema[col].bytes, + importHandle.data[col]->data + pObj->schema[col].bytes * blockIter.pos, pObj->schema[col].bytes); + } + blockIter.pos++; + rowOffset++; + } else if (blockIter.pos >= importHandle.pBlocks[blockIter.slot].numOfPoints) { // block end + vnodeConvertRowsToCols(pObj, payload + pObj->bytesPerPoint * payloadIter, 1, data, rowOffset); + pointsImported++; + payloadIter++; + rowOffset++; + } else { + if (KEY_AT_INDEX(payload, pObj->bytesPerPoint, payloadIter) == + KEY_AT_INDEX(importHandle.data[PRIMARYKEY_TIMESTAMP_COL_INDEX]->data, sizeof(TSKEY), + blockIter.pos)) { // duplicate key + payloadIter++; + continue; + } else if (KEY_AT_INDEX(payload, pObj->bytesPerPoint, payloadIter) < + KEY_AT_INDEX(importHandle.data[PRIMARYKEY_TIMESTAMP_COL_INDEX]->data, sizeof(TSKEY), + blockIter.pos)) { + vnodeConvertRowsToCols(pObj, payload + pObj->bytesPerPoint * payloadIter, 1, data, rowOffset); + pointsImported++; + payloadIter++; + rowOffset++; + } else { + for (int col = 0; col < pObj->numOfColumns; col++) { + memcpy(data[col]->data + rowOffset * pObj->schema[col].bytes, + importHandle.data[col]->data + pObj->schema[col].bytes * blockIter.pos, + pObj->schema[col].bytes); + } + blockIter.pos++; + rowOffset++; + } + } + } + if (rowOffset > 0) { // data full in a block to commit + compBlock.last = 0; + if (vnodeWriteBlockToFile(pObj, &compBlock, data, cdata, rowOffset) < 0) { + // TODO : deal with the ERROR here + } + + checksum = taosCalcChecksum(checksum, (uint8_t *)(&compBlock), sizeof(SCompBlock)); + if (twrite(pVnode->nfd, (void *)(&compBlock), sizeof(SCompBlock)) < 0) { + dError("vid:%d sid:%d meterId:%s, failed to write %s file, size:%ul, reason:%s", pVnode->vnode, pObj->sid, + pObj->meterId, pVnode->nfn, sizeof(SCompBlock), strerror(errno)); + goto _error_merge; + } + importHandle.newNumOfBlocks++; + importHandle.driftOffset += sizeof(SCompBlock); + rowOffset = 0; + } + + blockIter.slot++; + blockIter.oslot = blockIter.slot; + } + } + } + } + + // Write the SCompInfo part + if (vnodeCloseImportFiles(pObj, &importHandle) < 0) { + goto _error_merge; + } + + pImport->importedRows += pointsImported; + + // TODO: free the allocated memory + tfree(buffer); + tfree(cbuffer); + tfree(importHandle.pHeader); + tfree(importHandle.pBlocks); + tfree(importHandle.pField); + tfree(importHandle.buffer); + tfree(importHandle.temp); + tfree(importHandle.tempBuffer); + + return 0; + +_error_merge: + tfree(buffer); + tfree(cbuffer); + tfree(importHandle.pHeader); + tfree(importHandle.pBlocks); + tfree(importHandle.pField); + tfree(importHandle.buffer); + tfree(importHandle.temp); + tfree(importHandle.tempBuffer); + + close(pVnode->dfd); + pVnode->dfd = 0; + + close(pVnode->hfd); + pVnode->hfd = 0; + + close(pVnode->lfd); + pVnode->lfd = 0; + + if (pVnode->nfd > 0) { + close(pVnode->nfd); + pVnode->nfd = 0; + remove(pVnode->nfn); + } + + return -1; +} + +#define FORWARD_ITER(iter, step, slotLimit, posLimit) \ + { \ + if ((iter.pos) + (step) < (posLimit)) { \ + (iter.pos) = (iter.pos) + (step); \ + } else { \ + (iter.pos) = 0; \ + (iter.slot) = ((iter.slot) + 1) % (slotLimit); \ + } \ + } + +int isCacheEnd(SBlockIter iter, SMeterObj *pMeter) { + SCacheInfo *pInfo = (SCacheInfo *)(pMeter->pCache); + int slot = 0; + int pos = 0; + + if (pInfo->cacheBlocks[pInfo->currentSlot]->numOfPoints == pMeter->pointsPerBlock) { + slot = (pInfo->currentSlot + 1) % (pInfo->maxBlocks); + pos = 0; + } else { + slot = pInfo->currentSlot; + pos = pInfo->cacheBlocks[pInfo->currentSlot]->numOfPoints; + } + return ((iter.slot == slot) && (iter.pos == pos)); +} + +int vnodeImportDataToCache(SImportInfo *pImport, const char *payload, const int rows) { + SMeterObj * pObj = pImport->pObj; + SVnodeObj * pVnode = vnodeList + pObj->vnode; + int code = -1; + SCacheInfo * pInfo = (SCacheInfo *)(pObj->pCache); + int payloadIter; + SCachePool * pPool = pVnode->pCachePool; + int isCacheIterEnd = 0; + int spayloadIter = 0; + int isAppendData = 0; + int rowsImported = 0; + int totalRows = 0; + size_t size = 0; + SMergeBuffer *pBuffer = NULL; + + TSKEY firstKey = KEY_AT_INDEX(payload, pObj->bytesPerPoint, 0); + TSKEY lastKey = KEY_AT_INDEX(payload, pObj->bytesPerPoint, rows - 1); + + assert(firstKey <= lastKey && firstKey > pObj->lastKeyOnFile); + + // TODO: make this condition less strict + if (pObj->freePoints < rows || pObj->freePoints < (pObj->pointsPerBlock << 1)) { // No free room to hold the data + dError("vid:%d sid:%d id:%s, import failed, cache is full, freePoints:%d", pObj->vnode, pObj->sid, pObj->meterId, + pObj->freePoints); + pImport->importedRows = 0; + pImport->commit = 1; + code = TSDB_CODE_ACTION_IN_PROGRESS; + return code; + } + + if (pInfo->numOfBlocks == 0) { + if (vnodeAllocateCacheBlock(pObj) < 0) { + // TODO: deal with the ERROR here + } + } + + // Find the first importable record from payload + pImport->lastKey = lastKey; + for (payloadIter = 0; payloadIter < rows; payloadIter++) { + TSKEY key = KEY_AT_INDEX(payload, pObj->bytesPerPoint, payloadIter); + if (key == pObj->lastKey) continue; + if (key > pObj->lastKey) { // Just as insert + pImport->slot = pInfo->currentSlot; + pImport->pos = pInfo->cacheBlocks[pImport->slot]->numOfPoints; + isCacheIterEnd = 1; + break; + } else { + pImport->firstKey = key; + if (vnodeFindKeyInCache(pImport, 1) < 0) { + goto _exit; + } + + if (pImport->firstKey != pImport->key) break; + } + } + + if (payloadIter == rows) { + pImport->importedRows = 0; + code = 0; + goto _exit; + } + + spayloadIter = payloadIter; + if (pImport->pos == pObj->pointsPerBlock) assert(isCacheIterEnd); + + // Allocate a new merge buffer work as buffer + totalRows = pObj->pointsPerBlock + rows - payloadIter + 1; + size = sizeof(SMergeBuffer) + sizeof(char *) * pObj->numOfColumns + pObj->bytesPerPoint * totalRows; + pBuffer = (SMergeBuffer *)malloc(size); + if (pBuffer == NULL) { + dError("vid:%d sid:%d meterId:%s, failed to allocate memory, size:%d", pObj->vnode, pObj->sid, pObj->meterId, size); + return code; + } + pBuffer->spos = 0; + pBuffer->epos = 0; + pBuffer->totalRows = totalRows; + pBuffer->offset[0] = (char *)pBuffer + sizeof(SMergeBuffer) + sizeof(char *) * pObj->numOfColumns; + for (int col = 1; col < pObj->numOfColumns; col++) { + pBuffer->offset[col] = pBuffer->offset[col - 1] + pObj->schema[col - 1].bytes * totalRows; + } + + // TODO: take pImport->pos = pObj->pointsPerBlock into consideration + { // Do the merge staff + SBlockIter cacheIter = {pImport->slot, pImport->pos, 0, 0}; // Iter to traverse old cache data + SBlockIter writeIter = {pImport->slot, pImport->pos, 0, 0}; // Iter to write data to cache + int availPoints = pObj->pointsPerBlock - pInfo->cacheBlocks[pInfo->currentSlot]->numOfPoints; + + assert(availPoints >= 0); + + while (1) { + if ((payloadIter >= rows) && isCacheIterEnd) break; + + if ((pBuffer->epos + 1) % pBuffer->totalRows == pBuffer->spos) { // merge buffer is full, flush + if (writeIter.pos == pObj->pointsPerBlock) { + writeIter.pos = 0; + writeIter.slot = (writeIter.slot + 1) % pInfo->maxBlocks; + } + + while (pBuffer->spos != pBuffer->epos) { + if (writeIter.slot == cacheIter.slot && writeIter.pos == cacheIter.pos) break; + for (int col = 0; col < pObj->numOfColumns; col++) { + memcpy(pInfo->cacheBlocks[writeIter.slot]->offset[col] + pObj->schema[col].bytes * writeIter.pos, + pBuffer->offset[col] + pObj->schema[col].bytes * pBuffer->spos, pObj->schema[col].bytes); + } + + if (writeIter.pos + 1 < pObj->pointsPerBlock) { + writeIter.pos++; + } else { + pInfo->cacheBlocks[writeIter.slot]->numOfPoints = writeIter.pos + 1; + writeIter.slot = (writeIter.slot + 1) % pInfo->maxBlocks; + writeIter.pos = 0; + } + + pBuffer->spos = (pBuffer->spos + 1) % pBuffer->totalRows; + } + } + + if ((payloadIter >= rows) || + ((!isCacheIterEnd) && + (KEY_AT_INDEX(payload, pObj->bytesPerPoint, payloadIter) > + KEY_AT_INDEX(pInfo->cacheBlocks[cacheIter.slot]->offset[0], sizeof(TSKEY), + cacheIter.pos)))) { // if (payload end || (cacheIter not end && payloadKey > blockKey)) + for (int col = 0; col < pObj->numOfColumns; col++) { + memcpy(pBuffer->offset[col] + pObj->schema[col].bytes * pBuffer->epos, + pInfo->cacheBlocks[cacheIter.slot]->offset[col] + pObj->schema[col].bytes * cacheIter.pos, + pObj->schema[col].bytes); + } + FORWARD_ITER(cacheIter, 1, pInfo->maxBlocks, pObj->pointsPerBlock); + isCacheIterEnd = isCacheEnd(cacheIter, pObj); + } else if ((isCacheIterEnd) || + ((payloadIter < rows) && + (KEY_AT_INDEX(payload, pObj->bytesPerPoint, payloadIter) < + KEY_AT_INDEX(pInfo->cacheBlocks[cacheIter.slot]->offset[0], sizeof(TSKEY), + cacheIter.pos)))) { // cacheIter end || (payloadIter not end && payloadKey < blockKey) + if (availPoints == 0) { // Need to allocate a new cache block + pthread_mutex_lock(&(pPool->vmutex)); + SCacheBlock *pNewBlock = vnodeGetFreeCacheBlock(pVnode); + if (pNewBlock == NULL) { // Failed to allocate a new cache block + pthread_mutex_unlock(&(pPool->vmutex)); + payloadIter = rows; + code = TSDB_CODE_ACTION_IN_PROGRESS; + pImport->commit = 1; + continue; + } + + pNewBlock->pMeterObj = pObj; + pNewBlock->offset[0] = (char *)pNewBlock + sizeof(SCacheBlock) + sizeof(char *) * pObj->numOfColumns; + for (int col = 1; col < pObj->numOfColumns; col++) + pNewBlock->offset[col] = pNewBlock->offset[col - 1] + pObj->schema[col - 1].bytes * pObj->pointsPerBlock; + + int newSlot = (writeIter.slot + 1) % pInfo->maxBlocks; + pInfo->blocks++; + int tblockId = pInfo->blocks; + + if (writeIter.slot != pInfo->currentSlot) { + for (int tslot = pInfo->currentSlot; tslot != writeIter.slot;) { + int nextSlot = (tslot + 1) % pInfo->maxBlocks; + pInfo->cacheBlocks[nextSlot] = pInfo->cacheBlocks[tslot]; + pInfo->cacheBlocks[nextSlot]->slot = nextSlot; + pInfo->cacheBlocks[nextSlot]->blockId = tblockId--; + tslot = (tslot - 1 + pInfo->maxBlocks) % pInfo->maxBlocks; + } + } + + int index = pNewBlock->index; + if (cacheIter.slot == writeIter.slot) { + pNewBlock->numOfPoints = pInfo->cacheBlocks[cacheIter.slot]->numOfPoints; + int pointsLeft = pInfo->cacheBlocks[cacheIter.slot]->numOfPoints - cacheIter.pos; + if (pointsLeft > 0) { + for (int col = 0; col < pObj->numOfColumns; col++) { + memcpy((void *)(pNewBlock->offset[col]), + pInfo->cacheBlocks[cacheIter.slot]->offset[col] + pObj->schema[col].bytes * cacheIter.pos, + pObj->schema[col].bytes * pointsLeft); + } + } + } + pNewBlock->blockId = tblockId; + pNewBlock->slot = newSlot; + pNewBlock->index = index; + pInfo->cacheBlocks[newSlot] = pNewBlock; + pInfo->numOfBlocks++; + pInfo->unCommittedBlocks++; + pInfo->currentSlot = (pInfo->currentSlot + 1) % pInfo->maxBlocks; + pthread_mutex_unlock(&(pPool->vmutex)); + cacheIter.slot = (cacheIter.slot + 1) % pInfo->maxBlocks; + // move a cache of data forward + availPoints = pObj->pointsPerBlock; + } + + int offset = 0; + for (int col = 0; col < pObj->numOfColumns; col++) { + memcpy(pBuffer->offset[col] + pObj->schema[col].bytes * pBuffer->epos, + payload + pObj->bytesPerPoint * payloadIter + offset, pObj->schema[col].bytes); + offset += pObj->schema[col].bytes; + } + if (spayloadIter == payloadIter) {// update pVnode->firstKey + pthread_mutex_lock(&(pVnode->vmutex)); + if (KEY_AT_INDEX(payload, pObj->bytesPerPoint, payloadIter) < pVnode->firstKey) pVnode->firstKey = firstKey; + pthread_mutex_unlock(&(pVnode->vmutex)); + } + if (isCacheIterEnd) { + pObj->lastKey = KEY_AT_INDEX(payload, pObj->bytesPerPoint, payloadIter); + if (!isAppendData) isAppendData = 1; + } + + rowsImported++; + availPoints--; + payloadIter++; + + } else { + payloadIter++; + continue; + } + pBuffer->epos = (pBuffer->epos + 1) % pBuffer->totalRows; + } + + if (pBuffer->spos != pBuffer->epos) { + if (writeIter.pos == pObj->pointsPerBlock) { + writeIter.pos = 0; + writeIter.slot = (writeIter.slot + 1) % pInfo->maxBlocks; + } + while (pBuffer->spos != pBuffer->epos) { + for (int col = 0; col < pObj->numOfColumns; col++) { + memcpy(pInfo->cacheBlocks[writeIter.slot]->offset[col] + pObj->schema[col].bytes * writeIter.pos, + pBuffer->offset[col] + pObj->schema[col].bytes * pBuffer->spos, pObj->schema[col].bytes); + } + + if (writeIter.pos + 1 < pObj->pointsPerBlock) { + writeIter.pos++; + } else { + pInfo->cacheBlocks[writeIter.slot]->numOfPoints = writeIter.pos + 1; + writeIter.slot = (writeIter.slot + 1) % pInfo->maxBlocks; + writeIter.pos = 0; + } + + pBuffer->spos = (pBuffer->spos + 1) % pBuffer->totalRows; + } + + if (writeIter.pos != 0) pInfo->cacheBlocks[writeIter.slot]->numOfPoints = writeIter.pos; + } + + if (isAppendData) { + pthread_mutex_lock(&(pVnode->vmutex)); + if (pObj->lastKey > pVnode->lastKey) pVnode->lastKey = pObj->lastKey; + pthread_mutex_unlock(&(pVnode->vmutex)); + } + } + pImport->importedRows += rowsImported; + + code = 0; + +_exit: + tfree(pBuffer); + return code; +} + +int vnodeImportDataToFiles(SImportInfo *pImport, char *payload, const int rows) { + int code = 0; + // TODO : Check the correctness of pObj and pVnode + SMeterObj *pObj = (SMeterObj *)(pImport->pObj); + SVnodeObj *pVnode = vnodeList + pObj->vnode; + + int64_t delta = pVnode->cfg.daysPerFile * tsMsPerDay[pVnode->cfg.precision]; + int sfid = KEY_AT_INDEX(payload, pObj->bytesPerPoint, 0) / delta; + int efid = KEY_AT_INDEX(payload, pObj->bytesPerPoint, rows - 1) / delta; + + for (int fid = sfid; fid <= efid; fid++) { + TSKEY skey = fid * delta; + TSKEY ekey = skey + delta - 1; + int srow = 0, nrows = 0; + + if (vnodeSearchKeyInRange(payload, pObj->bytesPerPoint, rows, skey, ekey, &srow, &nrows) < 0) continue; + + assert(nrows > 0); + + dTrace("vid:%d sid:%d meterId:%s, %d rows of data will be imported to file %d, srow:%d firstKey:%ld lastKey:%ld", + pObj->vnode, pObj->sid, pObj->meterId, nrows, fid, srow, KEY_AT_INDEX(payload, pObj->bytesPerPoint, srow), + KEY_AT_INDEX(payload, pObj->bytesPerPoint, (srow + nrows - 1))); + + code = vnodeMergeDataIntoFile(pImport, payload + (srow * pObj->bytesPerPoint), nrows, fid); + if (code != 0) break; + } + + return code; +} + +// TODO : add offset in pShell to make it avoid repeatedly deal with messages +int vnodeImportData(SMeterObj *pObj, SImportInfo *pImport) { + int code = 0; + int srow = 0, nrows = 0; + SVnodeObj * pVnode = vnodeList + pObj->vnode; + SCachePool *pPool = (SCachePool *)(pVnode->pCachePool); + + // 1. import data in range (pObj->lastKeyOnFile, INT64_MAX) into cache + if (vnodeSearchKeyInRange(pImport->payload, pObj->bytesPerPoint, pImport->rows, pObj->lastKeyOnFile + 1, INT64_MAX, + &srow, &nrows) >= 0) { + code = vnodeImportDataToCache(pImport, pImport->payload + pObj->bytesPerPoint * srow, nrows); + if (pImport->commit) { // Need to commit now + pPool->commitInProcess = 0; + vnodeProcessCommitTimer(pVnode, NULL); + return code; + } + + if (code != 0) return code; + } + + // 2. import data (0, pObj->lastKeyOnFile) into files + if (vnodeSearchKeyInRange(pImport->payload, pObj->bytesPerPoint, pImport->rows, 0, pObj->lastKeyOnFile - 1, &srow, + &nrows) >= 0) { + code = vnodeImportDataToFiles(pImport, pImport->payload + pObj->bytesPerPoint * srow, nrows); + } + + pPool->commitInProcess = 0; return code; } From 9cdbc4848a24132d5925df225bb2f72b55efa233 Mon Sep 17 00:00:00 2001 From: Hongze Cheng Date: Mon, 11 Nov 2019 14:32:07 +0800 Subject: [PATCH 02/20] fix merge import bug in JIRA TBASE-1097 --- src/system/detail/src/vnodeImport.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/system/detail/src/vnodeImport.c b/src/system/detail/src/vnodeImport.c index 96aeb99e20..ab5fe66c95 100644 --- a/src/system/detail/src/vnodeImport.c +++ b/src/system/detail/src/vnodeImport.c @@ -1479,6 +1479,7 @@ int vnodeImportDataToCache(SImportInfo *pImport, const char *payload, const int payloadIter = rows; code = TSDB_CODE_ACTION_IN_PROGRESS; pImport->commit = 1; + // TODO: Fix here continue; } @@ -1507,7 +1508,7 @@ int vnodeImportDataToCache(SImportInfo *pImport, const char *payload, const int int pointsLeft = pInfo->cacheBlocks[cacheIter.slot]->numOfPoints - cacheIter.pos; if (pointsLeft > 0) { for (int col = 0; col < pObj->numOfColumns; col++) { - memcpy((void *)(pNewBlock->offset[col]), + memcpy((void *)(pNewBlock->offset[col] + pObj->schema[col].bytes*cacheIter.pos), pInfo->cacheBlocks[cacheIter.slot]->offset[col] + pObj->schema[col].bytes * cacheIter.pos, pObj->schema[col].bytes * pointsLeft); } From 1b1b71dd249cdb1147f213c76537baa4ec68ecba Mon Sep 17 00:00:00 2001 From: Hongze Cheng Date: Mon, 11 Nov 2019 16:50:35 +0800 Subject: [PATCH 03/20] Fix problem JIRA TBASE-1100 --- src/system/detail/src/vnodeImport.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/system/detail/src/vnodeImport.c b/src/system/detail/src/vnodeImport.c index ab5fe66c95..bc1cbd14f7 100644 --- a/src/system/detail/src/vnodeImport.c +++ b/src/system/detail/src/vnodeImport.c @@ -620,7 +620,7 @@ static int vnodeLoadNeededBlockData(SMeterObj *pObj, SImportHandle *pHandle, int } if (pHandle->tempBuffer == NULL) { - pHandle->tempBufferSize = pObj->maxBytes + EXTRA_BYTES; + pHandle->tempBufferSize = pObj->maxBytes * pObj->pointsPerFileBlock + EXTRA_BYTES; pHandle->tempBuffer = malloc(pHandle->tempBufferSize); if (pHandle->tempBuffer == NULL) { dError("vid: %d, sid: %d, meterId: %s, failed to allocate memory, size: %ul", pObj->vnode, pObj->sid, From 6e48ca27d3735aba7fb18d20a89ac93b39e2b854 Mon Sep 17 00:00:00 2001 From: Hongze Cheng Date: Tue, 12 Nov 2019 15:11:36 +0800 Subject: [PATCH 04/20] Fix several bug --- src/system/detail/src/vnodeImport.c | 87 ++++++++++++++++------------- 1 file changed, 47 insertions(+), 40 deletions(-) diff --git a/src/system/detail/src/vnodeImport.c b/src/system/detail/src/vnodeImport.c index bc1cbd14f7..d774a55f38 100644 --- a/src/system/detail/src/vnodeImport.c +++ b/src/system/detail/src/vnodeImport.c @@ -230,54 +230,21 @@ int vnodeImportPoints(SMeterObj *pObj, char *cont, int contLen, char source, voi int *pNumOfPoints, TSKEY now) { SSubmitMsg *pSubmit = (SSubmitMsg *)cont; SVnodeObj * pVnode = vnodeList + pObj->vnode; - int rows; - char * payload; + int rows = 0; + char * payload = NULL; int code = TSDB_CODE_ACTION_IN_PROGRESS; SCachePool *pPool = (SCachePool *)(pVnode->pCachePool); SShellObj * pShell = (SShellObj *)param; int pointsImported = 0; - TSKEY minKey, maxKey; + TSKEY firstKey, lastKey; - rows = htons(pSubmit->numOfRows); - int expectedLen = rows * pObj->bytesPerPoint + sizeof(pSubmit->numOfRows); - if (expectedLen != contLen) { - dError("vid:%d sid:%d id:%s, invalid import, expected:%d, contLen:%d", pObj->vnode, pObj->sid, pObj->meterId, - expectedLen, contLen); - return TSDB_CODE_WRONG_MSG_SIZE; - } - - // FIXME: check sversion here should not be here (Take import convert to insert case into consideration) - if (sversion != pObj->sversion) { - dError("vid:%d sid:%d id:%s, invalid sversion, expected:%d received:%d", pObj->vnode, pObj->sid, pObj->meterId, - pObj->sversion, sversion); - return TSDB_CODE_OTHERS; - } - - // Check timestamp context. payload = pSubmit->payLoad; - TSKEY firstKey = KEY_AT_INDEX(payload, pObj->bytesPerPoint, 0); - TSKEY lastKey = KEY_AT_INDEX(payload, pObj->bytesPerPoint, rows - 1); - assert(firstKey <= lastKey); - vnodeGetValidDataRange(pObj->vnode, now, &minKey, &maxKey); - if (firstKey < minKey || firstKey > maxKey || lastKey < minKey || lastKey > maxKey) { - dError( - "vid:%d sid:%d id:%s, invalid timestamp to import, rows:%d firstKey: %ld lastKey: %ld minAllowedKey:%ld " - "maxAllowedKey:%ld", - pObj->vnode, pObj->sid, pObj->meterId, rows, firstKey, lastKey, minKey, maxKey); - return TSDB_CODE_TIMESTAMP_OUT_OF_RANGE; - } - - // FIXME: Commit log here is invalid (Take retry into consideration) - if (pVnode->cfg.commitLog && source != TSDB_DATA_SOURCE_LOG) { - if (pVnode->logFd < 0) return TSDB_CODE_INVALID_COMMIT_LOG; - code = vnodeWriteToCommitLog(pObj, TSDB_ACTION_IMPORT, cont, contLen, sversion); - if (code != 0) return code; - } + firstKey = KEY_AT_INDEX(payload, pObj->bytesPerPoint, 0); if (firstKey > pObj->lastKey) { // Just call insert vnodeClearMeterState(pObj, TSDB_METER_STATE_IMPORTING); vnodeSetMeterState(pObj, TSDB_METER_STATE_INSERT); - code = vnodeInsertPoints(pObj, cont, contLen, TSDB_DATA_SOURCE_LOG, NULL, pObj->sversion, &pointsImported, now); + code = vnodeInsertPoints(pObj, cont, contLen, TSDB_DATA_SOURCE_LOG, NULL, sversion, &pointsImported, now); if (pShell) { pShell->code = code; @@ -286,6 +253,43 @@ int vnodeImportPoints(SMeterObj *pObj, char *cont, int contLen, char source, voi vnodeClearMeterState(pObj, TSDB_METER_STATE_INSERT); } else { // trigger import + { + rows = htons(pSubmit->numOfRows); + assert(rows > 0); + int expectedLen = rows * pObj->bytesPerPoint + sizeof(pSubmit->numOfRows); + if (expectedLen != contLen) { + dError("vid:%d sid:%d id:%s, invalid import, expected:%d, contLen:%d", pObj->vnode, pObj->sid, pObj->meterId, + expectedLen, contLen); + return TSDB_CODE_WRONG_MSG_SIZE; + } + + if (sversion != pObj->sversion) { + dError("vid:%d sid:%d id:%s, invalid sversion, expected:%d received:%d", pObj->vnode, pObj->sid, pObj->meterId, + pObj->sversion, sversion); + return TSDB_CODE_OTHERS; + } + + // Check timestamp context. + TSKEY minKey = 0, maxKey = 0; + lastKey = KEY_AT_INDEX(payload, pObj->bytesPerPoint, rows - 1); + assert(firstKey <= lastKey); + vnodeGetValidDataRange(pObj->vnode, now, &minKey, &maxKey); + if (firstKey < minKey || firstKey > maxKey || lastKey < minKey || lastKey > maxKey) { + dError( + "vid:%d sid:%d id:%s, invalid timestamp to import, rows:%d firstKey: %ld lastKey: %ld minAllowedKey:%ld " + "maxAllowedKey:%ld", + pObj->vnode, pObj->sid, pObj->meterId, rows, firstKey, lastKey, minKey, maxKey); + return TSDB_CODE_TIMESTAMP_OUT_OF_RANGE; + } + + // TODO: Retry here will cause duplicate commit log written + if (pVnode->cfg.commitLog && source != TSDB_DATA_SOURCE_LOG) { + if (pVnode->logFd < 0) return TSDB_CODE_INVALID_COMMIT_LOG; + code = vnodeWriteToCommitLog(pObj, TSDB_ACTION_IMPORT, cont, contLen, sversion); + if (code != 0) return code; + } + } + SImportInfo *pNew, import; dTrace("vid:%d sid:%d id:%s, try to import %d rows data, firstKey:%ld, lastKey:%ld, object lastKey:%ld", @@ -1343,7 +1347,7 @@ int vnodeImportDataToCache(SImportInfo *pImport, const char *payload, const int int code = -1; SCacheInfo * pInfo = (SCacheInfo *)(pObj->pCache); int payloadIter; - SCachePool * pPool = pVnode->pCachePool; + SCachePool * pPool = (SCachePool *)(pVnode->pCachePool); int isCacheIterEnd = 0; int spayloadIter = 0; int isAppendData = 0; @@ -1369,7 +1373,10 @@ int vnodeImportDataToCache(SImportInfo *pImport, const char *payload, const int if (pInfo->numOfBlocks == 0) { if (vnodeAllocateCacheBlock(pObj) < 0) { - // TODO: deal with the ERROR here + pImport->importedRows = 0; + pImport->commit = 1; + code = TSDB_CODE_ACTION_IN_PROGRESS; + return code; } } From 1c82fe86bdd5a9e09c5a6ad1c0234549d6fb6abf Mon Sep 17 00:00:00 2001 From: Hongze Cheng Date: Tue, 12 Nov 2019 15:51:49 +0800 Subject: [PATCH 05/20] Fix more bugs --- src/system/detail/src/vnodeImport.c | 14 ++++++++++---- 1 file changed, 10 insertions(+), 4 deletions(-) diff --git a/src/system/detail/src/vnodeImport.c b/src/system/detail/src/vnodeImport.c index d774a55f38..21e30a5917 100644 --- a/src/system/detail/src/vnodeImport.c +++ b/src/system/detail/src/vnodeImport.c @@ -1465,7 +1465,7 @@ int vnodeImportDataToCache(SImportInfo *pImport, const char *payload, const int ((!isCacheIterEnd) && (KEY_AT_INDEX(payload, pObj->bytesPerPoint, payloadIter) > KEY_AT_INDEX(pInfo->cacheBlocks[cacheIter.slot]->offset[0], sizeof(TSKEY), - cacheIter.pos)))) { // if (payload end || (cacheIter not end && payloadKey > blockKey)) + cacheIter.pos)))) { // if (payload end || (cacheIter not end && payloadKey > blockKey)), consume cache for (int col = 0; col < pObj->numOfColumns; col++) { memcpy(pBuffer->offset[col] + pObj->schema[col].bytes * pBuffer->epos, pInfo->cacheBlocks[cacheIter.slot]->offset[col] + pObj->schema[col].bytes * cacheIter.pos, @@ -1477,18 +1477,23 @@ int vnodeImportDataToCache(SImportInfo *pImport, const char *payload, const int ((payloadIter < rows) && (KEY_AT_INDEX(payload, pObj->bytesPerPoint, payloadIter) < KEY_AT_INDEX(pInfo->cacheBlocks[cacheIter.slot]->offset[0], sizeof(TSKEY), - cacheIter.pos)))) { // cacheIter end || (payloadIter not end && payloadKey < blockKey) + cacheIter.pos)))) { // cacheIter end || (payloadIter not end && payloadKey < blockKey), consume payload if (availPoints == 0) { // Need to allocate a new cache block pthread_mutex_lock(&(pPool->vmutex)); + // TODO: Need to check if there are enough slots to hold a new one SCacheBlock *pNewBlock = vnodeGetFreeCacheBlock(pVnode); - if (pNewBlock == NULL) { // Failed to allocate a new cache block + if (pNewBlock == NULL) { // Failed to allocate a new cache block, need to commit and loop over the remaining cache records pthread_mutex_unlock(&(pPool->vmutex)); payloadIter = rows; code = TSDB_CODE_ACTION_IN_PROGRESS; pImport->commit = 1; - // TODO: Fix here continue; } + + assert(pInfo->numOfBlocks <= pInfo->maxBlocks); + if (pInfo->numOfBlocks == pInfo->maxBlocks) { + vnodeFreeCacheBlock(pInfo->cacheBlocks[(pInfo->currentSlot + 1) % pInfo->maxBlocks]); + } pNewBlock->pMeterObj = pObj; pNewBlock->offset[0] = (char *)pNewBlock + sizeof(SCacheBlock) + sizeof(char *) * pObj->numOfColumns; @@ -1593,6 +1598,7 @@ int vnodeImportDataToCache(SImportInfo *pImport, const char *payload, const int } } pImport->importedRows += rowsImported; + __sync_fetch_and_sub(&(pObj->freePoints), rowsImported); code = 0; From 7279c3dd903049a654cf4910b88a05c951d9b9ef Mon Sep 17 00:00:00 2001 From: Hongze Cheng Date: Fri, 15 Nov 2019 10:33:02 +0800 Subject: [PATCH 06/20] extend memory --- src/system/detail/src/vnodeImport.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/src/system/detail/src/vnodeImport.c b/src/system/detail/src/vnodeImport.c index 21e30a5917..d154ed5770 100644 --- a/src/system/detail/src/vnodeImport.c +++ b/src/system/detail/src/vnodeImport.c @@ -597,7 +597,8 @@ static int vnodeLoadNeededBlockData(SMeterObj *pObj, SImportHandle *pHandle, int } { // Allocate necessary buffer - size = pObj->bytesPerPoint * pObj->pointsPerFileBlock + (sizeof(SData) + EXTRA_BYTES) * pObj->numOfColumns; + size = pObj->bytesPerPoint * pObj->pointsPerFileBlock + + (sizeof(SData) + EXTRA_BYTES + sizeof(TSCKSUM)) * pObj->numOfColumns; if (pHandle->buffer == NULL) { pHandle->buffer = malloc(size); if (pHandle->buffer == NULL) { @@ -610,7 +611,7 @@ static int vnodeLoadNeededBlockData(SMeterObj *pObj, SImportHandle *pHandle, int pHandle->data[0] = (SData *)(pHandle->buffer); for (int col = 1; col < pObj->numOfColumns; col++) { pHandle->data[col] = (SData *)((char *)(pHandle->data[col - 1]) + sizeof(SData) + EXTRA_BYTES + - pObj->pointsPerFileBlock * pObj->schema[col - 1].bytes); + sizeof(TSCKSUM) + pObj->pointsPerFileBlock * pObj->schema[col - 1].bytes); } } From 23c7e0458ab3880a619c28afc80aee1ee8769b27 Mon Sep 17 00:00:00 2001 From: Hongze Cheng Date: Thu, 21 Nov 2019 15:37:46 +0800 Subject: [PATCH 07/20] Fix one possible data overwritten --- src/system/detail/src/vnodeImport.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/system/detail/src/vnodeImport.c b/src/system/detail/src/vnodeImport.c index d154ed5770..609422e5d6 100644 --- a/src/system/detail/src/vnodeImport.c +++ b/src/system/detail/src/vnodeImport.c @@ -853,7 +853,7 @@ static int vnodeMergeDataIntoFile(SImportInfo *pImport, const char *payload, int } { // Initialize data[] and cdata[], which is used to hold data to write to data file - size = pObj->bytesPerPoint * pVnode->cfg.rowsInFileBlock + (sizeof(SData) + EXTRA_BYTES) * pObj->numOfColumns; + size = pObj->bytesPerPoint * pVnode->cfg.rowsInFileBlock + (sizeof(SData) + EXTRA_BYTES + sizeof(TSCKSUM)) * pObj->numOfColumns; buffer = (char *)malloc(size); if (buffer == NULL) { @@ -873,9 +873,9 @@ static int vnodeMergeDataIntoFile(SImportInfo *pImport, const char *payload, int cdata[0] = (SData *)cbuffer; for (int col = 1; col < pObj->numOfColumns; col++) { - data[col] = (SData *)((char *)data[col - 1] + sizeof(SData) + EXTRA_BYTES + + data[col] = (SData *)((char *)data[col - 1] + sizeof(SData) + EXTRA_BYTES + sizeof(TSCKSUM) + pObj->pointsPerFileBlock * pObj->schema[col - 1].bytes); - cdata[col] = (SData *)((char *)cdata[col - 1] + sizeof(SData) + EXTRA_BYTES + + cdata[col] = (SData *)((char *)cdata[col - 1] + sizeof(SData) + EXTRA_BYTES + sizeof(TSCKSUM) + pObj->pointsPerFileBlock * pObj->schema[col - 1].bytes); } } From 528b601287b785bb09190ec7c87ae42dcc4a24fb Mon Sep 17 00:00:00 2001 From: Hongze Cheng Date: Sat, 23 Nov 2019 17:07:43 +0800 Subject: [PATCH 08/20] Fix TBASE-1114 --- src/system/detail/src/vnodeImport.c | 115 ++--------------- src/system/detail/src/vnodeShell.c | 186 +++++++++++++++++++++------- 2 files changed, 153 insertions(+), 148 deletions(-) diff --git a/src/system/detail/src/vnodeImport.c b/src/system/detail/src/vnodeImport.c index 609422e5d6..94de72f86e 100644 --- a/src/system/detail/src/vnodeImport.c +++ b/src/system/detail/src/vnodeImport.c @@ -156,83 +156,13 @@ void vnodeGetValidDataRange(int vnode, TSKEY now, TSKEY *minKey, TSKEY *maxKey) return; } -void vnodeProcessImportTimer(void *param, void *tmrId) { - SImportInfo *pImport = (SImportInfo *)param; - if (pImport == NULL || pImport->signature != param) { - dError("import timer is messed up, signature:%p", pImport); - return; - } - - SMeterObj * pObj = pImport->pObj; - SVnodeObj * pVnode = &vnodeList[pObj->vnode]; - SCachePool *pPool = (SCachePool *)pVnode->pCachePool; - SShellObj * pShell = pImport->pShell; - - pImport->retry++; - - // slow query will block the import operation - int32_t state = vnodeSetMeterState(pObj, TSDB_METER_STATE_IMPORTING); - if (state >= TSDB_METER_STATE_DELETING) { - dError("vid:%d sid:%d id:%s, meter is deleted, failed to import, state:%d", pObj->vnode, pObj->sid, pObj->meterId, - state); - return; - } - - int32_t num = 0; - pthread_mutex_lock(&pVnode->vmutex); - num = pObj->numOfQueries; - pthread_mutex_unlock(&pVnode->vmutex); - - // if the num == 0, it will never be increased before state is set to TSDB_METER_STATE_READY - int32_t commitInProcess = 0; - pthread_mutex_lock(&pPool->vmutex); - if (((commitInProcess = pPool->commitInProcess) == 1) || num > 0 || state != TSDB_METER_STATE_READY) { - pthread_mutex_unlock(&pPool->vmutex); - vnodeClearMeterState(pObj, TSDB_METER_STATE_IMPORTING); - - if (pImport->retry < 1000) { - dTrace( - "vid:%d sid:%d id:%s, import failed, retry later. commit in process or queries on it, or not ready." - "commitInProcess:%d, numOfQueries:%d, state:%d", - pObj->vnode, pObj->sid, pObj->meterId, commitInProcess, num, state); - - taosTmrStart(vnodeProcessImportTimer, 10, pImport, vnodeTmrCtrl); - return; - } else { - pShell->code = TSDB_CODE_TOO_SLOW; - } - } else { - pPool->commitInProcess = 1; - pthread_mutex_unlock(&pPool->vmutex); - int code = vnodeImportData(pObj, pImport); - if (pShell) { - pShell->code = code; - pShell->numOfTotalPoints += pImport->importedRows; - } - } - - vnodeClearMeterState(pObj, TSDB_METER_STATE_IMPORTING); - - pVnode->version++; - - // send response back to shell - if (pShell) { - pShell->count--; - if (pShell->count <= 0) vnodeSendShellSubmitRspMsg(pImport->pShell, pShell->code, pShell->numOfTotalPoints); - } - - pImport->signature = NULL; - free(pImport->opayload); - free(pImport); -} - int vnodeImportPoints(SMeterObj *pObj, char *cont, int contLen, char source, void *param, int sversion, int *pNumOfPoints, TSKEY now) { SSubmitMsg *pSubmit = (SSubmitMsg *)cont; SVnodeObj * pVnode = vnodeList + pObj->vnode; int rows = 0; char * payload = NULL; - int code = TSDB_CODE_ACTION_IN_PROGRESS; + int code = TSDB_CODE_SUCCESS; SCachePool *pPool = (SCachePool *)(pVnode->pCachePool); SShellObj * pShell = (SShellObj *)param; int pointsImported = 0; @@ -243,14 +173,10 @@ int vnodeImportPoints(SMeterObj *pObj, char *cont, int contLen, char source, voi if (firstKey > pObj->lastKey) { // Just call insert vnodeClearMeterState(pObj, TSDB_METER_STATE_IMPORTING); + // TODO: Here may fail to set the state, add error handling. vnodeSetMeterState(pObj, TSDB_METER_STATE_INSERT); - code = vnodeInsertPoints(pObj, cont, contLen, TSDB_DATA_SOURCE_LOG, NULL, sversion, &pointsImported, now); - - if (pShell) { - pShell->code = code; - pShell->numOfTotalPoints += pointsImported; - } - + code = vnodeInsertPoints(pObj, cont, contLen, TSDB_DATA_SOURCE_LOG, NULL, sversion, pNumOfPoints, now); + // TODO: outside clear state function is invalid for this structure vnodeClearMeterState(pObj, TSDB_METER_STATE_INSERT); } else { // trigger import { @@ -290,7 +216,7 @@ int vnodeImportPoints(SMeterObj *pObj, char *cont, int contLen, char source, voi } } - SImportInfo *pNew, import; + SImportInfo import; dTrace("vid:%d sid:%d id:%s, try to import %d rows data, firstKey:%ld, lastKey:%ld, object lastKey:%ld", pObj->vnode, pObj->sid, pObj->meterId, rows, firstKey, lastKey, pObj->lastKey); @@ -315,40 +241,17 @@ int vnodeImportPoints(SMeterObj *pObj, char *cont, int contLen, char source, voi if (((commitInProcess = pPool->commitInProcess) == 1) || num > 0) { // mutual exclusion with read (need to change here) pthread_mutex_unlock(&pPool->vmutex); - - pNew = (SImportInfo *)malloc(sizeof(SImportInfo)); - memcpy(pNew, &import, sizeof(SImportInfo)); - pNew->signature = pNew; - int payloadLen = contLen - sizeof(SSubmitMsg); - pNew->payload = malloc(payloadLen); - pNew->opayload = pNew->payload; - memcpy(pNew->payload, payload, payloadLen); - - dTrace("vid:%d sid:%d id:%s, import later, commit in process:%d, numOfQueries:%d", pObj->vnode, pObj->sid, - pObj->meterId, commitInProcess, pObj->numOfQueries); - - taosTmrStart(vnodeProcessImportTimer, 10, pNew, vnodeTmrCtrl); - return 0; + return TSDB_CODE_ACTION_IN_PROGRESS; } else { pPool->commitInProcess = 1; pthread_mutex_unlock(&pPool->vmutex); int code = vnodeImportData(pObj, &import); - if (pShell) { - pShell->code = code; - pShell->numOfTotalPoints += import.importedRows; - } + *pNumOfPoints = import.importedRows; } + pVnode->version++; } - // How about the retry? Will this also cause vnode version++? - pVnode->version++; - - if (pShell) { - pShell->count--; - if (pShell->count <= 0) vnodeSendShellSubmitRspMsg(pShell, pShell->code, pShell->numOfTotalPoints); - } - - return 0; + return code; } /* Function to search keys in a range diff --git a/src/system/detail/src/vnodeShell.c b/src/system/detail/src/vnodeShell.c index a5f5259887..fcf4cdd231 100644 --- a/src/system/detail/src/vnodeShell.c +++ b/src/system/detail/src/vnodeShell.c @@ -38,10 +38,21 @@ SShellObj **shellList = NULL; int vnodeProcessRetrieveRequest(char *pMsg, int msgLen, SShellObj *pObj); int vnodeProcessQueryRequest(char *pMsg, int msgLen, SShellObj *pObj); int vnodeProcessShellSubmitRequest(char *pMsg, int msgLen, SShellObj *pObj); +static void vnodeProcessBatchImportTimer(void *param, void *tmrId); int vnodeSelectReqNum = 0; int vnodeInsertReqNum = 0; +typedef struct { + int32_t import; + int32_t vnode; + int32_t numOfSid; + int32_t ssid; // Start sid + SShellObj *pObj; + int64_t offset; // offset relative the blks + char blks[]; +} SBatchImportInfo; + void *vnodeProcessMsgFromShell(char *msg, void *ahandle, void *thandle) { int sid, vnode; SShellObj *pObj = (SShellObj *)ahandle; @@ -242,6 +253,7 @@ int vnodeSendShellSubmitRspMsg(SShellObj *pObj, int code, int numOfPoints) { char *pMsg, *pStart; int msgLen; + dTrace("code:%d numOfTotalPoints:%d", code, numOfPoints); pStart = taosBuildRspMsgWithSize(pObj->thandle, TSDB_MSG_TYPE_SUBMIT_RSP, 128); if (pStart == NULL) return -1; pMsg = pStart; @@ -273,6 +285,7 @@ int vnodeProcessQueryRequest(char *pMsg, int msgLen, SShellObj *pObj) { } if (pQueryMsg->numOfSids <= 0) { + dError("Invalid number of meters to query, numOfSids:%d", pQueryMsg->numOfSids); code = TSDB_CODE_INVALID_QUERY_MSG; goto _query_over; } @@ -482,10 +495,37 @@ int vnodeProcessRetrieveRequest(char *pMsg, int msgLen, SShellObj *pObj) { return msgLen; } +static int vnodeCheckSubmitBlockContext(SShellSubmitBlock *pBlocks, SVnodeObj *pVnode) { + int32_t sid = htonl(pBlocks->sid); + uint64_t uid = htobe64(pBlocks->uid); + + if (sid >= pVnode->cfg.maxSessions || sid <= 0) { + dError("sid:%d is out of range", sid); + return TSDB_CODE_INVALID_TABLE_ID; + } + + SMeterObj *pMeterObj = pVnode->meterList[sid]; + if (pMeterObj == NULL) { + dError("vid:%d sid:%d, no active table", pVnode->vnode, sid); + vnodeSendMeterCfgMsg(pVnode->vnode, sid); + return TSDB_CODE_NOT_ACTIVE_TABLE; + } + + if (pMeterObj->uid != uid) { + dError("vid:%d sid:%d, meterId:%s, uid:%lld, uid in msg:%lld, uid mismatch", pVnode->vnode, sid, pMeterObj->meterId, + pMeterObj->uid, uid); + return TSDB_CODE_INVALID_SUBMIT_MSG; + } + + return TSDB_CODE_SUCCESS; +} + int vnodeProcessShellSubmitRequest(char *pMsg, int msgLen, SShellObj *pObj) { int code = 0, ret = 0; + int32_t i = 0; SShellSubmitMsg shellSubmit = *(SShellSubmitMsg *)pMsg; SShellSubmitMsg *pSubmit = &shellSubmit; + SShellSubmitBlock *pBlocks = NULL; pSubmit->vnode = htons(pSubmit->vnode); pSubmit->numOfSid = htonl(pSubmit->numOfSid); @@ -524,67 +564,41 @@ int vnodeProcessShellSubmitRequest(char *pMsg, int msgLen, SShellObj *pObj) { pObj->count = pSubmit->numOfSid; // for import pObj->code = 0; // for import pObj->numOfTotalPoints = 0; // for import - SShellSubmitBlock *pBlocks = (SShellSubmitBlock *)(pMsg + sizeof(SShellSubmitMsg)); int32_t numOfPoints = 0; int32_t numOfTotalPoints = 0; // We take current time here to avoid it in the for loop. TSKEY now = taosGetTimestamp(pVnode->cfg.precision); - for (int32_t i = 0; i < pSubmit->numOfSid; ++i) { + pBlocks = (SShellSubmitBlock *)(pMsg + sizeof(SShellSubmitMsg)); + for (i = 0; i < pSubmit->numOfSid; ++i) { numOfPoints = 0; - pBlocks->sid = htonl(pBlocks->sid); - pBlocks->uid = htobe64(pBlocks->uid); - - if (pBlocks->sid >= pVnode->cfg.maxSessions || pBlocks->sid <= 0) { - dTrace("sid:%d is out of range", pBlocks->sid); - code = TSDB_CODE_INVALID_TABLE_ID; - goto _submit_over; - } - - int vnode = pSubmit->vnode; - int sid = pBlocks->sid; - - SMeterObj *pMeterObj = vnodeList[vnode].meterList[sid]; - if (pMeterObj == NULL) { - dError("vid:%d sid:%d, no active table", vnode, sid); - vnodeSendMeterCfgMsg(vnode, sid); - code = TSDB_CODE_NOT_ACTIVE_TABLE; - goto _submit_over; - } - - if (pMeterObj->uid != pBlocks->uid) { - dError("vid:%d sid:%d, meterId:%s, uid:%lld, uid in msg:%lld, uid mismatch", vnode, sid, pMeterObj->meterId, - pMeterObj->uid, pBlocks->uid); - code = TSDB_CODE_INVALID_SUBMIT_MSG; - goto _submit_over; - } + code = vnodeCheckSubmitBlockContext(pBlocks, pVnode); + if (code != TSDB_CODE_SUCCESS) break; + SMeterObj *pMeterObj = (SMeterObj *)(pVnode->meterList[htonl(pBlocks->sid)]); // dont include sid, vid - int subMsgLen = sizeof(pBlocks->numOfRows) + htons(pBlocks->numOfRows) * pMeterObj->bytesPerPoint; - int sversion = htonl(pBlocks->sversion); + int32_t subMsgLen = sizeof(pBlocks->numOfRows) + htons(pBlocks->numOfRows) * pMeterObj->bytesPerPoint; + int32_t sversion = htonl(pBlocks->sversion); int32_t state = TSDB_METER_STATE_READY; - if (pSubmit->import) { - state = vnodeSetMeterState(pMeterObj, TSDB_METER_STATE_IMPORTING); - } else { - state = vnodeSetMeterState(pMeterObj, TSDB_METER_STATE_INSERT); - } + state = vnodeSetMeterState(pMeterObj, (pSubmit->import ? TSDB_METER_STATE_IMPORTING : TSDB_METER_STATE_INSERT)); - if (state == TSDB_METER_STATE_READY) { - // meter status is ready for insert/import + if (state == TSDB_METER_STATE_READY) { // meter status is ready for insert/import if (pSubmit->import) { code = vnodeImportPoints(pMeterObj, (char *) &(pBlocks->numOfRows), subMsgLen, TSDB_DATA_SOURCE_SHELL, pObj, sversion, &numOfPoints, now); vnodeClearMeterState(pMeterObj, TSDB_METER_STATE_IMPORTING); + pObj->numOfTotalPoints += numOfPoints; + if (code == TSDB_CODE_SUCCESS) pObj->count--; } else { code = vnodeInsertPoints(pMeterObj, (char *) &(pBlocks->numOfRows), subMsgLen, TSDB_DATA_SOURCE_SHELL, NULL, sversion, &numOfPoints, now); vnodeClearMeterState(pMeterObj, TSDB_METER_STATE_INSERT); + numOfTotalPoints += numOfPoints; } - - if (code != TSDB_CODE_SUCCESS) {break;} + if (code != TSDB_CODE_SUCCESS) break; } else { if (vnodeIsMeterState(pMeterObj, TSDB_METER_STATE_DELETING)) { dTrace("vid:%d sid:%d id:%s, it is removed, state:%d", pMeterObj->vnode, pMeterObj->sid, pMeterObj->meterId, @@ -600,15 +614,103 @@ int vnodeProcessShellSubmitRequest(char *pMsg, int msgLen, SShellObj *pObj) { } } - numOfTotalPoints += numOfPoints; pBlocks = (SShellSubmitBlock *)((char *)pBlocks + sizeof(SShellSubmitBlock) + htons(pBlocks->numOfRows) * pMeterObj->bytesPerPoint); } _submit_over: - // for import, send the submit response only when return code is not zero - if (pSubmit->import == 0 || code != 0) ret = vnodeSendShellSubmitRspMsg(pObj, code, numOfTotalPoints); + ret = 0; + if (pSubmit->import) { // Import case + if (code == TSDB_CODE_ACTION_IN_PROGRESS) { + + SBatchImportInfo *pImportInfo = + (SBatchImportInfo *)calloc(1, sizeof(SBatchImportInfo) + msgLen - sizeof(SShellSubmitMsg)); + if (pImportInfo == NULL) { + code = TSDB_CODE_SERV_OUT_OF_MEMORY; + ret = vnodeSendShellSubmitRspMsg(pObj, code, pObj->numOfTotalPoints); + } else { // Start a timer to process the next part of request + pImportInfo->import = 1; + pImportInfo->vnode = pSubmit->vnode; + pImportInfo->numOfSid = pSubmit->numOfSid; + pImportInfo->ssid = i; + pImportInfo->pObj = pObj; + pImportInfo->offset = ((char *)pBlocks) - (pMsg + sizeof(SShellSubmitMsg)); + assert(pImportInfo->offset >= 0); + memcpy((void *)(pImportInfo->blks), (void *)(pMsg + sizeof(SShellSubmitMsg)), msgLen - sizeof(SShellSubmitMsg)); + taosTmrStart(vnodeProcessBatchImportTimer, 10, (void *)pImportInfo, vnodeTmrCtrl); + } + } else { + if (code == TSDB_CODE_SUCCESS) assert(pObj->count == 0); + ret = vnodeSendShellSubmitRspMsg(pObj, code, pObj->numOfTotalPoints); + } + } else { // Insert case + ret = vnodeSendShellSubmitRspMsg(pObj, code, numOfTotalPoints); + } atomic_fetch_add_32(&vnodeInsertReqNum, 1); return ret; } + +static void vnodeProcessBatchImportTimer(void *param, void *tmrId) { + SBatchImportInfo *pImportInfo = (SBatchImportInfo *)param; + assert(pImportInfo != NULL && pImportInfo->import); + + int32_t i = 0, numOfPoints = 0, numOfTotalPoints = 0; + int32_t code = TSDB_CODE_SUCCESS; + + SShellObj * pShell = pImportInfo->pObj; + SVnodeObj * pVnode = &vnodeList[pImportInfo->vnode]; + SShellSubmitBlock *pBlocks = (SShellSubmitBlock *)(pImportInfo->blks + pImportInfo->offset); + TSKEY now = taosGetTimestamp(pVnode->cfg.precision); + + for (i = pImportInfo->ssid; i < pImportInfo->numOfSid; i++) { + numOfPoints = 0; + + code = vnodeCheckSubmitBlockContext(pBlocks, pVnode); + if (code != TSDB_CODE_SUCCESS) break; + + SMeterObj *pMeterObj = (SMeterObj *)(pVnode->meterList[htonl(pBlocks->sid)]); + // dont include sid, vid + int32_t subMsgLen = sizeof(pBlocks->numOfRows) + htons(pBlocks->numOfRows) * pMeterObj->bytesPerPoint; + int32_t sversion = htonl(pBlocks->sversion); + + int32_t state = TSDB_METER_STATE_READY; + state = vnodeSetMeterState(pMeterObj, TSDB_METER_STATE_IMPORTING); + + if (state == TSDB_METER_STATE_READY) { // meter status is ready for insert/import + code = vnodeImportPoints(pMeterObj, (char *)&(pBlocks->numOfRows), subMsgLen, TSDB_DATA_SOURCE_SHELL, pShell, + sversion, &numOfPoints, now); + vnodeClearMeterState(pMeterObj, TSDB_METER_STATE_IMPORTING); + pShell->numOfTotalPoints += numOfPoints; + if (code != TSDB_CODE_SUCCESS) break; + pShell->count--; + } else { + if (vnodeIsMeterState(pMeterObj, TSDB_METER_STATE_DELETING)) { + dTrace("vid:%d sid:%d id:%s, it is removed, state:%d", pMeterObj->vnode, pMeterObj->sid, pMeterObj->meterId, + pMeterObj->state); + code = TSDB_CODE_NOT_ACTIVE_TABLE; + break; + } else { // waiting for 300ms by default and try again + dTrace("vid:%d sid:%d id:%s, try submit again since in state:%d", pMeterObj->vnode, pMeterObj->sid, + pMeterObj->meterId, pMeterObj->state); + + code = TSDB_CODE_ACTION_IN_PROGRESS; + break; + } + } + + pBlocks = (SShellSubmitBlock *)((char *)pBlocks + sizeof(SShellSubmitBlock) + + htons(pBlocks->numOfRows) * pMeterObj->bytesPerPoint); + } + + int ret = 0; + if (code == TSDB_CODE_ACTION_IN_PROGRESS) { + pImportInfo->ssid = i; + pImportInfo->offset = ((char *)pBlocks) - pImportInfo->blks; + taosTmrStart(vnodeProcessBatchImportTimer, 10, (void *)pImportInfo, vnodeTmrCtrl); + } else { + if (code == TSDB_CODE_SUCCESS) assert(pShell->count == 0); + tfree(param); + ret = vnodeSendShellSubmitRspMsg(pShell, code, pShell->numOfTotalPoints); + } +} From 534aa8b11dba17575d70339c7ea32eba1bc80743 Mon Sep 17 00:00:00 2001 From: Hongze Cheng Date: Mon, 25 Nov 2019 10:04:48 +0800 Subject: [PATCH 09/20] fix modification error --- src/system/detail/src/vnodeImport.c | 67 ++++++++++++++--------------- 1 file changed, 32 insertions(+), 35 deletions(-) diff --git a/src/system/detail/src/vnodeImport.c b/src/system/detail/src/vnodeImport.c index 94de72f86e..906e5daf4e 100644 --- a/src/system/detail/src/vnodeImport.c +++ b/src/system/detail/src/vnodeImport.c @@ -171,6 +171,34 @@ int vnodeImportPoints(SMeterObj *pObj, char *cont, int contLen, char source, voi payload = pSubmit->payLoad; firstKey = KEY_AT_INDEX(payload, pObj->bytesPerPoint, 0); + rows = htons(pSubmit->numOfRows); + assert(rows > 0); + int expectedLen = rows * pObj->bytesPerPoint + sizeof(pSubmit->numOfRows); + if (expectedLen != contLen) { + dError("vid:%d sid:%d id:%s, invalid import, expected:%d, contLen:%d", pObj->vnode, pObj->sid, pObj->meterId, + expectedLen, contLen); + return TSDB_CODE_WRONG_MSG_SIZE; + } + + // Check timestamp context. + TSKEY minKey = 0, maxKey = 0; + lastKey = KEY_AT_INDEX(payload, pObj->bytesPerPoint, rows - 1); + assert(firstKey <= lastKey); + vnodeGetValidDataRange(pObj->vnode, now, &minKey, &maxKey); + if (firstKey < minKey || firstKey > maxKey || lastKey < minKey || lastKey > maxKey) { + dError( + "vid:%d sid:%d id:%s, invalid timestamp to import, rows:%d firstKey: %ld lastKey: %ld minAllowedKey:%ld " + "maxAllowedKey:%ld", + pObj->vnode, pObj->sid, pObj->meterId, rows, firstKey, lastKey, minKey, maxKey); + return TSDB_CODE_TIMESTAMP_OUT_OF_RANGE; + } + + if (pVnode->cfg.commitLog && source != TSDB_DATA_SOURCE_LOG) { + if (pVnode->logFd < 0) return TSDB_CODE_INVALID_COMMIT_LOG; + code = vnodeWriteToCommitLog(pObj, TSDB_ACTION_IMPORT, cont, contLen, sversion); + if (code != 0) return code; + } + if (firstKey > pObj->lastKey) { // Just call insert vnodeClearMeterState(pObj, TSDB_METER_STATE_IMPORTING); // TODO: Here may fail to set the state, add error handling. @@ -179,41 +207,10 @@ int vnodeImportPoints(SMeterObj *pObj, char *cont, int contLen, char source, voi // TODO: outside clear state function is invalid for this structure vnodeClearMeterState(pObj, TSDB_METER_STATE_INSERT); } else { // trigger import - { - rows = htons(pSubmit->numOfRows); - assert(rows > 0); - int expectedLen = rows * pObj->bytesPerPoint + sizeof(pSubmit->numOfRows); - if (expectedLen != contLen) { - dError("vid:%d sid:%d id:%s, invalid import, expected:%d, contLen:%d", pObj->vnode, pObj->sid, pObj->meterId, - expectedLen, contLen); - return TSDB_CODE_WRONG_MSG_SIZE; - } - - if (sversion != pObj->sversion) { - dError("vid:%d sid:%d id:%s, invalid sversion, expected:%d received:%d", pObj->vnode, pObj->sid, pObj->meterId, - pObj->sversion, sversion); - return TSDB_CODE_OTHERS; - } - - // Check timestamp context. - TSKEY minKey = 0, maxKey = 0; - lastKey = KEY_AT_INDEX(payload, pObj->bytesPerPoint, rows - 1); - assert(firstKey <= lastKey); - vnodeGetValidDataRange(pObj->vnode, now, &minKey, &maxKey); - if (firstKey < minKey || firstKey > maxKey || lastKey < minKey || lastKey > maxKey) { - dError( - "vid:%d sid:%d id:%s, invalid timestamp to import, rows:%d firstKey: %ld lastKey: %ld minAllowedKey:%ld " - "maxAllowedKey:%ld", - pObj->vnode, pObj->sid, pObj->meterId, rows, firstKey, lastKey, minKey, maxKey); - return TSDB_CODE_TIMESTAMP_OUT_OF_RANGE; - } - - // TODO: Retry here will cause duplicate commit log written - if (pVnode->cfg.commitLog && source != TSDB_DATA_SOURCE_LOG) { - if (pVnode->logFd < 0) return TSDB_CODE_INVALID_COMMIT_LOG; - code = vnodeWriteToCommitLog(pObj, TSDB_ACTION_IMPORT, cont, contLen, sversion); - if (code != 0) return code; - } + if (sversion != pObj->sversion) { + dError("vid:%d sid:%d id:%s, invalid sversion, expected:%d received:%d", pObj->vnode, pObj->sid, pObj->meterId, + pObj->sversion, sversion); + return TSDB_CODE_OTHERS; } SImportInfo import; From 50c507c870e95c0a6a19e4736a45bd1475dc3810 Mon Sep 17 00:00:00 2001 From: Hongze Cheng Date: Mon, 25 Nov 2019 15:25:04 +0800 Subject: [PATCH 10/20] refactor error code return --- src/system/detail/src/vnodeFile.c | 47 ++++++---- src/system/detail/src/vnodeImport.c | 132 ++++++++++++---------------- 2 files changed, 86 insertions(+), 93 deletions(-) diff --git a/src/system/detail/src/vnodeFile.c b/src/system/detail/src/vnodeFile.c index 96b97f4059..c598be59ee 100644 --- a/src/system/detail/src/vnodeFile.c +++ b/src/system/detail/src/vnodeFile.c @@ -181,29 +181,24 @@ int vnodeCreateEmptyCompFile(int vnode, int fileId) { return 0; } -int vnodeOpenCommitFiles(SVnodeObj *pVnode, int noTempLast) { - char name[TSDB_FILENAME_LEN]; - char dHeadName[TSDB_FILENAME_LEN] = "\0"; - char dLastName[TSDB_FILENAME_LEN] = "\0"; - int len = 0; - struct stat filestat; - int vnode = pVnode->vnode; - int fileId, numOfFiles, filesAdded = 0; - SVnodeCfg * pCfg = &pVnode->cfg; +int vnodeCreateNeccessaryFiles(SVnodeObj *pVnode) { + int numOfFiles = 0, fileId, filesAdded = 0; + int vnode = pVnode->vnode; + SVnodeCfg *pCfg = &(pVnode->cfg); if (pVnode->lastKeyOnFile == 0) { if (pCfg->daysPerFile == 0) pCfg->daysPerFile = 10; pVnode->fileId = pVnode->firstKey / tsMsPerDay[pVnode->cfg.precision] / pCfg->daysPerFile; - pVnode->lastKeyOnFile = (int64_t)(pVnode->fileId + 1) * pCfg->daysPerFile * tsMsPerDay[pVnode->cfg.precision] - 1; + pVnode->lastKeyOnFile = (long)(pVnode->fileId + 1) * pCfg->daysPerFile * tsMsPerDay[pVnode->cfg.precision] - 1; pVnode->numOfFiles = 1; - vnodeCreateEmptyCompFile(vnode, pVnode->fileId); + if (vnodeCreateEmptyCompFile(vnode, pVnode->fileId) < 0) return -1; } numOfFiles = (pVnode->lastKeyOnFile - pVnode->commitFirstKey) / tsMsPerDay[pVnode->cfg.precision] / pCfg->daysPerFile; if (pVnode->commitFirstKey > pVnode->lastKeyOnFile) numOfFiles = -1; - dTrace("vid:%d, commitFirstKey:%ld lastKeyOnFile:%ld numOfFiles:%d fileId:%d vnodeNumOfFiles:%d", - vnode, pVnode->commitFirstKey, pVnode->lastKeyOnFile, numOfFiles, pVnode->fileId, pVnode->numOfFiles); + dTrace("vid:%d, commitFirstKey:%ld lastKeyOnFile:%ld numOfFiles:%d fileId:%d vnodeNumOfFiles:%d", pVnode->vnode, + pVnode->commitFirstKey, pVnode->lastKeyOnFile, numOfFiles, pVnode->fileId, pVnode->numOfFiles); if (numOfFiles >= pVnode->numOfFiles) { // create empty header files backward @@ -215,7 +210,7 @@ int vnodeOpenCommitFiles(SVnodeObj *pVnode, int noTempLast) { #ifdef CLUSTER return vnodeRecoverFromPeer(pVnode, fileId); #else - return -1; + return -1; #endif } } else if (numOfFiles < 0) { @@ -225,20 +220,36 @@ int vnodeOpenCommitFiles(SVnodeObj *pVnode, int noTempLast) { #ifdef CLUSTER return vnodeRecoverFromPeer(pVnode, pVnode->fileId); #else - return -1; + return -1; #endif - pVnode->lastKeyOnFile += (int64_t)tsMsPerDay[pVnode->cfg.precision] * pCfg->daysPerFile; + pVnode->lastKeyOnFile += (long)tsMsPerDay[pVnode->cfg.precision] * pCfg->daysPerFile; filesAdded = 1; numOfFiles = 0; // hacker way } fileId = pVnode->fileId - numOfFiles; pVnode->commitLastKey = - pVnode->lastKeyOnFile - (int64_t)numOfFiles * tsMsPerDay[pVnode->cfg.precision] * pCfg->daysPerFile; - pVnode->commitFirstKey = pVnode->commitLastKey - (int64_t)tsMsPerDay[pVnode->cfg.precision] * pCfg->daysPerFile + 1; + pVnode->lastKeyOnFile - (long)numOfFiles * tsMsPerDay[pVnode->cfg.precision] * pCfg->daysPerFile; + pVnode->commitFirstKey = pVnode->commitLastKey - (long)tsMsPerDay[pVnode->cfg.precision] * pCfg->daysPerFile + 1; pVnode->commitFileId = fileId; pVnode->numOfFiles = pVnode->numOfFiles + filesAdded; + return 0; +} + + +int vnodeOpenCommitFiles(SVnodeObj *pVnode, int noTempLast) { + char name[TSDB_FILENAME_LEN]; + char dHeadName[TSDB_FILENAME_LEN] = "\0"; + char dLastName[TSDB_FILENAME_LEN] = "\0"; + int len = 0; + struct stat filestat; + int vnode = pVnode->vnode; + int fileId, numOfFiles, filesAdded = 0; + SVnodeCfg * pCfg = &pVnode->cfg; + + if (vnodeCreateNeccessaryFiles(pVnode) < 0) return -1; + dTrace("vid:%d, commit fileId:%d, commitLastKey:%ld, vnodeLastKey:%ld, lastKeyOnFile:%ld numOfFiles:%d", vnode, fileId, pVnode->commitLastKey, pVnode->lastKey, pVnode->lastKeyOnFile, pVnode->numOfFiles); diff --git a/src/system/detail/src/vnodeImport.c b/src/system/detail/src/vnodeImport.c index 906e5daf4e..fa14a53c58 100644 --- a/src/system/detail/src/vnodeImport.c +++ b/src/system/detail/src/vnodeImport.c @@ -31,6 +31,7 @@ extern void vnodeGetHeadDataLname(char *headName, char *dataName, char * extern int vnodeCreateEmptyCompFile(int vnode, int fileId); extern int vnodeUpdateFreeSlot(SVnodeObj *pVnode); extern SCacheBlock *vnodeGetFreeCacheBlock(SVnodeObj *pVnode); +extern int vnodeCreateNeccessaryFiles(SVnodeObj *pVnode); #define KEY_AT_INDEX(payload, step, idx) (*(TSKEY *)((char *)(payload) + (step) * (idx))) typedef struct { @@ -169,7 +170,6 @@ int vnodeImportPoints(SMeterObj *pObj, char *cont, int contLen, char source, voi TSKEY firstKey, lastKey; payload = pSubmit->payLoad; - firstKey = KEY_AT_INDEX(payload, pObj->bytesPerPoint, 0); rows = htons(pSubmit->numOfRows); assert(rows > 0); @@ -182,6 +182,7 @@ int vnodeImportPoints(SMeterObj *pObj, char *cont, int contLen, char source, voi // Check timestamp context. TSKEY minKey = 0, maxKey = 0; + firstKey = KEY_AT_INDEX(payload, pObj->bytesPerPoint, 0); lastKey = KEY_AT_INDEX(payload, pObj->bytesPerPoint, rows - 1); assert(firstKey <= lastKey); vnodeGetValidDataRange(pObj->vnode, now, &minKey, &maxKey); @@ -242,7 +243,7 @@ int vnodeImportPoints(SMeterObj *pObj, char *cont, int contLen, char source, voi } else { pPool->commitInProcess = 1; pthread_mutex_unlock(&pPool->vmutex); - int code = vnodeImportData(pObj, &import); + code = vnodeImportData(pObj, &import); *pNumOfPoints = import.importedRows; } pVnode->version++; @@ -408,7 +409,7 @@ int vnodeOpenTempFilesForImport(SImportHandle *pHandle, SMeterObj *pObj, int fid } vnodeGetHeadTname(pVnode->nfn, NULL, pVnode->vnode, fid); - symlink(dHeadName, pVnode->nfn); + if (symlink(dHeadName, pVnode->nfn) < 0) return -1; pVnode->nfd = open(pVnode->nfn, O_RDWR | O_CREAT | O_TRUNC, S_IRWXU | S_IRWXG | S_IRWXO); if (pVnode->nfd < 0) { @@ -441,7 +442,7 @@ int vnodeOpenTempFilesForImport(SImportHandle *pHandle, SMeterObj *pObj, int fid lseek(pVnode->hfd, 0, SEEK_SET); lseek(pVnode->nfd, 0, SEEK_SET); if (tsendfile(pVnode->nfd, pVnode->hfd, NULL, pHandle->compInfoOffset) < 0) { - // TODO : deal with ERROR here + return -1; } // Leave a SCompInfo space here @@ -454,10 +455,10 @@ typedef enum { DATA_LOAD_TIMESTAMP = 0x1, DATA_LOAD_OTHER_DATA = 0x2 } DataLoadM /* Function to load a block data at the requirement of mod */ -static int vnodeLoadNeededBlockData(SMeterObj *pObj, SImportHandle *pHandle, int blockId, uint8_t loadMod) { +static int vnodeLoadNeededBlockData(SMeterObj *pObj, SImportHandle *pHandle, int blockId, uint8_t loadMod, int *code) { size_t size; - int code = 0; SCompBlock *pBlock = pHandle->pBlocks + blockId; + *code = TSDB_CODE_SUCCESS; assert(pBlock->sversion == pObj->sversion); @@ -477,6 +478,7 @@ static int vnodeLoadNeededBlockData(SMeterObj *pObj, SImportHandle *pHandle, int if (pHandle->pField == NULL) { dError("vid: %d, sid: %d, meterId: %s, failed to allocate memory, size: %ul", pObj->vnode, pObj->sid, pObj->meterId, size); + *code = TSDB_CODE_SERV_OUT_OF_MEMORY; return -1; } pHandle->pFieldSize = size; @@ -486,12 +488,14 @@ static int vnodeLoadNeededBlockData(SMeterObj *pObj, SImportHandle *pHandle, int if (read(dfd, (void *)(pHandle->pField), pHandle->pFieldSize) < 0) { dError("vid:%d sid:%d meterId:%s, failed to read data file, size:%ld reason:%s", pVnode->vnode, pObj->sid, pObj->meterId, pHandle->pFieldSize, strerror(errno)); + *code = TSDB_CODE_FILE_CORRUPTED; return -1; } if (!taosCheckChecksumWhole((uint8_t *)(pHandle->pField), pHandle->pFieldSize)) { dError("vid:%d sid:%d meterId:%s, data file %s is broken since checksum mismatch", pVnode->vnode, pObj->sid, pObj->meterId, pVnode->lfn); + *code = TSDB_CODE_FILE_CORRUPTED; return -1; } } @@ -504,6 +508,7 @@ static int vnodeLoadNeededBlockData(SMeterObj *pObj, SImportHandle *pHandle, int if (pHandle->buffer == NULL) { dError("vid: %d, sid: %d, meterId: %s, failed to allocate memory, size: %ul", pObj->vnode, pObj->sid, pObj->meterId, size); + *code = TSDB_CODE_SERV_OUT_OF_MEMORY; return -1; } @@ -520,16 +525,18 @@ static int vnodeLoadNeededBlockData(SMeterObj *pObj, SImportHandle *pHandle, int if (pHandle->temp == NULL) { dError("vid: %d, sid: %d, meterId: %s, failed to allocate memory, size: %ul", pObj->vnode, pObj->sid, pObj->meterId, size); + *code = TSDB_CODE_SERV_OUT_OF_MEMORY; return -1; } } if (pHandle->tempBuffer == NULL) { - pHandle->tempBufferSize = pObj->maxBytes * pObj->pointsPerFileBlock + EXTRA_BYTES; + pHandle->tempBufferSize = pObj->maxBytes * pObj->pointsPerFileBlock + EXTRA_BYTES + sizeof(TSCKSUM); pHandle->tempBuffer = malloc(pHandle->tempBufferSize); if (pHandle->tempBuffer == NULL) { dError("vid: %d, sid: %d, meterId: %s, failed to allocate memory, size: %ul", pObj->vnode, pObj->sid, pObj->meterId, pHandle->tempBufferSize); + *code = TSDB_CODE_SERV_OUT_OF_MEMORY; return -1; } } @@ -537,21 +544,24 @@ static int vnodeLoadNeededBlockData(SMeterObj *pObj, SImportHandle *pHandle, int if ((loadMod & DATA_LOAD_TIMESTAMP) && (~(pHandle->blockLoadState & DATA_LOAD_TIMESTAMP))) { // load only timestamp part - code = - vnodeReadColumnToMem(dfd, pBlock, &(pHandle->pField), PRIMARYKEY_TIMESTAMP_COL_INDEX, + if (vnodeReadColumnToMem(dfd, pBlock, &(pHandle->pField), PRIMARYKEY_TIMESTAMP_COL_INDEX, pHandle->data[PRIMARYKEY_TIMESTAMP_COL_INDEX]->data, sizeof(TSKEY) * pBlock->numOfPoints, - pHandle->temp, pHandle->tempBuffer, pHandle->tempBufferSize); + pHandle->temp, pHandle->tempBuffer, pHandle->tempBufferSize) < 0) { + *code = TSDB_CODE_FILE_CORRUPTED; + return -1; + } - if (code != 0) return -1; pHandle->blockLoadState |= DATA_LOAD_TIMESTAMP; } if ((loadMod & DATA_LOAD_OTHER_DATA) && (~(pHandle->blockLoadState & DATA_LOAD_OTHER_DATA))) { // load other columns for (int col = 1; col < pBlock->numOfCols; col++) { - code = vnodeReadColumnToMem(dfd, pBlock, &(pHandle->pField), col, pHandle->data[col]->data, - pBlock->numOfPoints * pObj->schema[col].bytes, pHandle->temp, pHandle->tempBuffer, - pHandle->tempBufferSize); - if (code != 0) return -1; + if (vnodeReadColumnToMem(dfd, pBlock, &(pHandle->pField), col, pHandle->data[col]->data, + pBlock->numOfPoints * pObj->schema[col].bytes, pHandle->temp, pHandle->tempBuffer, + pHandle->tempBufferSize) < 0) { + *code = TSDB_CODE_FILE_CORRUPTED; + return -1; + } } pHandle->blockLoadState |= DATA_LOAD_OTHER_DATA; @@ -651,52 +661,6 @@ void vnodeConvertRowsToCols(SMeterObj *pObj, const char *payload, int rows, SDat } } -// TODO : Check the correctness -int vnodeCreateNeccessaryFiles(SVnodeObj *pVnode) { - int numOfFiles = 0, fileId, filesAdded = 0; - int vnode = pVnode->vnode; - SVnodeCfg *pCfg = &(pVnode->cfg); - - if (pVnode->lastKeyOnFile == 0) { - if (pCfg->daysPerFile == 0) pCfg->daysPerFile = 10; - pVnode->fileId = pVnode->firstKey / tsMsPerDay[pVnode->cfg.precision] / pCfg->daysPerFile; - pVnode->lastKeyOnFile = (long)(pVnode->fileId + 1) * pCfg->daysPerFile * tsMsPerDay[pVnode->cfg.precision] - 1; - pVnode->numOfFiles = 1; - if (vnodeCreateEmptyCompFile(vnode, pVnode->fileId) < 0) return -1; - } - - numOfFiles = (pVnode->lastKeyOnFile - pVnode->commitFirstKey) / tsMsPerDay[pVnode->cfg.precision] / pCfg->daysPerFile; - if (pVnode->commitFirstKey > pVnode->lastKeyOnFile) numOfFiles = -1; - - dTrace("vid:%d, commitFirstKey:%ld lastKeyOnFile:%ld numOfFiles:%d fileId:%d vnodeNumOfFiles:%d", pVnode->vnode, - pVnode->commitFirstKey, pVnode->lastKeyOnFile, numOfFiles, pVnode->fileId, pVnode->numOfFiles); - - if (numOfFiles >= pVnode->numOfFiles) { - // create empty header files backward - filesAdded = numOfFiles - pVnode->numOfFiles + 1; - for (int i = 0; i < filesAdded; ++i) { - fileId = pVnode->fileId - pVnode->numOfFiles - i; - if (vnodeCreateEmptyCompFile(vnode, fileId) < 0) return -1; - } - } else if (numOfFiles < 0) { - // create empty header files forward - pVnode->fileId++; - if (vnodeCreateEmptyCompFile(vnode, pVnode->fileId) < 0) return -1; - pVnode->lastKeyOnFile += (long)tsMsPerDay[pVnode->cfg.precision] * pCfg->daysPerFile; - filesAdded = 1; - numOfFiles = 0; // hacker way - } - - fileId = pVnode->fileId - numOfFiles; - pVnode->commitLastKey = - pVnode->lastKeyOnFile - (long)numOfFiles * tsMsPerDay[pVnode->cfg.precision] * pCfg->daysPerFile; - pVnode->commitFirstKey = pVnode->commitLastKey - (long)tsMsPerDay[pVnode->cfg.precision] * pCfg->daysPerFile + 1; - pVnode->commitFileId = fileId; - pVnode->numOfFiles = pVnode->numOfFiles + filesAdded; - - return 0; -} - static int vnodeMergeDataIntoFile(SImportInfo *pImport, const char *payload, int rows, int fid) { SMeterObj * pObj = (SMeterObj *)(pImport->pObj); SVnodeObj * pVnode = vnodeList + pObj->vnode; @@ -709,6 +673,7 @@ static int vnodeMergeDataIntoFile(SImportInfo *pImport, const char *payload, int SCompBlock compBlock; TSCKSUM checksum = 0; int pointsImported = 0; + int code = TSDB_CODE_SUCCESS; TSKEY delta = pVnode->cfg.daysPerFile * tsMsPerDay[pVnode->cfg.precision]; TSKEY minFileKey = fid * delta; @@ -720,12 +685,12 @@ static int vnodeMergeDataIntoFile(SImportInfo *pImport, const char *payload, int // create neccessary files pVnode->commitFirstKey = firstKey; - if (vnodeCreateNeccessaryFiles(pVnode) < 0) return -1; + if (vnodeCreateNeccessaryFiles(pVnode) < 0) return TSDB_CODE_OTHERS; assert(pVnode->commitFileId == fid); // Open least files to import .head(hfd) .data(dfd) .last(lfd) - if (vnodeOpenMinFilesForImport(pObj->vnode, fid) < 0) return -1; + if (vnodeOpenMinFilesForImport(pObj->vnode, fid) < 0) return TSDB_CODE_OTHERS; memset(&importHandle, 0, sizeof(SImportHandle)); @@ -735,6 +700,7 @@ static int vnodeMergeDataIntoFile(SImportInfo *pImport, const char *payload, int if (importHandle.pHeader == NULL) { dError("vid: %d, sid: %d, meterId: %s, failed to allocate memory, size: %ul", pObj->vnode, pObj->sid, pObj->meterId, importHandle.pHeaderSize); + code = TSDB_CODE_SERV_OUT_OF_MEMORY; goto _error_merge; } @@ -742,12 +708,14 @@ static int vnodeMergeDataIntoFile(SImportInfo *pImport, const char *payload, int if (read(pVnode->hfd, (void *)(importHandle.pHeader), importHandle.pHeaderSize) < importHandle.pHeaderSize) { dError("vid: %d, sid: %d, meterId: %s, fid: %d failed to read SCompHeader part, reason:%s", pObj->vnode, pObj->sid, pObj->meterId, fid, strerror(errno)); + code = TSDB_CODE_OTHERS; goto _error_merge; } if (!taosCheckChecksumWhole((uint8_t *)(importHandle.pHeader), importHandle.pHeaderSize)) { dError("vid: %d, sid: %d, meterId: %s, fid: %d SCompHeader part is broken", pObj->vnode, pObj->sid, pObj->meterId, fid); + code = TSDB_CODE_FILE_CORRUPTED; goto _error_merge; } } @@ -759,6 +727,7 @@ static int vnodeMergeDataIntoFile(SImportInfo *pImport, const char *payload, int if (buffer == NULL) { dError("vid: %d, sid: %d, meterId: %s, failed to allocate memory, size: %ul", pObj->vnode, pObj->sid, pObj->meterId, size); + code = TSDB_CODE_SERV_OUT_OF_MEMORY; goto _error_merge; } @@ -766,6 +735,7 @@ static int vnodeMergeDataIntoFile(SImportInfo *pImport, const char *payload, int if (cbuffer == NULL) { dError("vid: %d, sid: %d, meterId: %s, failed to allocate memory, size: %ul", pObj->vnode, pObj->sid, pObj->meterId, size); + code = TSDB_CODE_SERV_OUT_OF_MEMORY; goto _error_merge; } @@ -783,6 +753,7 @@ static int vnodeMergeDataIntoFile(SImportInfo *pImport, const char *payload, int if (importHandle.pHeader[pObj->sid].compInfoOffset == 0) { // No data in this file, just write it _write_empty_point: if (vnodeOpenTempFilesForImport(&importHandle, pObj, fid) < 0) { + code = TSDB_CODE_OTHERS; goto _error_merge; } importHandle.oldNumOfBlocks = 0; @@ -793,7 +764,6 @@ static int vnodeMergeDataIntoFile(SImportInfo *pImport, const char *payload, int vnodeConvertRowsToCols(pObj, payload + rowsWritten * pObj->bytesPerPoint, rowsToWrite, data, 0); pointsImported += rowsToWrite; - // TODO : Write the block to the file compBlock.last = 1; if (vnodeWriteBlockToFile(pObj, &compBlock, data, cdata, rowsToWrite) < 0) { // TODO: deal with ERROR here @@ -816,6 +786,7 @@ static int vnodeMergeDataIntoFile(SImportInfo *pImport, const char *payload, int if (read(pVnode->hfd, (void *)(&(importHandle.compInfo)), sizeof(SCompInfo)) < sizeof(SCompInfo)) { dError("vid:%d sid:%d meterId:%s, failed to read .head file, reason:%s", pVnode->vnode, pObj->sid, pObj->meterId, strerror(errno)); + code = TSDB_CODE_FILE_CORRUPTED; goto _error_merge; } @@ -823,13 +794,13 @@ static int vnodeMergeDataIntoFile(SImportInfo *pImport, const char *payload, int (!taosCheckChecksumWhole((uint8_t *)(&(importHandle.compInfo)), sizeof(SCompInfo)))) { dError("vid:%d sid:%d meterId:%s, .head file %s is broken, delemeter:%x", pVnode->vnode, pObj->sid, pObj->meterId, pVnode->cfn, importHandle.compInfo.delimiter); + code = TSDB_CODE_FILE_CORRUPTED; goto _error_merge; } - { // Check the context of SCompInfo part - if (importHandle.compInfo.uid != pObj->uid) { // The data belongs to the other meter - goto _write_empty_point; - } + // Check the context of SCompInfo part + if (importHandle.compInfo.uid != pObj->uid) { // The data belongs to the other meter + goto _write_empty_point; } importHandle.oldNumOfBlocks = importHandle.compInfo.numOfBlocks; @@ -840,18 +811,21 @@ static int vnodeMergeDataIntoFile(SImportInfo *pImport, const char *payload, int if (importHandle.pBlocks == NULL) { dError("vid:%d sid:%d meterId:%s, failed to allocate importHandle.pBlock, size:%ul", pVnode->vnode, pObj->sid, pObj->meterId, size); + code = TSDB_CODE_SERV_OUT_OF_MEMORY; goto _error_merge; } if (read(pVnode->hfd, (void *)(importHandle.pBlocks), size) < size) { dError("vid:%d sid:%d meterId:%s, failed to read importHandle.pBlock, reason:%s", pVnode->vnode, pObj->sid, pObj->meterId, strerror(errno)); + code = TSDB_CODE_FILE_CORRUPTED; goto _error_merge; } if (!taosCheckChecksumWhole((uint8_t *)(importHandle.pBlocks), size)) { dError("vid:%d sid:%d meterId:%s, pBlock part is broken in %s", pVnode->vnode, pObj->sid, pObj->meterId, pVnode->cfn); + code = TSDB_CODE_FILE_CORRUPTED; goto _error_merge; } } @@ -876,6 +850,7 @@ static int vnodeMergeDataIntoFile(SImportInfo *pImport, const char *payload, int sizeof(SCompBlock) * blocksLeft) < 0) { dError("vid:%d sid:%d meterId:%s, failed to write %s file, size:%ul, reason:%s", pVnode->vnode, pObj->sid, pObj->meterId, pVnode->nfn, sizeof(SCompBlock) * blocksLeft, strerror(errno)); + code = TSDB_CODE_OTHERS; goto _error_merge; } } @@ -883,6 +858,7 @@ static int vnodeMergeDataIntoFile(SImportInfo *pImport, const char *payload, int if (twrite(pVnode->nfd, (void *)(&checksum), sizeof(TSCKSUM)) < 0) { dError("vid:%d sid:%d meterId:%s, failed to write %s file, size:%ul, reason:%s", pVnode->vnode, pObj->sid, pObj->meterId, pVnode->nfn, sizeof(TSCKSUM), strerror(errno)); + code = TSDB_CODE_OTHERS; goto _error_merge; } } @@ -1001,7 +977,8 @@ static int vnodeMergeDataIntoFile(SImportInfo *pImport, const char *payload, int SCompBlock *pBlock = importHandle.pBlocks + blockIter.slot; if (pBlock->sversion != pObj->sversion) { /*TODO*/ } - if (vnodeLoadNeededBlockData(pObj, &importHandle, blockIter.slot, DATA_LOAD_TIMESTAMP) < 0) { + if (vnodeLoadNeededBlockData(pObj, &importHandle, blockIter.slot, DATA_LOAD_TIMESTAMP, &code) < 0) { + goto _error_merge; } int pos = (*vnodeSearchKeyFunc[pObj->searchAlgorithm])( importHandle.data[PRIMARYKEY_TIMESTAMP_COL_INDEX]->data, pBlock->numOfPoints, key, TSQL_SO_ASC); @@ -1027,6 +1004,7 @@ static int vnodeMergeDataIntoFile(SImportInfo *pImport, const char *payload, int // Open the new .t file if not opened yet. if (pVnode->nfd <= 0) { if (vnodeOpenTempFilesForImport(&importHandle, pObj, fid) < 0) { + code = TSDB_CODE_OTHERS; goto _error_merge; } } @@ -1039,6 +1017,7 @@ static int vnodeMergeDataIntoFile(SImportInfo *pImport, const char *payload, int dError("vid:%d sid:%d meterId:%s, failed to write %s file, size:%ul, reason:%s", pVnode->vnode, pObj->sid, pObj->meterId, pVnode->nfn, sizeof(SCompBlock) * (blockIter.slot - blockIter.oslot), strerror(errno)); + code = TSDB_CODE_OTHERS; goto _error_merge; } @@ -1071,7 +1050,7 @@ static int vnodeMergeDataIntoFile(SImportInfo *pImport, const char *payload, int } else { // Merge block and payload from payloadIter if (vnodeLoadNeededBlockData(pObj, &importHandle, blockIter.slot, - DATA_LOAD_TIMESTAMP | DATA_LOAD_OTHER_DATA) < 0) { // Load neccessary blocks + DATA_LOAD_TIMESTAMP | DATA_LOAD_OTHER_DATA, &code) < 0) { // Load neccessary blocks goto _error_merge; } @@ -1172,6 +1151,7 @@ static int vnodeMergeDataIntoFile(SImportInfo *pImport, const char *payload, int // Write the SCompInfo part if (vnodeCloseImportFiles(pObj, &importHandle) < 0) { + code = TSDB_CODE_OTHERS; goto _error_merge; } @@ -1187,7 +1167,7 @@ static int vnodeMergeDataIntoFile(SImportInfo *pImport, const char *payload, int tfree(importHandle.temp); tfree(importHandle.tempBuffer); - return 0; + return code; _error_merge: tfree(buffer); @@ -1214,7 +1194,7 @@ _error_merge: remove(pVnode->nfn); } - return -1; + return code; } #define FORWARD_ITER(iter, step, slotLimit, posLimit) \ @@ -1316,7 +1296,7 @@ int vnodeImportDataToCache(SImportInfo *pImport, const char *payload, const int pBuffer = (SMergeBuffer *)malloc(size); if (pBuffer == NULL) { dError("vid:%d sid:%d meterId:%s, failed to allocate memory, size:%d", pObj->vnode, pObj->sid, pObj->meterId, size); - return code; + return TSDB_CODE_SERV_OUT_OF_MEMORY; } pBuffer->spos = 0; pBuffer->epos = 0; @@ -1532,7 +1512,7 @@ int vnodeImportDataToFiles(SImportInfo *pImport, char *payload, const int rows) KEY_AT_INDEX(payload, pObj->bytesPerPoint, (srow + nrows - 1))); code = vnodeMergeDataIntoFile(pImport, payload + (srow * pObj->bytesPerPoint), nrows, fid); - if (code != 0) break; + if (code != TSDB_CODE_SUCCESS) break; } return code; @@ -1548,6 +1528,7 @@ int vnodeImportData(SMeterObj *pObj, SImportInfo *pImport) { // 1. import data in range (pObj->lastKeyOnFile, INT64_MAX) into cache if (vnodeSearchKeyInRange(pImport->payload, pObj->bytesPerPoint, pImport->rows, pObj->lastKeyOnFile + 1, INT64_MAX, &srow, &nrows) >= 0) { + assert(nrows > 0); code = vnodeImportDataToCache(pImport, pImport->payload + pObj->bytesPerPoint * srow, nrows); if (pImport->commit) { // Need to commit now pPool->commitInProcess = 0; @@ -1555,12 +1536,13 @@ int vnodeImportData(SMeterObj *pObj, SImportInfo *pImport) { return code; } - if (code != 0) return code; + if (code != TSDB_CODE_SUCCESS) return code; } // 2. import data (0, pObj->lastKeyOnFile) into files if (vnodeSearchKeyInRange(pImport->payload, pObj->bytesPerPoint, pImport->rows, 0, pObj->lastKeyOnFile - 1, &srow, &nrows) >= 0) { + assert(nrows > 0); code = vnodeImportDataToFiles(pImport, pImport->payload + pObj->bytesPerPoint * srow, nrows); } From 7d1da2127fb56ebc38bf7a781bb5a42229cf57c1 Mon Sep 17 00:00:00 2001 From: Hongze Cheng Date: Mon, 25 Nov 2019 15:37:37 +0800 Subject: [PATCH 11/20] a quick fix --- src/system/detail/src/vnodeFile.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/src/system/detail/src/vnodeFile.c b/src/system/detail/src/vnodeFile.c index c598be59ee..15af05a28a 100644 --- a/src/system/detail/src/vnodeFile.c +++ b/src/system/detail/src/vnodeFile.c @@ -250,6 +250,8 @@ int vnodeOpenCommitFiles(SVnodeObj *pVnode, int noTempLast) { if (vnodeCreateNeccessaryFiles(pVnode) < 0) return -1; + fileId = pVnode->commitFileId; + dTrace("vid:%d, commit fileId:%d, commitLastKey:%ld, vnodeLastKey:%ld, lastKeyOnFile:%ld numOfFiles:%d", vnode, fileId, pVnode->commitLastKey, pVnode->lastKey, pVnode->lastKeyOnFile, pVnode->numOfFiles); From a3464134c85b946f4816428a648385be788dfd6f Mon Sep 17 00:00:00 2001 From: Hongze Cheng Date: Mon, 25 Nov 2019 17:42:39 +0800 Subject: [PATCH 12/20] fix small bug --- src/system/detail/src/vnodeImport.c | 30 +++++++++++++++++++++++++++++ 1 file changed, 30 insertions(+) diff --git a/src/system/detail/src/vnodeImport.c b/src/system/detail/src/vnodeImport.c index fa14a53c58..270f02b995 100644 --- a/src/system/detail/src/vnodeImport.c +++ b/src/system/detail/src/vnodeImport.c @@ -674,6 +674,9 @@ static int vnodeMergeDataIntoFile(SImportInfo *pImport, const char *payload, int TSCKSUM checksum = 0; int pointsImported = 0; int code = TSDB_CODE_SUCCESS; + SCachePool * pPool = (SCachePool *)pVnode->pCachePool; + SCacheInfo * pInfo = (SCacheInfo *)(pObj->pCache); + TSKEY lastKeyImported = 0; TSKEY delta = pVnode->cfg.daysPerFile * tsMsPerDay[pVnode->cfg.precision]; TSKEY minFileKey = fid * delta; @@ -758,6 +761,7 @@ static int vnodeMergeDataIntoFile(SImportInfo *pImport, const char *payload, int } importHandle.oldNumOfBlocks = 0; importHandle.driftOffset += sizeof(SCompInfo); + lastKeyImported = lastKey; for (int rowsWritten = 0; rowsWritten < rows;) { int rowsToWrite = MIN(pVnode->cfg.rowsInFileBlock, (rows - rowsWritten) /* the rows left */); @@ -874,6 +878,7 @@ static int vnodeMergeDataIntoFile(SImportInfo *pImport, const char *payload, int // TODO : Convert into while here vnodeConvertRowsToCols(pObj, payload + pObj->bytesPerPoint * payloadIter, rowsLeft, data, 0); pointsImported++; + lastKeyImported = KEY_AT_INDEX(payload, pObj->bytesPerPoint, payloadIter); assert(importHandle.last == 0); @@ -1032,6 +1037,7 @@ static int vnodeMergeDataIntoFile(SImportInfo *pImport, const char *payload, int vnodeConvertRowsToCols(pObj, payload + pObj->bytesPerPoint * payloadIter, 1, data, rowOffset); pointsImported++; + lastKeyImported = KEY_AT_INDEX(payload, pObj->bytesPerPoint, payloadIter); payloadIter++; } @@ -1099,6 +1105,7 @@ static int vnodeMergeDataIntoFile(SImportInfo *pImport, const char *payload, int } else if (blockIter.pos >= importHandle.pBlocks[blockIter.slot].numOfPoints) { // block end vnodeConvertRowsToCols(pObj, payload + pObj->bytesPerPoint * payloadIter, 1, data, rowOffset); pointsImported++; + lastKeyImported = KEY_AT_INDEX(payload, pObj->bytesPerPoint, payloadIter); payloadIter++; rowOffset++; } else { @@ -1112,6 +1119,7 @@ static int vnodeMergeDataIntoFile(SImportInfo *pImport, const char *payload, int blockIter.pos)) { vnodeConvertRowsToCols(pObj, payload + pObj->bytesPerPoint * payloadIter, 1, data, rowOffset); pointsImported++; + lastKeyImported = KEY_AT_INDEX(payload, pObj->bytesPerPoint, payloadIter); payloadIter++; rowOffset++; } else { @@ -1157,6 +1165,28 @@ static int vnodeMergeDataIntoFile(SImportInfo *pImport, const char *payload, int pImport->importedRows += pointsImported; + pthread_mutex_lock(&(pPool->vmutex)); + if (pInfo->numOfBlocks > 0) { + int slot = (pInfo->currentSlot - pInfo->numOfBlocks + 1 + pInfo->maxBlocks) % pInfo->maxBlocks; + TSKEY firstKeyInCache = *((TSKEY *)(pInfo->cacheBlocks[slot]->offset[0])); + + // data may be in commited cache, cache shall be released + if (lastKeyImported > firstKeyInCache) { + while (slot != pInfo->commitSlot) { + SCacheBlock *pCacheBlock = pInfo->cacheBlocks[slot]; + vnodeFreeCacheBlock(pCacheBlock); + slot = (slot + 1 + pInfo->maxBlocks) % pInfo->maxBlocks; + } + + if (pInfo->commitPoint == pObj->pointsPerBlock) { + if (pInfo->cacheBlocks[pInfo->commitSlot]->pMeterObj == pObj) { + vnodeFreeCacheBlock(pInfo->cacheBlocks[pInfo->commitSlot]); + } + } + } + } + pthread_mutex_unlock(&(pPool->vmutex)); + // TODO: free the allocated memory tfree(buffer); tfree(cbuffer); From 178f5771f6434ab7427d3051ad20a50cc482ed5e Mon Sep 17 00:00:00 2001 From: hjxilinx Date: Tue, 26 Nov 2019 17:27:30 +0800 Subject: [PATCH 13/20] [jira none] --- src/system/detail/src/vnodeImport.c | 33 +++++++------ src/system/detail/src/vnodeMeter.c | 10 ++-- src/system/detail/src/vnodeShell.c | 77 +++++++++-------------------- 3 files changed, 47 insertions(+), 73 deletions(-) diff --git a/src/system/detail/src/vnodeImport.c b/src/system/detail/src/vnodeImport.c index 270f02b995..5924caf6f4 100644 --- a/src/system/detail/src/vnodeImport.c +++ b/src/system/detail/src/vnodeImport.c @@ -166,7 +166,6 @@ int vnodeImportPoints(SMeterObj *pObj, char *cont, int contLen, char source, voi int code = TSDB_CODE_SUCCESS; SCachePool *pPool = (SCachePool *)(pVnode->pCachePool); SShellObj * pShell = (SShellObj *)param; - int pointsImported = 0; TSKEY firstKey, lastKey; payload = pSubmit->payLoad; @@ -200,25 +199,29 @@ int vnodeImportPoints(SMeterObj *pObj, char *cont, int contLen, char source, voi if (code != 0) return code; } + /* + * The timestamp of all records in a submit payload are always in ascending order, guaranteed by client, so here only + * the first key. + */ if (firstKey > pObj->lastKey) { // Just call insert - vnodeClearMeterState(pObj, TSDB_METER_STATE_IMPORTING); - // TODO: Here may fail to set the state, add error handling. - vnodeSetMeterState(pObj, TSDB_METER_STATE_INSERT); code = vnodeInsertPoints(pObj, cont, contLen, TSDB_DATA_SOURCE_LOG, NULL, sversion, pNumOfPoints, now); - // TODO: outside clear state function is invalid for this structure - vnodeClearMeterState(pObj, TSDB_METER_STATE_INSERT); } else { // trigger import if (sversion != pObj->sversion) { dError("vid:%d sid:%d id:%s, invalid sversion, expected:%d received:%d", pObj->vnode, pObj->sid, pObj->meterId, pObj->sversion, sversion); return TSDB_CODE_OTHERS; } - - SImportInfo import; + + // check the table status for perform import historical data + if ((code = vnodeSetMeterInsertImportStateEx(pObj, TSDB_METER_STATE_IMPORTING)) != TSDB_CODE_SUCCESS) { + return code; + } + + SImportInfo import = {0}; dTrace("vid:%d sid:%d id:%s, try to import %d rows data, firstKey:%ld, lastKey:%ld, object lastKey:%ld", pObj->vnode, pObj->sid, pObj->meterId, rows, firstKey, lastKey, pObj->lastKey); - memset(&import, 0, sizeof(import)); + import.firstKey = firstKey; import.lastKey = lastKey; import.pObj = pObj; @@ -226,8 +229,7 @@ int vnodeImportPoints(SMeterObj *pObj, char *cont, int contLen, char source, voi import.payload = payload; import.rows = rows; - // FIXME: mutex here seems meaningless and num here still can - // be changed + // FIXME: mutex here seems meaningless and num here still can be changed int32_t num = 0; pthread_mutex_lock(&pVnode->vmutex); num = pObj->numOfQueries; @@ -236,10 +238,12 @@ int vnodeImportPoints(SMeterObj *pObj, char *cont, int contLen, char source, voi int32_t commitInProcess = 0; pthread_mutex_lock(&pPool->vmutex); - if (((commitInProcess = pPool->commitInProcess) == 1) || - num > 0) { // mutual exclusion with read (need to change here) + if (((commitInProcess = pPool->commitInProcess) == 1) || num > 0) { + // mutual exclusion with read (need to change here) pthread_mutex_unlock(&pPool->vmutex); + vnodeClearMeterState(pObj, TSDB_METER_STATE_IMPORTING); return TSDB_CODE_ACTION_IN_PROGRESS; + } else { pPool->commitInProcess = 1; pthread_mutex_unlock(&pPool->vmutex); @@ -248,7 +252,8 @@ int vnodeImportPoints(SMeterObj *pObj, char *cont, int contLen, char source, voi } pVnode->version++; } - + + vnodeClearMeterState(pObj, TSDB_METER_STATE_IMPORTING); return code; } diff --git a/src/system/detail/src/vnodeMeter.c b/src/system/detail/src/vnodeMeter.c index f5619d72b8..77bafd50d4 100644 --- a/src/system/detail/src/vnodeMeter.c +++ b/src/system/detail/src/vnodeMeter.c @@ -584,12 +584,12 @@ int vnodeInsertPoints(SMeterObj *pObj, char *cont, int contLen, char source, voi if (pVnode->cfg.commitLog && source != TSDB_DATA_SOURCE_LOG) { if (pVnode->logFd < 0) return TSDB_CODE_INVALID_COMMIT_LOG; code = vnodeWriteToCommitLog(pObj, TSDB_ACTION_INSERT, cont, contLen, sversion); - if (code != 0) return code; + if (code != TSDB_CODE_SUCCESS) return code; } if (source == TSDB_DATA_SOURCE_SHELL && pVnode->cfg.replications > 1) { code = vnodeForwardToPeer(pObj, cont, contLen, TSDB_ACTION_INSERT, sversion); - if (code != 0) return code; + if (code != TSDB_CODE_SUCCESS) return code; } if (pObj->sversion < sversion) { @@ -601,11 +601,11 @@ int vnodeInsertPoints(SMeterObj *pObj, char *cont, int contLen, char source, voi } pData = pSubmit->payLoad; - code = TSDB_CODE_SUCCESS; TSKEY firstKey = *((TSKEY *)pData); TSKEY lastKey = *((TSKEY *)(pData + pObj->bytesPerPoint * (numOfPoints - 1))); int cfid = now/pVnode->cfg.daysPerFile/tsMsPerDay[pVnode->cfg.precision]; + TSKEY minAllowedKey = (cfid - pVnode->maxFiles + 1)*pVnode->cfg.daysPerFile*tsMsPerDay[pVnode->cfg.precision]; TSKEY maxAllowedKey = (cfid + 2)*pVnode->cfg.daysPerFile*tsMsPerDay[pVnode->cfg.precision] - 2; if (firstKey < minAllowedKey || firstKey > maxAllowedKey || lastKey < minAllowedKey || lastKey > maxAllowedKey) { @@ -619,7 +619,7 @@ int vnodeInsertPoints(SMeterObj *pObj, char *cont, int contLen, char source, voi } for (i = 0; i < numOfPoints; ++i) { // meter will be dropped, abort current insertion - if (pObj->state >= TSDB_METER_STATE_DELETING) { + if (vnodeIsMeterState(pObj, TSDB_METER_STATE_DELETING)) { dWarn("vid:%d sid:%d id:%s, meter is dropped, abort insert, state:%d", pObj->vnode, pObj->sid, pObj->meterId, pObj->state); @@ -648,6 +648,7 @@ int vnodeInsertPoints(SMeterObj *pObj, char *cont, int contLen, char source, voi pData += pObj->bytesPerPoint; points++; } + atomic_fetch_add_64(&(pVnode->vnodeStatistic.pointsWritten), points * (pObj->numOfColumns - 1)); atomic_fetch_add_64(&(pVnode->vnodeStatistic.totalStorage), points * pObj->bytesPerPoint); @@ -660,6 +661,7 @@ int vnodeInsertPoints(SMeterObj *pObj, char *cont, int contLen, char source, voi pVnode->version++; pthread_mutex_unlock(&(pVnode->vmutex)); + vnodeClearMeterState(pObj, TSDB_METER_STATE_INSERT); _over: diff --git a/src/system/detail/src/vnodeShell.c b/src/system/detail/src/vnodeShell.c index 0312fb07d6..164efb1198 100644 --- a/src/system/detail/src/vnodeShell.c +++ b/src/system/detail/src/vnodeShell.c @@ -509,7 +509,7 @@ static int vnodeCheckSubmitBlockContext(SShellSubmitBlock *pBlocks, SVnodeObj *p SMeterObj *pMeterObj = pVnode->meterList[sid]; if (pMeterObj == NULL) { - dError("vid:%d sid:%d, no active table", pVnode->vnode, sid); + dError("vid:%d sid:%d, not active table", pVnode->vnode, sid); vnodeSendMeterCfgMsg(pVnode->vnode, sid); return TSDB_CODE_NOT_ACTIVE_TABLE; } @@ -581,41 +581,27 @@ int vnodeProcessShellSubmitRequest(char *pMsg, int msgLen, SShellObj *pObj) { if (code != TSDB_CODE_SUCCESS) break; SMeterObj *pMeterObj = (SMeterObj *)(pVnode->meterList[htonl(pBlocks->sid)]); + // dont include sid, vid int32_t subMsgLen = sizeof(pBlocks->numOfRows) + htons(pBlocks->numOfRows) * pMeterObj->bytesPerPoint; int32_t sversion = htonl(pBlocks->sversion); - int32_t state = TSDB_METER_STATE_READY; - state = vnodeSetMeterState(pMeterObj, (pSubmit->import ? TSDB_METER_STATE_IMPORTING : TSDB_METER_STATE_INSERT)); - - if (state == TSDB_METER_STATE_READY) { // meter status is ready for insert/import - if (pSubmit->import) { - code = vnodeImportPoints(pMeterObj, (char *) &(pBlocks->numOfRows), subMsgLen, TSDB_DATA_SOURCE_SHELL, pObj, - sversion, &numOfPoints, now); - vnodeClearMeterState(pMeterObj, TSDB_METER_STATE_IMPORTING); - pObj->numOfTotalPoints += numOfPoints; - if (code == TSDB_CODE_SUCCESS) pObj->count--; - } else { - code = vnodeInsertPoints(pMeterObj, (char *) &(pBlocks->numOfRows), subMsgLen, TSDB_DATA_SOURCE_SHELL, NULL, - sversion, &numOfPoints, now); - vnodeClearMeterState(pMeterObj, TSDB_METER_STATE_INSERT); - numOfTotalPoints += numOfPoints; + if (pSubmit->import) { + code = vnodeImportPoints(pMeterObj, (char *) &(pBlocks->numOfRows), subMsgLen, TSDB_DATA_SOURCE_SHELL, pObj, + sversion, &numOfPoints, now); + pObj->numOfTotalPoints += numOfPoints; + + //records for one table should be consecutive located in the payload buffer, which is guaranteed by client + if (code == TSDB_CODE_SUCCESS) { + pObj->count--; } - if (code != TSDB_CODE_SUCCESS) break; } else { - if (vnodeIsMeterState(pMeterObj, TSDB_METER_STATE_DELETING)) { - dTrace("vid:%d sid:%d id:%s, it is removed, state:%d", pMeterObj->vnode, pMeterObj->sid, pMeterObj->meterId, - pMeterObj->state); - code = TSDB_CODE_NOT_ACTIVE_TABLE; - break; - } else {// waiting for 300ms by default and try again - dTrace("vid:%d sid:%d id:%s, try submit again since in state:%d", pMeterObj->vnode, pMeterObj->sid, - pMeterObj->meterId, pMeterObj->state); - - code = TSDB_CODE_ACTION_IN_PROGRESS; - break; - } + code = vnodeInsertPoints(pMeterObj, (char *) &(pBlocks->numOfRows), subMsgLen, TSDB_DATA_SOURCE_SHELL, NULL, + sversion, &numOfPoints, now); + numOfTotalPoints += numOfPoints; } + + if (code != TSDB_CODE_SUCCESS) break; pBlocks = (SShellSubmitBlock *)((char *)pBlocks + sizeof(SShellSubmitBlock) + htons(pBlocks->numOfRows) * pMeterObj->bytesPerPoint); @@ -635,7 +621,7 @@ _submit_over: pImportInfo->import = 1; pImportInfo->vnode = pSubmit->vnode; pImportInfo->numOfSid = pSubmit->numOfSid; - pImportInfo->ssid = i; + pImportInfo->ssid = i; // start from this position, not the initial position pImportInfo->pObj = pObj; pImportInfo->offset = ((char *)pBlocks) - (pMsg + sizeof(SShellSubmitMsg)); assert(pImportInfo->offset >= 0); @@ -658,7 +644,7 @@ static void vnodeProcessBatchImportTimer(void *param, void *tmrId) { SBatchImportInfo *pImportInfo = (SBatchImportInfo *)param; assert(pImportInfo != NULL && pImportInfo->import); - int32_t i = 0, numOfPoints = 0, numOfTotalPoints = 0; + int32_t i = 0, numOfPoints = 0; int32_t code = TSDB_CODE_SUCCESS; SShellObj * pShell = pImportInfo->pObj; @@ -677,30 +663,11 @@ static void vnodeProcessBatchImportTimer(void *param, void *tmrId) { int32_t subMsgLen = sizeof(pBlocks->numOfRows) + htons(pBlocks->numOfRows) * pMeterObj->bytesPerPoint; int32_t sversion = htonl(pBlocks->sversion); - int32_t state = TSDB_METER_STATE_READY; - state = vnodeSetMeterState(pMeterObj, TSDB_METER_STATE_IMPORTING); - - if (state == TSDB_METER_STATE_READY) { // meter status is ready for insert/import - code = vnodeImportPoints(pMeterObj, (char *)&(pBlocks->numOfRows), subMsgLen, TSDB_DATA_SOURCE_SHELL, pShell, - sversion, &numOfPoints, now); - vnodeClearMeterState(pMeterObj, TSDB_METER_STATE_IMPORTING); - pShell->numOfTotalPoints += numOfPoints; - if (code != TSDB_CODE_SUCCESS) break; - pShell->count--; - } else { - if (vnodeIsMeterState(pMeterObj, TSDB_METER_STATE_DELETING)) { - dTrace("vid:%d sid:%d id:%s, it is removed, state:%d", pMeterObj->vnode, pMeterObj->sid, pMeterObj->meterId, - pMeterObj->state); - code = TSDB_CODE_NOT_ACTIVE_TABLE; - break; - } else { // waiting for 300ms by default and try again - dTrace("vid:%d sid:%d id:%s, try submit again since in state:%d", pMeterObj->vnode, pMeterObj->sid, - pMeterObj->meterId, pMeterObj->state); - - code = TSDB_CODE_ACTION_IN_PROGRESS; - break; - } - } + code = vnodeImportPoints(pMeterObj, (char *)&(pBlocks->numOfRows), subMsgLen, TSDB_DATA_SOURCE_SHELL, pShell, + sversion, &numOfPoints, now); + pShell->numOfTotalPoints += numOfPoints; + if (code != TSDB_CODE_SUCCESS) break; + pShell->count--; pBlocks = (SShellSubmitBlock *)((char *)pBlocks + sizeof(SShellSubmitBlock) + htons(pBlocks->numOfRows) * pMeterObj->bytesPerPoint); From ed253f2a2a57af0605891c0bc3c3beccc3867ed7 Mon Sep 17 00:00:00 2001 From: Hongze Cheng Date: Tue, 26 Nov 2019 17:47:29 +0800 Subject: [PATCH 14/20] fix a small bug --- src/system/detail/src/vnodeImport.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/system/detail/src/vnodeImport.c b/src/system/detail/src/vnodeImport.c index 5924caf6f4..4d1d671096 100644 --- a/src/system/detail/src/vnodeImport.c +++ b/src/system/detail/src/vnodeImport.c @@ -243,7 +243,7 @@ int vnodeImportPoints(SMeterObj *pObj, char *cont, int contLen, char source, voi pthread_mutex_unlock(&pPool->vmutex); vnodeClearMeterState(pObj, TSDB_METER_STATE_IMPORTING); return TSDB_CODE_ACTION_IN_PROGRESS; - + } else { pPool->commitInProcess = 1; pthread_mutex_unlock(&pPool->vmutex); @@ -251,9 +251,9 @@ int vnodeImportPoints(SMeterObj *pObj, char *cont, int contLen, char source, voi *pNumOfPoints = import.importedRows; } pVnode->version++; + vnodeClearMeterState(pObj, TSDB_METER_STATE_IMPORTING); } - vnodeClearMeterState(pObj, TSDB_METER_STATE_IMPORTING); return code; } From 4aa9c7adc9d91af12ee0375bc05e049501bc794f Mon Sep 17 00:00:00 2001 From: Hongze Cheng Date: Thu, 28 Nov 2019 17:07:28 +0800 Subject: [PATCH 15/20] change long back to int64_t --- src/system/detail/src/vnodeFile.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/src/system/detail/src/vnodeFile.c b/src/system/detail/src/vnodeFile.c index 7d7e5242d5..fc4b435169 100644 --- a/src/system/detail/src/vnodeFile.c +++ b/src/system/detail/src/vnodeFile.c @@ -188,7 +188,7 @@ int vnodeCreateNeccessaryFiles(SVnodeObj *pVnode) { if (pVnode->lastKeyOnFile == 0) { if (pCfg->daysPerFile == 0) pCfg->daysPerFile = 10; pVnode->fileId = pVnode->firstKey / tsMsPerDay[pVnode->cfg.precision] / pCfg->daysPerFile; - pVnode->lastKeyOnFile = (long)(pVnode->fileId + 1) * pCfg->daysPerFile * tsMsPerDay[pVnode->cfg.precision] - 1; + pVnode->lastKeyOnFile = (int64_t)(pVnode->fileId + 1) * pCfg->daysPerFile * tsMsPerDay[pVnode->cfg.precision] - 1; pVnode->numOfFiles = 1; if (vnodeCreateEmptyCompFile(vnode, pVnode->fileId) < 0) return -1; } @@ -221,15 +221,15 @@ int vnodeCreateNeccessaryFiles(SVnodeObj *pVnode) { #else return -1; #endif - pVnode->lastKeyOnFile += (long)tsMsPerDay[pVnode->cfg.precision] * pCfg->daysPerFile; + pVnode->lastKeyOnFile += (int64_t)tsMsPerDay[pVnode->cfg.precision] * pCfg->daysPerFile; filesAdded = 1; numOfFiles = 0; // hacker way } fileId = pVnode->fileId - numOfFiles; pVnode->commitLastKey = - pVnode->lastKeyOnFile - (long)numOfFiles * tsMsPerDay[pVnode->cfg.precision] * pCfg->daysPerFile; - pVnode->commitFirstKey = pVnode->commitLastKey - (long)tsMsPerDay[pVnode->cfg.precision] * pCfg->daysPerFile + 1; + pVnode->lastKeyOnFile - (int64_t)numOfFiles * tsMsPerDay[pVnode->cfg.precision] * pCfg->daysPerFile; + pVnode->commitFirstKey = pVnode->commitLastKey - (int64_t)tsMsPerDay[pVnode->cfg.precision] * pCfg->daysPerFile + 1; pVnode->commitFileId = fileId; pVnode->numOfFiles = pVnode->numOfFiles + filesAdded; From 6fabe650af4bea2bb61b7220282291213b76487b Mon Sep 17 00:00:00 2001 From: Hongze Cheng Date: Fri, 29 Nov 2019 09:39:18 +0800 Subject: [PATCH 16/20] change var name --- src/system/detail/src/vnodeShell.c | 44 +++++++++++++++--------------- 1 file changed, 22 insertions(+), 22 deletions(-) diff --git a/src/system/detail/src/vnodeShell.c b/src/system/detail/src/vnodeShell.c index da26c04c6c..b1a9dbd3ef 100644 --- a/src/system/detail/src/vnodeShell.c +++ b/src/system/detail/src/vnodeShell.c @@ -52,7 +52,7 @@ typedef struct { SShellObj *pObj; int64_t offset; // offset relative the blks char blks[]; -} SBatchImportInfo; +} SBatchSubmitInfo; void *vnodeProcessMsgFromShell(char *msg, void *ahandle, void *thandle) { int sid, vnode; @@ -613,21 +613,21 @@ _submit_over: if (pSubmit->import) { // Import case if (code == TSDB_CODE_ACTION_IN_PROGRESS) { - SBatchImportInfo *pImportInfo = - (SBatchImportInfo *)calloc(1, sizeof(SBatchImportInfo) + msgLen - sizeof(SShellSubmitMsg)); - if (pImportInfo == NULL) { + SBatchSubmitInfo *pSubmitInfo = + (SBatchSubmitInfo *)calloc(1, sizeof(SBatchSubmitInfo) + msgLen - sizeof(SShellSubmitMsg)); + if (pSubmitInfo == NULL) { code = TSDB_CODE_SERV_OUT_OF_MEMORY; ret = vnodeSendShellSubmitRspMsg(pObj, code, pObj->numOfTotalPoints); } else { // Start a timer to process the next part of request - pImportInfo->import = 1; - pImportInfo->vnode = pSubmit->vnode; - pImportInfo->numOfSid = pSubmit->numOfSid; - pImportInfo->ssid = i; // start from this position, not the initial position - pImportInfo->pObj = pObj; - pImportInfo->offset = ((char *)pBlocks) - (pMsg + sizeof(SShellSubmitMsg)); - assert(pImportInfo->offset >= 0); - memcpy((void *)(pImportInfo->blks), (void *)(pMsg + sizeof(SShellSubmitMsg)), msgLen - sizeof(SShellSubmitMsg)); - taosTmrStart(vnodeProcessBatchImportTimer, 10, (void *)pImportInfo, vnodeTmrCtrl); + pSubmitInfo->import = 1; + pSubmitInfo->vnode = pSubmit->vnode; + pSubmitInfo->numOfSid = pSubmit->numOfSid; + pSubmitInfo->ssid = i; // start from this position, not the initial position + pSubmitInfo->pObj = pObj; + pSubmitInfo->offset = ((char *)pBlocks) - (pMsg + sizeof(SShellSubmitMsg)); + assert(pSubmitInfo->offset >= 0); + memcpy((void *)(pSubmitInfo->blks), (void *)(pMsg + sizeof(SShellSubmitMsg)), msgLen - sizeof(SShellSubmitMsg)); + taosTmrStart(vnodeProcessBatchImportTimer, 10, (void *)pSubmitInfo, vnodeTmrCtrl); } } else { if (code == TSDB_CODE_SUCCESS) assert(pObj->count == 0); @@ -642,18 +642,18 @@ _submit_over: } static void vnodeProcessBatchImportTimer(void *param, void *tmrId) { - SBatchImportInfo *pImportInfo = (SBatchImportInfo *)param; - assert(pImportInfo != NULL && pImportInfo->import); + SBatchSubmitInfo *pSubmitInfo = (SBatchSubmitInfo *)param; + assert(pSubmitInfo != NULL && pSubmitInfo->import); int32_t i = 0, numOfPoints = 0; int32_t code = TSDB_CODE_SUCCESS; - SShellObj * pShell = pImportInfo->pObj; - SVnodeObj * pVnode = &vnodeList[pImportInfo->vnode]; - SShellSubmitBlock *pBlocks = (SShellSubmitBlock *)(pImportInfo->blks + pImportInfo->offset); + SShellObj * pShell = pSubmitInfo->pObj; + SVnodeObj * pVnode = &vnodeList[pSubmitInfo->vnode]; + SShellSubmitBlock *pBlocks = (SShellSubmitBlock *)(pSubmitInfo->blks + pSubmitInfo->offset); TSKEY now = taosGetTimestamp(pVnode->cfg.precision); - for (i = pImportInfo->ssid; i < pImportInfo->numOfSid; i++) { + for (i = pSubmitInfo->ssid; i < pSubmitInfo->numOfSid; i++) { numOfPoints = 0; code = vnodeCheckSubmitBlockContext(pBlocks, pVnode); @@ -676,9 +676,9 @@ static void vnodeProcessBatchImportTimer(void *param, void *tmrId) { int ret = 0; if (code == TSDB_CODE_ACTION_IN_PROGRESS) { - pImportInfo->ssid = i; - pImportInfo->offset = ((char *)pBlocks) - pImportInfo->blks; - taosTmrStart(vnodeProcessBatchImportTimer, 10, (void *)pImportInfo, vnodeTmrCtrl); + pSubmitInfo->ssid = i; + pSubmitInfo->offset = ((char *)pBlocks) - pSubmitInfo->blks; + taosTmrStart(vnodeProcessBatchImportTimer, 10, (void *)pSubmitInfo, vnodeTmrCtrl); } else { if (code == TSDB_CODE_SUCCESS) assert(pShell->count == 0); tfree(param); From 56bb3121ad080396533893bd8e010082f111e7f1 Mon Sep 17 00:00:00 2001 From: Hongze Cheng Date: Fri, 29 Nov 2019 10:19:08 +0800 Subject: [PATCH 17/20] refactor part of code --- src/system/detail/src/vnodeShell.c | 123 ++++++++++++++--------------- 1 file changed, 58 insertions(+), 65 deletions(-) diff --git a/src/system/detail/src/vnodeShell.c b/src/system/detail/src/vnodeShell.c index b1a9dbd3ef..8d33b43503 100644 --- a/src/system/detail/src/vnodeShell.c +++ b/src/system/detail/src/vnodeShell.c @@ -39,7 +39,7 @@ SShellObj **shellList = NULL; int vnodeProcessRetrieveRequest(char *pMsg, int msgLen, SShellObj *pObj); int vnodeProcessQueryRequest(char *pMsg, int msgLen, SShellObj *pObj); int vnodeProcessShellSubmitRequest(char *pMsg, int msgLen, SShellObj *pObj); -static void vnodeProcessBatchImportTimer(void *param, void *tmrId); +static void vnodeProcessBatchSubmitTimer(void *param, void *tmrId); int vnodeSelectReqNum = 0; int vnodeInsertReqNum = 0; @@ -523,6 +523,52 @@ static int vnodeCheckSubmitBlockContext(SShellSubmitBlock *pBlocks, SVnodeObj *p return TSDB_CODE_SUCCESS; } +static int vnodeDoSubmitJob(SVnodeObj *pVnode, int import, int32_t *ssid, int32_t esid, SShellSubmitBlock **ppBlocks, + TSKEY now, SShellObj *pObj) { + SShellSubmitBlock *pBlocks = *ppBlocks; + int code = TSDB_CODE_SUCCESS; + int32_t numOfPoints = 0; + int32_t i = 0; + + for (i = *ssid; i < esid; i++) { + numOfPoints = 0; + + code = vnodeCheckSubmitBlockContext(pBlocks, pVnode); + if (code != TSDB_CODE_SUCCESS) break; + + SMeterObj *pMeterObj = (SMeterObj *)(pVnode->meterList[htonl(pBlocks->sid)]); + + // dont include sid, vid + int32_t subMsgLen = sizeof(pBlocks->numOfRows) + htons(pBlocks->numOfRows) * pMeterObj->bytesPerPoint; + int32_t sversion = htonl(pBlocks->sversion); + + if (import) { + code = vnodeImportPoints(pMeterObj, (char *)&(pBlocks->numOfRows), subMsgLen, TSDB_DATA_SOURCE_SHELL, pObj, + sversion, &numOfPoints, now); + pObj->numOfTotalPoints += numOfPoints; + + // records for one table should be consecutive located in the payload buffer, which is guaranteed by client + if (code == TSDB_CODE_SUCCESS) { + pObj->count--; + } + } else { + code = vnodeInsertPoints(pMeterObj, (char *)&(pBlocks->numOfRows), subMsgLen, TSDB_DATA_SOURCE_SHELL, NULL, + sversion, &numOfPoints, now); + pObj->numOfTotalPoints += numOfPoints; + } + + if (code != TSDB_CODE_SUCCESS) break; + + pBlocks = (SShellSubmitBlock *)((char *)pBlocks + sizeof(SShellSubmitBlock) + + htons(pBlocks->numOfRows) * pMeterObj->bytesPerPoint); + } + + *ssid = i; + *ppBlocks = pBlocks; + + return code; +} + int vnodeProcessShellSubmitRequest(char *pMsg, int msgLen, SShellObj *pObj) { int code = 0, ret = 0; int32_t i = 0; @@ -566,47 +612,13 @@ int vnodeProcessShellSubmitRequest(char *pMsg, int msgLen, SShellObj *pObj) { pObj->count = pSubmit->numOfSid; // for import pObj->code = 0; // for import - pObj->numOfTotalPoints = 0; // for import + pObj->numOfTotalPoints = 0; - int32_t numOfPoints = 0; - int32_t numOfTotalPoints = 0; - // We take current time here to avoid it in the for loop. TSKEY now = taosGetTimestamp(pVnode->cfg.precision); pBlocks = (SShellSubmitBlock *)(pMsg + sizeof(SShellSubmitMsg)); - for (i = 0; i < pSubmit->numOfSid; ++i) { - numOfPoints = 0; - - code = vnodeCheckSubmitBlockContext(pBlocks, pVnode); - if (code != TSDB_CODE_SUCCESS) break; - - SMeterObj *pMeterObj = (SMeterObj *)(pVnode->meterList[htonl(pBlocks->sid)]); - - // dont include sid, vid - int32_t subMsgLen = sizeof(pBlocks->numOfRows) + htons(pBlocks->numOfRows) * pMeterObj->bytesPerPoint; - int32_t sversion = htonl(pBlocks->sversion); - - if (pSubmit->import) { - dTrace("start to import data"); - code = vnodeImportPoints(pMeterObj, (char *) &(pBlocks->numOfRows), subMsgLen, TSDB_DATA_SOURCE_SHELL, pObj, - sversion, &numOfPoints, now); - pObj->numOfTotalPoints += numOfPoints; - - //records for one table should be consecutive located in the payload buffer, which is guaranteed by client - if (code == TSDB_CODE_SUCCESS) { - pObj->count--; - } - } else { - code = vnodeInsertPoints(pMeterObj, (char *) &(pBlocks->numOfRows), subMsgLen, TSDB_DATA_SOURCE_SHELL, NULL, - sversion, &numOfPoints, now); - numOfTotalPoints += numOfPoints; - } - - if (code != TSDB_CODE_SUCCESS) break; - - pBlocks = (SShellSubmitBlock *)((char *)pBlocks + sizeof(SShellSubmitBlock) + - htons(pBlocks->numOfRows) * pMeterObj->bytesPerPoint); - } + i = 0; + code = vnodeDoSubmitJob(pVnode, pSubmit->import, &i, pSubmit->numOfSid, &pBlocks, now, pObj); _submit_over: ret = 0; @@ -627,61 +639,42 @@ _submit_over: pSubmitInfo->offset = ((char *)pBlocks) - (pMsg + sizeof(SShellSubmitMsg)); assert(pSubmitInfo->offset >= 0); memcpy((void *)(pSubmitInfo->blks), (void *)(pMsg + sizeof(SShellSubmitMsg)), msgLen - sizeof(SShellSubmitMsg)); - taosTmrStart(vnodeProcessBatchImportTimer, 10, (void *)pSubmitInfo, vnodeTmrCtrl); + taosTmrStart(vnodeProcessBatchSubmitTimer, 10, (void *)pSubmitInfo, vnodeTmrCtrl); } } else { if (code == TSDB_CODE_SUCCESS) assert(pObj->count == 0); ret = vnodeSendShellSubmitRspMsg(pObj, code, pObj->numOfTotalPoints); } } else { // Insert case - ret = vnodeSendShellSubmitRspMsg(pObj, code, numOfTotalPoints); + ret = vnodeSendShellSubmitRspMsg(pObj, code, pObj->numOfTotalPoints); } atomic_fetch_add_32(&vnodeInsertReqNum, 1); return ret; } -static void vnodeProcessBatchImportTimer(void *param, void *tmrId) { +static void vnodeProcessBatchSubmitTimer(void *param, void *tmrId) { SBatchSubmitInfo *pSubmitInfo = (SBatchSubmitInfo *)param; assert(pSubmitInfo != NULL && pSubmitInfo->import); - int32_t i = 0, numOfPoints = 0; + int32_t i = 0; int32_t code = TSDB_CODE_SUCCESS; SShellObj * pShell = pSubmitInfo->pObj; SVnodeObj * pVnode = &vnodeList[pSubmitInfo->vnode]; SShellSubmitBlock *pBlocks = (SShellSubmitBlock *)(pSubmitInfo->blks + pSubmitInfo->offset); TSKEY now = taosGetTimestamp(pVnode->cfg.precision); + i = pSubmitInfo->ssid; - for (i = pSubmitInfo->ssid; i < pSubmitInfo->numOfSid; i++) { - numOfPoints = 0; + code = vnodeDoSubmitJob(pVnode, pSubmitInfo->import, &i, pSubmitInfo->numOfSid, &pBlocks, now, pShell); - code = vnodeCheckSubmitBlockContext(pBlocks, pVnode); - if (code != TSDB_CODE_SUCCESS) break; - - SMeterObj *pMeterObj = (SMeterObj *)(pVnode->meterList[htonl(pBlocks->sid)]); - // dont include sid, vid - int32_t subMsgLen = sizeof(pBlocks->numOfRows) + htons(pBlocks->numOfRows) * pMeterObj->bytesPerPoint; - int32_t sversion = htonl(pBlocks->sversion); - - code = vnodeImportPoints(pMeterObj, (char *)&(pBlocks->numOfRows), subMsgLen, TSDB_DATA_SOURCE_SHELL, pShell, - sversion, &numOfPoints, now); - pShell->numOfTotalPoints += numOfPoints; - if (code != TSDB_CODE_SUCCESS) break; - pShell->count--; - - pBlocks = (SShellSubmitBlock *)((char *)pBlocks + sizeof(SShellSubmitBlock) + - htons(pBlocks->numOfRows) * pMeterObj->bytesPerPoint); - } - - int ret = 0; if (code == TSDB_CODE_ACTION_IN_PROGRESS) { pSubmitInfo->ssid = i; pSubmitInfo->offset = ((char *)pBlocks) - pSubmitInfo->blks; - taosTmrStart(vnodeProcessBatchImportTimer, 10, (void *)pSubmitInfo, vnodeTmrCtrl); + taosTmrStart(vnodeProcessBatchSubmitTimer, 10, (void *)pSubmitInfo, vnodeTmrCtrl); } else { if (code == TSDB_CODE_SUCCESS) assert(pShell->count == 0); tfree(param); - ret = vnodeSendShellSubmitRspMsg(pShell, code, pShell->numOfTotalPoints); + vnodeSendShellSubmitRspMsg(pShell, code, pShell->numOfTotalPoints); } } From a27fcc52b8e9fcca4a0aace0e8fda9d917ab7905 Mon Sep 17 00:00:00 2001 From: Hongze Cheng Date: Fri, 29 Nov 2019 11:50:15 +0800 Subject: [PATCH 18/20] refactor part of code --- src/system/detail/src/vnodeImport.c | 101 ++++++++++++---------------- 1 file changed, 44 insertions(+), 57 deletions(-) diff --git a/src/system/detail/src/vnodeImport.c b/src/system/detail/src/vnodeImport.c index 4d1d671096..07dd7e237f 100644 --- a/src/system/detail/src/vnodeImport.c +++ b/src/system/detail/src/vnodeImport.c @@ -1257,6 +1257,37 @@ int isCacheEnd(SBlockIter iter, SMeterObj *pMeter) { return ((iter.slot == slot) && (iter.pos == pos)); } +static void vnodeFlushMergeBuffer(SMergeBuffer *pBuffer, SBlockIter *pWriteIter, SBlockIter *pCacheIter, + SMeterObj *pObj, SCacheInfo *pInfo, int checkBound) { + // Function to flush the merge buffer data to cache + if (pWriteIter->pos == pObj->pointsPerBlock) { + pWriteIter->pos = 0; + pWriteIter->slot = (pWriteIter->slot + 1) % pInfo->maxBlocks; + } + + while (pBuffer->spos != pBuffer->epos) { + if (checkBound && pWriteIter->slot == pCacheIter->slot && pWriteIter->pos == pCacheIter->pos) break; + for (int col = 0; col < pObj->numOfColumns; col++) { + memcpy(pInfo->cacheBlocks[pWriteIter->slot]->offset[col] + pObj->schema[col].bytes * pWriteIter->pos, + pBuffer->offset[col] + pObj->schema[col].bytes * pBuffer->spos, pObj->schema[col].bytes); + } + + if (pWriteIter->pos + 1 < pObj->pointsPerBlock) { + (pWriteIter->pos)++; + } else { + pInfo->cacheBlocks[pWriteIter->slot]->numOfPoints = pWriteIter->pos + 1; + pWriteIter->slot = (pWriteIter->slot + 1) % pInfo->maxBlocks; + pWriteIter->pos = 0; + } + + pBuffer->spos = (pBuffer->spos + 1) % pBuffer->totalRows; + } + + if ((!checkBound) && pWriteIter->pos != 0) { + pInfo->cacheBlocks[pWriteIter->slot]->numOfPoints = pWriteIter->pos; + } +} + int vnodeImportDataToCache(SImportInfo *pImport, const char *payload, const int rows) { SMeterObj * pObj = pImport->pObj; SVnodeObj * pVnode = vnodeList + pObj->vnode; @@ -1353,35 +1384,13 @@ int vnodeImportDataToCache(SImportInfo *pImport, const char *payload, const int if ((payloadIter >= rows) && isCacheIterEnd) break; if ((pBuffer->epos + 1) % pBuffer->totalRows == pBuffer->spos) { // merge buffer is full, flush - if (writeIter.pos == pObj->pointsPerBlock) { - writeIter.pos = 0; - writeIter.slot = (writeIter.slot + 1) % pInfo->maxBlocks; - } - - while (pBuffer->spos != pBuffer->epos) { - if (writeIter.slot == cacheIter.slot && writeIter.pos == cacheIter.pos) break; - for (int col = 0; col < pObj->numOfColumns; col++) { - memcpy(pInfo->cacheBlocks[writeIter.slot]->offset[col] + pObj->schema[col].bytes * writeIter.pos, - pBuffer->offset[col] + pObj->schema[col].bytes * pBuffer->spos, pObj->schema[col].bytes); - } - - if (writeIter.pos + 1 < pObj->pointsPerBlock) { - writeIter.pos++; - } else { - pInfo->cacheBlocks[writeIter.slot]->numOfPoints = writeIter.pos + 1; - writeIter.slot = (writeIter.slot + 1) % pInfo->maxBlocks; - writeIter.pos = 0; - } - - pBuffer->spos = (pBuffer->spos + 1) % pBuffer->totalRows; - } + vnodeFlushMergeBuffer(pBuffer, &writeIter, &cacheIter, pObj, pInfo, 1); } - if ((payloadIter >= rows) || - ((!isCacheIterEnd) && - (KEY_AT_INDEX(payload, pObj->bytesPerPoint, payloadIter) > - KEY_AT_INDEX(pInfo->cacheBlocks[cacheIter.slot]->offset[0], sizeof(TSKEY), - cacheIter.pos)))) { // if (payload end || (cacheIter not end && payloadKey > blockKey)), consume cache + TSKEY payloadKey = (payloadIter < rows) ? KEY_AT_INDEX(payload, pObj->bytesPerPoint, payloadIter) : INT64_MAX; + TSKEY cacheKey = (isCacheIterEnd) ? INT64_MAX : KEY_AT_INDEX(pInfo->cacheBlocks[cacheIter.slot]->offset[0], sizeof(TSKEY), cacheIter.pos); + + if (cacheKey < payloadKey) { // if (payload end || (cacheIter not end && payloadKey > blockKey)), consume cache for (int col = 0; col < pObj->numOfColumns; col++) { memcpy(pBuffer->offset[col] + pObj->schema[col].bytes * pBuffer->epos, pInfo->cacheBlocks[cacheIter.slot]->offset[col] + pObj->schema[col].bytes * cacheIter.pos, @@ -1389,11 +1398,7 @@ int vnodeImportDataToCache(SImportInfo *pImport, const char *payload, const int } FORWARD_ITER(cacheIter, 1, pInfo->maxBlocks, pObj->pointsPerBlock); isCacheIterEnd = isCacheEnd(cacheIter, pObj); - } else if ((isCacheIterEnd) || - ((payloadIter < rows) && - (KEY_AT_INDEX(payload, pObj->bytesPerPoint, payloadIter) < - KEY_AT_INDEX(pInfo->cacheBlocks[cacheIter.slot]->offset[0], sizeof(TSKEY), - cacheIter.pos)))) { // cacheIter end || (payloadIter not end && payloadKey < blockKey), consume payload + } else if (cacheKey > payloadKey) { // cacheIter end || (payloadIter not end && payloadKey < blockKey), consume payload if (availPoints == 0) { // Need to allocate a new cache block pthread_mutex_lock(&(pPool->vmutex)); // TODO: Need to check if there are enough slots to hold a new one @@ -1482,29 +1487,11 @@ int vnodeImportDataToCache(SImportInfo *pImport, const char *payload, const int pBuffer->epos = (pBuffer->epos + 1) % pBuffer->totalRows; } - if (pBuffer->spos != pBuffer->epos) { - if (writeIter.pos == pObj->pointsPerBlock) { - writeIter.pos = 0; - writeIter.slot = (writeIter.slot + 1) % pInfo->maxBlocks; - } - while (pBuffer->spos != pBuffer->epos) { - for (int col = 0; col < pObj->numOfColumns; col++) { - memcpy(pInfo->cacheBlocks[writeIter.slot]->offset[col] + pObj->schema[col].bytes * writeIter.pos, - pBuffer->offset[col] + pObj->schema[col].bytes * pBuffer->spos, pObj->schema[col].bytes); - } - - if (writeIter.pos + 1 < pObj->pointsPerBlock) { - writeIter.pos++; - } else { - pInfo->cacheBlocks[writeIter.slot]->numOfPoints = writeIter.pos + 1; - writeIter.slot = (writeIter.slot + 1) % pInfo->maxBlocks; - writeIter.pos = 0; - } - - pBuffer->spos = (pBuffer->spos + 1) % pBuffer->totalRows; - } - - if (writeIter.pos != 0) pInfo->cacheBlocks[writeIter.slot]->numOfPoints = writeIter.pos; + if (pBuffer->spos != pBuffer->epos) { // Flush the remaining data in the merge buffer + vnodeFlushMergeBuffer(pBuffer, &writeIter, &cacheIter, pObj, pInfo, 0); + } else { + // Should never come here + assert(false); } if (isAppendData) { @@ -1514,9 +1501,9 @@ int vnodeImportDataToCache(SImportInfo *pImport, const char *payload, const int } } pImport->importedRows += rowsImported; - __sync_fetch_and_sub(&(pObj->freePoints), rowsImported); + atomic_fetch_sub_32(&(pObj->freePoints), rowsImported); - code = 0; + code = TSDB_CODE_SUCCESS; _exit: tfree(pBuffer); From a51d109adbc78ec563dd53b985087151481500bf Mon Sep 17 00:00:00 2001 From: Hongze Cheng Date: Fri, 29 Nov 2019 14:28:03 +0800 Subject: [PATCH 19/20] refact part of code --- src/system/detail/src/vnodeImport.c | 31 ++++------------------------- 1 file changed, 4 insertions(+), 27 deletions(-) diff --git a/src/system/detail/src/vnodeImport.c b/src/system/detail/src/vnodeImport.c index 07dd7e237f..1fff201eb5 100644 --- a/src/system/detail/src/vnodeImport.c +++ b/src/system/detail/src/vnodeImport.c @@ -650,7 +650,7 @@ static int vnodeCloseImportFiles(SMeterObj *pObj, SImportHandle *pHandle) { return 0; } -void vnodeConvertRowsToCols(SMeterObj *pObj, const char *payload, int rows, SData *data[], int rowOffset) { +static void vnodeConvertRowsToCols(SMeterObj *pObj, const char *payload, int rows, SData *data[], int rowOffset) { int sdataRow; int offset; @@ -698,7 +698,7 @@ static int vnodeMergeDataIntoFile(SImportInfo *pImport, const char *payload, int assert(pVnode->commitFileId == fid); // Open least files to import .head(hfd) .data(dfd) .last(lfd) - if (vnodeOpenMinFilesForImport(pObj->vnode, fid) < 0) return TSDB_CODE_OTHERS; + if (vnodeOpenMinFilesForImport(pObj->vnode, fid) < 0) return TSDB_CODE_FILE_CORRUPTED; memset(&importHandle, 0, sizeof(SImportHandle)); @@ -716,7 +716,7 @@ static int vnodeMergeDataIntoFile(SImportInfo *pImport, const char *payload, int if (read(pVnode->hfd, (void *)(importHandle.pHeader), importHandle.pHeaderSize) < importHandle.pHeaderSize) { dError("vid: %d, sid: %d, meterId: %s, fid: %d failed to read SCompHeader part, reason:%s", pObj->vnode, pObj->sid, pObj->meterId, fid, strerror(errno)); - code = TSDB_CODE_OTHERS; + code = TSDB_CODE_FILE_CORRUPTED; goto _error_merge; } @@ -875,31 +875,8 @@ static int vnodeMergeDataIntoFile(SImportInfo *pImport, const char *payload, int } if (blockIter.slot >= importHandle.compInfo.numOfBlocks) { // blocks end, break - assert(false); - // Should never come here - int rowsLeft = rows - payloadIter; - if (pVnode->nfd > 0 && rowsLeft > 0) { - // TODO : Convert into while here - vnodeConvertRowsToCols(pObj, payload + pObj->bytesPerPoint * payloadIter, rowsLeft, data, 0); - pointsImported++; - lastKeyImported = KEY_AT_INDEX(payload, pObj->bytesPerPoint, payloadIter); - - assert(importHandle.last == 0); - - compBlock.last = 1; - if (vnodeWriteBlockToFile(pObj, &compBlock, data, cdata, rows - payloadIter) < 0) { - // TODO : - } - - checksum = taosCalcChecksum(checksum, (uint8_t *)(&compBlock), sizeof(SCompBlock)); - importHandle.newNumOfBlocks++; - importHandle.driftOffset += sizeof(SCompBlock); - importHandle.last = compBlock.last; - twrite(pVnode->nfd, (void *)(&compBlock), sizeof(SCompBlock)); - twrite(pVnode->nfd, (void *)(&checksum), sizeof(TSCKSUM)); - } - break; + assert(false); } TSKEY key = KEY_AT_INDEX(payload, pObj->bytesPerPoint, payloadIter); From 92e5e239f653e6b467949e318db6bf8c88efc809 Mon Sep 17 00:00:00 2001 From: Hongze Cheng Date: Fri, 29 Nov 2019 15:15:28 +0800 Subject: [PATCH 20/20] refact more --- src/system/detail/src/vnodeImport.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/src/system/detail/src/vnodeImport.c b/src/system/detail/src/vnodeImport.c index 1fff201eb5..7b53aaed06 100644 --- a/src/system/detail/src/vnodeImport.c +++ b/src/system/detail/src/vnodeImport.c @@ -192,6 +192,11 @@ int vnodeImportPoints(SMeterObj *pObj, char *cont, int contLen, char source, voi pObj->vnode, pObj->sid, pObj->meterId, rows, firstKey, lastKey, minKey, maxKey); return TSDB_CODE_TIMESTAMP_OUT_OF_RANGE; } + // forward to peers + if (pShell && pVnode->cfg.replications > 1) { + code = vnodeForwardToPeer(pObj, cont, contLen, TSDB_ACTION_IMPORT, sversion); + if (code != 0) return code; + } if (pVnode->cfg.commitLog && source != TSDB_DATA_SOURCE_LOG) { if (pVnode->logFd < 0) return TSDB_CODE_INVALID_COMMIT_LOG;