192 lines
5.2 KiB
C
192 lines
5.2 KiB
C
/*
|
|
* Copyright (c) 2019 TAOS Data, Inc. <jhtao@taosdata.com>
|
|
*
|
|
* This program is free software: you can use, redistribute, and/or modify
|
|
* it under the terms of the GNU Affero General Public License, version 3
|
|
* or later ("AGPL"), as published by the Free Software Foundation.
|
|
*
|
|
* This program is distributed in the hope that it will be useful, but WITHOUT
|
|
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
|
|
* FITNESS FOR A PARTICULAR PURPOSE.
|
|
*
|
|
* You should have received a copy of the GNU Affero General Public License
|
|
* along with this program. If not, see <http://www.gnu.org/licenses/>.
|
|
*/
|
|
|
|
#define _DEFAULT_SOURCE
|
|
#include "mndSync.h"
|
|
#include "mndTrans.h"
|
|
|
|
static int32_t mndInitWal(SMnode *pMnode) {
|
|
SSyncMgmt *pMgmt = &pMnode->syncMgmt;
|
|
|
|
char path[PATH_MAX] = {0};
|
|
snprintf(path, sizeof(path), "%s%swal", pMnode->path, TD_DIRSEP);
|
|
SWalCfg cfg = {.vgId = 1,
|
|
.fsyncPeriod = 0,
|
|
.rollPeriod = -1,
|
|
.segSize = -1,
|
|
.retentionPeriod = -1,
|
|
.retentionSize = -1,
|
|
.level = TAOS_WAL_FSYNC};
|
|
pMgmt->pWal = walOpen(path, &cfg);
|
|
if (pMgmt->pWal == NULL) return -1;
|
|
|
|
return 0;
|
|
}
|
|
|
|
static void mndCloseWal(SMnode *pMnode) {
|
|
SSyncMgmt *pMgmt = &pMnode->syncMgmt;
|
|
if (pMgmt->pWal != NULL) {
|
|
walClose(pMgmt->pWal);
|
|
pMgmt->pWal = NULL;
|
|
}
|
|
}
|
|
|
|
static int32_t mndRestoreWal(SMnode *pMnode) {
|
|
SWal *pWal = pMnode->syncMgmt.pWal;
|
|
SSdb *pSdb = pMnode->pSdb;
|
|
int64_t lastSdbVer = sdbUpdateVer(pSdb, 0);
|
|
int32_t code = -1;
|
|
|
|
SWalReadHandle *pHandle = walOpenReadHandle(pWal);
|
|
if (pHandle == NULL) return -1;
|
|
|
|
int64_t first = walGetFirstVer(pWal);
|
|
int64_t last = walGetLastVer(pWal);
|
|
mDebug("start to restore sdb wal, sdb ver:%" PRId64 ", wal first:%" PRId64 " last:%" PRId64, lastSdbVer, first, last);
|
|
|
|
first = TMAX(lastSdbVer + 1, first);
|
|
for (int64_t ver = first; ver >= 0 && ver <= last; ++ver) {
|
|
if (walReadWithHandle(pHandle, ver) < 0) {
|
|
mError("failed to read by wal handle since %s, ver:%" PRId64, terrstr(), ver);
|
|
goto WAL_RESTORE_OVER;
|
|
}
|
|
|
|
SWalHead *pHead = pHandle->pHead;
|
|
int64_t sdbVer = sdbUpdateVer(pSdb, 0);
|
|
if (sdbVer + 1 != ver) {
|
|
terrno = TSDB_CODE_SDB_INVALID_WAl_VER;
|
|
mError("failed to read wal from sdb, sdbVer:%" PRId64 " inconsistent with ver:%" PRId64, sdbVer, ver);
|
|
goto WAL_RESTORE_OVER;
|
|
}
|
|
|
|
mTrace("wal:%" PRId64 ", will be restored, content:%p", ver, pHead->head.body);
|
|
if (sdbWriteNotFree(pSdb, (void *)pHead->head.body) < 0) {
|
|
mError("failed to read wal from sdb since %s, ver:%" PRId64, terrstr(), ver);
|
|
goto WAL_RESTORE_OVER;
|
|
}
|
|
|
|
sdbUpdateVer(pSdb, 1);
|
|
mDebug("wal:%" PRId64 ", is restored", ver);
|
|
}
|
|
|
|
int64_t sdbVer = sdbUpdateVer(pSdb, 0);
|
|
mDebug("restore sdb wal finished, sdb ver:%" PRId64, sdbVer);
|
|
|
|
mndTransPullup(pMnode);
|
|
sdbVer = sdbUpdateVer(pSdb, 0);
|
|
mDebug("pullup trans finished, sdb ver:%" PRId64, sdbVer);
|
|
|
|
if (sdbVer != lastSdbVer) {
|
|
mInfo("sdb restored from %" PRId64 " to %" PRId64 ", write file", lastSdbVer, sdbVer);
|
|
if (sdbWriteFile(pSdb) != 0) {
|
|
goto WAL_RESTORE_OVER;
|
|
}
|
|
|
|
if (walCommit(pWal, sdbVer) != 0) {
|
|
goto WAL_RESTORE_OVER;
|
|
}
|
|
|
|
if (walBeginSnapshot(pWal, sdbVer) < 0) {
|
|
goto WAL_RESTORE_OVER;
|
|
}
|
|
|
|
if (walEndSnapshot(pWal) < 0) {
|
|
goto WAL_RESTORE_OVER;
|
|
}
|
|
|
|
}
|
|
|
|
code = 0;
|
|
|
|
WAL_RESTORE_OVER:
|
|
walCloseReadHandle(pHandle);
|
|
return code;
|
|
}
|
|
|
|
int32_t mndInitSync(SMnode *pMnode) {
|
|
SSyncMgmt *pMgmt = &pMnode->syncMgmt;
|
|
tsem_init(&pMgmt->syncSem, 0, 0);
|
|
|
|
if (mndInitWal(pMnode) < 0) {
|
|
mError("failed to open wal since %s", terrstr());
|
|
return -1;
|
|
}
|
|
|
|
if (mndRestoreWal(pMnode) < 0) {
|
|
mError("failed to restore wal since %s", terrstr());
|
|
return -1;
|
|
}
|
|
|
|
pMgmt->state = TAOS_SYNC_STATE_LEADER;
|
|
pMgmt->pSyncNode = NULL;
|
|
return 0;
|
|
}
|
|
|
|
void mndCleanupSync(SMnode *pMnode) {
|
|
SSyncMgmt *pMgmt = &pMnode->syncMgmt;
|
|
tsem_destroy(&pMgmt->syncSem);
|
|
mndCloseWal(pMnode);
|
|
}
|
|
|
|
static int32_t mndSyncApplyCb(struct SSyncFSM *fsm, SyncIndex index, const SSyncBuffer *buf, void *pData) {
|
|
SMnode *pMnode = pData;
|
|
SSyncMgmt *pMgmt = &pMnode->syncMgmt;
|
|
|
|
pMgmt->errCode = 0;
|
|
tsem_post(&pMgmt->syncSem);
|
|
|
|
return 0;
|
|
}
|
|
|
|
int32_t mndSyncPropose(SMnode *pMnode, SSdbRaw *pRaw) {
|
|
SWal *pWal = pMnode->syncMgmt.pWal;
|
|
SSdb *pSdb = pMnode->pSdb;
|
|
|
|
int64_t ver = sdbUpdateVer(pSdb, 1);
|
|
if (walWrite(pWal, ver, 1, pRaw, sdbGetRawTotalSize(pRaw)) < 0) {
|
|
sdbUpdateVer(pSdb, -1);
|
|
mError("failed to write raw:%p since %s, ver:%" PRId64, pRaw, terrstr(), ver);
|
|
return -1;
|
|
}
|
|
|
|
mTrace("raw:%p, write to wal, ver:%" PRId64, pRaw, ver);
|
|
walCommit(pWal, ver);
|
|
walFsync(pWal, true);
|
|
|
|
#if 1
|
|
return 0;
|
|
#else
|
|
if (pMnode->replica == 1) return 0;
|
|
|
|
SSyncMgmt *pMgmt = &pMnode->syncMgmt;
|
|
pMgmt->errCode = 0;
|
|
|
|
SSyncBuffer buf = {.data = pRaw, .len = sdbGetRawTotalSize(pRaw)};
|
|
|
|
bool isWeak = false;
|
|
int32_t code = syncPropose(pMgmt->pSyncNode, &buf, pMnode, isWeak);
|
|
|
|
if (code != 0) return code;
|
|
|
|
tsem_wait(&pMgmt->syncSem);
|
|
return pMgmt->errCode;
|
|
#endif
|
|
}
|
|
|
|
bool mndIsMaster(SMnode *pMnode) {
|
|
SSyncMgmt *pMgmt = &pMnode->syncMgmt;
|
|
return pMgmt->state == TAOS_SYNC_STATE_LEADER;
|
|
}
|