fix alter wal level from 0 to 1or2 and restart failed

This commit is contained in:
xiao-77 2024-11-06 11:05:59 +08:00
parent f2cea6a843
commit 4f9fa3bb3e
5 changed files with 224 additions and 18 deletions

View File

@ -424,9 +424,6 @@ static void printFileSet(int32_t vgId, SArray* fileSet, const char* str) {
int32_t walCheckAndRepairMeta(SWal* pWal) {
// load log files, get first/snapshot/last version info
if (pWal->cfg.level == TAOS_WAL_SKIP) {
return TSDB_CODE_SUCCESS;
}
int32_t code = 0;
const char* logPattern = "^[0-9]+.log$";
const char* idxPattern = "^[0-9]+.idx$";

View File

@ -90,6 +90,29 @@ static int32_t walInitLock(SWal *pWal) {
return 0;
}
int32_t walInitWriteFileForSkip(SWal *pWal) {
TdFilePtr pIdxTFile, pLogTFile;
int64_t fileFirstVer = 0;
char fnameStr[WAL_FILE_LEN];
walBuildIdxName(pWal, fileFirstVer, fnameStr);
pIdxTFile = taosOpenFile(fnameStr, TD_FILE_CREATE | TD_FILE_WRITE | TD_FILE_APPEND);
if (pIdxTFile == NULL) {
TAOS_RETURN(terrno);
}
walBuildLogName(pWal, fileFirstVer, fnameStr);
pLogTFile = taosOpenFile(fnameStr, TD_FILE_CREATE | TD_FILE_WRITE | TD_FILE_APPEND);
if (pLogTFile == NULL) {
TAOS_RETURN(terrno);
}
// switch file
pWal->pIdxFile = pIdxTFile;
pWal->pLogFile = pLogTFile;
pWal->writeCur = taosArrayGetSize(pWal->fileInfoSet) - 1;
TAOS_RETURN(TSDB_CODE_SUCCESS);
}
SWal *walOpen(const char *path, SWalCfg *pCfg) {
int32_t code = 0;
SWal *pWal = taosMemoryCalloc(1, sizeof(SWal));
@ -165,17 +188,24 @@ SWal *walOpen(const char *path, SWalCfg *pCfg) {
if (code < 0) {
wWarn("vgId:%d, failed to load meta since %s", pWal->cfg.vgId, tstrerror(code));
}
if (pWal->cfg.level != TAOS_WAL_SKIP) {
code = walCheckAndRepairMeta(pWal);
if (code < 0) {
wError("vgId:%d, cannot open wal since repair meta file failed since %s", pWal->cfg.vgId, tstrerror(code));
goto _err;
}
code = walCheckAndRepairMeta(pWal);
if (code < 0) {
wError("vgId:%d, cannot open wal since repair meta file failed since %s", pWal->cfg.vgId, tstrerror(code));
goto _err;
}
code = walCheckAndRepairIdx(pWal);
if (code < 0) {
wError("vgId:%d, cannot open wal since repair idx file failed since %s", pWal->cfg.vgId, tstrerror(code));
goto _err;
code = walCheckAndRepairIdx(pWal);
if (code < 0) {
wError("vgId:%d, cannot open wal since repair idx file failed since %s", pWal->cfg.vgId, tstrerror(code));
goto _err;
}
} else {
code = walInitWriteFileForSkip(pWal);
if (code < 0) {
wError("vgId:%d, cannot open wal since init write file for wal_level = 0 failed since %s", pWal->cfg.vgId, tstrerror(code));
goto _err;
}
}
// add ref
@ -217,6 +247,14 @@ int32_t walAlter(SWal *pWal, SWalCfg *pCfg) {
pWal->cfg.vgId, pWal->cfg.level, pWal->cfg.fsyncPeriod, pWal->cfg.retentionPeriod, pWal->cfg.retentionSize,
pCfg->level, pCfg->fsyncPeriod, pCfg->retentionPeriod, pCfg->retentionSize);
if (pWal->cfg.level == TAOS_WAL_SKIP && pCfg->level != TAOS_WAL_SKIP) {
wInfo("vgId:%d, remove all wals, path:%s", pWal->cfg.vgId, pWal->path);
taosRemoveDir(pWal->path);
if (taosMkDir(pWal->path) != 0) {
wError("vgId:%d, path:%s, failed to create directory since %s", pWal->cfg.vgId, pWal->path, tstrerror(terrno));
}
}
pWal->cfg.level = pCfg->level;
pWal->cfg.fsyncPeriod = pCfg->fsyncPeriod;
pWal->cfg.retentionPeriod = pCfg->retentionPeriod;

View File

@ -294,9 +294,9 @@ int32_t walRollback(SWal *pWal, int64_t ver) {
static int32_t walRollImpl(SWal *pWal) {
int32_t code = 0, lino = 0;
if (pWal->cfg.level == TAOS_WAL_SKIP && pWal->pIdxFile != NULL && pWal->pLogFile != NULL) {
TAOS_RETURN(TSDB_CODE_SUCCESS);
}
// if (pWal->cfg.level == TAOS_WAL_SKIP && pWal->pIdxFile != NULL && pWal->pLogFile != NULL) {
// TAOS_RETURN(TSDB_CODE_SUCCESS);
// }
if (pWal->pIdxFile != NULL) {
if ((code = taosFsyncFile(pWal->pIdxFile)) != 0) {
TAOS_CHECK_GOTO(terrno, &lino, _exit);

View File

@ -594,8 +594,7 @@ class TDDnode:
def forcestop(self):
if self.asan:
stopCmd = "%s -s stop -n dnode%d -x SIGKILL" + \
(self.execPath, self.index)
stopCmd = "%s -s stop -n dnode%d -x SIGKILL" % (self.execPath, self.index)
tdLog.info("execute script: " + stopCmd)
os.system(stopCmd)
return

View File

@ -0,0 +1,172 @@
import sys
import taos
import os
from util.log import *
from util.cases import *
from util.sql import *
from util.dnodes import *
class TDTestCase:
def init(self, conn, logSql,replicaVar=1):
tdLog.debug(f"start to excute {__file__}")
tdSql.init(conn.cursor())
def getBuildPath(self):
selfPath = os.path.dirname(os.path.realpath(__file__))
if ("community" in selfPath):
projPath = selfPath[:selfPath.find("community")]
else:
projPath = selfPath[:selfPath.find("tests")]
for root, dirs, files in os.walk(projPath):
if ("taosd" in files or "taosd.exe" in files):
rootRealPath = os.path.dirname(os.path.realpath(root))
if ("packaging" not in rootRealPath):
buildPath = root[:len(root) - len("/build/bin")]
break
return buildPath
def preData(self):
tdSql.execute("drop database if exists db0;")
tdSql.execute("create database db0 KEEP 30 vgroups 1 buffer 3 wal_level 0;")
tdSql.execute("create table if not exists db0.stb (ts timestamp, c1 int, c2 float, c3 double) tags (t1 int unsigned);")
tdSql.execute("create table db0.ct1 using db0.stb tags(1000);")
tdSql.execute("create table db0.ct2 using db0.stb tags(2000);")
tdSql.execute("create table if not exists db0.ntb (ts timestamp, c1 int, c2 float, c3 double) ;")
tdSql.query("show db0.stables;")
tdSql.execute("insert into db0.ct1 values(now+0s, 10, 2.0, 3.0);")
tdSql.execute("insert into db0.ct1 values(now+1s, 11, 2.1, 3.1)(now+2s, 12, 2.2, 3.2)(now+3s, 13, 2.3, 3.3);")
tdSql.execute("insert into db0.ntb values(now+2s, 10, 2.0, 3.0);")
def insertData(self):
tdSql.execute("insert into db0.ct1 values(now+0s, 10, 2.0, 3.0);")
tdSql.execute("insert into db0.ct1 values(now+1s, 11, 2.1, 3.1)(now+2s, 12, 2.2, 3.2)(now+3s, 13, 2.3, 3.3);")
tdSql.execute("insert into db0.ntb values(now+2s, 10, 2.0, 3.0);")
def createSubTableAndInsertData(self):
tdSql.execute("create table db0.ct1 using db0.stb tags(1000);")
tdSql.execute("create table db0.ct2 using db0.stb tags(2000);")
tdSql.execute("create table if not exists db0.ntb (ts timestamp, c1 int, c2 float, c3 double) ;")
tdSql.execute("insert into db0.ct1 values(now+0s, 10, 2.0, 3.0);")
tdSql.execute("insert into db0.ct1 values(now+1s, 11, 2.1, 3.1)(now+2s, 12, 2.2, 3.2)(now+3s, 13, 2.3, 3.3);")
tdSql.execute("insert into db0.ntb values(now+2s, 10, 2.0, 3.0);")
def alterWalLevel(self,level):
tdSql.execute("alter database db0 wal_level %d;"%level)
def run(self):
tdSql.prepare()
tdLog.info("-----------test for stop taosd before alter wal level-----------")
tdLog.info("create database wal_level = 0 and insert data")
self.preData()
tdDnodes.stop(1)
time.sleep(2)
tdLog.info("restart taosd")
tdDnodes.start(1)
tdLog.info(" alter wal level from 0 to 1")
self.alterWalLevel(1)
self.insertData()
tdDnodes.stop(1)
tdDnodes.start(1)
tdLog.info("create database wal_level = 0 and insert data")
self.preData()
tdDnodes.stop(1)
time.sleep(2)
tdLog.info("restart taosd")
tdDnodes.start(1)
tdLog.info(" alter wal level from 0 to 2")
self.alterWalLevel(2)
self.insertData()
tdDnodes.forcestop(1)
tdDnodes.start(1)
tdLog.info("-----------test for kill taosd before alter wal level-----------")
tdLog.info("create database wal_level = 0 and insert data")
self.preData()
tdDnodes.forcestop(1)
time.sleep(2)
tdLog.info("restart taosd")
tdDnodes.start(1)
tdLog.info(" alter wal level from 0 to 1")
self.alterWalLevel(1)
tdDnodes.forcestop(1)
tdDnodes.start(1)
tdLog.info("create database wal_level = 0 and insert data")
self.preData()
tdDnodes.forcestop(1)
time.sleep(2)
tdLog.info("restart taosd")
tdDnodes.start(1)
tdLog.info(" alter wal level from 0 to 2")
self.alterWalLevel(2)
tdDnodes.forcestop(1)
tdDnodes.start(1)
tdLog.info("-----------test for stop taosd after alter wal level-----------")
tdLog.info("create database wal_level = 0 and insert data")
self.preData()
tdLog.info(" alter wal level from 0 to 1")
self.alterWalLevel(1)
time.sleep(1)
self.insertData()
tdDnodes.stop(1)
time.sleep(2)
tdLog.info("restart taosd")
tdDnodes.start(1)
tdLog.info("create database wal_level = 0 and insert data")
self.preData()
tdLog.info(" alter wal level from 0 to 2")
self.alterWalLevel(2)
time.sleep(1)
self.insertData()
tdDnodes.stop(1)
time.sleep(2)
tdLog.info("restart taosd")
tdDnodes.start(1)
tdLog.info("-----------test for kill taosd after alter wal level-----------")
tdLog.info("create database wal_level = 0 and insert data")
self.preData()
tdLog.info(" alter wal level from 0 to 1")
self.alterWalLevel(1)
time.sleep(1)
self.insertData()
tdDnodes.forcestop(1)
time.sleep(2)
tdLog.info("restart taosd")
tdDnodes.start(1)
tdLog.info("create database wal_level = 0 and insert data")
self.preData()
tdLog.info(" alter wal level from 0 to 2")
self.alterWalLevel(2)
time.sleep(1)
self.insertData()
tdDnodes.forcestop(1)
time.sleep(2)
tdLog.info("restart taosd")
tdDnodes.start(1)
def stop(self):
tdSql.close()
tdLog.success(f"{__file__} successfully executed")
tdCases.addLinux(__file__, TDTestCase())
tdCases.addWindows(__file__, TDTestCase())