Merge pull request #28688 from taosdata/fix/main/TD-32824

Fix: alter wal level from 0 to 1or2 and restart failed
This commit is contained in:
Shengliang Guan 2024-11-13 14:01:25 +08:00 committed by GitHub
commit 902dbfbabc
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
5 changed files with 237 additions and 15 deletions

View File

@ -424,9 +424,6 @@ static void printFileSet(int32_t vgId, SArray* fileSet, const char* str) {
int32_t walCheckAndRepairMeta(SWal* pWal) {
// load log files, get first/snapshot/last version info
if (pWal->cfg.level == TAOS_WAL_SKIP) {
return TSDB_CODE_SUCCESS;
}
int32_t code = 0;
const char* logPattern = "^[0-9]+.log$";
const char* idxPattern = "^[0-9]+.idx$";

View File

@ -90,6 +90,45 @@ static int32_t walInitLock(SWal *pWal) {
return 0;
}
int32_t walInitWriteFileForSkip(SWal *pWal) {
TdFilePtr pIdxTFile = NULL, pLogTFile = NULL;
int64_t fileFirstVer = 0;
int32_t code = 0;
char fnameStr[WAL_FILE_LEN];
walBuildIdxName(pWal, fileFirstVer, fnameStr);
pIdxTFile = taosOpenFile(fnameStr, TD_FILE_CREATE | TD_FILE_WRITE | TD_FILE_APPEND);
if (pIdxTFile == NULL) {
wError("vgId:%d, failed to open file since %s", pWal->cfg.vgId, tstrerror(terrno));
code = terrno;
goto _exit;
}
walBuildLogName(pWal, fileFirstVer, fnameStr);
pLogTFile = taosOpenFile(fnameStr, TD_FILE_CREATE | TD_FILE_WRITE | TD_FILE_APPEND);
if (pLogTFile == NULL) {
wError("vgId:%d, failed to open file since %s", pWal->cfg.vgId, tstrerror(terrno));
code = terrno;
goto _exit;
}
// switch file
pWal->pIdxFile = pIdxTFile;
pWal->pLogFile = pLogTFile;
SWalFileInfo fileInfo;
(void)memset(&fileInfo, -1, sizeof(SWalFileInfo));
if (!taosArrayPush(pWal->fileInfoSet, &fileInfo)) {
wError("vgId:%d, failed to push fileInfo into array since %s", pWal->cfg.vgId, tstrerror(terrno));
code = terrno;
goto _exit;
}
pWal->writeCur = 0;
_exit:
if (code != TSDB_CODE_SUCCESS) {
(void)taosCloseFile(&pIdxTFile);
(void)taosCloseFile(&pLogTFile);
}
TAOS_RETURN(code);
}
SWal *walOpen(const char *path, SWalCfg *pCfg) {
int32_t code = 0;
SWal *pWal = taosMemoryCalloc(1, sizeof(SWal));
@ -165,7 +204,7 @@ SWal *walOpen(const char *path, SWalCfg *pCfg) {
if (code < 0) {
wWarn("vgId:%d, failed to load meta since %s", pWal->cfg.vgId, tstrerror(code));
}
if (pWal->cfg.level != TAOS_WAL_SKIP) {
code = walCheckAndRepairMeta(pWal);
if (code < 0) {
wError("vgId:%d, cannot open wal since repair meta file failed since %s", pWal->cfg.vgId, tstrerror(code));
@ -177,6 +216,14 @@ SWal *walOpen(const char *path, SWalCfg *pCfg) {
wError("vgId:%d, cannot open wal since repair idx file failed since %s", pWal->cfg.vgId, tstrerror(code));
goto _err;
}
} else {
code = walInitWriteFileForSkip(pWal);
if (code < 0) {
wError("vgId:%d, cannot open wal since init write file for wal_level = 0 failed since %s", pWal->cfg.vgId,
tstrerror(code));
goto _err;
}
}
// add ref
pWal->refId = taosAddRef(tsWal.refSetId, pWal);
@ -217,6 +264,14 @@ int32_t walAlter(SWal *pWal, SWalCfg *pCfg) {
pWal->cfg.vgId, pWal->cfg.level, pWal->cfg.fsyncPeriod, pWal->cfg.retentionPeriod, pWal->cfg.retentionSize,
pCfg->level, pCfg->fsyncPeriod, pCfg->retentionPeriod, pCfg->retentionSize);
if (pWal->cfg.level == TAOS_WAL_SKIP && pCfg->level != TAOS_WAL_SKIP) {
wInfo("vgId:%d, remove all wals, path:%s", pWal->cfg.vgId, pWal->path);
taosRemoveDir(pWal->path);
if (taosMkDir(pWal->path) != 0) {
wError("vgId:%d, path:%s, failed to create directory since %s", pWal->cfg.vgId, pWal->path, tstrerror(terrno));
}
}
pWal->cfg.level = pCfg->level;
pWal->cfg.fsyncPeriod = pCfg->fsyncPeriod;
pWal->cfg.retentionPeriod = pCfg->retentionPeriod;

View File

@ -361,6 +361,7 @@
,,n,system-test,python3 ./test.py -f 0-others/tag_index_basic.py
,,n,system-test,python3 ./test.py -f 0-others/udfpy_main.py
,,n,system-test,python3 ./test.py -N 3 -f 0-others/walRetention.py
,,n,system-test,python3 ./test.py -f 0-others/wal_level_skip.py
,,y,system-test,./pytest.sh python3 ./test.py -f 0-others/splitVGroup.py -N 3 -n 1
,,y,system-test,./pytest.sh python3 ./test.py -f 0-others/splitVGroupWal.py -N 3 -n 1
,,y,system-test,./pytest.sh python3 ./test.py -f 0-others/splitVGroup.py -N 3 -n 3

View File

@ -594,8 +594,7 @@ class TDDnode:
def forcestop(self):
if self.asan:
stopCmd = "%s -s stop -n dnode%d -x SIGKILL" + \
(self.execPath, self.index)
stopCmd = "%s -s stop -n dnode%d -x SIGKILL" % (self.execPath, self.index)
tdLog.info("execute script: " + stopCmd)
os.system(stopCmd)
return

View File

@ -0,0 +1,170 @@
import sys
import taos
import os
from util.log import *
from util.cases import *
from util.sql import *
from util.dnodes import *
class TDTestCase:
def init(self, conn, logSql,replicaVar=1):
tdLog.debug(f"start to excute {__file__}")
tdSql.init(conn.cursor())
def getBuildPath(self):
selfPath = os.path.dirname(os.path.realpath(__file__))
if ("community" in selfPath):
projPath = selfPath[:selfPath.find("community")]
else:
projPath = selfPath[:selfPath.find("tests")]
for root, dirs, files in os.walk(projPath):
if ("taosd" in files or "taosd.exe" in files):
rootRealPath = os.path.dirname(os.path.realpath(root))
if ("packaging" not in rootRealPath):
buildPath = root[:len(root) - len("/build/bin")]
break
return buildPath
def preData(self):
tdSql.execute("drop database if exists db0;")
tdSql.execute("create database db0 KEEP 30 vgroups 1 buffer 3 wal_level 0;")
tdSql.execute("create table if not exists db0.stb (ts timestamp, c1 int, c2 float, c3 double) tags (t1 int unsigned);")
tdSql.execute("create table db0.ct1 using db0.stb tags(1000);")
tdSql.execute("create table db0.ct2 using db0.stb tags(2000);")
tdSql.execute("create table if not exists db0.ntb (ts timestamp, c1 int, c2 float, c3 double) ;")
tdSql.query("show db0.stables;")
tdSql.execute("insert into db0.ct1 values(now+0s, 10, 2.0, 3.0);")
tdSql.execute("insert into db0.ct1 values(now+1s, 11, 2.1, 3.1)(now+2s, 12, 2.2, 3.2)(now+3s, 13, 2.3, 3.3);")
tdSql.execute("insert into db0.ntb values(now+2s, 10, 2.0, 3.0);")
def insertData(self):
tdSql.execute("insert into db0.ct1 values(now+0s, 10, 2.0, 3.0);")
tdSql.execute("insert into db0.ct1 values(now+1s, 11, 2.1, 3.1)(now+2s, 12, 2.2, 3.2)(now+3s, 13, 2.3, 3.3);")
tdSql.execute("insert into db0.ntb values(now+2s, 10, 2.0, 3.0);")
def createSubTableAndInsertData(self):
tdSql.execute("create table db0.ct1 using db0.stb tags(1000);")
tdSql.execute("create table db0.ct2 using db0.stb tags(2000);")
tdSql.execute("create table if not exists db0.ntb (ts timestamp, c1 int, c2 float, c3 double) ;")
tdSql.execute("insert into db0.ct1 values(now+0s, 10, 2.0, 3.0);")
tdSql.execute("insert into db0.ct1 values(now+1s, 11, 2.1, 3.1)(now+2s, 12, 2.2, 3.2)(now+3s, 13, 2.3, 3.3);")
tdSql.execute("insert into db0.ntb values(now+2s, 10, 2.0, 3.0);")
def alterWalLevel(self,level):
tdSql.execute("alter database db0 wal_level %d;"%level)
def run(self):
tdSql.prepare()
tdLog.info("-----------test for stop taosd before alter wal level-----------")
tdLog.info("create database wal_level = 0 and insert data")
self.preData()
tdDnodes.stop(1)
time.sleep(2)
tdLog.info("restart taosd")
tdDnodes.start(1)
tdLog.info(" alter wal level from 0 to 1")
self.alterWalLevel(1)
self.insertData()
tdDnodes.stop(1)
tdDnodes.start(1)
tdLog.info("create database wal_level = 0 and insert data")
self.preData()
tdDnodes.stop(1)
time.sleep(2)
tdLog.info("restart taosd")
tdDnodes.start(1)
tdLog.info(" alter wal level from 0 to 2")
self.alterWalLevel(2)
self.insertData()
tdDnodes.forcestop(1)
tdDnodes.start(1)
tdLog.info("-----------test for kill taosd before alter wal level-----------")
tdLog.info("create database wal_level = 0 and insert data")
self.preData()
tdDnodes.forcestop(1)
time.sleep(2)
tdLog.info("restart taosd")
tdDnodes.start(1)
tdLog.info(" alter wal level from 0 to 1")
self.alterWalLevel(1)
tdDnodes.forcestop(1)
tdDnodes.start(1)
tdLog.info("create database wal_level = 0 and insert data")
self.preData()
tdDnodes.forcestop(1)
time.sleep(2)
tdLog.info("restart taosd")
tdDnodes.start(1)
tdLog.info(" alter wal level from 0 to 2")
self.alterWalLevel(2)
tdDnodes.forcestop(1)
tdDnodes.start(1)
tdLog.info("-----------test for stop taosd after alter wal level-----------")
tdLog.info("create database wal_level = 0 and insert data")
self.preData()
tdLog.info(" alter wal level from 0 to 1")
self.alterWalLevel(1)
time.sleep(1)
self.insertData()
tdDnodes.stop(1)
time.sleep(2)
tdLog.info("restart taosd")
tdDnodes.start(1)
tdLog.info("create database wal_level = 0 and insert data")
self.preData()
tdLog.info(" alter wal level from 0 to 2")
self.alterWalLevel(2)
time.sleep(1)
self.insertData()
tdDnodes.stop(1)
time.sleep(2)
tdLog.info("restart taosd")
tdDnodes.start(1)
tdLog.info("-----------test for kill taosd after alter wal level-----------")
tdLog.info("create database wal_level = 0 and insert data")
self.preData()
tdLog.info(" alter wal level from 0 to 1")
self.alterWalLevel(1)
time.sleep(1)
self.insertData()
tdDnodes.forcestop(1)
time.sleep(2)
tdLog.info("restart taosd")
tdDnodes.start(1)
tdLog.info("create database wal_level = 0 and insert data")
self.preData()
tdLog.info(" alter wal level from 0 to 2")
self.alterWalLevel(2)
time.sleep(1)
self.insertData()
tdDnodes.forcestop(1)
time.sleep(2)
tdLog.info("restart taosd")
tdDnodes.start(1)
def stop(self):
tdSql.close()
tdLog.success(f"{__file__} successfully executed")
tdCases.addLinux(__file__, TDTestCase())
tdCases.addWindows(__file__, TDTestCase())