Merge pull request #28688 from taosdata/fix/main/TD-32824
Fix: alter wal level from 0 to 1or2 and restart failed
This commit is contained in:
commit
902dbfbabc
|
@ -424,9 +424,6 @@ static void printFileSet(int32_t vgId, SArray* fileSet, const char* str) {
|
||||||
|
|
||||||
int32_t walCheckAndRepairMeta(SWal* pWal) {
|
int32_t walCheckAndRepairMeta(SWal* pWal) {
|
||||||
// load log files, get first/snapshot/last version info
|
// load log files, get first/snapshot/last version info
|
||||||
if (pWal->cfg.level == TAOS_WAL_SKIP) {
|
|
||||||
return TSDB_CODE_SUCCESS;
|
|
||||||
}
|
|
||||||
int32_t code = 0;
|
int32_t code = 0;
|
||||||
const char* logPattern = "^[0-9]+.log$";
|
const char* logPattern = "^[0-9]+.log$";
|
||||||
const char* idxPattern = "^[0-9]+.idx$";
|
const char* idxPattern = "^[0-9]+.idx$";
|
||||||
|
|
|
@ -90,6 +90,45 @@ static int32_t walInitLock(SWal *pWal) {
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
int32_t walInitWriteFileForSkip(SWal *pWal) {
|
||||||
|
TdFilePtr pIdxTFile = NULL, pLogTFile = NULL;
|
||||||
|
int64_t fileFirstVer = 0;
|
||||||
|
int32_t code = 0;
|
||||||
|
|
||||||
|
char fnameStr[WAL_FILE_LEN];
|
||||||
|
walBuildIdxName(pWal, fileFirstVer, fnameStr);
|
||||||
|
pIdxTFile = taosOpenFile(fnameStr, TD_FILE_CREATE | TD_FILE_WRITE | TD_FILE_APPEND);
|
||||||
|
if (pIdxTFile == NULL) {
|
||||||
|
wError("vgId:%d, failed to open file since %s", pWal->cfg.vgId, tstrerror(terrno));
|
||||||
|
code = terrno;
|
||||||
|
goto _exit;
|
||||||
|
}
|
||||||
|
walBuildLogName(pWal, fileFirstVer, fnameStr);
|
||||||
|
pLogTFile = taosOpenFile(fnameStr, TD_FILE_CREATE | TD_FILE_WRITE | TD_FILE_APPEND);
|
||||||
|
if (pLogTFile == NULL) {
|
||||||
|
wError("vgId:%d, failed to open file since %s", pWal->cfg.vgId, tstrerror(terrno));
|
||||||
|
code = terrno;
|
||||||
|
goto _exit;
|
||||||
|
}
|
||||||
|
// switch file
|
||||||
|
pWal->pIdxFile = pIdxTFile;
|
||||||
|
pWal->pLogFile = pLogTFile;
|
||||||
|
SWalFileInfo fileInfo;
|
||||||
|
(void)memset(&fileInfo, -1, sizeof(SWalFileInfo));
|
||||||
|
if (!taosArrayPush(pWal->fileInfoSet, &fileInfo)) {
|
||||||
|
wError("vgId:%d, failed to push fileInfo into array since %s", pWal->cfg.vgId, tstrerror(terrno));
|
||||||
|
code = terrno;
|
||||||
|
goto _exit;
|
||||||
|
}
|
||||||
|
pWal->writeCur = 0;
|
||||||
|
_exit:
|
||||||
|
if (code != TSDB_CODE_SUCCESS) {
|
||||||
|
(void)taosCloseFile(&pIdxTFile);
|
||||||
|
(void)taosCloseFile(&pLogTFile);
|
||||||
|
}
|
||||||
|
TAOS_RETURN(code);
|
||||||
|
}
|
||||||
|
|
||||||
SWal *walOpen(const char *path, SWalCfg *pCfg) {
|
SWal *walOpen(const char *path, SWalCfg *pCfg) {
|
||||||
int32_t code = 0;
|
int32_t code = 0;
|
||||||
SWal *pWal = taosMemoryCalloc(1, sizeof(SWal));
|
SWal *pWal = taosMemoryCalloc(1, sizeof(SWal));
|
||||||
|
@ -165,17 +204,25 @@ SWal *walOpen(const char *path, SWalCfg *pCfg) {
|
||||||
if (code < 0) {
|
if (code < 0) {
|
||||||
wWarn("vgId:%d, failed to load meta since %s", pWal->cfg.vgId, tstrerror(code));
|
wWarn("vgId:%d, failed to load meta since %s", pWal->cfg.vgId, tstrerror(code));
|
||||||
}
|
}
|
||||||
|
if (pWal->cfg.level != TAOS_WAL_SKIP) {
|
||||||
|
code = walCheckAndRepairMeta(pWal);
|
||||||
|
if (code < 0) {
|
||||||
|
wError("vgId:%d, cannot open wal since repair meta file failed since %s", pWal->cfg.vgId, tstrerror(code));
|
||||||
|
goto _err;
|
||||||
|
}
|
||||||
|
|
||||||
code = walCheckAndRepairMeta(pWal);
|
code = walCheckAndRepairIdx(pWal);
|
||||||
if (code < 0) {
|
if (code < 0) {
|
||||||
wError("vgId:%d, cannot open wal since repair meta file failed since %s", pWal->cfg.vgId, tstrerror(code));
|
wError("vgId:%d, cannot open wal since repair idx file failed since %s", pWal->cfg.vgId, tstrerror(code));
|
||||||
goto _err;
|
goto _err;
|
||||||
}
|
}
|
||||||
|
} else {
|
||||||
code = walCheckAndRepairIdx(pWal);
|
code = walInitWriteFileForSkip(pWal);
|
||||||
if (code < 0) {
|
if (code < 0) {
|
||||||
wError("vgId:%d, cannot open wal since repair idx file failed since %s", pWal->cfg.vgId, tstrerror(code));
|
wError("vgId:%d, cannot open wal since init write file for wal_level = 0 failed since %s", pWal->cfg.vgId,
|
||||||
goto _err;
|
tstrerror(code));
|
||||||
|
goto _err;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// add ref
|
// add ref
|
||||||
|
@ -217,6 +264,14 @@ int32_t walAlter(SWal *pWal, SWalCfg *pCfg) {
|
||||||
pWal->cfg.vgId, pWal->cfg.level, pWal->cfg.fsyncPeriod, pWal->cfg.retentionPeriod, pWal->cfg.retentionSize,
|
pWal->cfg.vgId, pWal->cfg.level, pWal->cfg.fsyncPeriod, pWal->cfg.retentionPeriod, pWal->cfg.retentionSize,
|
||||||
pCfg->level, pCfg->fsyncPeriod, pCfg->retentionPeriod, pCfg->retentionSize);
|
pCfg->level, pCfg->fsyncPeriod, pCfg->retentionPeriod, pCfg->retentionSize);
|
||||||
|
|
||||||
|
if (pWal->cfg.level == TAOS_WAL_SKIP && pCfg->level != TAOS_WAL_SKIP) {
|
||||||
|
wInfo("vgId:%d, remove all wals, path:%s", pWal->cfg.vgId, pWal->path);
|
||||||
|
taosRemoveDir(pWal->path);
|
||||||
|
if (taosMkDir(pWal->path) != 0) {
|
||||||
|
wError("vgId:%d, path:%s, failed to create directory since %s", pWal->cfg.vgId, pWal->path, tstrerror(terrno));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
pWal->cfg.level = pCfg->level;
|
pWal->cfg.level = pCfg->level;
|
||||||
pWal->cfg.fsyncPeriod = pCfg->fsyncPeriod;
|
pWal->cfg.fsyncPeriod = pCfg->fsyncPeriod;
|
||||||
pWal->cfg.retentionPeriod = pCfg->retentionPeriod;
|
pWal->cfg.retentionPeriod = pCfg->retentionPeriod;
|
||||||
|
|
|
@ -361,6 +361,7 @@
|
||||||
,,n,system-test,python3 ./test.py -f 0-others/tag_index_basic.py
|
,,n,system-test,python3 ./test.py -f 0-others/tag_index_basic.py
|
||||||
,,n,system-test,python3 ./test.py -f 0-others/udfpy_main.py
|
,,n,system-test,python3 ./test.py -f 0-others/udfpy_main.py
|
||||||
,,n,system-test,python3 ./test.py -N 3 -f 0-others/walRetention.py
|
,,n,system-test,python3 ./test.py -N 3 -f 0-others/walRetention.py
|
||||||
|
,,n,system-test,python3 ./test.py -f 0-others/wal_level_skip.py
|
||||||
,,y,system-test,./pytest.sh python3 ./test.py -f 0-others/splitVGroup.py -N 3 -n 1
|
,,y,system-test,./pytest.sh python3 ./test.py -f 0-others/splitVGroup.py -N 3 -n 1
|
||||||
,,y,system-test,./pytest.sh python3 ./test.py -f 0-others/splitVGroupWal.py -N 3 -n 1
|
,,y,system-test,./pytest.sh python3 ./test.py -f 0-others/splitVGroupWal.py -N 3 -n 1
|
||||||
,,y,system-test,./pytest.sh python3 ./test.py -f 0-others/splitVGroup.py -N 3 -n 3
|
,,y,system-test,./pytest.sh python3 ./test.py -f 0-others/splitVGroup.py -N 3 -n 3
|
||||||
|
|
|
@ -594,8 +594,7 @@ class TDDnode:
|
||||||
|
|
||||||
def forcestop(self):
|
def forcestop(self):
|
||||||
if self.asan:
|
if self.asan:
|
||||||
stopCmd = "%s -s stop -n dnode%d -x SIGKILL" + \
|
stopCmd = "%s -s stop -n dnode%d -x SIGKILL" % (self.execPath, self.index)
|
||||||
(self.execPath, self.index)
|
|
||||||
tdLog.info("execute script: " + stopCmd)
|
tdLog.info("execute script: " + stopCmd)
|
||||||
os.system(stopCmd)
|
os.system(stopCmd)
|
||||||
return
|
return
|
||||||
|
|
|
@ -0,0 +1,170 @@
|
||||||
|
import sys
|
||||||
|
import taos
|
||||||
|
import os
|
||||||
|
from util.log import *
|
||||||
|
from util.cases import *
|
||||||
|
from util.sql import *
|
||||||
|
from util.dnodes import *
|
||||||
|
|
||||||
|
class TDTestCase:
|
||||||
|
|
||||||
|
def init(self, conn, logSql,replicaVar=1):
|
||||||
|
tdLog.debug(f"start to excute {__file__}")
|
||||||
|
tdSql.init(conn.cursor())
|
||||||
|
|
||||||
|
def getBuildPath(self):
|
||||||
|
selfPath = os.path.dirname(os.path.realpath(__file__))
|
||||||
|
|
||||||
|
if ("community" in selfPath):
|
||||||
|
projPath = selfPath[:selfPath.find("community")]
|
||||||
|
else:
|
||||||
|
projPath = selfPath[:selfPath.find("tests")]
|
||||||
|
|
||||||
|
for root, dirs, files in os.walk(projPath):
|
||||||
|
if ("taosd" in files or "taosd.exe" in files):
|
||||||
|
rootRealPath = os.path.dirname(os.path.realpath(root))
|
||||||
|
if ("packaging" not in rootRealPath):
|
||||||
|
buildPath = root[:len(root) - len("/build/bin")]
|
||||||
|
break
|
||||||
|
return buildPath
|
||||||
|
|
||||||
|
def preData(self):
|
||||||
|
tdSql.execute("drop database if exists db0;")
|
||||||
|
tdSql.execute("create database db0 KEEP 30 vgroups 1 buffer 3 wal_level 0;")
|
||||||
|
tdSql.execute("create table if not exists db0.stb (ts timestamp, c1 int, c2 float, c3 double) tags (t1 int unsigned);")
|
||||||
|
tdSql.execute("create table db0.ct1 using db0.stb tags(1000);")
|
||||||
|
tdSql.execute("create table db0.ct2 using db0.stb tags(2000);")
|
||||||
|
tdSql.execute("create table if not exists db0.ntb (ts timestamp, c1 int, c2 float, c3 double) ;")
|
||||||
|
tdSql.query("show db0.stables;")
|
||||||
|
tdSql.execute("insert into db0.ct1 values(now+0s, 10, 2.0, 3.0);")
|
||||||
|
tdSql.execute("insert into db0.ct1 values(now+1s, 11, 2.1, 3.1)(now+2s, 12, 2.2, 3.2)(now+3s, 13, 2.3, 3.3);")
|
||||||
|
tdSql.execute("insert into db0.ntb values(now+2s, 10, 2.0, 3.0);")
|
||||||
|
|
||||||
|
def insertData(self):
|
||||||
|
tdSql.execute("insert into db0.ct1 values(now+0s, 10, 2.0, 3.0);")
|
||||||
|
tdSql.execute("insert into db0.ct1 values(now+1s, 11, 2.1, 3.1)(now+2s, 12, 2.2, 3.2)(now+3s, 13, 2.3, 3.3);")
|
||||||
|
tdSql.execute("insert into db0.ntb values(now+2s, 10, 2.0, 3.0);")
|
||||||
|
|
||||||
|
def createSubTableAndInsertData(self):
|
||||||
|
tdSql.execute("create table db0.ct1 using db0.stb tags(1000);")
|
||||||
|
tdSql.execute("create table db0.ct2 using db0.stb tags(2000);")
|
||||||
|
tdSql.execute("create table if not exists db0.ntb (ts timestamp, c1 int, c2 float, c3 double) ;")
|
||||||
|
tdSql.execute("insert into db0.ct1 values(now+0s, 10, 2.0, 3.0);")
|
||||||
|
tdSql.execute("insert into db0.ct1 values(now+1s, 11, 2.1, 3.1)(now+2s, 12, 2.2, 3.2)(now+3s, 13, 2.3, 3.3);")
|
||||||
|
tdSql.execute("insert into db0.ntb values(now+2s, 10, 2.0, 3.0);")
|
||||||
|
|
||||||
|
|
||||||
|
def alterWalLevel(self,level):
|
||||||
|
tdSql.execute("alter database db0 wal_level %d;"%level)
|
||||||
|
|
||||||
|
def run(self):
|
||||||
|
tdSql.prepare()
|
||||||
|
|
||||||
|
tdLog.info("-----------test for stop taosd before alter wal level-----------")
|
||||||
|
tdLog.info("create database wal_level = 0 and insert data")
|
||||||
|
self.preData()
|
||||||
|
tdDnodes.stop(1)
|
||||||
|
time.sleep(2)
|
||||||
|
tdLog.info("restart taosd")
|
||||||
|
tdDnodes.start(1)
|
||||||
|
|
||||||
|
tdLog.info(" alter wal level from 0 to 1")
|
||||||
|
self.alterWalLevel(1)
|
||||||
|
self.insertData()
|
||||||
|
tdDnodes.stop(1)
|
||||||
|
tdDnodes.start(1)
|
||||||
|
|
||||||
|
tdLog.info("create database wal_level = 0 and insert data")
|
||||||
|
self.preData()
|
||||||
|
tdDnodes.stop(1)
|
||||||
|
time.sleep(2)
|
||||||
|
tdLog.info("restart taosd")
|
||||||
|
tdDnodes.start(1)
|
||||||
|
|
||||||
|
tdLog.info(" alter wal level from 0 to 2")
|
||||||
|
self.alterWalLevel(2)
|
||||||
|
self.insertData()
|
||||||
|
tdDnodes.forcestop(1)
|
||||||
|
tdDnodes.start(1)
|
||||||
|
|
||||||
|
|
||||||
|
tdLog.info("-----------test for kill taosd before alter wal level-----------")
|
||||||
|
tdLog.info("create database wal_level = 0 and insert data")
|
||||||
|
self.preData()
|
||||||
|
tdDnodes.forcestop(1)
|
||||||
|
time.sleep(2)
|
||||||
|
tdLog.info("restart taosd")
|
||||||
|
tdDnodes.start(1)
|
||||||
|
|
||||||
|
tdLog.info(" alter wal level from 0 to 1")
|
||||||
|
self.alterWalLevel(1)
|
||||||
|
tdDnodes.forcestop(1)
|
||||||
|
tdDnodes.start(1)
|
||||||
|
|
||||||
|
tdLog.info("create database wal_level = 0 and insert data")
|
||||||
|
self.preData()
|
||||||
|
tdDnodes.forcestop(1)
|
||||||
|
time.sleep(2)
|
||||||
|
tdLog.info("restart taosd")
|
||||||
|
tdDnodes.start(1)
|
||||||
|
|
||||||
|
tdLog.info(" alter wal level from 0 to 2")
|
||||||
|
self.alterWalLevel(2)
|
||||||
|
tdDnodes.forcestop(1)
|
||||||
|
tdDnodes.start(1)
|
||||||
|
|
||||||
|
tdLog.info("-----------test for stop taosd after alter wal level-----------")
|
||||||
|
tdLog.info("create database wal_level = 0 and insert data")
|
||||||
|
self.preData()
|
||||||
|
tdLog.info(" alter wal level from 0 to 1")
|
||||||
|
self.alterWalLevel(1)
|
||||||
|
time.sleep(1)
|
||||||
|
self.insertData()
|
||||||
|
tdDnodes.stop(1)
|
||||||
|
time.sleep(2)
|
||||||
|
tdLog.info("restart taosd")
|
||||||
|
tdDnodes.start(1)
|
||||||
|
|
||||||
|
|
||||||
|
tdLog.info("create database wal_level = 0 and insert data")
|
||||||
|
self.preData()
|
||||||
|
tdLog.info(" alter wal level from 0 to 2")
|
||||||
|
self.alterWalLevel(2)
|
||||||
|
time.sleep(1)
|
||||||
|
self.insertData()
|
||||||
|
tdDnodes.stop(1)
|
||||||
|
time.sleep(2)
|
||||||
|
tdLog.info("restart taosd")
|
||||||
|
tdDnodes.start(1)
|
||||||
|
|
||||||
|
tdLog.info("-----------test for kill taosd after alter wal level-----------")
|
||||||
|
tdLog.info("create database wal_level = 0 and insert data")
|
||||||
|
self.preData()
|
||||||
|
tdLog.info(" alter wal level from 0 to 1")
|
||||||
|
self.alterWalLevel(1)
|
||||||
|
time.sleep(1)
|
||||||
|
self.insertData()
|
||||||
|
tdDnodes.forcestop(1)
|
||||||
|
time.sleep(2)
|
||||||
|
tdLog.info("restart taosd")
|
||||||
|
tdDnodes.start(1)
|
||||||
|
|
||||||
|
|
||||||
|
tdLog.info("create database wal_level = 0 and insert data")
|
||||||
|
self.preData()
|
||||||
|
tdLog.info(" alter wal level from 0 to 2")
|
||||||
|
self.alterWalLevel(2)
|
||||||
|
time.sleep(1)
|
||||||
|
self.insertData()
|
||||||
|
tdDnodes.forcestop(1)
|
||||||
|
time.sleep(2)
|
||||||
|
tdLog.info("restart taosd")
|
||||||
|
tdDnodes.start(1)
|
||||||
|
|
||||||
|
|
||||||
|
def stop(self):
|
||||||
|
tdSql.close()
|
||||||
|
tdLog.success(f"{__file__} successfully executed")
|
||||||
|
|
||||||
|
tdCases.addLinux(__file__, TDTestCase())
|
||||||
|
tdCases.addWindows(__file__, TDTestCase())
|
Loading…
Reference in New Issue