Merge pull request #23465 from taosdata/fix/TD-26189-last-tier-write

enh(tsdb/retention): enable last tier write
This commit is contained in:
Hongze Cheng 2023-11-02 21:21:38 -05:00 committed by GitHub
commit 1ff3f96b08
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
12 changed files with 396 additions and 70 deletions

View File

@ -280,8 +280,10 @@ int8_t tsS3Enabled = false;
int8_t tsS3Https = true; int8_t tsS3Https = true;
char tsS3Hostname[TSDB_FQDN_LEN] = "<hostname>"; char tsS3Hostname[TSDB_FQDN_LEN] = "<hostname>";
int32_t tsS3BlockSize = 4096; // number of tsdb pages int32_t tsS3BlockSize = -1; // number of tsdb pages (4096)
int32_t tsS3BlockCacheSize = 16; // number of blocks int32_t tsS3BlockCacheSize = 16; // number of blocks
int32_t tsS3PageCacheSize = 4096; // number of pages
int32_t tsS3UploadDelaySec = 60 * 60;
#ifndef _STORAGE #ifndef _STORAGE
int32_t taosSetTfsCfg(SConfig *pCfg) { int32_t taosSetTfsCfg(SConfig *pCfg) {
@ -460,7 +462,8 @@ static int32_t taosAddClientCfg(SConfig *pCfg) {
if (cfgAddBool(pCfg, "queryUseNodeAllocator", tsQueryUseNodeAllocator, CFG_SCOPE_CLIENT) != 0) return -1; if (cfgAddBool(pCfg, "queryUseNodeAllocator", tsQueryUseNodeAllocator, CFG_SCOPE_CLIENT) != 0) return -1;
if (cfgAddBool(pCfg, "keepColumnName", tsKeepColumnName, CFG_SCOPE_CLIENT) != 0) return -1; if (cfgAddBool(pCfg, "keepColumnName", tsKeepColumnName, CFG_SCOPE_CLIENT) != 0) return -1;
if (cfgAddString(pCfg, "smlChildTableName", tsSmlChildTableName, CFG_SCOPE_CLIENT) != 0) return -1; if (cfgAddString(pCfg, "smlChildTableName", tsSmlChildTableName, CFG_SCOPE_CLIENT) != 0) return -1;
if (cfgAddString(pCfg, "smlAutoChildTableNameDelimiter", tsSmlAutoChildTableNameDelimiter, CFG_SCOPE_CLIENT) != 0) return -1; if (cfgAddString(pCfg, "smlAutoChildTableNameDelimiter", tsSmlAutoChildTableNameDelimiter, CFG_SCOPE_CLIENT) != 0)
return -1;
if (cfgAddString(pCfg, "smlTagName", tsSmlTagName, CFG_SCOPE_CLIENT) != 0) return -1; if (cfgAddString(pCfg, "smlTagName", tsSmlTagName, CFG_SCOPE_CLIENT) != 0) return -1;
if (cfgAddString(pCfg, "smlTsDefaultName", tsSmlTsDefaultName, CFG_SCOPE_CLIENT) != 0) return -1; if (cfgAddString(pCfg, "smlTsDefaultName", tsSmlTsDefaultName, CFG_SCOPE_CLIENT) != 0) return -1;
if (cfgAddBool(pCfg, "smlDot2Underline", tsSmlDot2Underline, CFG_SCOPE_CLIENT) != 0) return -1; if (cfgAddBool(pCfg, "smlDot2Underline", tsSmlDot2Underline, CFG_SCOPE_CLIENT) != 0) return -1;
@ -675,7 +678,8 @@ static int32_t taosAddServerCfg(SConfig *pCfg) {
if (cfgAddInt64(pCfg, "checkpointInterval", tsStreamCheckpointInterval, 60, 1200, CFG_SCOPE_SERVER) != 0) return -1; if (cfgAddInt64(pCfg, "checkpointInterval", tsStreamCheckpointInterval, 60, 1200, CFG_SCOPE_SERVER) != 0) return -1;
if (cfgAddFloat(pCfg, "streamSinkDataRate", tsSinkDataRate, 0.1, 5, CFG_SCOPE_SERVER) != 0) return -1; if (cfgAddFloat(pCfg, "streamSinkDataRate", tsSinkDataRate, 0.1, 5, CFG_SCOPE_SERVER) != 0) return -1;
if (cfgAddInt32(pCfg, "cacheLazyLoadThreshold", tsCacheLazyLoadThreshold, 0, 100000, CFG_SCOPE_SERVER) != 0) return -1; if (cfgAddInt32(pCfg, "cacheLazyLoadThreshold", tsCacheLazyLoadThreshold, 0, 100000, CFG_SCOPE_SERVER) != 0)
return -1;
if (cfgAddString(pCfg, "lossyColumns", tsLossyColumns, CFG_SCOPE_SERVER) != 0) return -1; if (cfgAddString(pCfg, "lossyColumns", tsLossyColumns, CFG_SCOPE_SERVER) != 0) return -1;
if (cfgAddFloat(pCfg, "fPrecision", tsFPrecision, 0.0f, 100000.0f, CFG_SCOPE_SERVER) != 0) return -1; if (cfgAddFloat(pCfg, "fPrecision", tsFPrecision, 0.0f, 100000.0f, CFG_SCOPE_SERVER) != 0) return -1;
@ -693,8 +697,11 @@ static int32_t taosAddServerCfg(SConfig *pCfg) {
if (cfgAddString(pCfg, "s3Accesskey", tsS3AccessKey, CFG_SCOPE_SERVER) != 0) return -1; if (cfgAddString(pCfg, "s3Accesskey", tsS3AccessKey, CFG_SCOPE_SERVER) != 0) return -1;
if (cfgAddString(pCfg, "s3Endpoint", tsS3Endpoint, CFG_SCOPE_SERVER) != 0) return -1; if (cfgAddString(pCfg, "s3Endpoint", tsS3Endpoint, CFG_SCOPE_SERVER) != 0) return -1;
if (cfgAddString(pCfg, "s3BucketName", tsS3BucketName, CFG_SCOPE_SERVER) != 0) return -1; if (cfgAddString(pCfg, "s3BucketName", tsS3BucketName, CFG_SCOPE_SERVER) != 0) return -1;
if (cfgAddInt32(pCfg, "s3BlockSize", tsS3BlockSize, 2048, 1024 * 1024, CFG_SCOPE_SERVER) != 0) return -1; if (cfgAddInt32(pCfg, "s3BlockSize", tsS3BlockSize, -100, 1024 * 1024, CFG_SCOPE_SERVER) != 0) return -1;
if (cfgAddInt32(pCfg, "s3BlockCacheSize", tsS3BlockCacheSize, 4, 1024 * 1024, CFG_SCOPE_SERVER) != 0) return -1; if (cfgAddInt32(pCfg, "s3BlockCacheSize", tsS3BlockCacheSize, 4, 1024 * 1024, CFG_SCOPE_SERVER) != 0) return -1;
if (cfgAddInt32(pCfg, "s3PageCacheSize", tsS3PageCacheSize, 4, 1024 * 1024 * 1024, CFG_SCOPE_SERVER) != 0) return -1;
if (cfgAddInt32(pCfg, "s3UploadDelaySec", tsS3UploadDelaySec, 60 * 10, 60 * 60 * 24 * 30, CFG_SCOPE_SERVER) != 0)
return -1;
// min free disk space used to check if the disk is full [50MB, 1GB] // min free disk space used to check if the disk is full [50MB, 1GB]
if (cfgAddInt64(pCfg, "minDiskFreeSize", tsMinDiskFreeSize, TFS_MIN_DISK_FREE_SIZE, 1024 * 1024 * 1024, if (cfgAddInt64(pCfg, "minDiskFreeSize", tsMinDiskFreeSize, TFS_MIN_DISK_FREE_SIZE, 1024 * 1024 * 1024,
@ -953,7 +960,8 @@ static int32_t taosSetClientCfg(SConfig *pCfg) {
return -1; return -1;
} }
tstrncpy(tsSmlAutoChildTableNameDelimiter, cfgGetItem(pCfg, "smlAutoChildTableNameDelimiter")->str, TSDB_TABLE_NAME_LEN); tstrncpy(tsSmlAutoChildTableNameDelimiter, cfgGetItem(pCfg, "smlAutoChildTableNameDelimiter")->str,
TSDB_TABLE_NAME_LEN);
tstrncpy(tsSmlChildTableName, cfgGetItem(pCfg, "smlChildTableName")->str, TSDB_TABLE_NAME_LEN); tstrncpy(tsSmlChildTableName, cfgGetItem(pCfg, "smlChildTableName")->str, TSDB_TABLE_NAME_LEN);
tstrncpy(tsSmlTagName, cfgGetItem(pCfg, "smlTagName")->str, TSDB_COL_NAME_LEN); tstrncpy(tsSmlTagName, cfgGetItem(pCfg, "smlTagName")->str, TSDB_COL_NAME_LEN);
tstrncpy(tsSmlTsDefaultName, cfgGetItem(pCfg, "smlTsDefaultName")->str, TSDB_COL_NAME_LEN); tstrncpy(tsSmlTsDefaultName, cfgGetItem(pCfg, "smlTsDefaultName")->str, TSDB_COL_NAME_LEN);
@ -1125,6 +1133,8 @@ static int32_t taosSetServerCfg(SConfig *pCfg) {
tsS3BlockSize = cfgGetItem(pCfg, "s3BlockSize")->i32; tsS3BlockSize = cfgGetItem(pCfg, "s3BlockSize")->i32;
tsS3BlockCacheSize = cfgGetItem(pCfg, "s3BlockCacheSize")->i32; tsS3BlockCacheSize = cfgGetItem(pCfg, "s3BlockCacheSize")->i32;
tsS3PageCacheSize = cfgGetItem(pCfg, "s3PageCacheSize")->i32;
tsS3UploadDelaySec = cfgGetItem(pCfg, "s3UploadDelaySec")->i32;
GRANT_CFG_GET; GRANT_CFG_GET;
return 0; return 0;
@ -1422,7 +1432,8 @@ int32_t taosApplyLocalCfg(SConfig *pCfg, char *name) {
} else if (strcasecmp("smlChildTableName", name) == 0) { } else if (strcasecmp("smlChildTableName", name) == 0) {
tstrncpy(tsSmlChildTableName, cfgGetItem(pCfg, "smlChildTableName")->str, TSDB_TABLE_NAME_LEN); tstrncpy(tsSmlChildTableName, cfgGetItem(pCfg, "smlChildTableName")->str, TSDB_TABLE_NAME_LEN);
} else if (strcasecmp("smlAutoChildTableNameDelimiter", name) == 0) { } else if (strcasecmp("smlAutoChildTableNameDelimiter", name) == 0) {
tstrncpy(tsSmlAutoChildTableNameDelimiter, cfgGetItem(pCfg, "smlAutoChildTableNameDelimiter")->str, TSDB_TABLE_NAME_LEN); tstrncpy(tsSmlAutoChildTableNameDelimiter, cfgGetItem(pCfg, "smlAutoChildTableNameDelimiter")->str,
TSDB_TABLE_NAME_LEN);
} else if (strcasecmp("smlTagName", name) == 0) { } else if (strcasecmp("smlTagName", name) == 0) {
tstrncpy(tsSmlTagName, cfgGetItem(pCfg, "smlTagName")->str, TSDB_COL_NAME_LEN); tstrncpy(tsSmlTagName, cfgGetItem(pCfg, "smlTagName")->str, TSDB_COL_NAME_LEN);
// } else if (strcasecmp("smlDataFormat", name) == 0) { // } else if (strcasecmp("smlDataFormat", name) == 0) {
@ -1717,6 +1728,20 @@ void taosCfgDynamicOptions(const char *option, const char *value) {
return; return;
} }
if (strcasecmp(option, "s3PageCacheSize") == 0) {
int32_t newS3PageCacheSize = atoi(value);
uInfo("s3PageCacheSize set from %d to %d", tsS3PageCacheSize, newS3PageCacheSize);
tsS3PageCacheSize = newS3PageCacheSize;
return;
}
if (strcasecmp(option, "s3UploadDelaySec") == 0) {
int32_t newS3UploadDelaysec = atoi(value);
uInfo("s3UploadDelaySec set from %d to %d", tsS3UploadDelaySec, newS3UploadDelaysec);
tsS3UploadDelaySec = newS3UploadDelaysec;
return;
}
if (strcasecmp(option, "ttlPushInterval") == 0) { if (strcasecmp(option, "ttlPushInterval") == 0) {
int32_t newTtlPushInterval = atoi(value); int32_t newTtlPushInterval = atoi(value);
uInfo("ttlPushInterval set from %d to %d", tsTtlPushIntervalSec, newTtlPushInterval); uInfo("ttlPushInterval set from %d to %d", tsTtlPushIntervalSec, newTtlPushInterval);

View File

@ -1228,6 +1228,70 @@ static int32_t mndProcessConfigDnodeReq(SRpcMsg *pReq) {
strcpy(dcfgReq.config, "monitor"); strcpy(dcfgReq.config, "monitor");
snprintf(dcfgReq.value, TSDB_DNODE_VALUE_LEN, "%d", flag); snprintf(dcfgReq.value, TSDB_DNODE_VALUE_LEN, "%d", flag);
} else if (strncasecmp(cfgReq.config, "s3blocksize", 11) == 0) {
int32_t optLen = strlen("s3blocksize");
int32_t flag = -1;
int32_t code = mndMCfgGetValInt32(&cfgReq, optLen, &flag);
if (code < 0) return code;
if (flag > 1024 * 1024) {
mError("dnode:%d, failed to config s3blocksize since value:%d. Valid range: [4, 1024 * 1024]", cfgReq.dnodeId,
flag);
terrno = TSDB_CODE_INVALID_CFG;
tFreeSMCfgDnodeReq(&cfgReq);
return -1;
}
strcpy(dcfgReq.config, "s3blocksize");
snprintf(dcfgReq.value, TSDB_DNODE_VALUE_LEN, "%d", flag);
} else if (strncasecmp(cfgReq.config, "s3blockcachesize", 16) == 0) {
int32_t optLen = strlen("s3blockcachesize");
int32_t flag = -1;
int32_t code = mndMCfgGetValInt32(&cfgReq, optLen, &flag);
if (code < 0) return code;
if (flag < 4 || flag > 1024 * 1024) {
mError("dnode:%d, failed to config s3BlockCacheSize since value:%d. Valid range: [4, 1024 * 1024]",
cfgReq.dnodeId, flag);
terrno = TSDB_CODE_INVALID_CFG;
tFreeSMCfgDnodeReq(&cfgReq);
return -1;
}
strcpy(dcfgReq.config, "s3blockcachesize");
snprintf(dcfgReq.value, TSDB_DNODE_VALUE_LEN, "%d", flag);
} else if (strncasecmp(cfgReq.config, "s3pagecachesize", 16) == 0) {
int32_t optLen = strlen("s3pagecachesize");
int32_t flag = -1;
int32_t code = mndMCfgGetValInt32(&cfgReq, optLen, &flag);
if (code < 0) return code;
if (flag < 4 || flag > 1024 * 1024 * 1024) {
mError("dnode:%d, failed to config s3PageCacheSize since value:%d. Valid range: [4, 1024 * 1024]", cfgReq.dnodeId,
flag);
terrno = TSDB_CODE_INVALID_CFG;
tFreeSMCfgDnodeReq(&cfgReq);
return -1;
}
strcpy(dcfgReq.config, "s3pagecachesize");
snprintf(dcfgReq.value, TSDB_DNODE_VALUE_LEN, "%d", flag);
} else if (strncasecmp(cfgReq.config, "s3uploaddelaysec", 16) == 0) {
int32_t optLen = strlen("s3uploaddelaysec");
int32_t flag = -1;
int32_t code = mndMCfgGetValInt32(&cfgReq, optLen, &flag);
if (code < 0) return code;
if (flag < 600 || flag > 60 * 60 * 24 * 30) {
mError("dnode:%d, failed to config s3UploadDelaySec since value:%d. Valid range: [600, 60 * 60 * 24 * 30]",
cfgReq.dnodeId, flag);
terrno = TSDB_CODE_INVALID_CFG;
tFreeSMCfgDnodeReq(&cfgReq);
return -1;
}
strcpy(dcfgReq.config, "s3uploaddelaysec");
snprintf(dcfgReq.value, TSDB_DNODE_VALUE_LEN, "%d", flag);
} else if (strncasecmp(cfgReq.config, "ttlpushinterval", 14) == 0) { } else if (strncasecmp(cfgReq.config, "ttlpushinterval", 14) == 0) {
int32_t optLen = strlen("ttlpushinterval"); int32_t optLen = strlen("ttlpushinterval");
int32_t flag = -1; int32_t flag = -1;

View File

@ -382,6 +382,8 @@ struct STsdb {
TdThreadMutex biMutex; TdThreadMutex biMutex;
SLRUCache *bCache; SLRUCache *bCache;
TdThreadMutex bMutex; TdThreadMutex bMutex;
SLRUCache *pgCache;
TdThreadMutex pgMutex;
struct STFileSystem *pFS; // new struct STFileSystem *pFS; // new
SRocksCache rCache; SRocksCache rCache;
}; };
@ -909,7 +911,9 @@ int32_t tsdbCacheGetBlockIdx(SLRUCache *pCache, SDataFReader *pFileReader, LRUHa
int32_t tsdbBICacheRelease(SLRUCache *pCache, LRUHandle *h); int32_t tsdbBICacheRelease(SLRUCache *pCache, LRUHandle *h);
int32_t tsdbCacheGetBlockS3(SLRUCache *pCache, STsdbFD *pFD, LRUHandle **handle); int32_t tsdbCacheGetBlockS3(SLRUCache *pCache, STsdbFD *pFD, LRUHandle **handle);
int32_t tsdbBCacheRelease(SLRUCache *pCache, LRUHandle *h); int32_t tsdbCacheGetPageS3(SLRUCache *pCache, STsdbFD *pFD, int64_t pgno, LRUHandle **handle);
int32_t tsdbCacheSetPageS3(SLRUCache *pCache, STsdbFD *pFD, int64_t pgno, uint8_t *pPage);
int32_t tsdbCacheRelease(SLRUCache *pCache, LRUHandle *h);
int32_t tsdbCacheDeleteLastrow(SLRUCache *pCache, tb_uid_t uid, TSKEY eKey); int32_t tsdbCacheDeleteLastrow(SLRUCache *pCache, tb_uid_t uid, TSKEY eKey);
int32_t tsdbCacheDeleteLast(SLRUCache *pCache, tb_uid_t uid, TSKEY eKey); int32_t tsdbCacheDeleteLast(SLRUCache *pCache, tb_uid_t uid, TSKEY eKey);

View File

@ -27,6 +27,8 @@ extern "C" {
extern int8_t tsS3Enabled; extern int8_t tsS3Enabled;
extern int32_t tsS3BlockSize; extern int32_t tsS3BlockSize;
extern int32_t tsS3BlockCacheSize; extern int32_t tsS3BlockCacheSize;
extern int32_t tsS3PageCacheSize;
extern int32_t tsS3UploadDelaySec;
int32_t s3Init(); int32_t s3Init();
void s3CleanUp(); void s3CleanUp();

View File

@ -87,6 +87,41 @@ static void tsdbCloseBCache(STsdb *pTsdb) {
} }
} }
static int32_t tsdbOpenPgCache(STsdb *pTsdb) {
int32_t code = 0;
// SLRUCache *pCache = taosLRUCacheInit(10 * 1024 * 1024, 0, .5);
int32_t szPage = pTsdb->pVnode->config.tsdbPageSize;
SLRUCache *pCache = taosLRUCacheInit((int64_t)tsS3PageCacheSize * szPage, 0, .5);
if (pCache == NULL) {
code = TSDB_CODE_OUT_OF_MEMORY;
goto _err;
}
taosLRUCacheSetStrictCapacity(pCache, false);
taosThreadMutexInit(&pTsdb->pgMutex, NULL);
_err:
pTsdb->pgCache = pCache;
return code;
}
static void tsdbClosePgCache(STsdb *pTsdb) {
SLRUCache *pCache = pTsdb->pgCache;
if (pCache) {
int32_t elems = taosLRUCacheGetElems(pCache);
tsdbTrace("vgId:%d, elems: %d", TD_VID(pTsdb->pVnode), elems);
taosLRUCacheEraseUnrefEntries(pCache);
elems = taosLRUCacheGetElems(pCache);
tsdbTrace("vgId:%d, elems: %d", TD_VID(pTsdb->pVnode), elems);
taosLRUCacheCleanup(pCache);
taosThreadMutexDestroy(&pTsdb->bMutex);
}
}
#define ROCKS_KEY_LEN (sizeof(tb_uid_t) + sizeof(int16_t) + sizeof(int8_t)) #define ROCKS_KEY_LEN (sizeof(tb_uid_t) + sizeof(int16_t) + sizeof(int8_t))
typedef struct { typedef struct {
@ -1191,6 +1226,12 @@ int32_t tsdbOpenCache(STsdb *pTsdb) {
goto _err; goto _err;
} }
code = tsdbOpenPgCache(pTsdb);
if (code != TSDB_CODE_SUCCESS) {
code = TSDB_CODE_OUT_OF_MEMORY;
goto _err;
}
code = tsdbOpenRocksCache(pTsdb); code = tsdbOpenRocksCache(pTsdb);
if (code != TSDB_CODE_SUCCESS) { if (code != TSDB_CODE_SUCCESS) {
code = TSDB_CODE_OUT_OF_MEMORY; code = TSDB_CODE_OUT_OF_MEMORY;
@ -1221,6 +1262,7 @@ void tsdbCloseCache(STsdb *pTsdb) {
tsdbCloseBICache(pTsdb); tsdbCloseBICache(pTsdb);
tsdbCloseBCache(pTsdb); tsdbCloseBCache(pTsdb);
tsdbClosePgCache(pTsdb);
tsdbCloseRocksCache(pTsdb); tsdbCloseRocksCache(pTsdb);
} }
@ -3057,7 +3099,6 @@ static int32_t tsdbCacheLoadBlockS3(STsdbFD *pFD, uint8_t **ppBlock) {
} }
*/ */
int64_t block_offset = (pFD->blkno - 1) * tsS3BlockSize * pFD->szPage; int64_t block_offset = (pFD->blkno - 1) * tsS3BlockSize * pFD->szPage;
// int64_t size = 4096;
code = s3GetObjectBlock(pFD->objName, block_offset, tsS3BlockSize * pFD->szPage, ppBlock); code = s3GetObjectBlock(pFD->objName, block_offset, tsS3BlockSize * pFD->szPage, ppBlock);
if (code != TSDB_CODE_SUCCESS) { if (code != TSDB_CODE_SUCCESS) {
// taosMemoryFree(pBlock); // taosMemoryFree(pBlock);
@ -3123,10 +3164,42 @@ int32_t tsdbCacheGetBlockS3(SLRUCache *pCache, STsdbFD *pFD, LRUHandle **handle)
return code; return code;
} }
int32_t tsdbBCacheRelease(SLRUCache *pCache, LRUHandle *h) { int32_t tsdbCacheGetPageS3(SLRUCache *pCache, STsdbFD *pFD, int64_t pgno, LRUHandle **handle) {
int32_t code = 0; int32_t code = 0;
char key[128] = {0};
int keyLen = 0;
taosLRUCacheRelease(pCache, h, false); getBCacheKey(pFD->fid, pFD->cid, pgno, key, &keyLen);
*handle = taosLRUCacheLookup(pCache, key, keyLen);
return code;
}
int32_t tsdbCacheSetPageS3(SLRUCache *pCache, STsdbFD *pFD, int64_t pgno, uint8_t *pPage) {
int32_t code = 0;
char key[128] = {0};
int keyLen = 0;
LRUHandle *handle = NULL;
getBCacheKey(pFD->fid, pFD->cid, pgno, key, &keyLen);
taosThreadMutexLock(&pFD->pTsdb->pgMutex);
handle = taosLRUCacheLookup(pFD->pTsdb->pgCache, key, keyLen);
if (!handle) {
size_t charge = pFD->szPage;
_taos_lru_deleter_t deleter = deleteBCache;
uint8_t *pPg = taosMemoryMalloc(charge);
memcpy(pPg, pPage, charge);
LRUStatus status =
taosLRUCacheInsert(pCache, key, keyLen, pPg, charge, deleter, &handle, TAOS_LRU_PRIORITY_LOW, NULL);
if (status != TAOS_LRU_STATUS_OK) {
// ignore cache updating if not ok
// code = TSDB_CODE_OUT_OF_MEMORY;
}
}
taosThreadMutexUnlock(&pFD->pTsdb->pgMutex);
tsdbCacheRelease(pFD->pTsdb->pgCache, handle);
return code; return code;
} }

View File

@ -46,6 +46,7 @@ typedef struct {
STFileSet *fset; STFileSet *fset;
TABLEID tbid[1]; TABLEID tbid[1];
bool hasTSData; bool hasTSData;
bool skipTsRow;
} ctx[1]; } ctx[1];
// reader // reader
@ -127,18 +128,18 @@ static int32_t tsdbCommitTSData(SCommitter2 *committer) {
continue; continue;
} }
} }
/*
extern int8_t tsS3Enabled; extern int8_t tsS3Enabled;
int32_t nlevel = tfsGetLevel(committer->tsdb->pVnode->pTfs); int32_t nlevel = tfsGetLevel(committer->tsdb->pVnode->pTfs);
bool skipRow = false; committer->ctx->skipTsRow = false;
if (tsS3Enabled && nlevel > 1 && committer->ctx->did.level == nlevel - 1) { if (tsS3Enabled && nlevel > 1 && committer->ctx->did.level == nlevel - 1) {
skipRow = true; committer->ctx->skipTsRow = true;
} }
*/
int64_t ts = TSDBROW_TS(&row->row); int64_t ts = TSDBROW_TS(&row->row);
if (skipRow && ts <= committer->ctx->maxKey) { if (committer->ctx->skipTsRow && ts <= committer->ctx->maxKey) {
ts = committer->ctx->maxKey + 1; ts = committer->ctx->maxKey + 1;
} }
@ -402,6 +403,32 @@ static int32_t tsdbCommitFileSetBegin(SCommitter2 *committer) {
// reset nextKey // reset nextKey
committer->ctx->nextKey = TSKEY_MAX; committer->ctx->nextKey = TSKEY_MAX;
committer->ctx->skipTsRow = false;
extern int8_t tsS3Enabled;
extern int32_t tsS3UploadDelaySec;
long s3Size(const char *object_name);
int32_t nlevel = tfsGetLevel(committer->tsdb->pVnode->pTfs);
committer->ctx->skipTsRow = false;
if (tsS3Enabled && nlevel > 1 && committer->ctx->fset) {
STFileObj *fobj = committer->ctx->fset->farr[TSDB_FTYPE_DATA];
if (fobj && fobj->f->did.level == nlevel - 1) {
// if exists on s3 or local mtime < committer->ctx->now - tsS3UploadDelay
const char *object_name = taosDirEntryBaseName((char *)fobj->fname);
if (taosCheckExistFile(fobj->fname)) {
int32_t mtime = 0;
taosStatFile(fobj->fname, NULL, &mtime, NULL);
if (mtime < committer->ctx->now - tsS3UploadDelaySec) {
committer->ctx->skipTsRow = true;
}
} else if (s3Size(object_name) > 0) {
committer->ctx->skipTsRow = true;
}
}
// new fset can be written with ts data
}
_exit: _exit:
if (code) { if (code) {
TSDB_ERROR_LOG(TD_VID(tsdb->pVnode), lino, code); TSDB_ERROR_LOG(TD_VID(tsdb->pVnode), lino, code);

View File

@ -303,6 +303,7 @@ bool tsdbIsSameTFile(const STFile *f1, const STFile *f2) {
if (f1->did.id != f2->did.id) return false; if (f1->did.id != f2->did.id) return false;
if (f1->fid != f2->fid) return false; if (f1->fid != f2->fid) return false;
if (f1->cid != f2->cid) return false; if (f1->cid != f2->cid) return false;
if (f1->s3flag != f2->s3flag) return false;
return true; return true;
} }

View File

@ -58,6 +58,7 @@ int32_t tsdbTFileObjCmpr(const STFileObj **fobj1, const STFileObj **fobj2);
struct STFile { struct STFile {
tsdb_ftype_t type; tsdb_ftype_t type;
SDiskID did; // disk id SDiskID did; // disk id
int32_t s3flag;
int32_t fid; // file id int32_t fid; // file id
int64_t cid; // commit id int64_t cid; // commit id
int64_t size; int64_t size;

View File

@ -131,6 +131,7 @@ static int32_t tsdbWriteFilePage(STsdbFD *pFD) {
} }
if (pFD->s3File) { if (pFD->s3File) {
tsdbWarn("%s file: %s", __func__, pFD->path);
return code; return code;
} }
if (pFD->pgno > 0) { if (pFD->pgno > 0) {
@ -177,7 +178,7 @@ static int32_t tsdbReadFilePage(STsdbFD *pFD, int64_t pgno) {
pFD->blkno = (pgno + tsS3BlockSize - 1) / tsS3BlockSize; pFD->blkno = (pgno + tsS3BlockSize - 1) / tsS3BlockSize;
code = tsdbCacheGetBlockS3(pFD->pTsdb->bCache, pFD, &handle); code = tsdbCacheGetBlockS3(pFD->pTsdb->bCache, pFD, &handle);
if (code != TSDB_CODE_SUCCESS || handle == NULL) { if (code != TSDB_CODE_SUCCESS || handle == NULL) {
tsdbBCacheRelease(pFD->pTsdb->bCache, handle); tsdbCacheRelease(pFD->pTsdb->bCache, handle);
if (code == TSDB_CODE_SUCCESS && !handle) { if (code == TSDB_CODE_SUCCESS && !handle) {
code = TSDB_CODE_OUT_OF_MEMORY; code = TSDB_CODE_OUT_OF_MEMORY;
} }
@ -189,7 +190,7 @@ static int32_t tsdbReadFilePage(STsdbFD *pFD, int64_t pgno) {
int64_t blk_offset = (pFD->blkno - 1) * tsS3BlockSize * pFD->szPage; int64_t blk_offset = (pFD->blkno - 1) * tsS3BlockSize * pFD->szPage;
memcpy(pFD->pBuf, pBlock + (offset - blk_offset), pFD->szPage); memcpy(pFD->pBuf, pBlock + (offset - blk_offset), pFD->szPage);
tsdbBCacheRelease(pFD->pTsdb->bCache, handle); tsdbCacheRelease(pFD->pTsdb->bCache, handle);
} else { } else {
// seek // seek
int64_t n = taosLSeekFile(pFD->pFD, offset, SEEK_SET); int64_t n = taosLSeekFile(pFD->pFD, offset, SEEK_SET);
@ -253,7 +254,7 @@ _exit:
return code; return code;
} }
int32_t tsdbReadFile(STsdbFD *pFD, int64_t offset, uint8_t *pBuf, int64_t size) { static int32_t tsdbReadFileImp(STsdbFD *pFD, int64_t offset, uint8_t *pBuf, int64_t size) {
int32_t code = 0; int32_t code = 0;
int64_t n = 0; int64_t n = 0;
int64_t fOffset = LOGIC_TO_FILE_OFFSET(offset, pFD->szPage); int64_t fOffset = LOGIC_TO_FILE_OFFSET(offset, pFD->szPage);
@ -282,10 +283,122 @@ _exit:
return code; return code;
} }
static int32_t tsdbReadFileS3(STsdbFD *pFD, int64_t offset, uint8_t *pBuf, int64_t size) {
int32_t code = 0;
int64_t n = 0;
int32_t szPgCont = PAGE_CONTENT_SIZE(pFD->szPage);
int64_t fOffset = LOGIC_TO_FILE_OFFSET(offset, pFD->szPage);
int64_t pgno = OFFSET_PGNO(fOffset, pFD->szPage);
int64_t bOffset = fOffset % pFD->szPage;
ASSERT(bOffset < szPgCont);
// 1, find pgnoStart & pgnoEnd to fetch from s3, if all pgs are local, no need to fetch
// 2, fetch pgnoStart ~ pgnoEnd from s3
// 3, store pgs to pcache & last pg to pFD->pBuf
// 4, deliver pgs to [pBuf, pBuf + size)
while (n < size) {
if (pFD->pgno != pgno) {
LRUHandle *handle = NULL;
code = tsdbCacheGetPageS3(pFD->pTsdb->pgCache, pFD, pgno, &handle);
if (code != TSDB_CODE_SUCCESS) {
if (handle) {
tsdbCacheRelease(pFD->pTsdb->pgCache, handle);
}
goto _exit;
}
if (!handle) {
break;
}
uint8_t *pPage = (uint8_t *)taosLRUCacheValue(pFD->pTsdb->pgCache, handle);
memcpy(pFD->pBuf, pPage, pFD->szPage);
tsdbCacheRelease(pFD->pTsdb->pgCache, handle);
// check
if (pgno > 1 && !taosCheckChecksumWhole(pFD->pBuf, pFD->szPage)) {
code = TSDB_CODE_FILE_CORRUPTED;
goto _exit;
}
pFD->pgno = pgno;
}
int64_t nRead = TMIN(szPgCont - bOffset, size - n);
memcpy(pBuf + n, pFD->pBuf + bOffset, nRead);
n += nRead;
pgno++;
bOffset = 0;
}
if (n < size) {
// 2, retrieve pgs from s3
uint8_t *pBlock = NULL;
int64_t retrieve_offset = PAGE_OFFSET(pgno, pFD->szPage);
int64_t pgnoEnd = pgno - 1 + (size - n + szPgCont - 1) / szPgCont;
int64_t retrieve_size = (pgnoEnd - pgno + 1) * pFD->szPage;
code = s3GetObjectBlock(pFD->objName, retrieve_offset, retrieve_size, &pBlock);
if (code != TSDB_CODE_SUCCESS) {
goto _exit;
}
// 3, Store Pages in Cache
int nPage = pgnoEnd - pgno + 1;
for (int i = 0; i < nPage; ++i) {
tsdbCacheSetPageS3(pFD->pTsdb->pgCache, pFD, pgno, pBlock + i * pFD->szPage);
memcpy(pFD->pBuf, pBlock + i * pFD->szPage, pFD->szPage);
// check
if (pgno > 1 && !taosCheckChecksumWhole(pFD->pBuf, pFD->szPage)) {
code = TSDB_CODE_FILE_CORRUPTED;
goto _exit;
}
pFD->pgno = pgno;
int64_t nRead = TMIN(szPgCont - bOffset, size - n);
memcpy(pBuf + n, pFD->pBuf + bOffset, nRead);
n += nRead;
pgno++;
bOffset = 0;
}
taosMemoryFree(pBlock);
}
_exit:
return code;
}
int32_t tsdbReadFile(STsdbFD *pFD, int64_t offset, uint8_t *pBuf, int64_t size) {
int32_t code = 0;
if (!pFD->pFD) {
code = tsdbOpenFileImpl(pFD);
if (code) {
goto _exit;
}
}
if (pFD->s3File && tsS3BlockSize < 0) {
return tsdbReadFileS3(pFD, offset, pBuf, size);
} else {
return tsdbReadFileImp(pFD, offset, pBuf, size);
}
_exit:
return code;
}
int32_t tsdbFsyncFile(STsdbFD *pFD) { int32_t tsdbFsyncFile(STsdbFD *pFD) {
int32_t code = 0; int32_t code = 0;
if (pFD->s3File) { if (pFD->s3File) {
tsdbWarn("%s file: %s", __func__, pFD->path);
return code; return code;
} }
code = tsdbWriteFilePage(pFD); code = tsdbWriteFilePage(pFD);

View File

@ -206,6 +206,8 @@ static int32_t tsdbMigrateDataFileS3(SRTNer *rtner, const STFileObj *fobj, const
}, },
}; };
op.nf.s3flag = true;
code = TARRAY2_APPEND(rtner->fopArr, op); code = TARRAY2_APPEND(rtner->fopArr, op);
TSDB_CHECK_CODE(code, lino, _exit); TSDB_CHECK_CODE(code, lino, _exit);
@ -322,27 +324,40 @@ static int32_t tsdbDoRetentionOnFileSet(SRTNer *rtner, STFileSet *fset) {
for (int32_t ftype = 0; ftype < TSDB_FTYPE_MAX && (fobj = fset->farr[ftype], 1); ++ftype) { for (int32_t ftype = 0; ftype < TSDB_FTYPE_MAX && (fobj = fset->farr[ftype], 1); ++ftype) {
if (fobj == NULL) continue; if (fobj == NULL) continue;
if (fobj->f->did.level == did.level) continue;
int32_t nlevel = tfsGetLevel(rtner->tsdb->pVnode->pTfs); int32_t nlevel = tfsGetLevel(rtner->tsdb->pVnode->pTfs);
if (fobj->f->did.level == did.level) {
if (tsS3Enabled && nlevel > 1 && TSDB_FTYPE_DATA == ftype && did.level == nlevel - 1 &&
taosCheckExistFile(fobj->fname)) {
int32_t mtime = 0;
taosStatFile(fobj->fname, NULL, &mtime, NULL);
if (mtime < rtner->now - tsS3UploadDelaySec) {
code = tsdbMigrateDataFileS3(rtner, fobj, &did);
TSDB_CHECK_CODE(code, lino, _exit);
}
}
continue;
}
/*
if (tsS3Enabled && nlevel > 1 && TSDB_FTYPE_DATA == ftype && did.level == nlevel - 1) { if (tsS3Enabled && nlevel > 1 && TSDB_FTYPE_DATA == ftype && did.level == nlevel - 1) {
code = tsdbMigrateDataFileS3(rtner, fobj, &did); code = tsdbMigrateDataFileS3(rtner, fobj, &did);
TSDB_CHECK_CODE(code, lino, _exit); TSDB_CHECK_CODE(code, lino, _exit);
} else { } else {
if (tsS3Enabled) { if (tsS3Enabled) {
int64_t fsize = 0; int64_t fsize = 0;
if (taosStatFile(fobj->fname, &fsize, NULL, NULL) < 0) { if (taosStatFile(fobj->fname, &fsize, NULL, NULL) < 0) {
code = TAOS_SYSTEM_ERROR(terrno); code = TAOS_SYSTEM_ERROR(terrno);
tsdbError("vgId:%d %s failed since file:%s stat failed, reason:%s", TD_VID(rtner->tsdb->pVnode), __func__, tsdbError("vgId:%d %s failed since file:%s stat failed, reason:%s", TD_VID(rtner->tsdb->pVnode),
fobj->fname, tstrerror(code)); __func__, fobj->fname, tstrerror(code)); TSDB_CHECK_CODE(code, lino, _exit);
TSDB_CHECK_CODE(code, lino, _exit);
} }
s3EvictCache(fobj->fname, fsize * 2); s3EvictCache(fobj->fname, fsize * 2);
} }
*/
code = tsdbDoMigrateFileObj(rtner, fobj, &did); code = tsdbDoMigrateFileObj(rtner, fobj, &did);
TSDB_CHECK_CODE(code, lino, _exit); TSDB_CHECK_CODE(code, lino, _exit);
} //}
} }
// stt // stt

View File

@ -80,8 +80,8 @@ int tsdbScanAndConvertSubmitMsg(STsdb *pTsdb, SSubmitReq2 *pMsg) {
TSKEY minKey = now - tsTickPerMin[pCfg->precision] * pCfg->keep2; TSKEY minKey = now - tsTickPerMin[pCfg->precision] * pCfg->keep2;
TSKEY maxKey = tsMaxKeyByPrecision[pCfg->precision]; TSKEY maxKey = tsMaxKeyByPrecision[pCfg->precision];
int32_t size = taosArrayGetSize(pMsg->aSubmitTbData); int32_t size = taosArrayGetSize(pMsg->aSubmitTbData);
/*
int32_t nlevel = tfsGetLevel(pTsdb->pVnode->pTfs); int32_t nlevel = tfsGetLevel(pTsdb->pVnode->pTfs);
if (nlevel > 1 && tsS3Enabled) { if (nlevel > 1 && tsS3Enabled) {
if (nlevel == 3) { if (nlevel == 3) {
minKey = now - tsTickPerMin[pCfg->precision] * pCfg->keep1; minKey = now - tsTickPerMin[pCfg->precision] * pCfg->keep1;
@ -89,7 +89,7 @@ int tsdbScanAndConvertSubmitMsg(STsdb *pTsdb, SSubmitReq2 *pMsg) {
minKey = now - tsTickPerMin[pCfg->precision] * pCfg->keep0; minKey = now - tsTickPerMin[pCfg->precision] * pCfg->keep0;
} }
} }
*/
for (int32_t i = 0; i < size; ++i) { for (int32_t i = 0; i < size; ++i) {
SSubmitTbData *pData = TARRAY_GET_ELEM(pMsg->aSubmitTbData, i); SSubmitTbData *pData = TARRAY_GET_ELEM(pMsg->aSubmitTbData, i);
if (pData->flags & SUBMIT_REQ_COLUMN_DATA_FORMAT) { if (pData->flags & SUBMIT_REQ_COLUMN_DATA_FORMAT) {

View File

@ -13,14 +13,14 @@
* along with this program. If not, see <http://www.gnu.org/licenses/>. * along with this program. If not, see <http://www.gnu.org/licenses/>.
*/ */
#include "audit.h"
#include "tencode.h" #include "tencode.h"
#include "tmsg.h" #include "tmsg.h"
#include "tstrbuild.h"
#include "vnd.h" #include "vnd.h"
#include "vndCos.h" #include "vndCos.h"
#include "vnode.h" #include "vnode.h"
#include "vnodeInt.h" #include "vnodeInt.h"
#include "audit.h"
#include "tstrbuild.h"
static int32_t vnodeProcessCreateStbReq(SVnode *pVnode, int64_t ver, void *pReq, int32_t len, SRpcMsg *pRsp); static int32_t vnodeProcessCreateStbReq(SVnode *pVnode, int64_t ver, void *pReq, int32_t len, SRpcMsg *pRsp);
static int32_t vnodeProcessAlterStbReq(SVnode *pVnode, int64_t ver, void *pReq, int32_t len, SRpcMsg *pRsp); static int32_t vnodeProcessAlterStbReq(SVnode *pVnode, int64_t ver, void *pReq, int32_t len, SRpcMsg *pRsp);
@ -263,8 +263,9 @@ static int32_t vnodePreProcessSubmitTbData(SVnode *pVnode, SDecoder *pCoder, int
now *= 1000000; now *= 1000000;
} }
int32_t nlevel = tfsGetLevel(pVnode->pTfs);
int32_t keep = pVnode->config.tsdbCfg.keep2; int32_t keep = pVnode->config.tsdbCfg.keep2;
/*
int32_t nlevel = tfsGetLevel(pVnode->pTfs);
if (nlevel > 1 && tsS3Enabled) { if (nlevel > 1 && tsS3Enabled) {
if (nlevel == 3) { if (nlevel == 3) {
keep = pVnode->config.tsdbCfg.keep1; keep = pVnode->config.tsdbCfg.keep1;
@ -272,6 +273,7 @@ static int32_t vnodePreProcessSubmitTbData(SVnode *pVnode, SDecoder *pCoder, int
keep = pVnode->config.tsdbCfg.keep0; keep = pVnode->config.tsdbCfg.keep0;
} }
} }
*/
TSKEY minKey = now - tsTickPerMin[pVnode->config.tsdbCfg.precision] * keep; TSKEY minKey = now - tsTickPerMin[pVnode->config.tsdbCfg.precision] * keep;
TSKEY maxKey = tsMaxKeyByPrecision[pVnode->config.tsdbCfg.precision]; TSKEY maxKey = tsMaxKeyByPrecision[pVnode->config.tsdbCfg.precision];
@ -1518,7 +1520,6 @@ static int32_t vnodeProcessSubmitReq(SVnode *pVnode, int64_t ver, void *pReq, in
int32_t nRow = TARRAY_SIZE(pSubmitTbData->aRowP); int32_t nRow = TARRAY_SIZE(pSubmitTbData->aRowP);
SRow **aRow = (SRow **)TARRAY_DATA(pSubmitTbData->aRowP); SRow **aRow = (SRow **)TARRAY_DATA(pSubmitTbData->aRowP);
for (int32_t iRow = 0; iRow < nRow; ++iRow) { for (int32_t iRow = 0; iRow < nRow; ++iRow) {
if (aRow[iRow]->ts < minKey || aRow[iRow]->ts > maxKey || (iRow > 0 && aRow[iRow]->ts <= aRow[iRow - 1]->ts)) { if (aRow[iRow]->ts < minKey || aRow[iRow]->ts > maxKey || (iRow > 0 && aRow[iRow]->ts <= aRow[iRow - 1]->ts)) {
code = TSDB_CODE_INVALID_MSG; code = TSDB_CODE_INVALID_MSG;
vError("vgId:%d %s failed since %s, version:%" PRId64, TD_VID(pVnode), __func__, tstrerror(code), ver); vError("vgId:%d %s failed since %s, version:%" PRId64, TD_VID(pVnode), __func__, tstrerror(code), ver);