Merge pull request #23341 from taosdata/enh/refactorBackend

Enh/refactor backend
This commit is contained in:
Haojun Liao 2023-11-30 13:48:18 +08:00 committed by GitHub
commit e9ead3bd28
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
33 changed files with 3320 additions and 1655 deletions

View File

@ -151,6 +151,7 @@ IF(${BUILD_S3})
IF(${BUILD_WITH_S3})
add_definitions(-DUSE_S3)
option(BUILD_WITH_COS "If build with cos" OFF)
ELSE ()

View File

@ -43,6 +43,7 @@ int32_t s3GetObjectBlock(const char *object_name, int64_t offset, int64_t size,
int32_t s3GetObjectsByPrefix(const char *prefix, const char *path);
void s3EvictCache(const char *path, long object_size);
long s3Size(const char *object_name);
int32_t s3GetObjectToFile(const char *object_name, char *fileName);
#ifdef __cplusplus
}

View File

@ -75,12 +75,15 @@ extern int32_t tsElectInterval;
extern int32_t tsHeartbeatInterval;
extern int32_t tsHeartbeatTimeout;
// vnode
extern int64_t tsVndCommitMaxIntervalMs;
// snode
extern int32_t tsRsyncPort;
extern char tsCheckpointBackupDir[];
extern char tsCheckpointBackupDir[];
// vnode checkpoint
extern char tsSnodeAddress[]; //127.0.0.1:873
extern char tsSnodeAddress[]; // 127.0.0.1:873
// mnode
extern int64_t tsMndSdbWriteDelta;
@ -104,8 +107,8 @@ extern int32_t tsMonitorMaxLogs;
extern bool tsMonitorComp;
// audit
extern bool tsEnableAudit;
extern bool tsEnableAuditCreateTable;
extern bool tsEnableAudit;
extern bool tsEnableAuditCreateTable;
// telem
extern bool tsEnableTelem;
@ -113,9 +116,9 @@ extern int32_t tsTelemInterval;
extern char tsTelemServer[];
extern uint16_t tsTelemPort;
extern bool tsEnableCrashReport;
extern char *tsTelemUri;
extern char *tsClientCrashReportUri;
extern char *tsSvrCrashReportUri;
extern char * tsTelemUri;
extern char * tsClientCrashReportUri;
extern char * tsSvrCrashReportUri;
// query buffer management
extern int32_t tsQueryBufferSize; // maximum allowed usage buffer size in MB for each data node during query processing

View File

@ -186,6 +186,7 @@ enum { // WARN: new msg should be appended to segment tail
TD_DEF_MSG_TYPE(TDMT_MND_RESUME_STREAM, "resume-stream", NULL, NULL)
TD_DEF_MSG_TYPE(TDMT_MND_STREAM_CHECKPOINT_TIMER, "stream-checkpoint-tmr", NULL, NULL)
TD_DEF_MSG_TYPE(TDMT_MND_STREAM_BEGIN_CHECKPOINT, "stream-begin-checkpoint", NULL, NULL)
TD_DEF_MSG_TYPE(TDMT_MND_STREAM_CHECKPOINT_CANDIDITATE, "stream-checkpoint-remain", NULL, NULL)
TD_DEF_MSG_TYPE(TDMT_MND_STREAM_NODECHANGE_CHECK, "stream-nodechange-check", NULL, NULL)
TD_DEF_MSG_TYPE(TDMT_MND_TRIM_DB_TIMER, "trim-db-tmr", NULL, NULL)
TD_DEF_MSG_TYPE(TDMT_MND_GRANT_NOTIFY, "grant-notify", NULL, NULL)

View File

@ -35,6 +35,7 @@ int32_t streamStateBegin(SStreamState* pState);
int32_t streamStateCommit(SStreamState* pState);
void streamStateDestroy(SStreamState* pState, bool remove);
int32_t streamStateDeleteCheckPoint(SStreamState* pState, TSKEY mark);
int32_t streamStateDelTaskDb(SStreamState* pState);
int32_t streamStateFuncPut(SStreamState* pState, const SWinKey* key, const void* value, int32_t vLen);
int32_t streamStateFuncGet(SStreamState* pState, const SWinKey* key, void** ppVal, int32_t* pVLen);
@ -133,4 +134,4 @@ char* streamStateIntervalDump(SStreamState* pState);
}
#endif
#endif /* ifndef _STREAM_STATE_H_ */
#endif /* ifndef _STREAM_STATE_H_ */

View File

@ -58,7 +58,9 @@ typedef struct SStreamTask SStreamTask;
typedef struct SStreamQueue SStreamQueue;
typedef struct SStreamTaskSM SStreamTaskSM;
#define SSTREAM_TASK_VER 2
#define SSTREAM_TASK_VER 2
#define SSTREAM_TASK_INCOMPATIBLE_VER 1
#define SSTREAM_TASK_NEED_CONVERT_VER 2
enum {
STREAM_STATUS__NORMAL = 0,
@ -110,6 +112,7 @@ typedef enum {
TASK_LEVEL__SOURCE = 1,
TASK_LEVEL__AGG,
TASK_LEVEL__SINK,
TASK_LEVEL_SMA,
} ETASK_LEVEL;
enum {
@ -304,11 +307,16 @@ typedef struct SStreamTaskId {
typedef struct SCheckpointInfo {
int64_t startTs;
int64_t checkpointId;
int64_t checkpointVer; // latest checkpointId version
int64_t processedVer; // already processed ver, that has generated results version.
int64_t checkpointVer; // latest checkpointId version
int64_t processedVer;
int64_t nextProcessVer; // current offset in WAL, not serialize it
int64_t failedId; // record the latest failed checkpoint id
int64_t checkpointingId;
int32_t downstreamAlignNum;
int32_t checkpointNotReadyTasks;
bool dispatchCheckpointTrigger;
int64_t msgVer;
} SCheckpointInfo;
typedef struct SStreamStatus {
@ -447,12 +455,11 @@ struct SStreamTask {
int64_t checkReqId;
SArray* checkReqIds; // shuffle
int32_t refCnt;
int64_t checkpointingId;
int32_t checkpointAlignCnt;
int32_t checkpointNotReadyTasks;
int32_t transferStateAlignCnt;
struct SStreamMeta* pMeta;
SSHashObj* pNameMap;
void* pBackend;
int64_t backendRefId;
char reserve[256];
};
@ -490,20 +497,25 @@ typedef struct SStreamMeta {
int32_t walScanCounter;
void* streamBackend;
int64_t streamBackendRid;
SHashObj* pTaskBackendUnique;
SHashObj* pTaskDbUnique;
TdThreadMutex backendMutex;
SMetaHbInfo* pHbInfo;
STaskUpdateInfo updateInfo;
SHashObj* pUpdateTaskSet;
int32_t numOfStreamTasks; // this value should be increased when a new task is added into the meta
int32_t numOfPausedTasks;
int32_t chkptNotReadyTasks;
int64_t rid;
int64_t chkpId;
int32_t chkpCap;
SArray* chkpSaved;
SArray* chkpInUse;
int32_t chkpCap;
SRWLatch chkpDirLock;
void* qHandle;
int32_t pauseTaskNum;
void* bkdChkptMgt;
} SStreamMeta;
int32_t tEncodeStreamEpInfo(SEncoder* pEncoder, const SStreamChildEpInfo* pInfo);
@ -659,7 +671,7 @@ int32_t tDecodeStreamCheckpointReadyMsg(SDecoder* pDecoder, SStreamCheckpointRea
typedef struct STaskStatusEntry {
STaskId id;
int32_t status;
int32_t statusLastDuration; // to record the last duration of current status
int32_t statusLastDuration; // to record the last duration of current status
int64_t stage;
int32_t nodeId;
int64_t verStart; // start version in WAL, only valid for source task
@ -672,8 +684,8 @@ typedef struct STaskStatusEntry {
int64_t inputQUnchangeCounter;
double inputQUsed; // in MiB
double inputRate;
double sinkQuota; // existed quota size for sink task
double sinkDataSize; // sink to dst data size
double sinkQuota; // existed quota size for sink task
double sinkDataSize; // sink to dst data size
} STaskStatusEntry;
typedef struct SStreamHbMsg {
@ -834,8 +846,10 @@ int32_t streamMetaReopen(SStreamMeta* pMeta);
void streamMetaInitBackend(SStreamMeta* pMeta);
int32_t streamMetaCommit(SStreamMeta* pMeta);
int32_t streamMetaLoadAllTasks(SStreamMeta* pMeta);
int32_t streamMetaReloadAllTasks(SStreamMeta* pMeta);
int64_t streamMetaGetLatestCheckpointId(SStreamMeta* pMeta);
void streamMetaNotifyClose(SStreamMeta* pMeta);
int32_t streamTaskSetDb(SStreamMeta* pMeta, void* pTask, char* key);
void streamMetaStartHb(SStreamMeta* pMeta);
bool streamMetaTaskInTimer(SStreamMeta* pMeta);
int32_t streamMetaUpdateTaskDownstreamStatus(SStreamTask* pTask, int64_t startTs, int64_t endTs, bool succ);
@ -857,8 +871,10 @@ int32_t streamAddCheckpointSourceRspMsg(SStreamCheckpointSourceReq* pReq, SRpcHa
int32_t buildCheckpointSourceRsp(SStreamCheckpointSourceReq* pReq, SRpcHandleInfo* pRpcInfo, SRpcMsg* pMsg,
int8_t isSucceed);
SStreamTaskSM* streamCreateStateMachine(SStreamTask* pTask);
void* streamDestroyStateMachine(SStreamTaskSM* pSM);
#ifdef __cplusplus
}
#endif
#endif /* ifndef _STREAM_H_ */
#endif /* ifndef _STREAM_H_ */

View File

@ -86,7 +86,7 @@ typedef struct {
char err_msg[128];
S3Status status;
uint64_t content_length;
char *buf;
char * buf;
int64_t buf_pos;
} TS3SizeCBD;
@ -270,7 +270,7 @@ typedef struct list_parts_callback_data {
typedef struct MultipartPartData {
put_object_callback_data put_object_data;
int seq;
UploadManager *manager;
UploadManager * manager;
} MultipartPartData;
static int putObjectDataCallback(int bufferSize, char *buffer, void *callbackData) {
@ -317,7 +317,7 @@ S3Status MultipartResponseProperiesCallback(const S3ResponseProperties *properti
MultipartPartData *data = (MultipartPartData *)callbackData;
int seq = data->seq;
const char *etag = properties->eTag;
const char * etag = properties->eTag;
data->manager->etags[seq - 1] = strdup(etag);
data->manager->next_etags_pos = seq;
return S3StatusOK;
@ -450,10 +450,10 @@ int32_t s3PutObjectFromFile2(const char *file, const char *object) {
int32_t code = 0;
const char *key = object;
// const char *uploadId = 0;
const char *filename = 0;
const char * filename = 0;
uint64_t contentLength = 0;
const char *cacheControl = 0, *contentType = 0, *md5 = 0;
const char *contentDispositionFilename = 0, *contentEncoding = 0;
const char * cacheControl = 0, *contentType = 0, *md5 = 0;
const char * contentDispositionFilename = 0, *contentEncoding = 0;
int64_t expires = -1;
S3CannedAcl cannedAcl = S3CannedAclPrivate;
int metaPropertiesCount = 0;
@ -467,6 +467,7 @@ int32_t s3PutObjectFromFile2(const char *file, const char *object) {
// data.infileFD = NULL;
// data.noStatus = noStatus;
// uError("ERROR: %s stat file %s: ", __func__, file);
if (taosStatFile(file, &contentLength, NULL, NULL) < 0) {
uError("ERROR: %s Failed to stat file %s: ", __func__, file);
code = TAOS_SYSTEM_ERROR(errno);
@ -647,7 +648,7 @@ typedef struct list_bucket_callback_data {
char nextMarker[1024];
int keyCount;
int allDetails;
SArray *objectArray;
SArray * objectArray;
} list_bucket_callback_data;
static S3Status listBucketCallback(int isTruncated, const char *nextMarker, int contentsCount,
@ -692,11 +693,11 @@ static void s3FreeObjectKey(void *pItem) {
static SArray *getListByPrefix(const char *prefix) {
S3BucketContext bucketContext = {0, tsS3BucketName, protocolG, uriStyleG, tsS3AccessKeyId, tsS3AccessKeySecret,
0, awsRegionG};
0, awsRegionG};
S3ListBucketHandler listBucketHandler = {{&responsePropertiesCallbackNull, &responseCompleteCallback},
&listBucketCallback};
const char *marker = 0, *delimiter = 0;
const char * marker = 0, *delimiter = 0;
int maxkeys = 0, allDetails = 0;
list_bucket_callback_data data;
data.objectArray = taosArrayInit(32, sizeof(void *));
@ -737,7 +738,7 @@ static SArray *getListByPrefix(const char *prefix) {
void s3DeleteObjects(const char *object_name[], int nobject) {
S3BucketContext bucketContext = {0, tsS3BucketName, protocolG, uriStyleG, tsS3AccessKeyId, tsS3AccessKeySecret,
0, awsRegionG};
0, awsRegionG};
S3ResponseHandler responseHandler = {0, &responseCompleteCallback};
for (int i = 0; i < nobject; ++i) {
@ -788,7 +789,7 @@ int32_t s3GetObjectBlock(const char *object_name, int64_t offset, int64_t size,
const char *ifMatch = 0, *ifNotMatch = 0;
S3BucketContext bucketContext = {0, tsS3BucketName, protocolG, uriStyleG, tsS3AccessKeyId, tsS3AccessKeySecret,
0, awsRegionG};
0, awsRegionG};
S3GetConditions getConditions = {ifModifiedSince, ifNotModifiedSince, ifMatch, ifNotMatch};
S3GetObjectHandler getObjectHandler = {{&responsePropertiesCallback, &responseCompleteCallback},
&getObjectDataCallback};
@ -826,7 +827,7 @@ int32_t s3GetObjectToFile(const char *object_name, char *fileName) {
const char *ifMatch = 0, *ifNotMatch = 0;
S3BucketContext bucketContext = {0, tsS3BucketName, protocolG, uriStyleG, tsS3AccessKeyId, tsS3AccessKeySecret,
0, awsRegionG};
0, awsRegionG};
S3GetConditions getConditions = {ifModifiedSince, ifNotModifiedSince, ifMatch, ifNotMatch};
S3GetObjectHandler getObjectHandler = {{&responsePropertiesCallbackNull, &responseCompleteCallback},
&getObjectCallback};
@ -857,7 +858,7 @@ int32_t s3GetObjectsByPrefix(const char *prefix, const char *path) {
if (objectArray == NULL) return -1;
for (size_t i = 0; i < taosArrayGetSize(objectArray); i++) {
char *object = taosArrayGetP(objectArray, i);
char * object = taosArrayGetP(objectArray, i);
const char *tmp = strchr(object, '/');
tmp = (tmp == NULL) ? object : tmp + 1;
char fileName[PATH_MAX] = {0};
@ -948,12 +949,12 @@ static void s3InitRequestOptions(cos_request_options_t *options, int is_cname) {
int32_t s3PutObjectFromFile(const char *file_str, const char *object_str) {
int32_t code = 0;
cos_pool_t *p = NULL;
cos_pool_t * p = NULL;
int is_cname = 0;
cos_status_t *s = NULL;
cos_status_t * s = NULL;
cos_request_options_t *options = NULL;
cos_string_t bucket, object, file;
cos_table_t *resp_headers;
cos_table_t * resp_headers;
// int traffic_limit = 0;
cos_pool_create(&p, NULL);
@ -984,14 +985,14 @@ int32_t s3PutObjectFromFile(const char *file_str, const char *object_str) {
int32_t s3PutObjectFromFile2(const char *file_str, const char *object_str) {
int32_t code = 0;
cos_pool_t *p = NULL;
cos_pool_t * p = NULL;
int is_cname = 0;
cos_status_t *s = NULL;
cos_request_options_t *options = NULL;
cos_status_t * s = NULL;
cos_request_options_t * options = NULL;
cos_string_t bucket, object, file;
cos_table_t *resp_headers;
cos_table_t * resp_headers;
int traffic_limit = 0;
cos_table_t *headers = NULL;
cos_table_t * headers = NULL;
cos_resumable_clt_params_t *clt_params = NULL;
cos_pool_create(&p, NULL);
@ -1024,11 +1025,11 @@ int32_t s3PutObjectFromFile2(const char *file_str, const char *object_str) {
}
void s3DeleteObjectsByPrefix(const char *prefix_str) {
cos_pool_t *p = NULL;
cos_pool_t * p = NULL;
cos_request_options_t *options = NULL;
int is_cname = 0;
cos_string_t bucket;
cos_status_t *s = NULL;
cos_status_t * s = NULL;
cos_string_t prefix;
cos_pool_create(&p, NULL);
@ -1043,10 +1044,10 @@ void s3DeleteObjectsByPrefix(const char *prefix_str) {
}
void s3DeleteObjects(const char *object_name[], int nobject) {
cos_pool_t *p = NULL;
cos_pool_t * p = NULL;
int is_cname = 0;
cos_string_t bucket;
cos_table_t *resp_headers = NULL;
cos_table_t * resp_headers = NULL;
cos_request_options_t *options = NULL;
cos_list_t object_list;
cos_list_t deleted_object_list;
@ -1080,14 +1081,14 @@ void s3DeleteObjects(const char *object_name[], int nobject) {
bool s3Exists(const char *object_name) {
bool ret = false;
cos_pool_t *p = NULL;
cos_pool_t * p = NULL;
int is_cname = 0;
cos_status_t *s = NULL;
cos_request_options_t *options = NULL;
cos_status_t * s = NULL;
cos_request_options_t * options = NULL;
cos_string_t bucket;
cos_string_t object;
cos_table_t *resp_headers;
cos_table_t *headers = NULL;
cos_table_t * resp_headers;
cos_table_t * headers = NULL;
cos_object_exist_status_e object_exist;
cos_pool_create(&p, NULL);
@ -1114,15 +1115,15 @@ bool s3Exists(const char *object_name) {
bool s3Get(const char *object_name, const char *path) {
bool ret = false;
cos_pool_t *p = NULL;
cos_pool_t * p = NULL;
int is_cname = 0;
cos_status_t *s = NULL;
cos_status_t * s = NULL;
cos_request_options_t *options = NULL;
cos_string_t bucket;
cos_string_t object;
cos_string_t file;
cos_table_t *resp_headers = NULL;
cos_table_t *headers = NULL;
cos_table_t * resp_headers = NULL;
cos_table_t * headers = NULL;
int traffic_limit = 0;
//创建内存池
@ -1158,15 +1159,15 @@ bool s3Get(const char *object_name, const char *path) {
int32_t s3GetObjectBlock(const char *object_name, int64_t offset, int64_t block_size, bool check, uint8_t **ppBlock) {
(void)check;
int32_t code = 0;
cos_pool_t *p = NULL;
cos_pool_t * p = NULL;
int is_cname = 0;
cos_status_t *s = NULL;
cos_status_t * s = NULL;
cos_request_options_t *options = NULL;
cos_string_t bucket;
cos_string_t object;
cos_table_t *resp_headers;
cos_table_t *headers = NULL;
cos_buf_t *content = NULL;
cos_table_t * resp_headers;
cos_table_t * headers = NULL;
cos_buf_t * content = NULL;
// cos_string_t file;
// int traffic_limit = 0;
char range_buf[64];
@ -1260,7 +1261,7 @@ void s3EvictCache(const char *path, long object_size) {
terrno = TAOS_SYSTEM_ERROR(errno);
vError("failed to open %s since %s", dir_name, terrstr());
}
SArray *evict_files = taosArrayInit(16, sizeof(SEvictFile));
SArray * evict_files = taosArrayInit(16, sizeof(SEvictFile));
tdbDirEntryPtr pDirEntry;
while ((pDirEntry = taosReadDir(pDir)) != NULL) {
char *name = taosGetDirEntryName(pDirEntry);
@ -1302,13 +1303,13 @@ void s3EvictCache(const char *path, long object_size) {
long s3Size(const char *object_name) {
long size = 0;
cos_pool_t *p = NULL;
cos_pool_t * p = NULL;
int is_cname = 0;
cos_status_t *s = NULL;
cos_status_t * s = NULL;
cos_request_options_t *options = NULL;
cos_string_t bucket;
cos_string_t object;
cos_table_t *resp_headers = NULL;
cos_table_t * resp_headers = NULL;
//创建内存池
cos_pool_create(&p, NULL);
@ -1354,5 +1355,6 @@ int32_t s3GetObjectBlock(const char *object_name, int64_t offset, int64_t size,
void s3EvictCache(const char *path, long object_size) {}
long s3Size(const char *object_name) { return 0; }
int32_t s3GetObjectsByPrefix(const char *prefix, const char *path) { return 0; }
int32_t s3GetObjectToFile(const char *object_name, char *fileName) { return 0; }
#endif

View File

@ -107,7 +107,7 @@ bool tsEnableTelem = true;
int32_t tsTelemInterval = 43200;
char tsTelemServer[TSDB_FQDN_LEN] = "telemetry.tdengine.com";
uint16_t tsTelemPort = 80;
char *tsTelemUri = "/report";
char * tsTelemUri = "/report";
#ifdef TD_ENTERPRISE
bool tsEnableCrashReport = false;
@ -354,16 +354,24 @@ static int32_t taosLoadCfg(SConfig *pCfg, const char **envCmd, const char *input
char cfgFile[PATH_MAX + 100] = {0};
taosExpandDir(inputCfgDir, cfgDir, PATH_MAX);
char lastC = cfgDir[strlen(cfgDir) - 1];
char lastC = cfgDir[strlen(cfgDir) - 1];
char *tdDirsep = TD_DIRSEP;
if (lastC == '\\' || lastC == '/') {
tdDirsep = "";
}
if (taosIsDir(cfgDir)) {
#ifdef CUS_PROMPT
snprintf(cfgFile, sizeof(cfgFile), "%s" "%s" "%s.cfg", cfgDir, tdDirsep, CUS_PROMPT);
snprintf(cfgFile, sizeof(cfgFile),
"%s"
"%s"
"%s.cfg",
cfgDir, tdDirsep, CUS_PROMPT);
#else
snprintf(cfgFile, sizeof(cfgFile), "%s" "%s" "taos.cfg", cfgDir, tdDirsep);
snprintf(cfgFile, sizeof(cfgFile),
"%s"
"%s"
"taos.cfg",
cfgDir, tdDirsep);
#endif
} else {
tstrncpy(cfgFile, cfgDir, sizeof(cfgDir));
@ -729,6 +737,7 @@ static int32_t taosAddServerCfg(SConfig *pCfg) {
if (cfgAddBool(pCfg, "disableStream", tsDisableStream, CFG_SCOPE_SERVER, CFG_DYN_ENT_SERVER) != 0) return -1;
if (cfgAddInt64(pCfg, "streamBufferSize", tsStreamBufferSize, 0, INT64_MAX, CFG_SCOPE_SERVER, CFG_DYN_NONE) != 0)
return -1;
if (cfgAddInt32(pCfg, "checkpointInterval", tsStreamCheckpointInterval, 60, 1200, CFG_SCOPE_SERVER,
CFG_DYN_ENT_SERVER) != 0)
return -1;
@ -1347,7 +1356,7 @@ void taosCleanupCfg() {
typedef struct {
const char *optionName;
void *optionVar;
void * optionVar;
} OptionNameAndVar;
static int32_t taosCfgSetOption(OptionNameAndVar *pOptions, int32_t optionSize, SConfigItem *pItem, bool isDebugflag) {
@ -1360,7 +1369,7 @@ static int32_t taosCfgSetOption(OptionNameAndVar *pOptions, int32_t optionSize,
switch (pItem->dtype) {
case CFG_DTYPE_BOOL: {
int32_t flag = pItem->i32;
bool *pVar = pOptions[d].optionVar;
bool * pVar = pOptions[d].optionVar;
uInfo("%s set from %d to %d", optName, *pVar, flag);
*pVar = flag;
terrno = TSDB_CODE_SUCCESS;

View File

@ -700,6 +700,11 @@ typedef struct {
} SStreamObj;
typedef struct SStreamSeq {
char name[24];
uint64_t seq;
SRWLatch lock;
} SStreamSeq;
int32_t tEncodeSStreamObj(SEncoder* pEncoder, const SStreamObj* pObj);
int32_t tDecodeSStreamObj(SDecoder* pDecoder, SStreamObj* pObj, int32_t sver);
void tFreeStreamObj(SStreamObj* pObj);
@ -731,14 +736,13 @@ typedef struct {
int8_t type;
int32_t numOfCols;
SSchema* pSchema;
SRWLatch lock;
SRWLatch lock;
} SViewObj;
int32_t tEncodeSViewObj(SEncoder* pEncoder, const SViewObj* pObj);
int32_t tDecodeSViewObj(SDecoder* pDecoder, SViewObj* pObj, int32_t sver);
void tFreeSViewObj(SViewObj* pObj);
#ifdef __cplusplus
}
#endif

View File

@ -28,18 +28,28 @@ typedef struct SStreamTransInfo {
const char *name;
} SStreamTransInfo;
// time to generated the checkpoint, if now() - checkpointTs >= tsCheckpointInterval, this checkpoint will be discard
// to avoid too many checkpoints for a taskk in the waiting list
typedef struct SCheckpointCandEntry {
char * pName;
int64_t streamId;
int64_t checkpointTs;
int64_t checkpointId;
} SCheckpointCandEntry;
typedef struct SStreamTransMgmt {
SHashObj *pDBTrans;
SHashObj *pWaitingList; // stream id list, of which timed checkpoint failed to be issued due to the trans conflict.
} SStreamTransMgmt;
typedef struct SStreamExecInfo {
SArray *pNodeList;
int64_t ts; // snapshot ts
SStreamTransMgmt transMgmt;
int64_t activeCheckpoint; // active check point id
SHashObj * pTaskMap;
SArray * pTaskList;
TdThreadMutex lock;
SArray * pNodeList;
int64_t ts; // snapshot ts
SStreamTransMgmt transMgmt;
int64_t activeCheckpoint; // active check point id
SHashObj * pTaskMap;
SArray * pTaskList;
TdThreadMutex lock;
} SStreamExecInfo;
extern SStreamExecInfo execInfo;
@ -51,7 +61,8 @@ void mndReleaseStream(SMnode *pMnode, SStreamObj *pStream);
int32_t mndDropStreamByDb(SMnode *pMnode, STrans *pTrans, SDbObj *pDb);
int32_t mndPersistStream(SMnode *pMnode, STrans *pTrans, SStreamObj *pStream);
int32_t mndStreamRegisterTrans(STrans* pTrans, const char* pName, const char* pSrcDb, const char* pDstDb);
int32_t mndStreamRegisterTrans(STrans *pTrans, const char *pName, const char *pSrcDb, const char *pDstDb);
int32_t mndAddtoCheckpointWaitingList(SStreamObj *pStream, int64_t checkpointId);
bool streamTransConflictOtherTrans(SMnode *pMnode, const char *pSrcDb, const char *pDstDb, bool lock);
// for sma

View File

@ -146,6 +146,15 @@ static void mndStreamCheckpointTick(SMnode *pMnode, int64_t sec) {
}
}
static void mndStreamCheckpointRemain(SMnode* pMnode) {
int32_t contLen = 0;
void *pReq = mndBuildCheckpointTickMsg(&contLen, 0);
if (pReq != NULL) {
SRpcMsg rpcMsg = {.msgType = TDMT_MND_STREAM_CHECKPOINT_CANDIDITATE, .pCont = pReq, .contLen = contLen};
tmsgPutToQueue(&pMnode->msgCb, READ_QUEUE, &rpcMsg);
}
}
static void mndStreamCheckNode(SMnode* pMnode) {
int32_t contLen = 0;
void *pReq = mndBuildTimerMsg(&contLen);
@ -286,6 +295,10 @@ static void *mndThreadFp(void *param) {
mndStreamCheckpointTick(pMnode, sec);
}
if (sec % 5 == 0) {
mndStreamCheckpointRemain(pMnode);
}
if (sec % tsStreamNodeCheckInterval == 0) {
mndStreamCheckNode(pMnode);
}

View File

@ -18,23 +18,21 @@
#include "mndDb.h"
#include "mndDnode.h"
#include "mndMnode.h"
#include "mndSnode.h"
#include "mndPrivilege.h"
#include "mndScheduler.h"
#include "mndShow.h"
#include "mndSnode.h"
#include "mndStb.h"
#include "mndTopic.h"
#include "mndTrans.h"
#include "mndUser.h"
#include "mndVgroup.h"
#include "osMemory.h"
#include "parser.h"
#include "tmisce.h"
#include "tname.h"
#define MND_STREAM_VER_NUMBER 4
#define MND_STREAM_RESERVE_SIZE 64
#define MND_STREAM_MAX_NUM 60
#define MND_STREAM_VER_NUMBER 4
#define MND_STREAM_RESERVE_SIZE 64
#define MND_STREAM_MAX_NUM 60
#define MND_STREAM_CHECKPOINT_NAME "stream-checkpoint"
#define MND_STREAM_PAUSE_NAME "stream-pause"
@ -65,6 +63,7 @@ static int32_t mndProcessCreateStreamReq(SRpcMsg *pReq);
static int32_t mndProcessDropStreamReq(SRpcMsg *pReq);
static int32_t mndProcessStreamCheckpointTmr(SRpcMsg *pReq);
static int32_t mndProcessStreamDoCheckpoint(SRpcMsg *pReq);
static int32_t mndProcessStreamCheckpointInCandid(SRpcMsg *pReq);
static int32_t mndProcessStreamHb(SRpcMsg *pReq);
static int32_t mndRetrieveStream(SRpcMsg *pReq, SShowObj *pShow, SSDataBlock *pBlock, int32_t rows);
static void mndCancelGetNextStream(SMnode *pMnode, void *pIter);
@ -83,7 +82,7 @@ static SStreamObj *mndGetStreamObj(SMnode *pMnode, int64_t streamId);
static SVgroupChangeInfo mndFindChangedNodeInfo(SMnode *pMnode, const SArray *pPrevNodeList, const SArray *pNodeList);
static STrans *doCreateTrans(SMnode *pMnode, SStreamObj *pStream, SRpcMsg *pReq, const char *name, const char* pMsg);
static STrans *doCreateTrans(SMnode *pMnode, SStreamObj *pStream, SRpcMsg *pReq, const char *name, const char *pMsg);
static int32_t mndPersistTransLog(SStreamObj *pStream, STrans *pTrans);
static void initTransAction(STransAction *pAction, void *pCont, int32_t contLen, int32_t msgType, const SEpSet *pEpset,
int32_t retryCode);
@ -91,12 +90,19 @@ static int32_t createStreamUpdateTrans(SStreamObj *pStream, SVgroupChangeInfo *p
static void removeStreamTasksInBuf(SStreamObj *pStream, SStreamExecInfo *pExecNode);
static void saveStreamTasksInfo(SStreamObj *pStream, SStreamExecInfo *pExecNode);
static int32_t removeExpirednodeEntryAndTask(SArray *pNodeSnapshot);
static int32_t killActiveCheckpointTrans(SMnode *pMnode, const char* pDbName, size_t len);
static int32_t killActiveCheckpointTrans(SMnode *pMnode, const char *pDbName, size_t len);
static int32_t setNodeEpsetExpiredFlag(const SArray *pNodeList);
static void freeCheckpointCandEntry(void *);
static SSdbRaw *mndStreamActionEncode(SStreamObj *pStream);
static SSdbRow *mndStreamActionDecode(SSdbRaw *pRaw);
SSdbRaw * mndStreamSeqActionEncode(SStreamObj *pStream);
SSdbRow * mndStreamSeqActionDecode(SSdbRaw *pRaw);
static int32_t mndStreamSeqActionInsert(SSdb *pSdb, SStreamSeq *pStream);
static int32_t mndStreamSeqActionDelete(SSdb *pSdb, SStreamSeq *pStream);
static int32_t mndStreamSeqActionUpdate(SSdb *pSdb, SStreamSeq *pOldStream, SStreamSeq *pNewStream);
int32_t mndInitStream(SMnode *pMnode) {
SSdbTable table = {
.sdbType = SDB_STREAM,
@ -107,6 +113,15 @@ int32_t mndInitStream(SMnode *pMnode) {
.updateFp = (SdbUpdateFp)mndStreamActionUpdate,
.deleteFp = (SdbDeleteFp)mndStreamActionDelete,
};
SSdbTable tableSeq = {
.sdbType = SDB_STREAM_SEQ,
.keyType = SDB_KEY_BINARY,
.encodeFp = (SdbEncodeFp)mndStreamSeqActionEncode,
.decodeFp = (SdbDecodeFp)mndStreamSeqActionDecode,
.insertFp = (SdbInsertFp)mndStreamSeqActionInsert,
.updateFp = (SdbUpdateFp)mndStreamSeqActionUpdate,
.deleteFp = (SdbDeleteFp)mndStreamSeqActionDelete,
};
mndSetMsgHandle(pMnode, TDMT_MND_CREATE_STREAM, mndProcessCreateStreamReq);
mndSetMsgHandle(pMnode, TDMT_MND_DROP_STREAM, mndProcessDropStreamReq);
@ -123,6 +138,7 @@ int32_t mndInitStream(SMnode *pMnode) {
mndSetMsgHandle(pMnode, TDMT_VND_STREAM_CHECK_POINT_SOURCE_RSP, mndTransProcessRsp);
mndSetMsgHandle(pMnode, TDMT_MND_STREAM_CHECKPOINT_TIMER, mndProcessStreamCheckpointTmr);
mndSetMsgHandle(pMnode, TDMT_MND_STREAM_BEGIN_CHECKPOINT, mndProcessStreamDoCheckpoint);
mndSetMsgHandle(pMnode, TDMT_MND_STREAM_CHECKPOINT_CANDIDITATE, mndProcessStreamCheckpointInCandid);
mndSetMsgHandle(pMnode, TDMT_MND_STREAM_HEARTBEAT, mndProcessStreamHb);
mndSetMsgHandle(pMnode, TDMT_STREAM_TASK_REPORT_CHECKPOINT, mndTransProcessRsp);
mndSetMsgHandle(pMnode, TDMT_MND_STREAM_NODECHANGE_CHECK, mndProcessNodeCheckReq);
@ -141,14 +157,23 @@ int32_t mndInitStream(SMnode *pMnode) {
execInfo.pTaskList = taosArrayInit(4, sizeof(STaskId));
execInfo.pTaskMap = taosHashInit(64, fn, true, HASH_NO_LOCK);
execInfo.transMgmt.pDBTrans = taosHashInit(32, fn, true, HASH_NO_LOCK);
execInfo.transMgmt.pWaitingList = taosHashInit(32, fn, true, HASH_NO_LOCK);
taosHashSetFreeFp(execInfo.transMgmt.pWaitingList, freeCheckpointCandEntry);
return sdbSetTable(pMnode->pSdb, table);
if (sdbSetTable(pMnode->pSdb, table) != 0) {
return -1;
}
if (sdbSetTable(pMnode->pSdb, tableSeq) != 0) {
return -1;
}
return 0;
}
void mndCleanupStream(SMnode *pMnode) {
taosArrayDestroy(execInfo.pTaskList);
taosHashCleanup(execInfo.pTaskMap);
taosHashCleanup(execInfo.transMgmt.pDBTrans);
taosHashCleanup(execInfo.transMgmt.pWaitingList);
taosThreadMutexDestroy(&execInfo.lock);
mDebug("mnd stream exec info cleanup");
}
@ -195,7 +220,8 @@ STREAM_ENCODE_OVER:
return NULL;
}
mTrace("stream:%s, encode to raw:%p, row:%p", pStream->name, pRaw, pStream);
mTrace("stream:%s, encode to raw:%p, row:%p, checkpoint:%" PRId64 "", pStream->name, pRaw, pStream,
pStream->checkpointId);
return pRaw;
}
@ -248,7 +274,8 @@ STREAM_DECODE_OVER:
return NULL;
}
mTrace("stream:%s, decode from raw:%p, row:%p", pStream->name, pRaw, pStream);
mTrace("stream:%s, decode from raw:%p, row:%p, checkpoint:%" PRId64 "", pStream->name, pRaw, pStream,
pStream->checkpointId);
return pRow;
}
@ -274,6 +301,8 @@ static int32_t mndStreamActionUpdate(SSdb *pSdb, SStreamObj *pOldStream, SStream
pOldStream->status = pNewStream->status;
pOldStream->updateTime = pNewStream->updateTime;
pOldStream->checkpointId = pNewStream->checkpointId;
pOldStream->checkpointFreq = pNewStream->checkpointFreq;
taosWUnLockLatch(&pOldStream->lock);
return 0;
@ -308,6 +337,12 @@ static void mndShowStreamStatus(char *dst, SStreamObj *pStream) {
}
}
SSdbRaw *mndStreamSeqActionEncode(SStreamObj *pStream) { return NULL; }
SSdbRow *mndStreamSeqActionDecode(SSdbRaw *pRaw) { return NULL; }
int32_t mndStreamSeqActionInsert(SSdb *pSdb, SStreamSeq *pStream) { return 0; }
int32_t mndStreamSeqActionDelete(SSdb *pSdb, SStreamSeq *pStream) { return 0; }
int32_t mndStreamSeqActionUpdate(SSdb *pSdb, SStreamSeq *pOldStream, SStreamSeq *pNewStream) { return 0; }
static void mndShowStreamTrigger(char *dst, SStreamObj *pStream) {
int8_t trigger = pStream->conf.trigger;
if (trigger == STREAM_TRIGGER_AT_ONCE) {
@ -686,7 +721,7 @@ _OVER:
return -1;
}
static int32_t mndPersistTaskDropReq(SMnode* pMnode, STrans *pTrans, SStreamTask *pTask) {
static int32_t mndPersistTaskDropReq(SMnode *pMnode, STrans *pTrans, SStreamTask *pTask) {
SVDropStreamTaskReq *pReq = taosMemoryCalloc(1, sizeof(SVDropStreamTaskReq));
if (pReq == NULL) {
terrno = TSDB_CODE_OUT_OF_MEMORY;
@ -698,11 +733,11 @@ static int32_t mndPersistTaskDropReq(SMnode* pMnode, STrans *pTrans, SStreamTask
pReq->streamId = pTask->id.streamId;
STransAction action = {0};
SEpSet epset = {0};
if(pTask->info.nodeId == SNODE_HANDLE){
SSnodeObj* pObj = mndAcquireSnode(pMnode, pTask->info.nodeId);
SEpSet epset = {0};
if (pTask->info.nodeId == SNODE_HANDLE) {
SSnodeObj *pObj = mndAcquireSnode(pMnode, pTask->info.nodeId);
addEpIntoEpSet(&epset, pObj->pDnode->fqdn, pObj->pDnode->port);
}else{
} else {
SVgObj *pVgObj = mndAcquireVgroup(pMnode, pTask->info.nodeId);
epset = mndGetVgroupEpset(pMnode, pVgObj);
mndReleaseVgroup(pMnode, pVgObj);
@ -736,7 +771,7 @@ int32_t mndDropStreamTasks(SMnode *pMnode, STrans *pTrans, SStreamObj *pStream)
static int32_t checkForNumOfStreams(SMnode *pMnode, SStreamObj *pStreamObj) { // check for number of existed tasks
int32_t numOfStream = 0;
SStreamObj *pStream = NULL;
void *pIter = NULL;
void * pIter = NULL;
while ((pIter = sdbFetch(pMnode->pSdb, SDB_STREAM, pIter, (void **)&pStream)) != NULL) {
if (pStream->sourceDbUid == pStreamObj->sourceDbUid) {
@ -762,11 +797,11 @@ static int32_t checkForNumOfStreams(SMnode *pMnode, SStreamObj *pStreamObj) { /
}
static int32_t mndProcessCreateStreamReq(SRpcMsg *pReq) {
SMnode *pMnode = pReq->info.node;
SMnode * pMnode = pReq->info.node;
int32_t code = -1;
SStreamObj *pStream = NULL;
SStreamObj streamObj = {0};
char *sql = NULL;
char * sql = NULL;
int32_t sqlLen = 0;
SCMCreateStreamReq createStreamReq = {0};
@ -918,8 +953,11 @@ int64_t mndStreamGenChkpId(SMnode *pMnode) {
if (pIter == NULL) break;
maxChkpId = TMAX(maxChkpId, pStream->checkpointId);
mDebug("stream %p checkpoint %" PRId64 "", pStream, pStream->checkpointId);
sdbRelease(pSdb, pStream);
}
mDebug("generated checkpoint %" PRId64 "", maxChkpId + 1);
return maxChkpId + 1;
}
@ -939,6 +977,22 @@ static int32_t mndProcessStreamCheckpointTmr(SRpcMsg *pReq) {
return 0;
}
static int32_t mndProcessStreamRemainChkptTmr(SRpcMsg *pReq) {
SMnode *pMnode = pReq->info.node;
SSdb * pSdb = pMnode->pSdb;
if (sdbGetSize(pSdb, SDB_STREAM) <= 0) {
return 0;
}
SMStreamDoCheckpointMsg *pMsg = rpcMallocCont(sizeof(SMStreamDoCheckpointMsg));
pMsg->checkpointId = 0;
int32_t size = sizeof(SMStreamDoCheckpointMsg);
SRpcMsg rpcMsg = {.msgType = TDMT_MND_STREAM_CHECKPOINT_CANDIDITATE, .pCont = pMsg, .contLen = size};
tmsgPutToQueue(&pMnode->msgCb, WRITE_QUEUE, &rpcMsg);
return 0;
}
static int32_t mndBuildStreamCheckpointSourceReq2(void **pBuf, int32_t *pLen, int32_t nodeId, int64_t checkpointId,
int64_t streamId, int32_t taskId) {
SStreamCheckpointSourceReq req = {0};
@ -981,107 +1035,104 @@ static int32_t mndBuildStreamCheckpointSourceReq2(void **pBuf, int32_t *pLen, in
return 0;
}
// static int32_t mndProcessStreamCheckpointTrans(SMnode *pMnode, SStreamObj *pStream, int64_t checkpointId) {
// int64_t timestampMs = taosGetTimestampMs();
// if (timestampMs - pStream->checkpointFreq < tsStreamCheckpointInterval * 1000) {
// return -1;
// }
// STrans *pTrans = mndTransCreate(pMnode, TRN_POLICY_RETRY, TRN_CONFLICT_DB_INSIDE, NULL, "stream-checkpoint");
// if (pTrans == NULL) return -1;
// mndTransSetDbName(pTrans, pStream->sourceDb, pStream->targetDb);
// if (mndTrancCheckConflict(pMnode, pTrans) != 0) {
// mError("failed to checkpoint of stream name%s, checkpointId: %" PRId64 ", reason:%s", pStream->name,
// checkpointId,
// tstrerror(TSDB_CODE_MND_TRANS_CONFLICT));
// mndTransDrop(pTrans);
// return -1;
// }
// mDebug("start to trigger checkpoint for stream:%s, checkpoint: %" PRId64 "", pStream->name, checkpointId);
// atomic_store_64(&pStream->currentTick, 1);
// taosWLockLatch(&pStream->lock);
// // 1. redo action: broadcast checkpoint source msg for all source vg
// int32_t totLevel = taosArrayGetSize(pStream->tasks);
// for (int32_t i = 0; i < totLevel; i++) {
// SArray *pLevel = taosArrayGetP(pStream->tasks, i);
// SStreamTask *pTask = taosArrayGetP(pLevel, 0);
// if (pTask->info.taskLevel == TASK_LEVEL__SOURCE) {
// int32_t sz = taosArrayGetSize(pLevel);
// for (int32_t j = 0; j < sz; j++) {
// SStreamTask *pTask = taosArrayGetP(pLevel, j);
// /*A(pTask->info.nodeId > 0);*/
// SVgObj *pVgObj = mndAcquireVgroup(pMnode, pTask->info.nodeId);
// if (pVgObj == NULL) {
// taosWUnLockLatch(&pStream->lock);
// mndTransDrop(pTrans);
// return -1;
// }
static int32_t mndProcessStreamCheckpointTrans(SMnode *pMnode, SStreamObj *pStream, int64_t checkpointId) {
int32_t code = -1;
int64_t timestampMs = taosGetTimestampMs();
if (timestampMs - pStream->checkpointFreq < tsStreamCheckpointInterval * 1000) {
return -1;
}
// void *buf;
// int32_t tlen;
// if (mndBuildStreamCheckpointSourceReq2(&buf, &tlen, pTask->info.nodeId, checkpointId, pTask->id.streamId,
// pTask->id.taskId) < 0) {
// mndReleaseVgroup(pMnode, pVgObj);
// taosWUnLockLatch(&pStream->lock);
// mndTransDrop(pTrans);
// return -1;
// }
bool conflict = streamTransConflictOtherTrans(pMnode, pStream->sourceDb, pStream->targetDb, true);
if (conflict) {
mndAddtoCheckpointWaitingList(pStream, checkpointId);
mWarn("checkpoint conflict with other trans in %s, ignore the checkpoint for stream:%s %" PRIx64, pStream->sourceDb,
pStream->name, pStream->uid);
return -1;
}
// STransAction action = {0};
// action.epSet = mndGetVgroupEpset(pMnode, pVgObj);
// action.pCont = buf;
// action.contLen = tlen;
// action.msgType = TDMT_VND_STREAM_CHECK_POINT_SOURCE;
STrans *pTrans = mndTransCreate(pMnode, TRN_POLICY_RETRY, TRN_CONFLICT_DB_INSIDE, NULL, MND_STREAM_CHECKPOINT_NAME);
if (pTrans == NULL) {
return -1;
}
// mndReleaseVgroup(pMnode, pVgObj);
mndStreamRegisterTrans(pTrans, MND_STREAM_CHECKPOINT_NAME, pStream->sourceDb, pStream->targetDb);
// if (mndTransAppendRedoAction(pTrans, &action) != 0) {
// taosMemoryFree(buf);
// taosWUnLockLatch(&pStream->lock);
// mndReleaseStream(pMnode, pStream);
// mndTransDrop(pTrans);
// return -1;
// }
// }
// }
// }
// // 2. reset tick
// pStream->checkpointFreq = checkpointId;
// pStream->checkpointId = checkpointId;
// pStream->checkpointFreq = taosGetTimestampMs();
// atomic_store_64(&pStream->currentTick, 0);
// // 3. commit log: stream checkpoint info
// pStream->version = pStream->version + 1;
// taosWUnLockLatch(&pStream->lock);
mndTransSetDbName(pTrans, pStream->sourceDb, pStream->targetDb);
if (mndTrancCheckConflict(pMnode, pTrans) != 0) {
mError("failed to checkpoint of stream name%s, checkpointId: %" PRId64 ", reason:%s", pStream->name, checkpointId,
tstrerror(TSDB_CODE_MND_TRANS_CONFLICT));
goto _ERR;
}
// // // code condtion
mDebug("start to trigger checkpoint for stream:%s, checkpoint: %" PRId64 "", pStream->name, checkpointId);
// SSdbRaw *pCommitRaw = mndStreamActionEncode(pStream);
// if (pCommitRaw == NULL) {
// mError("failed to prepare trans rebalance since %s", terrstr());
// goto _ERR;
// }
// if (mndTransAppendCommitlog(pTrans, pCommitRaw) != 0) {
// sdbFreeRaw(pCommitRaw);
// mError("failed to prepare trans rebalance since %s", terrstr());
// goto _ERR;
// }
// if (sdbSetRawStatus(pCommitRaw, SDB_STATUS_READY) != 0) {
// sdbFreeRaw(pCommitRaw);
// mError("failed to prepare trans rebalance since %s", terrstr());
// goto _ERR;
// }
taosWLockLatch(&pStream->lock);
pStream->currentTick = 1;
// if (mndTransPrepare(pMnode, pTrans) != 0) {
// mError("failed to prepare trans rebalance since %s", terrstr());
// goto _ERR;
// }
// mndTransDrop(pTrans);
// return 0;
// _ERR:
// mndTransDrop(pTrans);
// return -1;
// }
// 1. redo action: broadcast checkpoint source msg for all source vg
int32_t totLevel = taosArrayGetSize(pStream->tasks);
for (int32_t i = 0; i < totLevel; i++) {
SArray * pLevel = taosArrayGetP(pStream->tasks, i);
SStreamTask *p = taosArrayGetP(pLevel, 0);
if (p->info.taskLevel == TASK_LEVEL__SOURCE) {
int32_t sz = taosArrayGetSize(pLevel);
for (int32_t j = 0; j < sz; j++) {
SStreamTask *pTask = taosArrayGetP(pLevel, j);
SVgObj *pVgObj = mndAcquireVgroup(pMnode, pTask->info.nodeId);
if (pVgObj == NULL) {
taosWUnLockLatch(&pStream->lock);
goto _ERR;
}
void * buf;
int32_t tlen;
if (mndBuildStreamCheckpointSourceReq2(&buf, &tlen, pTask->info.nodeId, checkpointId, pTask->id.streamId,
pTask->id.taskId) < 0) {
mndReleaseVgroup(pMnode, pVgObj);
taosWUnLockLatch(&pStream->lock);
goto _ERR;
}
STransAction act = {0};
SEpSet epset = mndGetVgroupEpset(pMnode, pVgObj);
mndReleaseVgroup(pMnode, pVgObj);
initTransAction(&act, buf, tlen, TDMT_VND_STREAM_CHECK_POINT_SOURCE, &epset, TSDB_CODE_SYN_PROPOSE_NOT_READY);
if (mndTransAppendRedoAction(pTrans, &act) != 0) {
taosMemoryFree(buf);
taosWUnLockLatch(&pStream->lock);
goto _ERR;
}
}
}
}
// 2. reset tick
pStream->checkpointId = checkpointId;
pStream->checkpointFreq = taosGetTimestampMs();
pStream->currentTick = 0;
// 3. commit log: stream checkpoint info
pStream->version = pStream->version + 1;
taosWUnLockLatch(&pStream->lock);
if ((code = mndPersistTransLog(pStream, pTrans)) != TSDB_CODE_SUCCESS) {
return code;
}
if ((code = mndTransPrepare(pMnode, pTrans)) != TSDB_CODE_SUCCESS) {
mError("failed to prepare trans rebalance since %s", terrstr());
goto _ERR;
}
code = 0;
_ERR:
mndTransDrop(pTrans);
return code;
}
static int32_t mndAddStreamCheckpointToTrans(STrans *pTrans, SStreamObj *pStream, SMnode *pMnode, int64_t chkptId) {
taosWLockLatch(&pStream->lock);
@ -1157,23 +1208,7 @@ static int32_t mndAddStreamCheckpointToTrans(STrans *pTrans, SStreamObj *pStream
return 0;
}
static const char *mndGetStreamDB(SMnode *pMnode) {
SSdb * pSdb = pMnode->pSdb;
SStreamObj *pStream = NULL;
void * pIter = NULL;
pIter = sdbFetch(pSdb, SDB_STREAM, pIter, (void **)&pStream);
if (pIter == NULL) {
return NULL;
}
const char *p = taosStrdup(pStream->sourceDb);
mndReleaseStream(pMnode, pStream);
sdbCancelFetch(pSdb, pIter);
return p;
}
static int32_t initStreamNodeList(SMnode* pMnode) {
static int32_t initStreamNodeList(SMnode *pMnode) {
if (execInfo.pNodeList == NULL || (taosArrayGetSize(execInfo.pNodeList) == 0)) {
execInfo.pNodeList = taosArrayDestroy(execInfo.pNodeList);
execInfo.pNodeList = extractNodeListFromStream(pMnode);
@ -1182,11 +1217,11 @@ static int32_t initStreamNodeList(SMnode* pMnode) {
return taosArrayGetSize(execInfo.pNodeList);
}
static bool taskNodeIsUpdated(SMnode* pMnode) {
static bool taskNodeIsUpdated(SMnode *pMnode) {
// check if the node update happens or not
taosThreadMutexLock(&execInfo.lock);
int32_t numOfNodes = initStreamNodeList(pMnode);
int32_t numOfNodes = initStreamNodeList(pMnode);
if (numOfNodes == 0) {
mDebug("stream task node change checking done, no vgroups exist, do nothing");
execInfo.ts = taosGetTimestampSec();
@ -1226,6 +1261,38 @@ static bool taskNodeIsUpdated(SMnode* pMnode) {
return nodeUpdated;
}
static int32_t mndCheckNodeStatus(SMnode *pMnode) {
bool ready = true;
int64_t ts = taosGetTimestampSec();
if (taskNodeIsUpdated(pMnode)) {
return -1;
}
taosThreadMutexLock(&execInfo.lock);
if (taosArrayGetSize(execInfo.pNodeList) == 0) {
mDebug("stream task node change checking done, no vgroups exist, do nothing");
execInfo.ts = ts;
}
for (int32_t i = 0; i < taosArrayGetSize(execInfo.pTaskList); ++i) {
STaskId * p = taosArrayGet(execInfo.pTaskList, i);
STaskStatusEntry *pEntry = taosHashGet(execInfo.pTaskMap, p, sizeof(*p));
if (pEntry == NULL) {
continue;
}
if (pEntry->status != TASK_STATUS__READY) {
mDebug("s-task:0x%" PRIx64 "-0x%x (nodeId:%d) status:%s not ready, checkpoint msg not issued",
pEntry->id.streamId, (int32_t)pEntry->id.taskId, 0, streamTaskGetStatusStr(pEntry->status));
ready = false;
break;
}
}
taosThreadMutexUnlock(&execInfo.lock);
return ready ? 0 : -1;
}
static int32_t mndProcessStreamDoCheckpoint(SRpcMsg *pReq) {
SMnode * pMnode = pReq->info.node;
SSdb * pSdb = pMnode->pSdb;
@ -1233,90 +1300,65 @@ static int32_t mndProcessStreamDoCheckpoint(SRpcMsg *pReq) {
SStreamObj *pStream = NULL;
int32_t code = 0;
// check if the node update happens or not
bool updated = taskNodeIsUpdated(pMnode);
if (updated) {
mWarn("checkpoint ignore, stream task nodes update detected");
return -1;
}
{ // check if all tasks are in TASK_STATUS__READY status
bool ready = true;
taosThreadMutexLock(&execInfo.lock);
// no streams exists, abort
int32_t numOfTasks = taosArrayGetSize(execInfo.pTaskList);
if (numOfTasks <= 0) {
taosThreadMutexUnlock(&execInfo.lock);
return 0;
}
for (int32_t i = 0; i < taosArrayGetSize(execInfo.pTaskList); ++i) {
STaskId * p = taosArrayGet(execInfo.pTaskList, i);
STaskStatusEntry *pEntry = taosHashGet(execInfo.pTaskMap, p, sizeof(*p));
if (pEntry == NULL) {
continue;
}
if (pEntry->status != TASK_STATUS__READY) {
mDebug("s-task:0x%" PRIx64 "-0x%x (nodeId:%d) status:%s not ready, checkpoint msg not issued",
pEntry->id.streamId, (int32_t)pEntry->id.taskId, 0, streamTaskGetStatusStr(pEntry->status));
ready = false;
break;
}
}
taosThreadMutexUnlock(&execInfo.lock);
if (!ready) {
return 0;
}
if ((code = mndCheckNodeStatus(pMnode)) != 0) {
return code;
}
SMStreamDoCheckpointMsg *pMsg = (SMStreamDoCheckpointMsg *)pReq->pCont;
int64_t checkpointId = pMsg->checkpointId;
STrans *pTrans = mndTransCreate(pMnode, TRN_POLICY_RETRY, TRN_CONFLICT_DB_INSIDE, NULL, MND_STREAM_CHECKPOINT_NAME);
if (pTrans == NULL) {
mError("failed to trigger checkpoint, reason: %s", tstrerror(TSDB_CODE_OUT_OF_MEMORY));
return -1;
}
mDebug("start to trigger checkpoint, checkpointId: %" PRId64, checkpointId);
const char *pDb = mndGetStreamDB(pMnode);
mndTransSetDbName(pTrans, pDb, pDb);
mndStreamRegisterTrans(pTrans, MND_STREAM_CHECKPOINT_NAME, pDb, pDb);
taosMemoryFree((void *)pDb);
if (mndTransCheckConflict(pMnode, pTrans) != 0) {
mError("failed to trigger checkpoint, checkpointId: %" PRId64 ", reason:%s", checkpointId,
tstrerror(TSDB_CODE_MND_TRANS_CONFLICT));
mndTransDrop(pTrans);
return -1;
}
while (1) {
pIter = sdbFetch(pSdb, SDB_STREAM, pIter, (void **)&pStream);
if (pIter == NULL) break;
code = mndAddStreamCheckpointToTrans(pTrans, pStream, pMnode, checkpointId);
while ((pIter = sdbFetch(pSdb, SDB_STREAM, pIter, (void **)&pStream)) != NULL) {
code = mndProcessStreamCheckpointTrans(pMnode, pStream, pMsg->checkpointId);
sdbRelease(pSdb, pStream);
if (code == -1) {
break;
}
}
if (code == 0) {
if (mndTransPrepare(pMnode, pTrans) != 0) {
mError("failed to prepare trans rebalance since %s", terrstr());
return code;
}
static int32_t mndProcessStreamCheckpointInCandid(SRpcMsg *pReq) {
SMnode *pMnode = pReq->info.node;
void * pIter = NULL;
int32_t code = 0;
taosThreadMutexLock(&execInfo.lock);
int32_t num = taosHashGetSize(execInfo.transMgmt.pWaitingList);
taosThreadMutexUnlock(&execInfo.lock);
if (num == 0) {
return code;
}
if ((code = mndCheckNodeStatus(pMnode)) != 0) {
return code;
}
SArray *pList = taosArrayInit(4, sizeof(int64_t));
while ((pIter = taosHashIterate(execInfo.transMgmt.pWaitingList, pIter)) != NULL) {
SCheckpointCandEntry *pEntry = pIter;
SStreamObj *ps = mndAcquireStream(pMnode, pEntry->pName);
if (ps == NULL) {
continue;
}
mDebug("start to launch checkpoint for stream:%s %" PRIx64 " in candidate list", pEntry->pName, pEntry->streamId);
code = mndProcessStreamCheckpointTrans(pMnode, ps, pEntry->checkpointId);
mndReleaseStream(pMnode, ps);
if (code == TSDB_CODE_SUCCESS) {
taosArrayPush(pList, &pEntry->streamId);
}
}
mndTransDrop(pTrans);
for (int32_t i = 0; i < taosArrayGetSize(pList); ++i) {
int64_t *pId = taosArrayGet(pList, i);
// only one trans here
taosThreadMutexLock(&execInfo.lock);
execInfo.activeCheckpoint = checkpointId;
taosThreadMutexUnlock(&execInfo.lock);
taosHashRemove(execInfo.transMgmt.pWaitingList, pId, sizeof(*pId));
}
int32_t remain = taosHashGetSize(execInfo.transMgmt.pWaitingList);
mDebug("%d in candidate list generated checkpoint, remaining:%d", (int32_t)taosArrayGetSize(pList), remain);
taosArrayDestroy(pList);
return code;
}
@ -1554,7 +1596,7 @@ static int32_t mndRetrieveStream(SRpcMsg *pReq, SShowObj *pShow, SSDataBlock *pB
char dstStr[20] = {0};
STR_TO_VARSTR(dstStr, sinkQuota)
pColInfo = taosArrayGet(pBlock->pDataBlock, cols++);
colDataSetVal(pColInfo, numOfRows, (const char*) dstStr, false);
colDataSetVal(pColInfo, numOfRows, (const char *)dstStr, false);
char scanHistoryIdle[20 + VARSTR_HEADER_SIZE] = {0};
strcpy(scanHistoryIdle, "100a");
@ -1562,7 +1604,7 @@ static int32_t mndRetrieveStream(SRpcMsg *pReq, SShowObj *pShow, SSDataBlock *pB
memset(dstStr, 0, tListLen(dstStr));
STR_TO_VARSTR(dstStr, scanHistoryIdle)
pColInfo = taosArrayGet(pBlock->pDataBlock, cols++);
colDataSetVal(pColInfo, numOfRows, (const char*) dstStr, false);
colDataSetVal(pColInfo, numOfRows, (const char *)dstStr, false);
numOfRows++;
sdbRelease(pSdb, pStream);
@ -1577,7 +1619,7 @@ static void mndCancelGetNextStream(SMnode *pMnode, void *pIter) {
sdbCancelFetch(pSdb, pIter);
}
static void setTaskAttrInResBlock(SStreamObj* pStream, SStreamTask* pTask, SSDataBlock* pBlock, int32_t numOfRows) {
static void setTaskAttrInResBlock(SStreamObj *pStream, SStreamTask *pTask, SSDataBlock *pBlock, int32_t numOfRows) {
SColumnInfoData *pColInfo;
int32_t cols = 0;
@ -1631,7 +1673,7 @@ static void setTaskAttrInResBlock(SStreamObj* pStream, SStreamTask* pTask, SSDat
colDataSetVal(pColInfo, numOfRows, (const char *)level, false);
// status
char status[20 + VARSTR_HEADER_SIZE] = {0};
char status[20 + VARSTR_HEADER_SIZE] = {0};
STaskId id = {.streamId = pTask->id.streamId, .taskId = pTask->id.taskId};
STaskStatusEntry *pe = taosHashGet(execInfo.pTaskMap, &id, sizeof(id));
@ -1682,7 +1724,7 @@ static void setTaskAttrInResBlock(SStreamObj* pStream, SStreamTask* pTask, SSDat
colDataSetVal(pColInfo, numOfRows, (const char *)vbuf, false);
}
static int32_t getNumOfTasks(SArray* pTaskList) {
static int32_t getNumOfTasks(SArray *pTaskList) {
int32_t numOfLevels = taosArrayGetSize(pTaskList);
int32_t count = 0;
@ -1740,7 +1782,7 @@ static void mndCancelGetNextStreamTask(SMnode *pMnode, void *pIter) {
sdbCancelFetch(pSdb, pIter);
}
static int32_t mndPauseStreamTask(SMnode* pMnode, STrans *pTrans, SStreamTask *pTask) {
static int32_t mndPauseStreamTask(SMnode *pMnode, STrans *pTrans, SStreamTask *pTask) {
SVPauseStreamTaskReq *pReq = taosMemoryCalloc(1, sizeof(SVPauseStreamTaskReq));
if (pReq == NULL) {
mError("failed to malloc in pause stream, size:%" PRIzu ", code:%s", sizeof(SVPauseStreamTaskReq),
@ -1766,7 +1808,7 @@ static int32_t mndPauseStreamTask(SMnode* pMnode, STrans *pTrans, SStreamTask *p
return 0;
}
int32_t mndPauseAllStreamTasks(SMnode* pMnode, STrans *pTrans, SStreamObj *pStream) {
int32_t mndPauseAllStreamTasks(SMnode *pMnode, STrans *pTrans, SStreamObj *pStream) {
SArray *tasks = pStream->tasks;
int32_t size = taosArrayGetSize(tasks);
@ -1894,7 +1936,7 @@ static int32_t mndProcessPauseStreamReq(SRpcMsg *pReq) {
return TSDB_CODE_ACTION_IN_PROGRESS;
}
static int32_t mndResumeStreamTask(STrans *pTrans, SMnode* pMnode, SStreamTask *pTask, int8_t igUntreated) {
static int32_t mndResumeStreamTask(STrans *pTrans, SMnode *pMnode, SStreamTask *pTask, int8_t igUntreated) {
SVResumeStreamTaskReq *pReq = taosMemoryCalloc(1, sizeof(SVResumeStreamTaskReq));
if (pReq == NULL) {
terrno = TSDB_CODE_OUT_OF_MEMORY;
@ -1918,7 +1960,7 @@ static int32_t mndResumeStreamTask(STrans *pTrans, SMnode* pMnode, SStreamTask *
return 0;
}
int32_t mndResumeAllStreamTasks(STrans *pTrans, SMnode* pMnode, SStreamObj *pStream, int8_t igUntreated) {
int32_t mndResumeAllStreamTasks(STrans *pTrans, SMnode *pMnode, SStreamObj *pStream, int8_t igUntreated) {
int32_t size = taosArrayGetSize(pStream->tasks);
for (int32_t i = 0; i < size; i++) {
SArray *pTasks = taosArrayGetP(pStream->tasks, i);
@ -2155,8 +2197,8 @@ static bool isNodeEpsetChanged(const SEpSet *pPrevEpset, const SEpSet *pCurrent)
// 1. increase the replica does not affect the stream process.
// 2. decreasing the replica may affect the stream task execution in the way that there is one or more running stream
// tasks on the will be removed replica.
// 3. vgroup redistribution is an combination operation of first increase replica and then decrease replica. So we will
// handle it as mentioned in 1 & 2 items.
// 3. vgroup redistribution is an combination operation of first increase replica and then decrease replica. So we
// will handle it as mentioned in 1 & 2 items.
static SVgroupChangeInfo mndFindChangedNodeInfo(SMnode *pMnode, const SArray *pPrevNodeList, const SArray *pNodeList) {
SVgroupChangeInfo info = {
.pUpdateNodeList = taosArrayInit(4, sizeof(SNodeUpdateInfo)),
@ -2184,10 +2226,8 @@ static SVgroupChangeInfo mndFindChangedNodeInfo(SMnode *pMnode, const SArray *pP
epsetAssign(&updateInfo.prevEp, &pPrevEntry->epset);
epsetAssign(&updateInfo.newEp, &pCurrent->epset);
taosArrayPush(info.pUpdateNodeList, &updateInfo);
}
if(pCurrent->nodeId != SNODE_HANDLE){
if (pCurrent->nodeId != SNODE_HANDLE) {
SVgObj *pVgroup = mndAcquireVgroup(pMnode, pCurrent->nodeId);
taosHashPut(info.pDBMap, pVgroup->dbName, strlen(pVgroup->dbName), NULL, 0);
mndReleaseVgroup(pMnode, pVgroup);
@ -2400,7 +2440,6 @@ static int32_t doRemoveTasks(SStreamExecInfo *pExecNode, STaskId *pRemovedId) {
if (p == NULL) {
return TSDB_CODE_SUCCESS;
}
taosHashRemove(pExecNode->pTaskMap, pRemovedId, sizeof(*pRemovedId));
for (int32_t k = 0; k < taosArrayGetSize(pExecNode->pTaskList); ++k) {
@ -2438,7 +2477,7 @@ int32_t removeExpirednodeEntryAndTask(SArray *pNodeSnapshot) {
STaskId * pId = taosArrayGet(execInfo.pTaskList, i);
STaskStatusEntry *pEntry = taosHashGet(execInfo.pTaskMap, pId, sizeof(*pId));
if(pEntry->nodeId == SNODE_HANDLE) continue;
if (pEntry->nodeId == SNODE_HANDLE) continue;
bool existed = taskNodeExists(pNodeSnapshot, pEntry->nodeId);
if (!existed) {
@ -2455,9 +2494,9 @@ int32_t removeExpirednodeEntryAndTask(SArray *pNodeSnapshot) {
(int32_t)taosArrayGetSize(execInfo.pTaskList));
int32_t size = taosArrayGetSize(pNodeSnapshot);
SArray* pValidNodeEntryList = taosArrayInit(4, sizeof(SNodeEntry));
for(int32_t i = 0; i < taosArrayGetSize(execInfo.pNodeList); ++i) {
SNodeEntry* p = taosArrayGet(execInfo.pNodeList, i);
SArray *pValidNodeEntryList = taosArrayInit(4, sizeof(SNodeEntry));
for (int32_t i = 0; i < taosArrayGetSize(execInfo.pNodeList); ++i) {
SNodeEntry *p = taosArrayGet(execInfo.pNodeList, i);
for (int32_t j = 0; j < size; ++j) {
SNodeEntry *pEntry = taosArrayGet(pNodeSnapshot, j);
@ -2476,13 +2515,14 @@ int32_t removeExpirednodeEntryAndTask(SArray *pNodeSnapshot) {
return 0;
}
static void killAllCheckpointTrans(SMnode* pMnode, SVgroupChangeInfo* pChangeInfo) {
void* pIter = NULL;
while((pIter = taosHashIterate(pChangeInfo->pDBMap, pIter)) != NULL) {
char* pDb = (char*) pIter;
// kill all trans in the dst DB
static void killAllCheckpointTrans(SMnode *pMnode, SVgroupChangeInfo *pChangeInfo) {
void *pIter = NULL;
while ((pIter = taosHashIterate(pChangeInfo->pDBMap, pIter)) != NULL) {
char *pDb = (char *)pIter;
size_t len = 0;
void* pKey = taosHashGetKey(pDb, &len);
void * pKey = taosHashGetKey(pDb, &len);
killActiveCheckpointTrans(pMnode, pKey, len);
}
}
@ -2631,7 +2671,7 @@ void removeStreamTasksInBuf(SStreamObj *pStream, SStreamExecInfo *pExecNode) {
ASSERT(taosHashGetSize(pExecNode->pTaskMap) == taosArrayGetSize(pExecNode->pTaskList));
}
STrans *doCreateTrans(SMnode *pMnode, SStreamObj *pStream, SRpcMsg *pReq, const char *name, const char* pMsg) {
STrans *doCreateTrans(SMnode *pMnode, SStreamObj *pStream, SRpcMsg *pReq, const char *name, const char *pMsg) {
STrans *pTrans = mndTransCreate(pMnode, TRN_POLICY_RETRY, TRN_CONFLICT_DB_INSIDE, pReq, name);
if (pTrans == NULL) {
mError("failed to build trans:%s, reason: %s", name, tstrerror(TSDB_CODE_OUT_OF_MEMORY));
@ -2639,7 +2679,7 @@ STrans *doCreateTrans(SMnode *pMnode, SStreamObj *pStream, SRpcMsg *pReq, const
return NULL;
}
mDebug("s-task:0x%"PRIx64" start to build trans %s", pStream->uid, pMsg);
mDebug("s-task:0x%" PRIx64 " start to build trans %s", pStream->uid, pMsg);
mndTransSetDbName(pTrans, pStream->sourceDb, pStream->targetDb);
if (mndTransCheckConflict(pMnode, pTrans) != 0) {
@ -2675,6 +2715,7 @@ int32_t createStreamResetStatusTrans(SMnode *pMnode, SStreamObj *pStream) {
terrno = TSDB_CODE_OUT_OF_MEMORY;
mError("failed to malloc in reset stream, size:%" PRIzu ", code:%s", sizeof(SVResetStreamTaskReq),
tstrerror(TSDB_CODE_OUT_OF_MEMORY));
taosWUnLockLatch(&pStream->lock);
return terrno;
}
@ -2718,9 +2759,9 @@ int32_t createStreamResetStatusTrans(SMnode *pMnode, SStreamObj *pStream) {
return TSDB_CODE_ACTION_IN_PROGRESS;
}
int32_t killActiveCheckpointTrans(SMnode *pMnode, const char* pDBName, size_t len) {
int32_t killActiveCheckpointTrans(SMnode *pMnode, const char *pDBName, size_t len) {
// data in the hash table will be removed automatically, no need to remove it here.
SStreamTransInfo* pTransInfo = taosHashGet(execInfo.transMgmt.pDBTrans, pDBName, len);
SStreamTransInfo *pTransInfo = taosHashGet(execInfo.transMgmt.pDBTrans, pDBName, len);
if (pTransInfo == NULL) {
return TSDB_CODE_SUCCESS;
}
@ -2731,7 +2772,7 @@ int32_t killActiveCheckpointTrans(SMnode *pMnode, const char* pDBName, size_t le
return TSDB_CODE_SUCCESS;
}
STrans* pTrans = mndAcquireTrans(pMnode, pTransInfo->transId);
STrans *pTrans = mndAcquireTrans(pMnode, pTransInfo->transId);
if (pTrans != NULL) {
mInfo("kill checkpoint transId:%d in Db:%s", pTransInfo->transId, pDBName);
mndKillTrans(pMnode, pTrans);
@ -2742,7 +2783,7 @@ int32_t killActiveCheckpointTrans(SMnode *pMnode, const char* pDBName, size_t le
}
static int32_t mndResetStatusFromCheckpoint(SMnode *pMnode, int32_t transId) {
STrans* pTrans = mndAcquireTrans(pMnode, transId);
STrans *pTrans = mndAcquireTrans(pMnode, transId);
if (pTrans != NULL) {
mInfo("kill checkpoint transId:%d to reset task status", transId);
mndKillTrans(pMnode, pTrans);
@ -2761,8 +2802,8 @@ static int32_t mndResetStatusFromCheckpoint(SMnode *pMnode, int32_t transId) {
bool conflict = streamTransConflictOtherTrans(pMnode, pStream->sourceDb, pStream->targetDb, false);
if (conflict) {
mError("stream:%s other trans exists in DB:%s & %s failed to start reset-status trans",
pStream->name, pStream->sourceDb, pStream->targetDb);
mError("stream:%s other trans exists in DB:%s & %s failed to start reset-status trans", pStream->name,
pStream->sourceDb, pStream->targetDb);
continue;
}
@ -2773,11 +2814,10 @@ static int32_t mndResetStatusFromCheckpoint(SMnode *pMnode, int32_t transId) {
return code;
}
}
return 0;
}
static SStreamTask* mndGetStreamTask(STaskId* pId, SStreamObj* pStream) {
static SStreamTask *mndGetStreamTask(STaskId *pId, SStreamObj *pStream) {
for (int32_t i = 0; i < taosArrayGetSize(pStream->tasks); i++) {
SArray *pLevel = taosArrayGetP(pStream->tasks, i);
@ -2832,7 +2872,7 @@ static bool needDropRelatedFillhistoryTask(STaskStatusEntry *pTaskEntry, SStream
static int32_t mndDropRelatedFillhistoryTask(SMnode *pMnode, STaskStatusEntry *pTaskEntry, SStreamObj *pStream) {
SStreamTask *pTask = mndGetStreamTask(&pTaskEntry->id, pStream);
if (pTask == NULL) {
mError("failed to get the stream task:0x%x, may have been dropped", (int32_t) pTaskEntry->id.taskId);
mError("failed to get the stream task:0x%x, may have been dropped", (int32_t)pTaskEntry->id.taskId);
return -1;
}
@ -2867,12 +2907,12 @@ int32_t setNodeEpsetExpiredFlag(const SArray *pNodeList) {
mInfo("set node expired for %d nodes", num);
for (int k = 0; k < num; ++k) {
int32_t* pVgId = taosArrayGet(pNodeList, k);
int32_t *pVgId = taosArrayGet(pNodeList, k);
mInfo("set node expired for nodeId:%d, total:%d", *pVgId, num);
int32_t numOfNodes = taosArrayGetSize(execInfo.pNodeList);
for (int i = 0; i < numOfNodes; ++i) {
SNodeEntry* pNodeEntry = taosArrayGet(execInfo.pNodeList, i);
SNodeEntry *pNodeEntry = taosArrayGet(execInfo.pNodeList, i);
if (pNodeEntry->nodeId == *pVgId) {
mInfo("vgId:%d expired for some stream tasks, needs update nodeEp", *pVgId);
@ -2885,13 +2925,13 @@ int32_t setNodeEpsetExpiredFlag(const SArray *pNodeList) {
return TSDB_CODE_SUCCESS;
}
static void updateStageInfo(STaskStatusEntry* pTaskEntry, int64_t stage) {
static void updateStageInfo(STaskStatusEntry *pTaskEntry, int64_t stage) {
int32_t numOfNodes = taosArrayGetSize(execInfo.pNodeList);
for(int32_t j = 0; j < numOfNodes; ++j) {
SNodeEntry* pNodeEntry = taosArrayGet(execInfo.pNodeList, j);
for (int32_t j = 0; j < numOfNodes; ++j) {
SNodeEntry *pNodeEntry = taosArrayGet(execInfo.pNodeList, j);
if (pNodeEntry->nodeId == pTaskEntry->nodeId) {
mInfo("vgId:%d stage updated from %"PRId64 " to %"PRId64 ", nodeUpdate trigger by s-task:0x%" PRIx64, pTaskEntry->nodeId,
pTaskEntry->stage, stage, pTaskEntry->id.taskId);
mInfo("vgId:%d stage updated from %" PRId64 " to %" PRId64 ", nodeUpdate trigger by s-task:0x%" PRIx64,
pTaskEntry->nodeId, pTaskEntry->stage, stage, pTaskEntry->id.taskId);
pNodeEntry->stageUpdated = true;
pTaskEntry->stage = stage;
@ -2947,9 +2987,7 @@ int32_t mndProcessStreamHb(SRpcMsg *pReq) {
if (pTaskEntry->stage != p->stage && pTaskEntry->stage != -1) {
updateStageInfo(pTaskEntry, p->stage);
if(pTaskEntry->nodeId == SNODE_HANDLE) {
snodeChanged = true;
}
if (pTaskEntry->nodeId == SNODE_HANDLE) snodeChanged = true;
} else {
// task is idle for more than 50 sec.
if (fabs(pTaskEntry->inputQUsed - p->inputQUsed) <= DBL_EPSILON) {
@ -2989,7 +3027,7 @@ int32_t mndProcessStreamHb(SRpcMsg *pReq) {
if (p->status == TASK_STATUS__STREAM_SCAN_HISTORY) {
bool drop = needDropRelatedFillhistoryTask(pTaskEntry, &execInfo);
if(drop) {
if (drop) {
SStreamObj *pStreamObj = mndGetStreamObj(pMnode, pTaskEntry->id.streamId);
if (pStreamObj == NULL) {
mError("failed to acquire the streamObj:0x%" PRIx64 " it may have been dropped", pStreamObj->uid);
@ -3026,9 +3064,13 @@ int32_t mndProcessStreamHb(SRpcMsg *pReq) {
return TSDB_CODE_SUCCESS;
}
void freeCheckpointCandEntry(void *param) {
SCheckpointCandEntry *pEntry = param;
taosMemoryFreeClear(pEntry->pName);
}
SStreamObj *mndGetStreamObj(SMnode *pMnode, int64_t streamId) {
void *pIter = NULL;
SSdb *pSdb = pMnode->pSdb;
void * pIter = NULL;
SSdb * pSdb = pMnode->pSdb;
SStreamObj *pStream = NULL;
while ((pIter = sdbFetch(pSdb, SDB_STREAM, pIter, (void **)&pStream)) != NULL) {
@ -3039,4 +3081,4 @@ SStreamObj *mndGetStreamObj(SMnode *pMnode, int64_t streamId) {
}
return NULL;
}
}

View File

@ -13,8 +13,8 @@
* along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
#include "mndTrans.h"
#include "mndStream.h"
#include "mndTrans.h"
typedef struct SKeyInfo {
void* pKey;
@ -110,5 +110,24 @@ bool streamTransConflictOtherTrans(SMnode* pMnode, const char* pSrcDb, const cha
return false;
}
int32_t mndAddtoCheckpointWaitingList(SStreamObj* pStream, int64_t checkpointId) {
SCheckpointCandEntry* pEntry = taosHashGet(execInfo.transMgmt.pWaitingList, &pStream->uid, sizeof(pStream->uid));
if (pEntry == NULL) {
SCheckpointCandEntry entry = {.streamId = pStream->uid,
.checkpointTs = taosGetTimestampMs(),
.checkpointId = checkpointId,
.pName = taosStrdup(pStream->name)};
taosHashPut(execInfo.transMgmt.pWaitingList, &pStream->uid, sizeof(pStream->uid), &entry, sizeof(entry));
int32_t size = taosHashGetSize(execInfo.transMgmt.pWaitingList);
mDebug("stream:%" PRIx64 " add into waiting list due to conflict, ts:%" PRId64 " , checkpointId: %" PRId64
", total in waitingList:%d",
pStream->uid, entry.checkpointTs, checkpointId, size);
} else {
mDebug("stream:%" PRIx64 " ts:%" PRId64 ", checkpointId:%" PRId64 " already in waiting list, no need to add into",
pStream->uid, pEntry->checkpointTs, checkpointId);
}
return TSDB_CODE_SUCCESS;
}

View File

@ -149,7 +149,8 @@ typedef enum {
SDB_FUNC = 20,
SDB_IDX = 21,
SDB_VIEW = 22,
SDB_MAX = 23
SDB_STREAM_SEQ = 23,
SDB_MAX = 24
} ESdbType;
typedef struct SSdbRaw {
@ -169,11 +170,11 @@ typedef struct SSdbRow {
} SSdbRow;
typedef struct SSdb {
SMnode *pMnode;
SWal *pWal;
SMnode * pMnode;
SWal * pWal;
int64_t sync;
char *currDir;
char *tmpDir;
char * currDir;
char * tmpDir;
int64_t commitIndex;
int64_t commitTerm;
int64_t commitConfig;
@ -183,7 +184,7 @@ typedef struct SSdb {
int64_t tableVer[SDB_MAX];
int64_t maxId[SDB_MAX];
EKeyType keyTypes[SDB_MAX];
SHashObj *hashObjs[SDB_MAX];
SHashObj * hashObjs[SDB_MAX];
TdThreadRwlock locks[SDB_MAX];
SdbInsertFp insertFps[SDB_MAX];
SdbUpdateFp updateFps[SDB_MAX];
@ -198,25 +199,25 @@ typedef struct SSdb {
typedef struct SSdbIter {
TdFilePtr file;
int64_t total;
char *name;
char * name;
} SSdbIter;
typedef struct {
ESdbType sdbType;
EKeyType keyType;
SdbDeployFp deployFp;
SdbEncodeFp encodeFp;
SdbDecodeFp decodeFp;
SdbInsertFp insertFp;
SdbUpdateFp updateFp;
SdbDeleteFp deleteFp;
ESdbType sdbType;
EKeyType keyType;
SdbDeployFp deployFp;
SdbEncodeFp encodeFp;
SdbDecodeFp decodeFp;
SdbInsertFp insertFp;
SdbUpdateFp updateFp;
SdbDeleteFp deleteFp;
SdbValidateFp validateFp;
} SSdbTable;
typedef struct SSdbOpt {
const char *path;
SMnode *pMnode;
SWal *pWal;
SMnode * pMnode;
SWal * pWal;
int64_t sync;
} SSdbOpt;
@ -393,7 +394,7 @@ int32_t sdbGetRawSoftVer(SSdbRaw *pRaw, int8_t *sver);
int32_t sdbGetRawTotalSize(SSdbRaw *pRaw);
SSdbRow *sdbAllocRow(int32_t objSize);
void *sdbGetRowObj(SSdbRow *pRow);
void * sdbGetRowObj(SSdbRow *pRow);
void sdbFreeRow(SSdb *pSdb, SSdbRow *pRow, bool callFunc);
int32_t sdbStartRead(SSdb *pSdb, SSdbIter **ppIter, int64_t *index, int64_t *term, int64_t *config);

View File

@ -13,31 +13,31 @@
* along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
#include "rsync.h"
#include "executor.h"
#include "rsync.h"
#include "sndInt.h"
#include "tqCommon.h"
#include "tuuid.h"
#define sndError(...) \
do { \
do { \
if (sndDebugFlag & DEBUG_ERROR) { \
taosPrintLog("SND ERROR ", DEBUG_ERROR, sndDebugFlag, __VA_ARGS__); \
} \
} \
} while (0)
#define sndInfo(...) \
#define sndInfo(...) \
do { \
if (sndDebugFlag & DEBUG_INFO) { \
if (sndDebugFlag & DEBUG_INFO) { \
taosPrintLog("SND INFO ", DEBUG_INFO, sndDebugFlag, __VA_ARGS__); \
} \
} while (0)
#define sndDebug(...) \
do { \
do { \
if (sndDebugFlag & DEBUG_DEBUG) { \
taosPrintLog("SND ", DEBUG_DEBUG, sndDebugFlag, __VA_ARGS__); \
} \
} \
} while (0)
int32_t sndExpandTask(SSnode *pSnode, SStreamTask *pTask, int64_t nextProcessVer) {
@ -46,10 +46,11 @@ int32_t sndExpandTask(SSnode *pSnode, SStreamTask *pTask, int64_t nextProcessVer
if (code != TSDB_CODE_SUCCESS) {
return code;
}
pTask->pBackend = NULL;
streamTaskOpenAllUpstreamInput(pTask);
SStreamTask* pSateTask = pTask;
SStreamTask *pSateTask = pTask;
SStreamTask task = {0};
if (pTask->info.fillHistory) {
task.id.streamId = pTask->streamTaskId.streamId;
@ -84,7 +85,7 @@ int32_t sndExpandTask(SSnode *pSnode, SStreamTask *pTask, int64_t nextProcessVer
streamTaskResetUpstreamStageInfo(pTask);
streamSetupScheduleTrigger(pTask);
SCheckpointInfo* pChkInfo = &pTask->chkInfo;
SCheckpointInfo *pChkInfo = &pTask->chkInfo;
// checkpoint ver is the kept version, handled data should be the next version.
if (pTask->chkInfo.checkpointId != 0) {
pTask->chkInfo.nextProcessVer = pTask->chkInfo.checkpointVer + 1;
@ -92,7 +93,7 @@ int32_t sndExpandTask(SSnode *pSnode, SStreamTask *pTask, int64_t nextProcessVer
pChkInfo->checkpointId, pChkInfo->checkpointVer, pChkInfo->nextProcessVer);
}
char* p = NULL;
char *p = NULL;
streamTaskGetStatus(pTask, &p);
if (pTask->info.fillHistory) {
@ -194,7 +195,7 @@ int32_t sndProcessStreamMsg(SSnode *pSnode, SRpcMsg *pMsg) {
int32_t sndProcessWriteMsg(SSnode *pSnode, SRpcMsg *pMsg, SRpcMsg *pRsp) {
switch (pMsg->msgType) {
case TDMT_STREAM_TASK_DEPLOY: {
void *pReq = POINTER_SHIFT(pMsg->pCont, sizeof(SMsgHead));
void * pReq = POINTER_SHIFT(pMsg->pCont, sizeof(SMsgHead));
int32_t len = pMsg->contLen - sizeof(SMsgHead);
return tqStreamTaskProcessDeployReq(pSnode->pMeta, -1, pReq, len, true, true);
}

View File

@ -61,7 +61,7 @@ struct SRSmaQTaskInfoItem {
int32_t len;
int8_t type;
int64_t suid;
void *qTaskInfo;
void * qTaskInfo;
};
static void tdRSmaQTaskInfoFree(qTaskInfo_t *taskHandle, int32_t vgId, int32_t level) {
@ -185,7 +185,7 @@ int32_t tdUpdateTbUidList(SSma *pSma, STbUidStore *pStore, bool isAdd) {
void *pIter = NULL;
while ((pIter = taosHashIterate(pStore->uidHash, pIter))) {
tb_uid_t *pTbSuid = (tb_uid_t *)taosHashGetKey(pIter, NULL);
SArray *pTbUids = *(SArray **)pIter;
SArray * pTbUids = *(SArray **)pIter;
if (tdUpdateTbUidListImpl(pSma, pTbSuid, pTbUids, isAdd) != TSDB_CODE_SUCCESS) {
taosHashCancelIterate(pStore->uidHash, pIter);
@ -213,7 +213,7 @@ int32_t tdFetchTbUidList(SSma *pSma, STbUidStore **ppStore, tb_uid_t suid, tb_ui
}
SRSmaStat *pStat = (SRSmaStat *)SMA_ENV_STAT(pEnv);
SHashObj *infoHash = NULL;
SHashObj * infoHash = NULL;
if (!pStat || !(infoHash = RSMA_INFO_HASH(pStat))) {
terrno = TSDB_CODE_RSMA_INVALID_STAT;
return TSDB_CODE_FAILED;
@ -264,11 +264,11 @@ static int32_t tdSetRSmaInfoItemParams(SSma *pSma, SRSmaParam *param, SRSmaStat
int8_t idx) {
if ((param->qmsgLen > 0) && param->qmsg[idx]) {
SRSmaInfoItem *pItem = &(pRSmaInfo->items[idx]);
SRetention *pRetention = SMA_RETENTION(pSma);
STsdbCfg *pTsdbCfg = SMA_TSDB_CFG(pSma);
SVnode *pVnode = pSma->pVnode;
SRetention * pRetention = SMA_RETENTION(pSma);
STsdbCfg * pTsdbCfg = SMA_TSDB_CFG(pSma);
SVnode * pVnode = pSma->pVnode;
char taskInfDir[TSDB_FILENAME_LEN] = {0};
void *pStreamState = NULL;
void * pStreamState = NULL;
// set the backend of stream state
tdRSmaQTaskInfoGetFullPath(pVnode, pRSmaInfo->suid, idx + 1, pVnode->pTfs, taskInfDir);
@ -297,6 +297,8 @@ static int32_t tdSetRSmaInfoItemParams(SSma *pSma, SRSmaParam *param, SRSmaStat
sprintf(pStreamTask->exec.qmsg, "%s", RSMA_EXEC_TASK_FLAG);
pStreamTask->chkInfo.checkpointId = streamMetaGetLatestCheckpointId(pStreamTask->pMeta);
tdRSmaTaskInit(pStreamTask->pMeta, pItem, &pStreamTask->id);
pStreamTask->status.pSM = streamCreateStateMachine(pStreamTask);
pStreamState = streamStateOpen(taskInfDir, pStreamTask, true, -1, -1);
if (!pStreamState) {
terrno = TSDB_CODE_RSMA_STREAM_STATE_OPEN;
@ -372,7 +374,7 @@ int32_t tdRSmaProcessCreateImpl(SSma *pSma, SRSmaParam *param, int64_t suid, con
}
#endif
SSmaEnv *pEnv = SMA_RSMA_ENV(pSma);
SSmaEnv * pEnv = SMA_RSMA_ENV(pSma);
SRSmaStat *pStat = (SRSmaStat *)SMA_ENV_STAT(pEnv);
SRSmaInfo *pRSmaInfo = NULL;
@ -651,9 +653,7 @@ static int32_t tdRSmaProcessDelReq(SSma *pSma, int64_t suid, int8_t level, SBatc
((SMsgHead *)pBuf)->vgId = TD_VID(pSma->pVnode);
SRpcMsg delMsg = {.msgType = TDMT_VND_BATCH_DEL,
.pCont = pBuf,
.contLen = len + sizeof(SMsgHead)};
SRpcMsg delMsg = {.msgType = TDMT_VND_BATCH_DEL, .pCont = pBuf, .contLen = len + sizeof(SMsgHead)};
code = tmsgPutToQueue(&pSma->pVnode->msgCb, WRITE_QUEUE, &delMsg);
TSDB_CHECK_CODE(code, lino, _exit);
}
@ -673,8 +673,8 @@ static int32_t tdRSmaExecAndSubmitResult(SSma *pSma, qTaskInfo_t taskInfo, SRSma
int32_t code = 0;
int32_t lino = 0;
SSDataBlock *output = NULL;
SArray *pResList = pItem->pResList;
STSchema *pTSchema = pInfo->pTSchema;
SArray * pResList = pItem->pResList;
STSchema * pTSchema = pInfo->pTSchema;
int64_t suid = pInfo->suid;
while (1) {
@ -733,7 +733,7 @@ static int32_t tdRSmaExecAndSubmitResult(SSma *pSma, qTaskInfo_t taskInfo, SRSma
}
}
STsdb *sinkTsdb = (pItem->level == TSDB_RETENTION_L1 ? pSma->pRSmaTsdb[0] : pSma->pRSmaTsdb[1]);
STsdb * sinkTsdb = (pItem->level == TSDB_RETENTION_L1 ? pSma->pRSmaTsdb[0] : pSma->pRSmaTsdb[1]);
SSubmitReq2 *pReq = NULL;
if (buildSubmitReqFromDataBlock(&pReq, output, pTSchema, output->info.id.groupId, SMA_VID(pSma), suid) < 0) {
@ -795,7 +795,7 @@ _exit:
static int32_t tdExecuteRSmaImplAsync(SSma *pSma, int64_t version, const void *pMsg, int32_t len, int32_t inputType,
SRSmaInfo *pInfo, tb_uid_t suid) {
int32_t size = RSMA_EXEC_MSG_HLEN + len; // header + payload
void *qItem = taosAllocateQitem(size, DEF_QITEM, 0);
void * qItem = taosAllocateQitem(size, DEF_QITEM, 0);
if (!qItem) {
return TSDB_CODE_FAILED;
@ -870,10 +870,10 @@ static int32_t tdRsmaPrintSubmitReq(SSma *pSma, SSubmitReq *pReq) {
* @param level
* @return int32_t
*/
static int32_t tdExecuteRSmaImpl(SSma *pSma, const void *pMsg, int32_t msgSize, int64_t version, int32_t inputType, SRSmaInfo *pInfo,
ERsmaExecType type, int8_t level) {
static int32_t tdExecuteRSmaImpl(SSma *pSma, const void *pMsg, int32_t msgSize, int64_t version, int32_t inputType,
SRSmaInfo *pInfo, ERsmaExecType type, int8_t level) {
int32_t idx = level - 1;
void *qTaskInfo = RSMA_INFO_QTASK(pInfo, idx);
void * qTaskInfo = RSMA_INFO_QTASK(pInfo, idx);
SRSmaInfoItem *pItem = RSMA_INFO_ITEM(pInfo, idx);
if (!qTaskInfo) {
@ -887,8 +887,9 @@ static int32_t tdExecuteRSmaImpl(SSma *pSma, const void *pMsg, int32_t msgSize,
return TSDB_CODE_FAILED;
}
smaDebug("vgId:%d, execute rsma %" PRIi8 " task for qTaskInfo:%p, suid:%" PRIu64 ", nMsg:%d, submitReqVer:%" PRIi64 ", inputType:%d", SMA_VID(pSma), level,
RSMA_INFO_QTASK(pInfo, idx), pInfo->suid, msgSize, version, inputType);
smaDebug("vgId:%d, execute rsma %" PRIi8 " task for qTaskInfo:%p, suid:%" PRIu64 ", nMsg:%d, submitReqVer:%" PRIi64
", inputType:%d",
SMA_VID(pSma), level, RSMA_INFO_QTASK(pInfo, idx), pInfo->suid, msgSize, version, inputType);
if ((terrno = qSetSMAInput(qTaskInfo, pMsg, msgSize, inputType)) < 0) {
smaError("vgId:%d, rsma %" PRIi8 " qSetStreamInput failed since %s", SMA_VID(pSma), level, tstrerror(terrno));
@ -912,7 +913,7 @@ static int32_t tdExecuteRSmaImpl(SSma *pSma, const void *pMsg, int32_t msgSize,
static SRSmaInfo *tdAcquireRSmaInfoBySuid(SSma *pSma, int64_t suid) {
int32_t code = 0;
int32_t lino = 0;
SSmaEnv *pEnv = SMA_RSMA_ENV(pSma);
SSmaEnv * pEnv = SMA_RSMA_ENV(pSma);
SRSmaStat *pStat = NULL;
SRSmaInfo *pRSmaInfo = NULL;
@ -1067,8 +1068,8 @@ _err:
static int32_t tdRSmaRestoreQTaskInfoInit(SSma *pSma, int64_t *nTables) {
int32_t code = 0;
int32_t lino = 0;
SVnode *pVnode = pSma->pVnode;
SArray *suidList = NULL;
SVnode * pVnode = pSma->pVnode;
SArray * suidList = NULL;
STbUidStore uidStore = {0};
SMetaReader mr = {0};
tb_uid_t suid = 0;
@ -1196,7 +1197,7 @@ int32_t tdRSmaPersistExecImpl(SRSmaStat *pRSmaStat, SHashObj *pInfoHash) {
int32_t code = 0;
int32_t lino = 0;
int32_t nTaskInfo = 0;
SSma *pSma = pRSmaStat->pSma;
SSma * pSma = pRSmaStat->pSma;
SVnode *pVnode = pSma->pVnode;
if (taosHashGetSize(pInfoHash) <= 0) {
@ -1229,7 +1230,7 @@ int32_t tdRSmaPersistExecImpl(SRSmaStat *pRSmaStat, SHashObj *pInfoHash) {
do {
int32_t nStreamFlushed = 0;
int32_t nSleep = 0;
void *infoHash = NULL;
void * infoHash = NULL;
while (true) {
while ((infoHash = taosHashIterate(pInfoHash, infoHash))) {
SRSmaInfo *pRSmaInfo = *(SRSmaInfo **)infoHash;
@ -1271,7 +1272,7 @@ _checkpoint:
SStreamMeta *pMeta = NULL;
int64_t checkpointId = taosGetTimestampNs();
bool checkpointBuilt = false;
void *infoHash = NULL;
void * infoHash = NULL;
while ((infoHash = taosHashIterate(pInfoHash, infoHash))) {
SRSmaInfo *pRSmaInfo = *(SRSmaInfo **)infoHash;
if (RSMA_INFO_IS_DEL(pRSmaInfo)) {
@ -1282,11 +1283,12 @@ _checkpoint:
SRSmaInfoItem *pItem = RSMA_INFO_ITEM(pRSmaInfo, i);
if (pItem && pItem->pStreamTask) {
SStreamTask *pTask = pItem->pStreamTask;
atomic_store_32(&pTask->pMeta->chkptNotReadyTasks, 1);
pTask->checkpointingId = checkpointId;
pTask->chkInfo.checkpointId = pTask->checkpointingId;
// atomic_store_32(&pTask->pMeta->chkptNotReadyTasks, 1);
pTask->chkInfo.checkpointingId = checkpointId;
pTask->chkInfo.checkpointId = checkpointId; // 1pTask->checkpointingId;
pTask->chkInfo.checkpointVer = pItem->submitReqVer;
pTask->info.triggerParam = pItem->fetchResultVer;
pTask->info.taskLevel = TASK_LEVEL_SMA;
if (!checkpointBuilt) {
// the stream states share one checkpoint
@ -1342,10 +1344,10 @@ _exit:
* @param tmrId
*/
static void tdRSmaFetchTrigger(void *param, void *tmrId) {
SRSmaRef *pRSmaRef = NULL;
SSma *pSma = NULL;
SRSmaStat *pStat = NULL;
SRSmaInfo *pRSmaInfo = NULL;
SRSmaRef * pRSmaRef = NULL;
SSma * pSma = NULL;
SRSmaStat * pStat = NULL;
SRSmaInfo * pRSmaInfo = NULL;
SRSmaInfoItem *pItem = NULL;
if (!(pRSmaRef = taosHashGet(smaMgmt.refHash, &param, POINTER_BYTES))) {
@ -1513,7 +1515,7 @@ _err:
}
static int32_t tdRSmaBatchExec(SSma *pSma, SRSmaInfo *pInfo, STaosQall *qall, SArray *pSubmitArr, ERsmaExecType type) {
void *msg = NULL;
void * msg = NULL;
int8_t resume = 0;
int32_t nSubmit = 0;
int32_t nDelete = 0;
@ -1628,11 +1630,11 @@ _err:
int32_t tdRSmaProcessExecImpl(SSma *pSma, ERsmaExecType type) {
int32_t code = 0;
int32_t lino = 0;
SVnode *pVnode = pSma->pVnode;
SSmaEnv *pEnv = SMA_RSMA_ENV(pSma);
SVnode * pVnode = pSma->pVnode;
SSmaEnv * pEnv = SMA_RSMA_ENV(pSma);
SRSmaStat *pRSmaStat = (SRSmaStat *)SMA_ENV_STAT(pEnv);
SHashObj *infoHash = NULL;
SArray *pSubmitArr = NULL;
SHashObj * infoHash = NULL;
SArray * pSubmitArr = NULL;
bool isFetchAll = false;
if (!pRSmaStat || !(infoHash = RSMA_INFO_HASH(pRSmaStat))) {
@ -1731,4 +1733,4 @@ _exit:
smaError("vgId:%d, %s failed at line %d since %s", TD_VID(pVnode), __func__, lino, tstrerror(code));
}
return code;
}
}

View File

@ -750,21 +750,27 @@ int32_t tqExpandTask(STQ* pTq, SStreamTask* pTask, int64_t nextProcessVer) {
if (pTask->info.taskLevel == TASK_LEVEL__SOURCE) {
SStreamTask* pStateTask = pTask;
SStreamTask task = {0};
STaskId taskId = {.streamId = 0, .taskId = 0};
if (pTask->info.fillHistory) {
task.id.streamId = pTask->streamTaskId.streamId;
task.id.taskId = pTask->streamTaskId.taskId;
task.pMeta = pTask->pMeta;
pStateTask = &task;
taskId.streamId = pTask->id.streamId;
taskId.taskId = pTask->id.taskId;
pTask->id.streamId = pTask->streamTaskId.streamId;
pTask->id.taskId = pTask->streamTaskId.taskId;
}
pTask->pState = streamStateOpen(pTq->pStreamMeta->path, pStateTask, false, -1, -1);
pTask->pState = streamStateOpen(pTq->pStreamMeta->path, pTask, false, -1, -1);
if (pTask->pState == NULL) {
tqError("s-task:%s (vgId:%d) failed to open state for task", pTask->id.idStr, vgId);
return -1;
} else {
tqDebug("s-task:%s state:%p", pTask->id.idStr, pTask->pState);
}
if (pTask->info.fillHistory) {
pTask->id.streamId = taskId.streamId;
pTask->id.taskId = taskId.taskId;
}
SReadHandle handle = {
.checkpointId = pTask->chkInfo.checkpointId,
@ -785,15 +791,17 @@ int32_t tqExpandTask(STQ* pTq, SStreamTask* pTask, int64_t nextProcessVer) {
qSetTaskId(pTask->exec.pExecutor, pTask->id.taskId, pTask->id.streamId);
} else if (pTask->info.taskLevel == TASK_LEVEL__AGG) {
SStreamTask* pSateTask = pTask;
SStreamTask task = {0};
// SStreamTask task = {0};
STaskId taskId = {.streamId = 0, .taskId = 0};
if (pTask->info.fillHistory) {
task.id.streamId = pTask->streamTaskId.streamId;
task.id.taskId = pTask->streamTaskId.taskId;
task.pMeta = pTask->pMeta;
pSateTask = &task;
taskId.streamId = pTask->id.streamId;
taskId.taskId = pTask->id.taskId;
pTask->id.streamId = pTask->streamTaskId.streamId;
pTask->id.taskId = pTask->streamTaskId.taskId;
}
pTask->pState = streamStateOpen(pTq->pStreamMeta->path, pSateTask, false, -1, -1);
pTask->pState = streamStateOpen(pTq->pStreamMeta->path, pTask, false, -1, -1);
if (pTask->pState == NULL) {
tqError("s-task:%s (vgId:%d) failed to open state for task", pTask->id.idStr, vgId);
return -1;
@ -801,6 +809,11 @@ int32_t tqExpandTask(STQ* pTq, SStreamTask* pTask, int64_t nextProcessVer) {
tqDebug("s-task:%s state:%p", pTask->id.idStr, pTask->pState);
}
if (pTask->info.fillHistory) {
pTask->id.streamId = taskId.streamId;
pTask->id.taskId = taskId.taskId;
}
int32_t numOfVgroups = (int32_t)taosArrayGetSize(pTask->upstreamInfo.pList);
SReadHandle handle = {
.checkpointId = pTask->chkInfo.checkpointId,
@ -1280,14 +1293,13 @@ int32_t tqProcessTaskCheckPointSourceReq(STQ* pTq, SRpcMsg* pMsg, SRpcMsg* pRsp)
return TSDB_CODE_SUCCESS;
}
// downstream not ready, current the stream tasks are not all ready. Ignore this checkpoint req.
if (pTask->status.downstreamReady != 1) {
pTask->chkInfo.failedId = req.checkpointId; // record the latest failed checkpoint id
pTask->checkpointingId = req.checkpointId;
pTask->chkInfo.checkpointingId = req.checkpointId;
qError("s-task:%s not ready for checkpoint, since downstream not ready, ignore this checkpoint:%" PRId64
", set it failure",
pTask->id.idStr, req.checkpointId);
tqError("s-task:%s not ready for checkpoint, since downstream not ready, ignore this checkpoint:%" PRId64
", set it failure",
pTask->id.idStr, req.checkpointId);
streamMetaReleaseTask(pMeta, pTask);
SRpcMsg rsp = {0};
@ -1316,10 +1328,10 @@ int32_t tqProcessTaskCheckPointSourceReq(STQ* pTq, SRpcMsg* pMsg, SRpcMsg* pRsp)
// check if the checkpoint msg already sent or not.
if (status == TASK_STATUS__CK) {
ASSERT(pTask->checkpointingId == req.checkpointId);
ASSERT(pTask->chkInfo.checkpointingId == req.checkpointId);
tqWarn("s-task:%s recv checkpoint-source msg again checkpointId:%" PRId64
" already received, ignore this msg and continue process checkpoint",
pTask->id.idStr, pTask->checkpointingId);
pTask->id.idStr, pTask->chkInfo.checkpointingId);
taosThreadMutexUnlock(&pTask->lock);
streamMetaReleaseTask(pMeta, pTask);
@ -1335,10 +1347,6 @@ int32_t tqProcessTaskCheckPointSourceReq(STQ* pTq, SRpcMsg* pMsg, SRpcMsg* pRsp)
// set the initial value for generating check point
// set the mgmt epset info according to the checkout source msg from mnode, todo update mgmt epset if needed
if (pMeta->chkptNotReadyTasks == 0) {
pMeta->chkptNotReadyTasks = pMeta->numOfStreamTasks;
}
total = pMeta->numOfStreamTasks;
streamMetaWUnLock(pMeta);
@ -1390,7 +1398,7 @@ int32_t tqProcessTaskResetReq(STQ* pTq, SRpcMsg* pMsg) {
}
int32_t tqProcessTaskDropHTask(STQ* pTq, SRpcMsg* pMsg) {
SVDropHTaskReq* pReq = (SVDropHTaskReq*) pMsg->pCont;
SVDropHTaskReq* pReq = (SVDropHTaskReq*)pMsg->pCont;
SStreamMeta* pMeta = pTq->pStreamMeta;
SStreamTask* pTask = streamMetaAcquireTask(pMeta, pReq->streamId, pReq->taskId);

View File

@ -104,8 +104,8 @@ int32_t streamStateSnapRead(SStreamStateReader* pReader, uint8_t** ppData) {
pHdr->type = SNAP_DATA_STREAM_STATE_BACKEND;
pHdr->size = len;
memcpy(pHdr->data, rowData, len);
tqDebug("vgId:%d, vnode stream-state snapshot read data success", TD_VID(pReader->pTq->pVnode));
taosMemoryFree(rowData);
tqDebug("vgId:%d, vnode stream-state snapshot read data success", TD_VID(pReader->pTq->pVnode));
return code;
_err:
@ -139,7 +139,7 @@ int32_t streamStateSnapWriterOpen(STQ* pTq, int64_t sver, int64_t ever, SStreamS
pWriter->sver = sver;
pWriter->ever = ever;
sprintf(tdir, "%s%s%s%s%s", pTq->path, TD_DIRSEP, VNODE_TQ_STREAM, TD_DIRSEP, "received");
sprintf(tdir, "%s%s%s", pTq->path, TD_DIRSEP, VNODE_TQ_STREAM);
taosMkDir(tdir);
SStreamSnapWriter* pSnapWriter = NULL;
@ -167,25 +167,19 @@ int32_t streamStateSnapWriterClose(SStreamStateWriter* pWriter, int8_t rollback)
return code;
}
int32_t streamStateRebuildFromSnap(SStreamStateWriter* pWriter, int64_t chkpId) {
tqDebug("vgId:%d, vnode %s start to rebuild stream-state", TD_VID(pWriter->pTq->pVnode), STREAM_STATE_TRANSFER);
streamMetaWLock(pWriter->pTq->pStreamMeta);
int32_t code = streamMetaReopen(pWriter->pTq->pStreamMeta);
if (code == 0) {
streamMetaInitBackend(pWriter->pTq->pStreamMeta);
code = streamStateLoadTasks(pWriter);
}
streamMetaWUnLock(pWriter->pTq->pStreamMeta);
tqDebug("vgId:%d, vnode %s succ to rebuild stream-state", TD_VID(pWriter->pTq->pVnode), STREAM_STATE_TRANSFER);
taosMemoryFree(pWriter);
return code;
}
int32_t streamStateLoadTasks(SStreamStateWriter* pWriter) { return streamMetaLoadAllTasks(pWriter->pTq->pStreamMeta); }
int32_t streamStateSnapWrite(SStreamStateWriter* pWriter, uint8_t* pData, uint32_t nData) {
tqDebug("vgId:%d, vnode %s snapshot write data", TD_VID(pWriter->pTq->pVnode), STREAM_STATE_TRANSFER);
return streamSnapWrite(pWriter->pWriterImpl, pData + sizeof(SSnapDataHdr), nData - sizeof(SSnapDataHdr));
}
int32_t streamStateRebuildFromSnap(SStreamStateWriter* pWriter, int64_t chkpId) {
tqDebug("vgId:%d, vnode %s start to rebuild stream-state", TD_VID(pWriter->pTq->pVnode), STREAM_STATE_TRANSFER);
int32_t code = streamStateLoadTasks(pWriter);
tqDebug("vgId:%d, vnode %s succ to rebuild stream-state", TD_VID(pWriter->pTq->pVnode), STREAM_STATE_TRANSFER);
taosMemoryFree(pWriter);
return code;
}
int32_t streamStateLoadTasks(SStreamStateWriter* pWriter) {
return streamMetaReloadAllTasks(pWriter->pTq->pStreamMeta);
}

View File

@ -238,7 +238,6 @@ int32_t streamTaskSnapWrite(SStreamTaskWriter* pWriter, uint8_t* pData, uint32_t
goto _err;
}
tDecoderClear(&decoder);
// tdbTbInsert(TTB *pTb, const void *pKey, int keyLen, const void *pVal, int valLen, TXN *pTxn)
int64_t key[2] = {taskId.streamId, taskId.taskId};
taosWLockLatch(&pTq->pStreamMeta->lock);

View File

@ -276,15 +276,15 @@ static int32_t getAllIntervalWindow(SSHashObj* pHashMap, SSHashObj* resWins) {
void* pIte = NULL;
int32_t iter = 0;
while ((pIte = tSimpleHashIterate(pHashMap, pIte, &iter)) != NULL) {
SWinKey* pKey = tSimpleHashGetKey(pIte, NULL);
uint64_t groupId = pKey->groupId;
TSKEY ts = pKey->ts;
SWinKey* pKey = tSimpleHashGetKey(pIte, NULL);
uint64_t groupId = pKey->groupId;
TSKEY ts = pKey->ts;
SRowBuffPos* pPos = *(SRowBuffPos**)pIte;
if (!pPos->beUpdated) {
continue;
}
pPos->beUpdated = false;
int32_t code = saveWinResultInfo(ts, groupId, pPos, resWins);
int32_t code = saveWinResultInfo(ts, groupId, pPos, resWins);
if (code != TSDB_CODE_SUCCESS) {
return code;
}
@ -1091,10 +1091,10 @@ void doStreamIntervalDecodeOpState(void* buf, int32_t len, SOperatorInfo* pOpera
int32_t mapSize = 0;
buf = taosDecodeFixedI32(buf, &mapSize);
for (int32_t i = 0; i < mapSize; i++) {
SWinKey key = {0};
SWinKey key = {0};
buf = decodeSWinKey(buf, &key);
SRowBuffPos* pPos = NULL;
int32_t resSize = pInfo->aggSup.resultRowSize;
int32_t resSize = pInfo->aggSup.resultRowSize;
pInfo->stateStore.streamStateAddIfNotExist(pInfo->pState, &key, (void**)&pPos, &resSize);
tSimpleHashPut(pInfo->aggSup.pResultRowHashTable, &key, sizeof(SWinKey), &pPos, POINTER_BYTES);
}
@ -1165,7 +1165,7 @@ static SSDataBlock* buildIntervalResult(SOperatorInfo* pOperator) {
return NULL;
}
int32_t copyUpdateResult(SSHashObj** ppWinUpdated, SArray* pUpdated, __compar_fn_t compar) {
int32_t copyUpdateResult(SSHashObj** ppWinUpdated, SArray* pUpdated, __compar_fn_t compar) {
void* pIte = NULL;
int32_t iter = 0;
while ((pIte = tSimpleHashIterate(*ppWinUpdated, pIte, &iter)) != NULL) {
@ -1402,10 +1402,12 @@ void streamIntervalReloadState(SOperatorInfo* pOperator) {
void* pBuf = NULL;
int32_t code = pInfo->stateStore.streamStateGetInfo(pInfo->pState, STREAM_INTERVAL_OP_STATE_NAME,
strlen(STREAM_INTERVAL_OP_STATE_NAME), &pBuf, &size);
TSKEY ts = *(TSKEY*)pBuf;
taosMemoryFree(pBuf);
pInfo->twAggSup.maxTs = TMAX(pInfo->twAggSup.maxTs, ts);
pInfo->stateStore.streamStateReloadInfo(pInfo->pState, ts);
if (code == 0) {
TSKEY ts = *(TSKEY*)pBuf;
taosMemoryFree(pBuf);
pInfo->twAggSup.maxTs = TMAX(pInfo->twAggSup.maxTs, ts);
pInfo->stateStore.streamStateReloadInfo(pInfo->pState, ts);
}
}
SOperatorInfo* downstream = pOperator->pDownstream[0];
if (downstream->fpSet.reloadStreamStateFn) {
@ -1723,8 +1725,8 @@ void setSessionOutputBuf(SStreamAggSupporter* pAggSup, TSKEY startTs, TSKEY endT
pCurWin->sessionWin.win.skey = startTs;
pCurWin->sessionWin.win.ekey = endTs;
int32_t size = pAggSup->resultRowSize;
int32_t code = pAggSup->stateStore.streamStateSessionAddIfNotExist(pAggSup->pState, &pCurWin->sessionWin,
pAggSup->gap, (void**)&pCurWin->pStatePos, &size);
int32_t code = pAggSup->stateStore.streamStateSessionAddIfNotExist(pAggSup->pState, &pCurWin->sessionWin,
pAggSup->gap, (void**)&pCurWin->pStatePos, &size);
if (code == TSDB_CODE_SUCCESS && !inWinRange(&pAggSup->winRange, &pCurWin->sessionWin.win)) {
code = TSDB_CODE_FAILED;
clearOutputBuf(pAggSup->pState, pCurWin->pStatePos, &pAggSup->pSessionAPI->stateStore);
@ -1822,9 +1824,9 @@ void removeSessionResults(SStreamAggSupporter* pAggSup, SSHashObj* pHashMap, SAr
}
}
int32_t updateSessionWindowInfo(SStreamAggSupporter* pAggSup, SResultWindowInfo* pWinInfo, TSKEY* pStartTs, TSKEY* pEndTs, uint64_t groupId,
int32_t rows, int32_t start, int64_t gap, SSHashObj* pResultRows, SSHashObj* pStUpdated,
SSHashObj* pStDeleted) {
int32_t updateSessionWindowInfo(SStreamAggSupporter* pAggSup, SResultWindowInfo* pWinInfo, TSKEY* pStartTs,
TSKEY* pEndTs, uint64_t groupId, int32_t rows, int32_t start, int64_t gap,
SSHashObj* pResultRows, SSHashObj* pStUpdated, SSHashObj* pStDeleted) {
for (int32_t i = start; i < rows; ++i) {
if (!isInWindow(pWinInfo, pStartTs[i], gap) && (!pEndTs || !isInWindow(pWinInfo, pEndTs[i], gap))) {
return i - start;
@ -1856,8 +1858,8 @@ static int32_t initSessionOutputBuf(SResultWindowInfo* pWinInfo, SResultRow** pR
}
int32_t doOneWindowAggImpl(SColumnInfoData* pTimeWindowData, SResultWindowInfo* pCurWin, SResultRow** pResult,
int32_t startIndex, int32_t winRows, int32_t rows, int32_t numOutput,
SOperatorInfo* pOperator, int64_t winDelta) {
int32_t startIndex, int32_t winRows, int32_t rows, int32_t numOutput,
SOperatorInfo* pOperator, int64_t winDelta) {
SExprSupp* pSup = &pOperator->exprSupp;
SExecTaskInfo* pTaskInfo = pOperator->pTaskInfo;
int32_t code = initSessionOutputBuf(pCurWin, pResult, pSup->pCtx, numOutput, pSup->rowEntryInfoOffset);
@ -1981,9 +1983,10 @@ static void compactSessionSemiWindow(SOperatorInfo* pOperator, SResultWindowInfo
}
int32_t saveSessionOutputBuf(SStreamAggSupporter* pAggSup, SResultWindowInfo* pWinInfo) {
qDebug("===stream===try save session result skey:%" PRId64 ", ekey:%" PRId64 ".pos%d",
pWinInfo->sessionWin.win.skey, pWinInfo->sessionWin.win.ekey, pWinInfo->pStatePos->needFree);
return pAggSup->stateStore.streamStateSessionPut(pAggSup->pState, &pWinInfo->sessionWin, pWinInfo->pStatePos, pAggSup->resultRowSize);
qDebug("===stream===try save session result skey:%" PRId64 ", ekey:%" PRId64 ".pos%d", pWinInfo->sessionWin.win.skey,
pWinInfo->sessionWin.win.ekey, pWinInfo->pStatePos->needFree);
return pAggSup->stateStore.streamStateSessionPut(pAggSup->pState, &pWinInfo->sessionWin, pWinInfo->pStatePos,
pAggSup->resultRowSize);
}
static void doStreamSessionAggImpl(SOperatorInfo* pOperator, SSDataBlock* pSDataBlock, SSHashObj* pStUpdated,
@ -2045,7 +2048,8 @@ static void doStreamSessionAggImpl(SOperatorInfo* pOperator, SSDataBlock* pSData
if (pInfo->twAggSup.calTrigger == STREAM_TRIGGER_AT_ONCE && pStUpdated) {
code = saveResult(winInfo, pStUpdated);
if (code != TSDB_CODE_SUCCESS) {
qError("%s do stream session aggregate impl, set result error, code %s", GET_TASKID(pTaskInfo), tstrerror(code));
qError("%s do stream session aggregate impl, set result error, code %s", GET_TASKID(pTaskInfo),
tstrerror(code));
T_LONG_JMP(pTaskInfo->env, TSDB_CODE_OUT_OF_MEMORY);
}
}
@ -2084,8 +2088,8 @@ void doDeleteTimeWindows(SStreamAggSupporter* pAggSup, SSDataBlock* pBlock, SArr
inline int32_t sessionKeyCompareAsc(const void* pKey1, const void* pKey2) {
SResultWindowInfo* pWinInfo1 = (SResultWindowInfo*)pKey1;
SResultWindowInfo* pWinInfo2 = (SResultWindowInfo*)pKey2;
SSessionKey* pWin1 = &pWinInfo1->sessionWin;
SSessionKey* pWin2 = &pWinInfo2->sessionWin;
SSessionKey* pWin1 = &pWinInfo1->sessionWin;
SSessionKey* pWin2 = &pWinInfo2->sessionWin;
if (pWin1->groupId > pWin2->groupId) {
return 1;
@ -2290,9 +2294,9 @@ int32_t buildSessionResultDataBlock(SOperatorInfo* pOperator, void* pState, SSDa
for (int32_t i = pGroupResInfo->index; i < numOfRows; i += 1) {
SResultWindowInfo* pWinInfo = taosArrayGet(pGroupResInfo->pRows, i);
SRowBuffPos* pPos = pWinInfo->pStatePos;
SResultRow* pRow = NULL;
SSessionKey* pKey = (SSessionKey*) pPos->pKey;
SRowBuffPos* pPos = pWinInfo->pStatePos;
SResultRow* pRow = NULL;
SSessionKey* pKey = (SSessionKey*)pPos->pKey;
if (pBlock->info.id.groupId == 0) {
pBlock->info.id.groupId = pKey->groupId;
@ -2312,7 +2316,7 @@ int32_t buildSessionResultDataBlock(SOperatorInfo* pOperator, void* pState, SSDa
}
}
int32_t code = pAPI->stateStore.streamStateGetByPos(pState, pPos, (void**)&pRow);
int32_t code = pAPI->stateStore.streamStateGetByPos(pState, pPos, (void**)&pRow);
if (pBlock->info.rows + pRow->numOfRows > pBlock->info.capacity) {
ASSERT(pBlock->info.rows > 0);
break;
@ -2325,7 +2329,7 @@ int32_t buildSessionResultDataBlock(SOperatorInfo* pOperator, void* pState, SSDa
pGroupResInfo->index += 1;
continue;
}
doUpdateNumOfRows(pCtx, pRow, numOfExprs, rowEntryOffset);
// no results, continue to check the next one
if (pRow->numOfRows == 0) {
@ -2409,7 +2413,7 @@ void getMaxTsWins(const SArray* pAllWins, SArray* pMaxWins) {
return;
}
SResultWindowInfo* pWinInfo = taosArrayGet(pAllWins, size - 1);
SSessionKey* pSeKey = pWinInfo->pStatePos->pKey;
SSessionKey* pSeKey = pWinInfo->pStatePos->pKey;
taosArrayPush(pMaxWins, pSeKey);
if (pSeKey->groupId == 0) {
return;
@ -2716,7 +2720,8 @@ void resetWinRange(STimeWindow* winRange) {
void getSessionWindowInfoByKey(SStreamAggSupporter* pAggSup, SSessionKey* pKey, SResultWindowInfo* pWinInfo) {
int32_t rowSize = pAggSup->resultRowSize;
int32_t code = pAggSup->stateStore.streamStateSessionGet(pAggSup->pState, pKey, (void**)&pWinInfo->pStatePos, &rowSize);
int32_t code =
pAggSup->stateStore.streamStateSessionGet(pAggSup->pState, pKey, (void**)&pWinInfo->pStatePos, &rowSize);
if (code == TSDB_CODE_SUCCESS) {
pWinInfo->sessionWin = *pKey;
pWinInfo->isOutput = true;
@ -2730,16 +2735,16 @@ void getSessionWindowInfoByKey(SStreamAggSupporter* pAggSup, SSessionKey* pKey,
void streamSessionSemiReloadState(SOperatorInfo* pOperator) {
SStreamSessionAggOperatorInfo* pInfo = pOperator->info;
SStreamAggSupporter* pAggSup = &pInfo->streamAggSup;
SStreamAggSupporter* pAggSup = &pInfo->streamAggSup;
resetWinRange(&pAggSup->winRange);
SResultWindowInfo winInfo = {0};
int32_t size = 0;
void* pBuf = NULL;
int32_t code = pAggSup->stateStore.streamStateGetInfo(pAggSup->pState, STREAM_SESSION_OP_STATE_NAME,
int32_t size = 0;
void* pBuf = NULL;
int32_t code = pAggSup->stateStore.streamStateGetInfo(pAggSup->pState, STREAM_SESSION_OP_STATE_NAME,
strlen(STREAM_SESSION_OP_STATE_NAME), &pBuf, &size);
int32_t num = (size - sizeof(TSKEY)) / sizeof(SSessionKey);
SSessionKey* pSeKeyBuf = (SSessionKey*) pBuf;
int32_t num = (size - sizeof(TSKEY)) / sizeof(SSessionKey);
SSessionKey* pSeKeyBuf = (SSessionKey*)pBuf;
ASSERT(size == num * sizeof(SSessionKey) + sizeof(TSKEY));
for (int32_t i = 0; i < num; i++) {
SResultWindowInfo winInfo = {0};
@ -2763,12 +2768,12 @@ void streamSessionReloadState(SOperatorInfo* pOperator) {
SStreamAggSupporter* pAggSup = &pInfo->streamAggSup;
resetWinRange(&pAggSup->winRange);
int32_t size = 0;
void* pBuf = NULL;
int32_t code = pAggSup->stateStore.streamStateGetInfo(pAggSup->pState, STREAM_SESSION_OP_STATE_NAME,
strlen(STREAM_SESSION_OP_STATE_NAME), &pBuf, &size);
int32_t num = (size - sizeof(TSKEY)) / sizeof(SSessionKey);
SSessionKey* pSeKeyBuf = (SSessionKey*)pBuf;
int32_t size = 0;
void* pBuf = NULL;
int32_t code = pAggSup->stateStore.streamStateGetInfo(pAggSup->pState, STREAM_SESSION_OP_STATE_NAME,
strlen(STREAM_SESSION_OP_STATE_NAME), &pBuf, &size);
int32_t num = (size - sizeof(TSKEY)) / sizeof(SSessionKey);
SSessionKey* pSeKeyBuf = (SSessionKey*)pBuf;
ASSERT(size == num * sizeof(SSessionKey) + sizeof(TSKEY));
TSKEY ts = *(TSKEY*)((char*)pBuf + size - sizeof(TSKEY));
@ -2887,7 +2892,7 @@ SOperatorInfo* createStreamSessionAggOperatorInfo(SOperatorInfo* downstream, SPh
pInfo->recvGetAll = false;
pOperator->operatorType = QUERY_NODE_PHYSICAL_PLAN_STREAM_SESSION;
// for stream
// for stream
void* buff = NULL;
int32_t len = 0;
int32_t res =
@ -2924,7 +2929,8 @@ static void clearStreamSessionOperator(SStreamSessionAggOperatorInfo* pInfo) {
pInfo->streamAggSup.stateStore.streamStateSessionClear(pInfo->streamAggSup.pState);
}
void deleteSessionWinState(SStreamAggSupporter* pAggSup, SSDataBlock* pBlock, SSHashObj* pMapUpdate, SSHashObj* pMapDelete) {
void deleteSessionWinState(SStreamAggSupporter* pAggSup, SSDataBlock* pBlock, SSHashObj* pMapUpdate,
SSHashObj* pMapDelete) {
SArray* pWins = taosArrayInit(16, sizeof(SSessionKey));
doDeleteTimeWindows(pAggSup, pBlock, pWins);
removeSessionResults(pAggSup, pMapUpdate, pWins);
@ -3023,7 +3029,7 @@ static SSDataBlock* doStreamSessionSemiAgg(SOperatorInfo* pOperator) {
copyUpdateResult(&pInfo->pStUpdated, pInfo->pUpdated, sessionKeyCompareAsc);
removeSessionDeleteResults(pInfo->pStDeleted, pInfo->pUpdated);
if(pInfo->isHistoryOp) {
if (pInfo->isHistoryOp) {
getMaxTsWins(pInfo->pUpdated, pInfo->historyWins);
}
@ -3057,8 +3063,9 @@ SOperatorInfo* createStreamFinalSessionAggOperatorInfo(SOperatorInfo* downstream
pOperator->operatorType = pPhyNode->type;
if (pPhyNode->type != QUERY_NODE_PHYSICAL_PLAN_STREAM_FINAL_SESSION) {
pOperator->fpSet = createOperatorFpSet(optrDummyOpenFn, doStreamSessionSemiAgg, NULL,
destroyStreamSessionAggOperatorInfo, optrDefaultBufFn, NULL, optrDefaultGetNextExtFn, NULL);
pOperator->fpSet =
createOperatorFpSet(optrDummyOpenFn, doStreamSessionSemiAgg, NULL, destroyStreamSessionAggOperatorInfo,
optrDefaultBufFn, NULL, optrDefaultGetNextExtFn, NULL);
setOperatorStreamStateFn(pOperator, streamSessionReleaseState, streamSessionSemiReloadState);
}
setOperatorInfo(pOperator, getStreamOpName(pOperator->operatorType), pPhyNode->type, false, OP_NOT_OPENED, pInfo,
@ -3174,7 +3181,7 @@ void getStateWindowInfoByKey(SStreamAggSupporter* pAggSup, SSessionKey* pKey, SS
pAggSup->stateStore.streamStateSessionSeekKeyNext(pAggSup->pState, &pNextWin->winInfo.sessionWin);
int32_t nextSize = pAggSup->resultRowSize;
int32_t code = pAggSup->stateStore.streamStateSessionGetKVByCur(pCur, &pNextWin->winInfo.sessionWin,
(void**)&pNextWin->winInfo.pStatePos, &nextSize);
(void**)&pNextWin->winInfo.pStatePos, &nextSize);
if (code != TSDB_CODE_SUCCESS) {
SET_SESSION_WIN_INVALID(pNextWin->winInfo);
} else {
@ -3187,8 +3194,8 @@ void getStateWindowInfoByKey(SStreamAggSupporter* pAggSup, SSessionKey* pKey, SS
pNextWin->winInfo.isOutput = true;
}
pAggSup->stateStore.streamStateFreeCur(pCur);
qDebug("===stream===get state next win buff. skey:%" PRId64 ", endkey:%" PRId64, pNextWin->winInfo.sessionWin.win.skey,
pNextWin->winInfo.sessionWin.win.ekey);
qDebug("===stream===get state next win buff. skey:%" PRId64 ", endkey:%" PRId64,
pNextWin->winInfo.sessionWin.win.skey, pNextWin->winInfo.sessionWin.win.ekey);
}
void setStateOutputBuf(SStreamAggSupporter* pAggSup, TSKEY ts, uint64_t groupId, char* pKeyData,
@ -3257,13 +3264,13 @@ void setStateOutputBuf(SStreamAggSupporter* pAggSup, TSKEY ts, uint64_t groupId,
pNextWin->winInfo.isOutput = true;
}
pAggSup->stateStore.streamStateFreeCur(pCur);
qDebug("===stream===set state next win buff. skey:%" PRId64 ", endkey:%" PRId64, pNextWin->winInfo.sessionWin.win.skey,
pNextWin->winInfo.sessionWin.win.ekey);
qDebug("===stream===set state next win buff. skey:%" PRId64 ", endkey:%" PRId64,
pNextWin->winInfo.sessionWin.win.skey, pNextWin->winInfo.sessionWin.win.ekey);
}
int32_t updateStateWindowInfo(SStreamAggSupporter* pAggSup, SStateWindowInfo* pWinInfo, SStateWindowInfo* pNextWin, TSKEY* pTs, uint64_t groupId,
SColumnInfoData* pKeyCol, int32_t rows, int32_t start, bool* allEqual,
SSHashObj* pResultRows, SSHashObj* pSeUpdated, SSHashObj* pSeDeleted) {
int32_t updateStateWindowInfo(SStreamAggSupporter* pAggSup, SStateWindowInfo* pWinInfo, SStateWindowInfo* pNextWin,
TSKEY* pTs, uint64_t groupId, SColumnInfoData* pKeyCol, int32_t rows, int32_t start,
bool* allEqual, SSHashObj* pResultRows, SSHashObj* pSeUpdated, SSHashObj* pSeDeleted) {
*allEqual = true;
for (int32_t i = start; i < rows; ++i) {
char* pKeyData = colDataGetData(pKeyCol, i);
@ -3338,7 +3345,7 @@ static void doStreamStateAggImpl(SOperatorInfo* pOperator, SSDataBlock* pSDataBl
SStateWindowInfo nextWin = {0};
setStateOutputBuf(pAggSup, tsCols[i], groupId, pKeyData, &curWin, &nextWin);
releaseOutputBuf(pAggSup->pState, nextWin.winInfo.pStatePos, &pAPI->stateStore);
setSessionWinOutputInfo(pSeUpdated, &curWin.winInfo);
winRows = updateStateWindowInfo(pAggSup, &curWin, &nextWin, tsCols, groupId, pKeyColInfo, rows, i, &allEqual,
pAggSup->pResultRows, pSeUpdated, pStDeleted);
@ -3475,6 +3482,7 @@ void doStreamStateSaveCheckpoint(SOperatorInfo* pOperator) {
len = doStreamStateEncodeOpState(&pBuf, len, pOperator, true);
pInfo->streamAggSup.stateStore.streamStateSaveInfo(pInfo->streamAggSup.pState, STREAM_STATE_OP_CHECKPOINT_NAME,
strlen(STREAM_STATE_OP_CHECKPOINT_NAME), buf, len);
taosMemoryFree(buf);
}
static SSDataBlock* buildStateResult(SOperatorInfo* pOperator) {
@ -3614,10 +3622,11 @@ void streamStateReleaseState(SOperatorInfo* pOperator) {
static void compactStateWindow(SOperatorInfo* pOperator, SResultWindowInfo* pCurWin, SResultWindowInfo* pNextWin,
SSHashObj* pStUpdated, SSHashObj* pStDeleted) {
SExprSupp* pSup = &pOperator->exprSupp;
SExecTaskInfo* pTaskInfo = pOperator->pTaskInfo;
SExprSupp* pSup = &pOperator->exprSupp;
SExecTaskInfo* pTaskInfo = pOperator->pTaskInfo;
SStreamStateAggOperatorInfo* pInfo = pOperator->info;
compactTimeWindow(pSup, &pInfo->streamAggSup, &pInfo->twAggSup, pTaskInfo, pCurWin, pNextWin, pStUpdated, pStDeleted, false);
compactTimeWindow(pSup, &pInfo->streamAggSup, &pInfo->twAggSup, pTaskInfo, pCurWin, pNextWin, pStUpdated, pStDeleted,
false);
}
void streamStateReloadState(SOperatorInfo* pOperator) {
@ -3629,7 +3638,7 @@ void streamStateReloadState(SOperatorInfo* pOperator) {
int32_t size = 0;
void* pBuf = NULL;
int32_t code = pAggSup->stateStore.streamStateGetInfo(pAggSup->pState, STREAM_STATE_OP_STATE_NAME,
strlen(STREAM_STATE_OP_STATE_NAME), &pBuf, &size);
strlen(STREAM_STATE_OP_STATE_NAME), &pBuf, &size);
int32_t num = (size - sizeof(TSKEY)) / sizeof(SSessionKey);
qDebug("===stream=== reload state. get result count:%d", num);
SSessionKey* pSeKeyBuf = (SSessionKey*)pBuf;
@ -4010,8 +4019,9 @@ SOperatorInfo* createStreamIntervalOperatorInfo(SOperatorInfo* downstream, SPhys
setOperatorInfo(pOperator, "StreamIntervalOperator", QUERY_NODE_PHYSICAL_PLAN_STREAM_INTERVAL, true, OP_NOT_OPENED,
pInfo, pTaskInfo);
pOperator->fpSet = createOperatorFpSet(optrDummyOpenFn, doStreamIntervalAgg, NULL,
destroyStreamFinalIntervalOperatorInfo, optrDefaultBufFn, NULL, optrDefaultGetNextExtFn, NULL);
pOperator->fpSet =
createOperatorFpSet(optrDummyOpenFn, doStreamIntervalAgg, NULL, destroyStreamFinalIntervalOperatorInfo,
optrDefaultBufFn, NULL, optrDefaultGetNextExtFn, NULL);
setOperatorStreamStateFn(pOperator, streamIntervalReleaseState, streamIntervalReloadState);
pInfo->stateStore = pTaskInfo->storageAPI.stateStore;

View File

@ -17,6 +17,7 @@
#define _STREAM_BACKEDN_ROCKSDB_H_
#include "rocksdb/c.h"
//#include "streamInt.h"
#include "streamState.h"
#include "tcoding.h"
#include "tcommon.h"
@ -42,15 +43,110 @@ typedef struct {
TdThreadMutex cfMutex;
SHashObj* cfInst;
int64_t defaultCfInit;
} SBackendWrapper;
typedef struct {
void* tableOpt;
} RocksdbCfParam;
typedef struct {
rocksdb_t* db;
rocksdb_writeoptions_t* writeOpt;
rocksdb_readoptions_t* readOpt;
rocksdb_options_t* dbOpt;
rocksdb_env_t* env;
rocksdb_cache_t* cache;
rocksdb_column_family_handle_t** pCf;
rocksdb_comparator_t** pCompares;
rocksdb_options_t** pCfOpts;
RocksdbCfParam* pCfParams;
rocksdb_compactionfilterfactory_t* filterFactory;
TdThreadMutex mutex;
char* idstr;
char* path;
int64_t refId;
void* pTask;
int64_t streamId;
int64_t taskId;
int64_t chkpId;
SArray* chkpSaved;
SArray* chkpInUse;
int32_t chkpCap;
TdThreadRwlock chkpDirLock;
int64_t dataWritten;
} STaskDbWrapper;
typedef struct SDbChkp {
int8_t init;
char* pCurrent;
char* pManifest;
SArray* pSST;
int64_t preCkptId;
int64_t curChkpId;
char* path;
char* buf;
int32_t len;
// ping-pong buf
SHashObj* pSstTbl[2];
int8_t idx;
SArray* pAdd;
SArray* pDel;
int8_t update;
TdThreadRwlock rwLock;
} SDbChkp;
typedef struct {
int8_t init;
char* pCurrent;
char* pManifest;
SArray* pSST;
int64_t preCkptId;
int64_t curChkpId;
char* path;
char* buf;
int32_t len;
// ping-pong buf
SHashObj* pSstTbl[2];
int8_t idx;
SArray* pAdd;
SArray* pDel;
int8_t update;
SHashObj* pDbChkpTbl;
TdThreadRwlock rwLock;
} SBkdMgt;
bool streamBackendDataIsExist(const char* path, int64_t chkpId, int32_t vgId);
void* streamBackendInit(const char* path, int64_t chkpId, int32_t vgId);
void streamBackendCleanup(void* arg);
void streamBackendHandleCleanup(void* arg);
int32_t streamBackendLoadCheckpointInfo(void* pMeta);
int32_t streamBackendDoCheckpoint(void* pMeta, uint64_t checkpointId);
int32_t streamBackendDoCheckpoint(void* pMeta, int64_t checkpointId);
SListNode* streamBackendAddCompare(void* backend, void* arg);
void streamBackendDelCompare(void* backend, void* arg);
int32_t streamStateCvtDataFormat(char* path, char* key, void* cfInst);
STaskDbWrapper* taskDbOpen(char* path, char* key, int64_t chkpId);
void taskDbDestroy(void* pBackend, bool flush);
void taskDbDestroy2(void* pBackend);
int32_t taskDbDoCheckpoint(void* arg, int64_t chkpId);
void taskDbUpdateChkpId(void* pTaskDb, int64_t chkpId);
void* taskDbAddRef(void* pTaskDb);
void taskDbRemoveRef(void* pTaskDb);
int streamStateOpenBackend(void* backend, SStreamState* pState);
void streamStateCloseBackend(SStreamState* pState, bool remove);
@ -122,7 +218,7 @@ int32_t streamDefaultGet_rocksdb(SStreamState* pState, const void* key, void** p
int32_t streamDefaultDel_rocksdb(SStreamState* pState, const void* key);
int32_t streamDefaultIterGet_rocksdb(SStreamState* pState, const void* start, const void* end, SArray* result);
void* streamDefaultIterCreate_rocksdb(SStreamState* pState);
bool streamDefaultIterValid_rocksdb(void* iter);
bool streamDefaultIterValid_rocksdb(void* iter);
void streamDefaultIterSeek_rocksdb(void* iter, const char* key);
void streamDefaultIterNext_rocksdb(void* iter);
char* streamDefaultIterKey_rocksdb(void* iter, int32_t* len);
@ -146,5 +242,20 @@ int32_t streamBackendTriggerChkp(void* pMeta, char* dst);
int32_t streamBackendAddInUseChkp(void* arg, int64_t chkpId);
int32_t streamBackendDelInUseChkp(void* arg, int64_t chkpId);
int32_t taskDbBuildSnap(void* arg, SArray* pSnap);
// int32_t streamDefaultIter_rocksdb(SStreamState* pState, const void* start, const void* end, SArray* result);
// STaskDbWrapper* taskDbOpen(char* path, char* key, int64_t chkpId);
// void taskDbDestroy(void* pDb, bool flush);
int32_t taskDbDoCheckpoint(void* arg, int64_t chkpId);
SBkdMgt* bkdMgtCreate(char* path);
int32_t bkdMgtAddChkp(SBkdMgt* bm, char* task, char* path);
int32_t bkdMgtGetDelta(SBkdMgt* bm, char* taskId, int64_t chkpId, SArray* list, char* name);
int32_t bkdMgtDumpTo(SBkdMgt* bm, char* taskId, char* dname);
void bkdMgtDestroy(SBkdMgt* bm);
int32_t taskDbGenChkpUploadData(void* arg, void* bkdMgt, int64_t chkpId, int8_t type, char** path, SArray* list);
#endif

View File

@ -68,13 +68,19 @@ typedef struct SStreamContinueExecInfo {
SRpcMsg msg;
} SStreamContinueExecInfo;
typedef struct {
int64_t streamId;
int64_t taskId;
int64_t chkpId;
char* dbPrefixPath;
} SStreamTaskSnap;
struct STokenBucket {
int32_t numCapacity; // total capacity, available token per second
int32_t numOfToken; // total available tokens
int32_t numRate; // number of token per second
double quotaCapacity; // available capacity for maximum input size, KiloBytes per Second
double quotaRemain; // not consumed bytes per second
double quotaRate; // number of token per second
int32_t numCapacity; // total capacity, available token per second
int32_t numOfToken; // total available tokens
int32_t numRate; // number of token per second
double quotaCapacity; // available capacity for maximum input size, KiloBytes per Second
double quotaRemain; // not consumed bytes per second
double quotaRate; // number of token per second
int64_t tokenFillTimestamp; // fill timestamp
int64_t quotaFillTimestamp; // fill timestamp
};
@ -89,6 +95,7 @@ struct SStreamQueue {
extern SStreamGlobalEnv streamEnv;
extern int32_t streamBackendId;
extern int32_t streamBackendCfWrapperId;
extern int32_t taskDbWrapperId;
void streamRetryDispatchData(SStreamTask* pTask, int64_t waitDuration);
int32_t streamDispatchStreamBlock(SStreamTask* pTask);
@ -106,6 +113,8 @@ int32_t streamBroadcastToChildren(SStreamTask* pTask, const SSDataBlock* pBlock)
int32_t tEncodeStreamRetrieveReq(SEncoder* pEncoder, const SStreamRetrieveReq* pReq);
int32_t streamSaveTaskCheckpointInfo(SStreamTask* p, int64_t checkpointId);
int32_t streamTaskBuildCheckpoint(SStreamTask* pTask);
int32_t streamSaveAllTaskStatus(SStreamMeta* pMeta, int64_t checkpointId);
int32_t streamSendCheckMsg(SStreamTask* pTask, const SStreamTaskCheckReq* pReq, int32_t nodeId, SEpSet* pEpSet);
@ -117,7 +126,8 @@ int32_t streamTaskInitTokenBucket(STokenBucket* pBucket, int32_t numCap, int32_t
STaskId streamTaskExtractKey(const SStreamTask* pTask);
void streamTaskInitForLaunchHTask(SHistoryTaskInfo* pInfo);
void streamTaskSetRetryInfoForLaunch(SHistoryTaskInfo* pInfo);
int32_t streamTaskBuildScanhistoryRspMsg(SStreamTask* pTask, SStreamScanHistoryFinishReq* pReq, void** pBuffer, int32_t* pLen);
int32_t streamTaskBuildScanhistoryRspMsg(SStreamTask* pTask, SStreamScanHistoryFinishReq* pReq, void** pBuffer,
int32_t* pLen);
int32_t streamTaskFillHistoryFinished(SStreamTask* pTask);
void streamClearChkptReadyMsg(SStreamTask* pTask);
@ -132,6 +142,17 @@ int32_t streamAddEndScanHistoryMsg(SStreamTask* pTask, SRpcHandleInfo* pRpcInfo,
int32_t streamNotifyUpstreamContinue(SStreamTask* pTask);
int32_t streamTransferStateToStreamTask(SStreamTask* pTask);
// <<<<<<< HEAD
// void streamClearChkptReadyMsg(SStreamTask* pTask);
// int32_t streamTaskInitTokenBucket(STokenBucket* pBucket, int32_t numCap, int32_t numRate, float quotaRate, const
// char*); STaskId streamTaskExtractKey(const SStreamTask* pTask); void streamTaskInitForLaunchHTask(SHistoryTaskInfo*
// pInfo); void streamTaskSetRetryInfoForLaunch(SHistoryTaskInfo* pInfo);
// void streamMetaResetStartInfo(STaskStartInfo* pMeta);
// =======
// >>>>>>> 3.0
SStreamQueue* streamQueueOpen(int64_t cap);
void streamQueueClose(SStreamQueue* pQueue, int32_t taskId);
void streamQueueProcessSuccess(SStreamQueue* queue);
@ -151,10 +172,14 @@ int uploadCheckpoint(char* id, char* path);
int downloadCheckpoint(char* id, char* path);
int deleteCheckpoint(char* id);
int deleteCheckpointFile(char* id, char* name);
int downloadCheckpointByName(char* id, char* fname, char* dstName);
int32_t streamTaskOnNormalTaskReady(SStreamTask* pTask);
int32_t streamTaskOnScanhistoryTaskReady(SStreamTask* pTask);
typedef int32_t (*__stream_async_exec_fn_t)(void* param);
int32_t streamMetaAsyncExec(SStreamMeta* pMeta, __stream_async_exec_fn_t fn, void* param, int32_t* code);
#ifdef __cplusplus
}
#endif

View File

@ -32,8 +32,8 @@ typedef int32_t (*__state_trans_fn)(SStreamTask*);
typedef int32_t (*__state_trans_succ_fn)(SStreamTask*);
typedef struct SAttachedEventInfo {
ETaskStatus status; // required status that this event can be handled
EStreamTaskEvent event; // the delayed handled event
ETaskStatus status; // required status that this event can be handled
EStreamTaskEvent event; // the delayed handled event
} SAttachedEventInfo;
typedef struct STaskStateTrans {
@ -64,8 +64,8 @@ typedef struct SStreamEventInfo {
const char* name;
} SStreamEventInfo;
SStreamTaskSM* streamCreateStateMachine(SStreamTask* pTask);
void* streamDestroyStateMachine(SStreamTaskSM* pSM);
// SStreamTaskSM* streamCreateStateMachine(SStreamTask* pTask);
// void* streamDestroyStateMachine(SStreamTaskSM* pSM);
#ifdef __cplusplus
}
#endif

File diff suppressed because it is too large Load Diff

View File

@ -15,8 +15,16 @@
#include "cos.h"
#include "rsync.h"
#include "streamBackendRocksdb.h"
#include "streamInt.h"
typedef struct {
UPLOAD_TYPE type;
char* taskId;
int64_t chkpId;
SStreamTask* pTask;
} SAsyncUploadArg;
int32_t tEncodeStreamCheckpointSourceReq(SEncoder* pEncoder, const SStreamCheckpointSourceReq* pReq) {
if (tStartEncode(pEncoder) < 0) return -1;
if (tEncodeI64(pEncoder, pReq->streamId) < 0) return -1;
@ -95,12 +103,12 @@ int32_t tDecodeStreamCheckpointReadyMsg(SDecoder* pDecoder, SStreamCheckpointRea
static int32_t streamAlignCheckpoint(SStreamTask* pTask) {
int32_t num = taosArrayGetSize(pTask->upstreamInfo.pList);
int64_t old = atomic_val_compare_exchange_32(&pTask->checkpointAlignCnt, 0, num);
int64_t old = atomic_val_compare_exchange_32(&pTask->chkInfo.downstreamAlignNum, 0, num);
if (old == 0) {
stDebug("s-task:%s set initial align upstream num:%d", pTask->id.idStr, num);
}
return atomic_sub_fetch_32(&pTask->checkpointAlignCnt, 1);
return atomic_sub_fetch_32(&pTask->chkInfo.downstreamAlignNum, 1);
}
static int32_t appendCheckpointIntoInputQ(SStreamTask* pTask, int32_t checkpointType) {
@ -118,7 +126,7 @@ static int32_t appendCheckpointIntoInputQ(SStreamTask* pTask, int32_t checkpoint
}
pBlock->info.type = STREAM_CHECKPOINT;
pBlock->info.version = pTask->checkpointingId;
pBlock->info.version = pTask->chkInfo.checkpointingId;
pBlock->info.rows = 1;
pBlock->info.childId = pTask->info.selfChildId;
@ -141,13 +149,12 @@ int32_t streamProcessCheckpointSourceReq(SStreamTask* pTask, SStreamCheckpointSo
// 1. set task status to be prepared for check point, no data are allowed to put into inputQ.
streamTaskHandleEvent(pTask->status.pSM, TASK_EVENT_GEN_CHECKPOINT);
pTask->checkpointingId = pReq->checkpointId;
pTask->checkpointNotReadyTasks = streamTaskGetNumOfDownstream(pTask);
pTask->chkInfo.checkpointingId = pReq->checkpointId;
pTask->chkInfo.checkpointNotReadyTasks = streamTaskGetNumOfDownstream(pTask);
pTask->chkInfo.startTs = taosGetTimestampMs();
pTask->execInfo.checkpoint += 1;
// 2. Put the checkpoint block into inputQ, to make sure all blocks with less version have been handled by this task
// already.
int32_t code = appendCheckpointIntoInputQ(pTask, STREAM_INPUT__CHECKPOINT_TRIGGER);
return code;
}
@ -171,13 +178,12 @@ static int32_t continueDispatchCheckpointBlock(SStreamDataBlock* pBlock, SStream
int32_t streamProcessCheckpointBlock(SStreamTask* pTask, SStreamDataBlock* pBlock) {
SSDataBlock* pDataBlock = taosArrayGet(pBlock->blocks, 0);
int64_t checkpointId = pDataBlock->info.version;
const char* id = pTask->id.idStr;
int32_t code = TSDB_CODE_SUCCESS;
const char* id = pTask->id.idStr;
int32_t code = TSDB_CODE_SUCCESS;
// set task status
if (streamTaskGetStatus(pTask, NULL) != TASK_STATUS__CK) {
pTask->checkpointingId = checkpointId;
pTask->chkInfo.checkpointingId = checkpointId;
code = streamTaskHandleEvent(pTask->status.pSM, TASK_EVENT_GEN_CHECKPOINT);
if (code != TSDB_CODE_SUCCESS) {
stError("s-task:%s handle checkpoint-trigger block failed, code:%s", id, tstrerror(code));
@ -185,26 +191,15 @@ int32_t streamProcessCheckpointBlock(SStreamTask* pTask, SStreamDataBlock* pBloc
}
}
{ // todo: remove this when the pipeline checkpoint generating is used.
SStreamMeta* pMeta = pTask->pMeta;
streamMetaWLock(pMeta);
if (pMeta->chkptNotReadyTasks == 0) {
pMeta->chkptNotReadyTasks = pMeta->numOfStreamTasks;
}
streamMetaWUnLock(pMeta);
}
// todo fix race condition: set the status and append checkpoint block
int32_t taskLevel = pTask->info.taskLevel;
if (taskLevel == TASK_LEVEL__SOURCE) {
if (pTask->outputInfo.type == TASK_OUTPUT__FIXED_DISPATCH ||
pTask->outputInfo.type == TASK_OUTPUT__SHUFFLE_DISPATCH) {
int8_t type = pTask->outputInfo.type;
if (type == TASK_OUTPUT__FIXED_DISPATCH || type == TASK_OUTPUT__SHUFFLE_DISPATCH) {
stDebug("s-task:%s set childIdx:%d, and add checkpoint-trigger block into outputQ", id, pTask->info.selfChildId);
continueDispatchCheckpointBlock(pBlock, pTask);
} else { // only one task exists, no need to dispatch downstream info
atomic_add_fetch_32(&pTask->checkpointNotReadyTasks, 1);
atomic_add_fetch_32(&pTask->chkInfo.checkpointNotReadyTasks, 1);
streamProcessCheckpointReadyMsg(pTask);
streamFreeQitem((SStreamQueueItem*)pBlock);
}
@ -233,15 +228,13 @@ int32_t streamProcessCheckpointBlock(SStreamTask* pTask, SStreamDataBlock* pBloc
id, num);
streamFreeQitem((SStreamQueueItem*)pBlock);
streamTaskBuildCheckpoint(pTask);
} else {
stDebug(
"s-task:%s process checkpoint block, all %d upstreams sent checkpoint msgs, dispatch checkpoint msg "
"downstream",
id, num);
} else { // source & agg tasks need to forward the checkpoint msg downwards
stDebug("s-task:%s process checkpoint block, all %d upstreams sent checkpoint msgs, continue forwards msg", id,
num);
// set the needed checked downstream tasks, only when all downstream tasks do checkpoint complete, this task
// can start local checkpoint procedure
pTask->checkpointNotReadyTasks = streamTaskGetNumOfDownstream(pTask);
pTask->chkInfo.checkpointNotReadyTasks = streamTaskGetNumOfDownstream(pTask);
// Put the checkpoint block into inputQ, to make sure all blocks with less version have been handled by this task
// already. And then, dispatch check point msg to all downstream tasks
@ -260,7 +253,7 @@ int32_t streamProcessCheckpointReadyMsg(SStreamTask* pTask) {
ASSERT(pTask->info.taskLevel == TASK_LEVEL__SOURCE || pTask->info.taskLevel == TASK_LEVEL__AGG);
// only when all downstream tasks are send checkpoint rsp, we can start the checkpoint procedure for the agg task
int32_t notReady = atomic_sub_fetch_32(&pTask->checkpointNotReadyTasks, 1);
int32_t notReady = atomic_sub_fetch_32(&pTask->chkInfo.checkpointNotReadyTasks, 1);
ASSERT(notReady >= 0);
if (notReady == 0) {
@ -276,11 +269,11 @@ int32_t streamProcessCheckpointReadyMsg(SStreamTask* pTask) {
}
void streamTaskClearCheckInfo(SStreamTask* pTask, bool clearChkpReadyMsg) {
pTask->checkpointingId = 0; // clear the checkpoint id
pTask->chkInfo.checkpointingId = 0; // clear the checkpoint id
pTask->chkInfo.failedId = 0;
pTask->chkInfo.startTs = 0; // clear the recorded start time
pTask->checkpointNotReadyTasks = 0;
pTask->checkpointAlignCnt = 0;
pTask->chkInfo.checkpointNotReadyTasks = 0;
// pTask->chkInfo.checkpointAlignCnt = 0;
pTask->chkInfo.dispatchCheckpointTrigger = false;
streamTaskOpenAllUpstreamInput(pTask); // open inputQ for all upstream tasks
if (clearChkpReadyMsg) {
@ -288,100 +281,217 @@ void streamTaskClearCheckInfo(SStreamTask* pTask, bool clearChkpReadyMsg) {
}
}
int32_t streamSaveAllTaskStatus(SStreamMeta* pMeta, int64_t checkpointId) {
int32_t vgId = pMeta->vgId;
int32_t code = 0;
int32_t streamSaveTaskCheckpointInfo(SStreamTask* p, int64_t checkpointId) {
SStreamMeta* pMeta = p->pMeta;
int32_t vgId = pMeta->vgId;
const char* id = p->id.idStr;
int32_t code = 0;
streamMetaWLock(pMeta);
if (p->info.fillHistory == 1) {
return code;
}
for (int32_t i = 0; i < taosArrayGetSize(pMeta->pTaskList); ++i) {
STaskId* pId = taosArrayGet(pMeta->pTaskList, i);
SStreamTask** ppTask = taosHashGet(pMeta->pTasksMap, pId, sizeof(*pId));
if (ppTask == NULL) {
continue;
}
if (p->info.taskLevel > TASK_LEVEL__SINK) {
return code;
}
SStreamTask* p = *ppTask;
if (p->info.fillHistory == 1) {
continue;
}
taosThreadMutexLock(&p->lock);
ASSERT(p->chkInfo.checkpointId <= p->checkpointingId && p->checkpointingId == checkpointId &&
p->chkInfo.checkpointVer <= p->chkInfo.processedVer);
ASSERT(p->chkInfo.checkpointId <= p->chkInfo.checkpointingId && p->chkInfo.checkpointingId == checkpointId &&
p->chkInfo.checkpointVer <= p->chkInfo.processedVer);
p->chkInfo.checkpointId = p->chkInfo.checkpointingId;
p->chkInfo.checkpointVer = p->chkInfo.processedVer;
p->chkInfo.checkpointId = p->checkpointingId;
p->chkInfo.checkpointVer = p->chkInfo.processedVer;
streamTaskClearCheckInfo(p, false);
char* str = NULL;
streamTaskGetStatus(p, &str);
streamTaskClearCheckInfo(p, false);
code = streamTaskHandleEvent(p->status.pSM, TASK_EVENT_CHECKPOINT_DONE);
taosThreadMutexUnlock(&p->lock);
char* str = NULL;
streamTaskGetStatus(p, &str);
if (code != TSDB_CODE_SUCCESS) {
stDebug("s-task:%s vgId:%d handle event:checkpoint-done failed", id, vgId);
return -1;
}
code = streamTaskHandleEvent(p->status.pSM, TASK_EVENT_CHECKPOINT_DONE);
stDebug("vgId:%d s-task:%s level:%d open upstream inputQ, save status after checkpoint, checkpointId:%" PRId64
", Ver(saved):%" PRId64 " currentVer:%" PRId64 ", status: normal, prev:%s",
vgId, id, p->info.taskLevel, checkpointId, p->chkInfo.checkpointVer, p->chkInfo.nextProcessVer, str);
// save the task if not sink task
if (p->info.taskLevel != TASK_LEVEL__SINK) {
streamMetaWLock(pMeta);
code = streamMetaSaveTask(pMeta, p);
if (code != TSDB_CODE_SUCCESS) {
stDebug("s-task:%s vgId:%d save task status failed, since handle event failed", p->id.idStr, vgId);
streamMetaWUnLock(pMeta);
return -1;
} else { // save the task
streamMetaSaveTask(pMeta, p);
stError("s-task:%s vgId:%d failed to save task info after do checkpoint, checkpointId:%" PRId64 ", since %s", id,
vgId, checkpointId, terrstr());
return code;
}
stDebug(
"vgId:%d s-task:%s level:%d open upstream inputQ, commit task status after checkpoint completed, "
"checkpointId:%" PRId64 ", Ver(saved):%" PRId64 " currentVer:%" PRId64 ", status to be normal, prev:%s",
pMeta->vgId, p->id.idStr, p->info.taskLevel, checkpointId, p->chkInfo.checkpointVer, p->chkInfo.nextProcessVer,
str);
}
code = streamMetaCommit(pMeta);
if (code != TSDB_CODE_SUCCESS) {
stError("s-task:%s vgId:%d failed to commit stream meta after do checkpoint, checkpointId:%" PRId64 ", since %s",
id, vgId, checkpointId, terrstr());
}
code = streamMetaCommit(pMeta);
if (code < 0) {
stError("vgId:%d failed to commit stream meta after do checkpoint, checkpointId:%" PRId64 ", since %s", pMeta->vgId,
checkpointId, terrstr());
} else {
stInfo("vgId:%d commit stream meta after do checkpoint, checkpointId:%" PRId64 " DONE", pMeta->vgId, checkpointId);
streamMetaWUnLock(pMeta);
}
streamMetaWUnLock(pMeta);
return code;
}
int32_t streamTaskBuildCheckpoint(SStreamTask* pTask) {
int32_t code = 0;
void streamTaskSetFailedId(SStreamTask* pTask) {
pTask->chkInfo.failedId = pTask->chkInfo.checkpointingId;
pTask->chkInfo.checkpointId = pTask->chkInfo.checkpointingId;
}
// check for all tasks, and do generate the vnode-wide checkpoint data.
SStreamMeta* pMeta = pTask->pMeta;
int32_t remain = atomic_sub_fetch_32(&pMeta->chkptNotReadyTasks, 1);
ASSERT(remain >= 0);
double el = (taosGetTimestampMs() - pTask->chkInfo.startTs) / 1000.0;
if (remain == 0) { // all tasks are ready
stDebug("s-task:%s all downstreams are ready, ready for do checkpoint", pTask->id.idStr);
streamBackendDoCheckpoint(pMeta, pTask->checkpointingId);
streamSaveAllTaskStatus(pMeta, pTask->checkpointingId);
stInfo(
"vgId:%d vnode wide checkpoint completed, save all tasks status, last:%s, level:%d elapsed time:%.2f Sec "
"checkpointId:%" PRId64,
pMeta->vgId, pTask->id.idStr, pTask->info.taskLevel, el, pTask->checkpointingId);
int32_t getChkpMeta(char* id, char* path, SArray* list) {
char* file = taosMemoryCalloc(1, strlen(path) + 32);
sprintf(file, "%s%s%s", path, TD_DIRSEP, "META_TMP");
int32_t code = downloadCheckpointByName(id, "META", file);
if (code != 0) {
stDebug("chkp failed to download meta file:%s", file);
taosMemoryFree(file);
return code;
}
TdFilePtr pFile = taosOpenFile(file, TD_FILE_READ);
char buf[128] = {0};
if (taosReadFile(pFile, buf, sizeof(buf)) <= 0) {
stError("chkp failed to read meta file:%s", file);
code = -1;
} else {
stInfo(
"vgId:%d vnode wide tasks not reach checkpoint ready status, ready s-task:%s, level:%d elapsed time:%.2f Sec "
"not ready:%d/%d",
pMeta->vgId, pTask->id.idStr, pTask->info.taskLevel, el, remain, pMeta->numOfStreamTasks);
int32_t len = strlen(buf);
for (int i = 0; i < len; i++) {
if (buf[i] == '\n') {
char* item = taosMemoryCalloc(1, i + 1);
memcpy(item, buf, i);
taosArrayPush(list, &item);
item = taosMemoryCalloc(1, len - i);
memcpy(item, buf + i + 1, len - i - 1);
taosArrayPush(list, &item);
}
}
}
taosCloseFile(&pFile);
taosRemoveFile(file);
taosMemoryFree(file);
return code;
}
int32_t doUploadChkp(void* param) {
SAsyncUploadArg* arg = param;
char* path = NULL;
int32_t code = 0;
SArray* toDelFiles = taosArrayInit(4, sizeof(void*));
if ((code = taskDbGenChkpUploadData(arg->pTask->pBackend, arg->pTask->pMeta->bkdChkptMgt, arg->chkpId,
(int8_t)(arg->type), &path, toDelFiles)) != 0) {
stError("s-task:%s failed to gen upload checkpoint:%" PRId64 "", arg->pTask->id.idStr, arg->chkpId);
}
if (arg->type == UPLOAD_S3) {
if (code == 0 && (code = getChkpMeta(arg->taskId, path, toDelFiles)) != 0) {
stError("s-task:%s failed to get checkpoint:%" PRId64 " meta", arg->pTask->id.idStr, arg->chkpId);
}
}
if (code == 0 && (code = uploadCheckpoint(arg->taskId, path)) != 0) {
stError("s-task:%s failed to upload checkpoint:%" PRId64, arg->pTask->id.idStr, arg->chkpId);
}
if (code == 0) {
for (int i = 0; i < taosArrayGetSize(toDelFiles); i++) {
char* p = taosArrayGetP(toDelFiles, i);
code = deleteCheckpointFile(arg->taskId, p);
stDebug("s-task:%s try to del file: %s", arg->pTask->id.idStr, p);
if (code != 0) {
break;
}
}
}
taosArrayDestroyP(toDelFiles, taosMemoryFree);
taosRemoveDir(path);
taosMemoryFree(path);
taosMemoryFree(arg->taskId);
taosMemoryFree(arg);
return code;
}
int32_t streamTaskUploadChkp(SStreamTask* pTask, int64_t chkpId, char* taskId) {
// async upload
UPLOAD_TYPE type = getUploadType();
if (type == UPLOAD_DISABLE) {
return 0;
}
SAsyncUploadArg* arg = taosMemoryCalloc(1, sizeof(SAsyncUploadArg));
arg->type = type;
arg->taskId = taosStrdup(taskId);
arg->chkpId = chkpId;
arg->pTask = pTask;
return streamMetaAsyncExec(pTask->pMeta, doUploadChkp, arg, NULL);
}
int32_t streamTaskBuildCheckpoint(SStreamTask* pTask) {
int32_t code = TSDB_CODE_SUCCESS;
int64_t startTs = pTask->chkInfo.startTs;
int64_t ckId = pTask->chkInfo.checkpointingId;
// sink task do not need to save the status, and generated the checkpoint
if (pTask->info.taskLevel != TASK_LEVEL__SINK) {
stDebug("s-task:%s level:%d start gen checkpoint", pTask->id.idStr, pTask->info.taskLevel);
code = streamBackendDoCheckpoint(pTask->pBackend, ckId);
if (code != TSDB_CODE_SUCCESS) {
stError("s-task:%s gen checkpoint:%" PRId64 " failed, code:%s", pTask->id.idStr, ckId, tstrerror(terrno));
}
}
// send check point response to upstream task
if (pTask->info.taskLevel == TASK_LEVEL__SOURCE) {
code = streamTaskSendCheckpointSourceRsp(pTask);
} else {
code = streamTaskSendCheckpointReadyMsg(pTask);
if (code == TSDB_CODE_SUCCESS) {
if (pTask->info.taskLevel == TASK_LEVEL__SOURCE) {
code = streamTaskSendCheckpointSourceRsp(pTask);
} else {
code = streamTaskSendCheckpointReadyMsg(pTask);
}
if (code != TSDB_CODE_SUCCESS) {
// todo: let's retry send rsp to upstream/mnode
stError("s-task:%s failed to send checkpoint rsp to upstream, checkpointId:%" PRId64 ", code:%s", pTask->id.idStr,
ckId, tstrerror(code));
}
}
if (code != TSDB_CODE_SUCCESS) {
// todo: let's retry send rsp to upstream/mnode
stError("s-task:%s failed to send checkpoint rsp to upstream, checkpointId:%" PRId64 ", code:%s", pTask->id.idStr,
pTask->checkpointingId, tstrerror(code));
// clear the checkpoint info, and commit the newest checkpoint info if all works are done successfully
if (code == TSDB_CODE_SUCCESS) {
code = streamSaveTaskCheckpointInfo(pTask, ckId);
if (code != TSDB_CODE_SUCCESS) {
stError("s-task:%s commit taskInfo failed, checkpoint:%" PRId64 " failed, code:%s", pTask->id.idStr, ckId,
tstrerror(terrno));
} else {
code = streamTaskUploadChkp(pTask, ckId, (char*)pTask->id.idStr);
if (code != 0) {
stError("s-task:%s failed to upload checkpoint:%" PRId64 " failed", pTask->id.idStr, ckId);
}
}
}
if (code != TSDB_CODE_SUCCESS) { // clear the checkpoint info if failed
taosThreadMutexLock(&pTask->lock);
streamTaskClearCheckInfo(pTask, false);
code = streamTaskHandleEvent(pTask->status.pSM, TASK_EVENT_CHECKPOINT_DONE);
taosThreadMutexUnlock(&pTask->lock);
streamTaskSetFailedId(pTask);
stDebug("s-task:%s clear checkpoint flag since gen checkpoint failed, checkpointId:%" PRId64, pTask->id.idStr,
ckId);
}
double el = (taosGetTimestampMs() - startTs) / 1000.0;
stInfo("s-task:%s vgId:%d level:%d, checkpointId:%" PRId64 " ver:%" PRId64 " elapsed time:%.2f Sec, %s ",
pTask->id.idStr, pTask->pMeta->vgId, pTask->info.taskLevel, ckId, pTask->chkInfo.checkpointVer, el,
(code == TSDB_CODE_SUCCESS) ? "succ" : "failed");
return code;
}
@ -390,6 +500,7 @@ static int uploadCheckpointToS3(char* id, char* path) {
if (pDir == NULL) return -1;
TdDirEntryPtr de = NULL;
s3Init();
while ((de = taosReadDir(pDir)) != NULL) {
char* name = taosGetDirEntryName(de);
if (strcmp(name, ".") == 0 || strcmp(name, "..") == 0 || taosDirEntryIsDir(de)) continue;
@ -409,12 +520,24 @@ static int uploadCheckpointToS3(char* id, char* path) {
return -1;
}
stDebug("[s3] upload checkpoint:%s", filename);
// break;
}
taosCloseDir(&pDir);
return 0;
}
static int downloadCheckpointByNameS3(char* id, char* fname, char* dstName) {
int code = 0;
char* buf = taosMemoryCalloc(1, strlen(id) + strlen(dstName) + 4);
sprintf(buf, "%s/%s", id, fname);
if (s3GetObjectToFile(buf, dstName) != 0) {
code = -1;
}
taosMemoryFree(buf);
return code;
}
UPLOAD_TYPE getUploadType() {
if (strlen(tsSnodeAddress) != 0) {
return UPLOAD_RSYNC;
@ -438,6 +561,20 @@ int uploadCheckpoint(char* id, char* path) {
return 0;
}
// fileName: CURRENT
int downloadCheckpointByName(char* id, char* fname, char* dstName) {
if (id == NULL || fname == NULL || strlen(id) == 0 || strlen(fname) == 0 || strlen(fname) >= PATH_MAX) {
stError("uploadCheckpointByName parameters invalid");
return -1;
}
if (strlen(tsSnodeAddress) != 0) {
return 0;
} else if (tsS3StreamEnabled) {
return downloadCheckpointByNameS3(id, fname, dstName);
}
return 0;
}
int downloadCheckpoint(char* id, char* path) {
if (id == NULL || path == NULL || strlen(id) == 0 || strlen(path) == 0 || strlen(path) >= PATH_MAX) {
stError("downloadCheckpoint parameters invalid");

View File

@ -1167,10 +1167,10 @@ int32_t streamProcessDispatchRsp(SStreamTask* pTask, SStreamDispatchRsp* pRsp, i
pTask->info.taskLevel == TASK_LEVEL__SOURCE) {
stError("s-task:%s failed to dispatch checkpoint-trigger msg, checkpointId:%" PRId64
", set the current checkpoint failed, and send rsp to mnode",
id, pTask->checkpointingId);
id, pTask->chkInfo.checkpointingId);
{ // send checkpoint failure msg to mnode directly
pTask->chkInfo.failedId = pTask->checkpointingId; // record the latest failed checkpoint id
pTask->checkpointingId = pTask->checkpointingId;
pTask->chkInfo.failedId = pTask->chkInfo.checkpointingId; // record the latest failed checkpoint id
pTask->chkInfo.checkpointingId = pTask->chkInfo.checkpointingId;
streamTaskSendCheckpointSourceRsp(pTask);
}
} else {

View File

@ -18,6 +18,7 @@
#include "streamInt.h"
#include "tmisce.h"
#include "tref.h"
#include "tsched.h"
#include "tstream.h"
#include "ttimer.h"
#include "wal.h"
@ -27,6 +28,7 @@ static TdThreadOnce streamMetaModuleInit = PTHREAD_ONCE_INIT;
int32_t streamBackendId = 0;
int32_t streamBackendCfWrapperId = 0;
int32_t streamMetaId = 0;
int32_t taskDbWrapperId = 0;
static void metaHbToMnode(void* param, void* tmrId);
static void streamMetaClear(SStreamMeta* pMeta);
@ -55,6 +57,7 @@ int32_t metaRefMgtAdd(int64_t vgId, int64_t* rid);
static void streamMetaEnvInit() {
streamBackendId = taosOpenRef(64, streamBackendCleanup);
streamBackendCfWrapperId = taosOpenRef(64, streamBackendHandleCleanup);
taskDbWrapperId = taosOpenRef(64, taskDbDestroy2);
streamMetaId = taosOpenRef(64, streamMetaCloseImpl);
@ -62,6 +65,7 @@ static void streamMetaEnvInit() {
}
void streamMetaInit() { taosThreadOnce(&streamMetaModuleInit, streamMetaEnvInit); }
void streamMetaCleanup() {
taosCloseRef(streamBackendId);
taosCloseRef(streamBackendCfWrapperId);
@ -106,6 +110,174 @@ int32_t metaRefMgtAdd(int64_t vgId, int64_t* rid) {
return 0;
}
typedef struct {
int64_t chkpId;
char* path;
char* taskId;
SArray* pChkpSave;
SArray* pChkpInUse;
int8_t chkpCap;
void* backend;
} StreamMetaTaskState;
int32_t streamMetaOpenTdb(SStreamMeta* pMeta) {
if (tdbOpen(pMeta->path, 16 * 1024, 1, &pMeta->db, 0) < 0) {
return -1;
// goto _err;
}
if (tdbTbOpen("task.db", STREAM_TASK_KEY_LEN, -1, NULL, pMeta->db, &pMeta->pTaskDb, 0) < 0) {
return -1;
}
if (tdbTbOpen("checkpoint.db", sizeof(int32_t), -1, NULL, pMeta->db, &pMeta->pCheckpointDb, 0) < 0) {
return -1;
}
return 0;
}
//
// impl later
//
enum STREAM_STATE_VER {
STREAM_STATA_NO_COMPATIBLE,
STREAM_STATA_COMPATIBLE,
STREAM_STATA_NEED_CONVERT,
};
int32_t streamMetaCheckBackendCompatible(SStreamMeta* pMeta) {
int8_t ret = STREAM_STATA_COMPATIBLE;
TBC* pCur = NULL;
if (tdbTbcOpen(pMeta->pTaskDb, &pCur, NULL) < 0) {
// no task info, no stream
return ret;
}
void* pKey = NULL;
int32_t kLen = 0;
void* pVal = NULL;
int32_t vLen = 0;
tdbTbcMoveToFirst(pCur);
while (tdbTbcNext(pCur, &pKey, &kLen, &pVal, &vLen) == 0) {
if (pVal == NULL || vLen == 0) {
break;
}
SDecoder decoder;
SCheckpointInfo info;
tDecoderInit(&decoder, (uint8_t*)pVal, vLen);
if (tDecodeStreamTaskChkInfo(&decoder, &info) < 0) {
continue;
}
if (info.msgVer <= SSTREAM_TASK_INCOMPATIBLE_VER) {
ret = STREAM_STATA_NO_COMPATIBLE;
} else if (info.msgVer == SSTREAM_TASK_NEED_CONVERT_VER) {
ret = STREAM_STATA_NEED_CONVERT;
} else if (info.msgVer == SSTREAM_TASK_VER) {
ret = STREAM_STATA_COMPATIBLE;
}
tDecoderClear(&decoder);
break;
}
tdbFree(pKey);
tdbFree(pVal);
tdbTbcClose(pCur);
return ret;
}
int32_t streamMetaCvtDbFormat(SStreamMeta* pMeta) {
int32_t code = 0;
int64_t chkpId = streamMetaGetLatestCheckpointId(pMeta);
bool exist = streamBackendDataIsExist(pMeta->path, chkpId, pMeta->vgId);
if (exist == false) {
return code;
}
SBackendWrapper* pBackend = streamBackendInit(pMeta->path, chkpId, pMeta->vgId);
void* pIter = taosHashIterate(pBackend->cfInst, NULL);
while (pIter) {
void* key = taosHashGetKey(pIter, NULL);
code = streamStateCvtDataFormat(pMeta->path, key, *(void**)pIter);
if (code != 0) {
qError("failed to cvt data");
goto _EXIT;
}
pIter = taosHashIterate(pBackend->cfInst, pIter);
}
_EXIT:
streamBackendCleanup((void*)pBackend);
if (code == 0) {
char* state = taosMemoryCalloc(1, strlen(pMeta->path) + 32);
sprintf(state, "%s%s%s", pMeta->path, TD_DIRSEP, "state");
taosRemoveDir(state);
taosMemoryFree(state);
}
return code;
}
int32_t streamMetaMayCvtDbFormat(SStreamMeta* pMeta) {
int8_t compatible = streamMetaCheckBackendCompatible(pMeta);
if (compatible == STREAM_STATA_COMPATIBLE) {
return 0;
} else if (compatible == STREAM_STATA_NEED_CONVERT) {
qInfo("stream state need covert backend format");
return streamMetaCvtDbFormat(pMeta);
} else if (compatible == STREAM_STATA_NO_COMPATIBLE) {
qError(
"stream read incompatible data, rm %s/vnode/vnode*/tq/stream if taosd cannot start, and rebuild stream "
"manually",
tsDataDir);
return -1;
}
return 0;
}
int32_t streamTaskSetDb(SStreamMeta* pMeta, void* arg, char* key) {
SStreamTask* pTask = arg;
int64_t chkpId = pTask->chkInfo.checkpointId;
taosThreadMutexLock(&pMeta->backendMutex);
void** ppBackend = taosHashGet(pMeta->pTaskDbUnique, key, strlen(key));
if (ppBackend != NULL && *ppBackend != NULL) {
taskDbAddRef(*ppBackend);
STaskDbWrapper* pBackend = *ppBackend;
pTask->backendRefId = pBackend->refId;
pTask->pBackend = pBackend;
taosThreadMutexUnlock(&pMeta->backendMutex);
stDebug("s-task:0x%x set backend %p", pTask->id.taskId, pBackend);
return 0;
}
STaskDbWrapper* pBackend = taskDbOpen(pMeta->path, key, chkpId);
if (pBackend == NULL) {
taosThreadMutexUnlock(&pMeta->backendMutex);
return -1;
}
int64_t tref = taosAddRef(taskDbWrapperId, pBackend);
pTask->backendRefId = tref;
pTask->pBackend = pBackend;
pBackend->refId = tref;
pBackend->pTask = pTask;
taosHashPut(pMeta->pTaskDbUnique, key, strlen(key), &pBackend, sizeof(void*));
taosThreadMutexUnlock(&pMeta->backendMutex);
stDebug("s-task:0x%x set backend %p", pTask->id.taskId, pBackend);
return 0;
}
SStreamMeta* streamMetaOpen(const char* path, void* ahandle, FTaskExpand expandFunc, int32_t vgId, int64_t stage) {
int32_t code = -1;
SStreamMeta* pMeta = taosMemoryCalloc(1, sizeof(SStreamMeta));
@ -121,15 +293,11 @@ SStreamMeta* streamMetaOpen(const char* path, void* ahandle, FTaskExpand expandF
sprintf(tpath, "%s%s%s", path, TD_DIRSEP, "stream");
pMeta->path = tpath;
if (tdbOpen(pMeta->path, 16 * 1024, 1, &pMeta->db, 0) < 0) {
if (streamMetaOpenTdb(pMeta) < 0) {
goto _err;
}
if (tdbTbOpen("task.db", STREAM_TASK_KEY_LEN, -1, NULL, pMeta->db, &pMeta->pTaskDb, 0) < 0) {
goto _err;
}
if (tdbTbOpen("checkpoint.db", sizeof(int32_t), -1, NULL, pMeta->db, &pMeta->pCheckpointDb, 0) < 0) {
if (streamMetaMayCvtDbFormat(pMeta) < 0) {
goto _err;
}
if (streamMetaBegin(pMeta) < 0) {
@ -176,47 +344,41 @@ SStreamMeta* streamMetaOpen(const char* path, void* ahandle, FTaskExpand expandF
pMeta->stage = stage;
pMeta->role = (vgId == SNODE_HANDLE) ? NODE_ROLE_LEADER : NODE_ROLE_UNINIT;
// send heartbeat every 5sec.
pMeta->rid = taosAddRef(streamMetaId, pMeta);
int64_t* pRid = taosMemoryMalloc(sizeof(int64_t));
*pRid = pMeta->rid;
pMeta->pTaskDbUnique = taosHashInit(64, taosGetDefaultHashFunction(TSDB_DATA_TYPE_BINARY), false, HASH_ENTRY_LOCK);
metaRefMgtAdd(pMeta->vgId, pRid);
pMeta->pHbInfo->hbTmr = taosTmrStart(metaHbToMnode, META_HB_CHECK_INTERVAL, pRid, streamEnv.timer);
pMeta->pHbInfo->tickCounter = 0;
pMeta->pHbInfo->stopFlag = 0;
pMeta->pTaskBackendUnique =
taosHashInit(64, taosGetDefaultHashFunction(TSDB_DATA_TYPE_BINARY), false, HASH_ENTRY_LOCK);
pMeta->chkpSaved = taosArrayInit(4, sizeof(int64_t));
pMeta->chkpInUse = taosArrayInit(4, sizeof(int64_t));
pMeta->chkpCap = 2;
taosInitRWLatch(&pMeta->chkpDirLock);
pMeta->chkpId = streamMetaGetLatestCheckpointId(pMeta);
pMeta->streamBackend = streamBackendInit(pMeta->path, pMeta->chkpId, pMeta->vgId);
while (pMeta->streamBackend == NULL) {
taosMsleep(100);
pMeta->streamBackend = streamBackendInit(pMeta->path, pMeta->chkpId, vgId);
if (pMeta->streamBackend == NULL) {
stInfo("vgId:%d failed to init stream backend, retry in 100ms", pMeta->vgId);
}
}
pMeta->streamBackendRid = taosAddRef(streamBackendId, pMeta->streamBackend);
code = streamBackendLoadCheckpointInfo(pMeta);
taosInitRWLatch(&pMeta->lock);
taosThreadMutexInit(&pMeta->backendMutex, NULL);
// pMeta->chkpId = streamGetLatestCheckpointId(pMeta);
// pMeta->streamBackend = streamBackendInit(pMeta->path, pMeta->chkpId);
// while (pMeta->streamBackend == NULL) {
// qError("vgId:%d failed to init stream backend", pMeta->vgId);
// taosMsleep(2 * 1000);
// qInfo("vgId:%d retry to init stream backend", pMeta->vgId);
// pMeta->streamBackend = streamBackendInit(pMeta->path, pMeta->chkpId);
// if (pMeta->streamBackend == NULL) {
// }
// }
// pMeta->streamBackendRid = taosAddRef(streamBackendId, pMeta->streamBackend);
pMeta->numOfPausedTasks = 0;
pMeta->numOfStreamTasks = 0;
stInfo("vgId:%d open stream meta successfully, latest checkpoint:%" PRId64 ", stage:%" PRId64, vgId, pMeta->chkpId,
stage);
pMeta->rid = taosAddRef(streamMetaId, pMeta);
int64_t* pRid = taosMemoryMalloc(sizeof(int64_t));
memcpy(pRid, &pMeta->rid, sizeof(pMeta->rid));
metaRefMgtAdd(pMeta->vgId, pRid);
pMeta->pHbInfo->hbTmr = taosTmrStart(metaHbToMnode, META_HB_CHECK_INTERVAL, pRid, streamEnv.timer);
pMeta->pHbInfo->tickCounter = 0;
pMeta->pHbInfo->stopFlag = 0;
pMeta->qHandle = taosInitScheduler(32, 1, "stream-chkp", NULL);
pMeta->bkdChkptMgt = bkdMgtCreate(tpath);
return pMeta;
_err:
_err:
taosMemoryFree(pMeta->path);
if (pMeta->pTasksMap) taosHashCleanup(pMeta->pTasksMap);
if (pMeta->pTaskList) taosArrayDestroy(pMeta->pTaskList);
@ -311,14 +473,13 @@ void streamMetaClear(SStreamMeta* pMeta) {
taosRemoveRef(streamBackendId, pMeta->streamBackendRid);
taosHashClear(pMeta->pTasksMap);
taosHashClear(pMeta->pTaskBackendUnique);
taosHashClear(pMeta->pTaskDbUnique);
taosArrayClear(pMeta->pTaskList);
taosArrayClear(pMeta->chkpSaved);
taosArrayClear(pMeta->chkpInUse);
pMeta->numOfStreamTasks = 0;
pMeta->numOfPausedTasks = 0;
pMeta->chkptNotReadyTasks = 0;
// the willrestart/starting flag can NOT be cleared
taosHashClear(pMeta->startInfo.pReadyTaskSet);
@ -360,7 +521,9 @@ void streamMetaCloseImpl(void* arg) {
taosArrayDestroy(pMeta->chkpInUse);
taosHashCleanup(pMeta->pTasksMap);
taosHashCleanup(pMeta->pTaskBackendUnique);
taosHashCleanup(pMeta->pTaskDbUnique);
taosHashCleanup(pMeta->pUpdateTaskSet);
// taosHashCleanup(pMeta->pTaskBackendUnique);
taosHashCleanup(pMeta->updateInfo.pTasks);
taosHashCleanup(pMeta->startInfo.pReadyTaskSet);
taosHashCleanup(pMeta->startInfo.pFailedTaskSet);
@ -369,6 +532,11 @@ void streamMetaCloseImpl(void* arg) {
taosMemoryFree(pMeta->path);
taosThreadMutexDestroy(&pMeta->backendMutex);
taosCleanUpScheduler(pMeta->qHandle);
taosMemoryFree(pMeta->qHandle);
bkdMgtDestroy(pMeta->bkdChkptMgt);
pMeta->role = NODE_ROLE_UNINIT;
taosMemoryFree(pMeta);
stDebug("end to close stream meta");
@ -661,6 +829,11 @@ static void doClear(void* pKey, void* pVal, TBC* pCur, SArray* pRecycleList) {
taosArrayDestroy(pRecycleList);
}
int32_t streamMetaReloadAllTasks(SStreamMeta* pMeta) {
if (pMeta == NULL) return 0;
return streamMetaLoadAllTasks(pMeta);
}
int32_t streamMetaLoadAllTasks(SStreamMeta* pMeta) {
TBC* pCur = NULL;
int32_t vgId = pMeta->vgId;
@ -728,8 +901,6 @@ int32_t streamMetaLoadAllTasks(SStreamMeta* pMeta) {
} else {
// todo this should replace the existed object put by replay creating stream task msg from mnode
stError("s-task:0x%x already added into table meta by replaying WAL, need check", pTask->id.taskId);
tdbFree(pKey);
tdbFree(pVal);
taosMemoryFree(pTask);
continue;
}
@ -983,9 +1154,9 @@ void metaHbToMnode(void* param, void* tmrId) {
entry.sinkDataSize = SIZE_IN_MiB((*pTask)->execInfo.sink.dataSize);
}
if ((*pTask)->checkpointingId != 0) {
entry.checkpointFailed = ((*pTask)->chkInfo.failedId >= (*pTask)->checkpointingId);
entry.activeCheckpointId = (*pTask)->checkpointingId;
if ((*pTask)->chkInfo.checkpointingId != 0) {
entry.checkpointFailed = ((*pTask)->chkInfo.failedId >= (*pTask)->chkInfo.checkpointingId);
entry.activeCheckpointId = (*pTask)->chkInfo.checkpointingId;
}
if ((*pTask)->exec.pWalReader != NULL) {
@ -1028,7 +1199,9 @@ void metaHbToMnode(void* param, void* tmrId) {
}
tEncoderClear(&encoder);
SRpcMsg msg = {.info.noResp = 1,};
SRpcMsg msg = {
.info.noResp = 1,
};
initRpcMsg(&msg, TDMT_MND_STREAM_HEARTBEAT, buf, tlen);
pMeta->pHbInfo->hbCount += 1;
@ -1040,7 +1213,7 @@ void metaHbToMnode(void* param, void* tmrId) {
stDebug("vgId:%d no tasks and no mnd epset, not send stream hb to mnode", pMeta->vgId);
}
_end:
_end:
clearHbMsg(&hbMsg, pIdList);
taosTmrReset(metaHbToMnode, META_HB_CHECK_INTERVAL, param, streamEnv.timer, &pMeta->pHbInfo->hbTmr);
taosReleaseRef(streamMetaId, rid);
@ -1070,8 +1243,8 @@ bool streamMetaTaskInTimer(SStreamMeta* pMeta) {
void streamMetaNotifyClose(SStreamMeta* pMeta) {
int32_t vgId = pMeta->vgId;
stDebug("vgId:%d notify all stream tasks that the vnode is closing. isLeader:%d startHb:%" PRId64 ", totalHb:%d", vgId,
(pMeta->role == NODE_ROLE_LEADER), pMeta->pHbInfo->hbStart, pMeta->pHbInfo->hbCount);
stDebug("vgId:%d notify all stream tasks that the vnode is closing. isLeader:%d startHb:%" PRId64 ", totalHb:%d",
vgId, (pMeta->role == NODE_ROLE_LEADER), pMeta->pHbInfo->hbStart, pMeta->pHbInfo->hbCount);
streamMetaWLock(pMeta);
@ -1142,4 +1315,19 @@ void streamMetaWUnLock(SStreamMeta* pMeta) {
stTrace("vgId:%d meta-wunlock", pMeta->vgId);
taosWUnLockLatch(&pMeta->lock);
}
static void execHelper(struct SSchedMsg* pSchedMsg) {
__async_exec_fn_t execFn = (__async_exec_fn_t)pSchedMsg->ahandle;
int32_t code = execFn(pSchedMsg->thandle);
if (code != 0 && pSchedMsg->msg != NULL) {
*(int32_t*)pSchedMsg->msg = code;
}
}
int32_t streamMetaAsyncExec(SStreamMeta* pMeta, __stream_async_exec_fn_t fn, void* param, int32_t* code) {
SSchedMsg schedMsg = {0};
schedMsg.fp = execHelper;
schedMsg.ahandle = fn;
schedMsg.thandle = param;
schedMsg.msg = code;
return taosScheduleTask(pMeta->qHandle, &schedMsg);
}

View File

@ -32,6 +32,7 @@ typedef struct SBackendFileItem {
char* name;
int8_t type;
int64_t size;
int8_t ref;
} SBackendFileItem;
typedef struct SBackendFile {
char* pCurrent;
@ -40,7 +41,28 @@ typedef struct SBackendFile {
SArray* pSst;
char* pCheckpointMeta;
char* path;
} SBanckendFile;
typedef struct SBackendSnapFiles2 {
char* pCurrent;
char* pMainfest;
char* pOptions;
SArray* pSst;
char* pCheckpointMeta;
char* path;
int64_t checkpointId;
int64_t seraial;
int64_t offset;
TdFilePtr fd;
int8_t filetype;
SArray* pFileList;
int32_t currFileIdx;
SStreamTaskSnap snapInfo;
int8_t inited;
} SBackendSnapFile2;
struct SStreamSnapHandle {
void* handle;
SBanckendFile* pBackendFile;
@ -51,16 +73,23 @@ struct SStreamSnapHandle {
int8_t filetype;
SArray* pFileList;
int32_t currFileIdx;
int8_t delFlag; // 0 : not del, 1: del
char* metaPath;
SArray* pDbSnapSet;
int32_t currIdx;
int8_t delFlag; // 0 : not del, 1: del
};
struct SStreamSnapBlockHdr {
int8_t type;
int8_t flag;
int64_t index;
char name[128];
int64_t totalSize;
int64_t size;
uint8_t data[];
// int64_t streamId;
// int64_t taskId;
SStreamTaskSnap snapInfo;
char name[128];
int64_t totalSize;
int64_t size;
uint8_t data[];
};
struct SStreamSnapReader {
void* pMeta;
@ -82,7 +111,7 @@ const char* ROCKSDB_CURRENT = "CURRENT";
const char* ROCKSDB_CHECKPOINT_META = "CHECKPOINT";
static int64_t kBlockSize = 64 * 1024;
int32_t streamSnapHandleInit(SStreamSnapHandle* handle, char* path, int64_t chkpId, void* pMeta);
int32_t streamSnapHandleInit(SStreamSnapHandle* handle, char* path, void* pMeta);
void streamSnapHandleDestroy(SStreamSnapHandle* handle);
// static void streamBuildFname(char* path, char* file, char* fullname)
@ -106,195 +135,205 @@ int32_t streamGetFileSize(char* path, char* name, int64_t* sz) {
TdFilePtr streamOpenFile(char* path, char* name, int32_t opt) {
char fullname[256] = {0};
STREAM_ROCKSDB_BUILD_FULLNAME(path, name, fullname);
return taosOpenFile(fullname, opt);
}
int32_t streamSnapHandleInit(SStreamSnapHandle* pHandle, char* path, int64_t chkpId, void* pMeta) {
// impl later
int len = strlen(path);
char* tdir = taosMemoryCalloc(1, len + 256);
memcpy(tdir, path, len);
int32_t streamTaskDbGetSnapInfo(void* arg, char* path, SArray* pSnap) { return taskDbBuildSnap(arg, pSnap); }
int32_t code = 0;
void snapFileDebugInfo(SBackendSnapFile2* pSnapFile) {
if (qDebugFlag & DEBUG_DEBUG) {
char* buf = taosMemoryCalloc(1, 512);
sprintf(buf + strlen(buf), "[");
int8_t validChkp = 0;
if (chkpId != 0) {
sprintf(tdir, "%s%s%s%s%s%scheckpoint%" PRId64 "", path, TD_DIRSEP, "stream", TD_DIRSEP, "checkpoints", TD_DIRSEP,
chkpId);
if (taosIsDir(tdir)) {
validChkp = 1;
stInfo("%s start to read snap %s", STREAM_STATE_TRANSFER, tdir);
streamBackendAddInUseChkp(pMeta, chkpId);
} else {
stWarn("%s failed to read from %s, reason: dir not exist,retry to default state dir", STREAM_STATE_TRANSFER,
tdir);
if (pSnapFile->pCurrent) sprintf(buf, "current: %s,", pSnapFile->pCurrent);
if (pSnapFile->pMainfest) sprintf(buf + strlen(buf), "MANIFEST: %s,", pSnapFile->pMainfest);
if (pSnapFile->pOptions) sprintf(buf + strlen(buf), "options: %s,", pSnapFile->pOptions);
if (pSnapFile->pSst) {
for (int i = 0; i < taosArrayGetSize(pSnapFile->pSst); i++) {
char* name = taosArrayGetP(pSnapFile->pSst, i);
sprintf(buf + strlen(buf), "%s,", name);
}
}
sprintf(buf + strlen(buf) - 1, "]");
qInfo("%s %" PRId64 "-%" PRId64 " get file list: %s", STREAM_STATE_TRANSFER, pSnapFile->snapInfo.streamId,
pSnapFile->snapInfo.taskId, buf);
taosMemoryFree(buf);
}
}
// no checkpoint specified or not exists invalid checkpoint, do checkpoint at default path and translate it
if (validChkp == 0) {
sprintf(tdir, "%s%s%s%s%s", path, TD_DIRSEP, "stream", TD_DIRSEP, "state");
char* chkpdir = taosMemoryCalloc(1, len + 256);
sprintf(chkpdir, "%s%s%s", tdir, TD_DIRSEP, "tmp");
taosMemoryFree(tdir);
int32_t snapFileGenMeta(SBackendSnapFile2* pSnapFile) {
SBackendFileItem item = {0};
item.ref = 1;
// current
item.name = pSnapFile->pCurrent;
item.type = ROCKSDB_CURRENT_TYPE;
streamGetFileSize(pSnapFile->path, item.name, &item.size);
taosArrayPush(pSnapFile->pFileList, &item);
tdir = chkpdir;
stInfo("%s start to trigger checkpoint on %s", STREAM_STATE_TRANSFER, tdir);
// mainfest
item.name = pSnapFile->pMainfest;
item.type = ROCKSDB_MAINFEST_TYPE;
streamGetFileSize(pSnapFile->path, item.name, &item.size);
taosArrayPush(pSnapFile->pFileList, &item);
code = streamBackendTriggerChkp(pMeta, tdir);
if (code != 0) {
stError("%s failed to trigger chekckpoint at %s", STREAM_STATE_TRANSFER, tdir);
taosMemoryFree(tdir);
return code;
}
pHandle->delFlag = 1;
chkpId = 0;
// options
item.name = pSnapFile->pOptions;
item.type = ROCKSDB_OPTIONS_TYPE;
streamGetFileSize(pSnapFile->path, item.name, &item.size);
taosArrayPush(pSnapFile->pFileList, &item);
// sst
for (int i = 0; i < taosArrayGetSize(pSnapFile->pSst); i++) {
char* sst = taosArrayGetP(pSnapFile->pSst, i);
item.name = sst;
item.type = ROCKSDB_SST_TYPE;
streamGetFileSize(pSnapFile->path, item.name, &item.size);
taosArrayPush(pSnapFile->pFileList, &item);
}
stInfo("%s start to read dir: %s", STREAM_STATE_TRANSFER, tdir);
TdDirPtr pDir = taosOpenDir(tdir);
// meta
item.name = pSnapFile->pCheckpointMeta;
item.type = ROCKSDB_CHECKPOINT_META_TYPE;
if (streamGetFileSize(pSnapFile->path, item.name, &item.size) == 0) {
taosArrayPush(pSnapFile->pFileList, &item);
}
return 0;
}
int32_t snapFileReadMeta(SBackendSnapFile2* pSnapFile) {
TdDirPtr pDir = taosOpenDir(pSnapFile->path);
if (NULL == pDir) {
stError("%s failed to open %s", STREAM_STATE_TRANSFER, tdir);
goto _err;
qError("%s failed to open %s", STREAM_STATE_TRANSFER, pSnapFile->path);
return -1;
}
SBanckendFile* pFile = taosMemoryCalloc(1, sizeof(SBanckendFile));
pHandle->pBackendFile = pFile;
pHandle->checkpointId = chkpId;
pHandle->seraial = 0;
pFile->path = tdir;
pFile->pSst = taosArrayInit(16, sizeof(void*));
TdDirEntryPtr pDirEntry;
while ((pDirEntry = taosReadDir(pDir)) != NULL) {
char* name = taosGetDirEntryName(pDirEntry);
if (strlen(name) >= strlen(ROCKSDB_CURRENT) && 0 == strncmp(name, ROCKSDB_CURRENT, strlen(ROCKSDB_CURRENT))) {
pFile->pCurrent = taosStrdup(name);
pSnapFile->pCurrent = taosStrdup(name);
continue;
}
if (strlen(name) >= strlen(ROCKSDB_MAINFEST) && 0 == strncmp(name, ROCKSDB_MAINFEST, strlen(ROCKSDB_MAINFEST))) {
pFile->pMainfest = taosStrdup(name);
pSnapFile->pMainfest = taosStrdup(name);
continue;
}
if (strlen(name) >= strlen(ROCKSDB_OPTIONS) && 0 == strncmp(name, ROCKSDB_OPTIONS, strlen(ROCKSDB_OPTIONS))) {
pFile->pOptions = taosStrdup(name);
pSnapFile->pOptions = taosStrdup(name);
continue;
}
if (strlen(name) >= strlen(ROCKSDB_CHECKPOINT_META) &&
0 == strncmp(name, ROCKSDB_CHECKPOINT_META, strlen(ROCKSDB_CHECKPOINT_META))) {
pFile->pCheckpointMeta = taosStrdup(name);
pSnapFile->pCheckpointMeta = taosStrdup(name);
continue;
}
if (strlen(name) >= strlen(ROCKSDB_SST) &&
0 == strncmp(name + strlen(name) - strlen(ROCKSDB_SST), ROCKSDB_SST, strlen(ROCKSDB_SST))) {
char* sst = taosStrdup(name);
taosArrayPush(pFile->pSst, &sst);
taosArrayPush(pSnapFile->pSst, &sst);
}
}
if (qDebugFlag & DEBUG_TRACE) {
char* buf = taosMemoryCalloc(1, 128 + taosArrayGetSize(pFile->pSst) * 64);
sprintf(buf, "[current: %s,", pFile->pCurrent);
sprintf(buf + strlen(buf), "MANIFEST: %s,", pFile->pMainfest);
sprintf(buf + strlen(buf), "options: %s,", pFile->pOptions);
for (int i = 0; i < taosArrayGetSize(pFile->pSst); i++) {
char* name = taosArrayGetP(pFile->pSst, i);
sprintf(buf + strlen(buf), "%s,", name);
}
sprintf(buf + strlen(buf) - 1, "]");
stInfo("%s get file list: %s", STREAM_STATE_TRANSFER, buf);
taosMemoryFree(buf);
}
taosCloseDir(&pDir);
if (pFile->pCurrent == NULL) {
stError("%s failed to open %s, reason: no valid file", STREAM_STATE_TRANSFER, tdir);
code = -1;
tdir = NULL;
goto _err;
}
SArray* list = taosArrayInit(64, sizeof(SBackendFileItem));
SBackendFileItem item;
// current
item.name = pFile->pCurrent;
item.type = ROCKSDB_CURRENT_TYPE;
streamGetFileSize(pFile->path, item.name, &item.size);
taosArrayPush(list, &item);
// mainfest
item.name = pFile->pMainfest;
item.type = ROCKSDB_MAINFEST_TYPE;
streamGetFileSize(pFile->path, item.name, &item.size);
taosArrayPush(list, &item);
// options
item.name = pFile->pOptions;
item.type = ROCKSDB_OPTIONS_TYPE;
streamGetFileSize(pFile->path, item.name, &item.size);
taosArrayPush(list, &item);
// sst
for (int i = 0; i < taosArrayGetSize(pFile->pSst); i++) {
char* sst = taosArrayGetP(pFile->pSst, i);
item.name = sst;
item.type = ROCKSDB_SST_TYPE;
streamGetFileSize(pFile->path, item.name, &item.size);
taosArrayPush(list, &item);
}
// meta
item.name = pFile->pCheckpointMeta;
item.type = ROCKSDB_CHECKPOINT_META_TYPE;
if (streamGetFileSize(pFile->path, item.name, &item.size) == 0) {
taosArrayPush(list, &item);
}
pHandle->pBackendFile = pFile;
pHandle->currFileIdx = 0;
pHandle->pFileList = list;
pHandle->seraial = 0;
pHandle->offset = 0;
pHandle->handle = pMeta;
return 0;
}
int32_t streamBackendSnapInitFile(char* metaPath, SStreamTaskSnap* pSnap, SBackendSnapFile2* pSnapFile) {
int32_t code = -1;
char* path = taosMemoryCalloc(1, strlen(pSnap->dbPrefixPath) + 256);
// char idstr[64] = {0};
sprintf(path, "%s%s%s%s%s%" PRId64 "", pSnap->dbPrefixPath, TD_DIRSEP, "checkpoints", TD_DIRSEP, "checkpoint",
pSnap->chkpId);
if (!taosIsDir(path)) {
goto _ERROR;
}
pSnapFile->pSst = taosArrayInit(16, sizeof(void*));
pSnapFile->pFileList = taosArrayInit(64, sizeof(SBackendFileItem));
pSnapFile->path = path;
pSnapFile->snapInfo = *pSnap;
if ((code = snapFileReadMeta(pSnapFile)) != 0) {
goto _ERROR;
}
if ((code = snapFileGenMeta(pSnapFile)) != 0) {
goto _ERROR;
}
snapFileDebugInfo(pSnapFile);
path = NULL;
code = 0;
_ERROR:
taosMemoryFree(path);
return code;
}
void snapFileDestroy(SBackendSnapFile2* pSnap) {
taosMemoryFree(pSnap->pCheckpointMeta);
taosMemoryFree(pSnap->pCurrent);
taosMemoryFree(pSnap->pMainfest);
taosMemoryFree(pSnap->pOptions);
taosMemoryFree(pSnap->path);
for (int i = 0; i < taosArrayGetSize(pSnap->pSst); i++) {
char* sst = taosArrayGetP(pSnap->pSst, i);
taosMemoryFree(sst);
}
// unite read/write snap file
for (int i = 0; i < taosArrayGetSize(pSnap->pFileList); i++) {
SBackendFileItem* pItem = taosArrayGet(pSnap->pFileList, i);
if (pItem->ref == 0) {
taosMemoryFree(pItem->name);
}
}
taosArrayDestroy(pSnap->pFileList);
taosArrayDestroy(pSnap->pSst);
taosCloseFile(&pSnap->fd);
return;
}
int32_t streamSnapHandleInit(SStreamSnapHandle* pHandle, char* path, void* pMeta) {
// impl later
SArray* pSnapSet = taosArrayInit(4, sizeof(SStreamTaskSnap));
int32_t code = streamTaskDbGetSnapInfo(pMeta, path, pSnapSet);
if (code != 0) {
return -1;
}
SArray* pDbSnapSet = taosArrayInit(8, sizeof(SBackendSnapFile2));
for (int i = 0; i < taosArrayGetSize(pSnapSet); i++) {
SStreamTaskSnap* pSnap = taosArrayGet(pSnapSet, i);
SBackendSnapFile2 snapFile = {0};
code = streamBackendSnapInitFile(path, pSnap, &snapFile);
ASSERT(code == 0);
taosArrayPush(pDbSnapSet, &snapFile);
}
for (int i = 0; i < taosArrayGetSize(pSnapSet); i++) {
SStreamTaskSnap* pSnap = taosArrayGet(pSnapSet, i);
taosMemoryFree(pSnap->dbPrefixPath);
}
taosArrayDestroy(pSnapSet);
pHandle->pDbSnapSet = pDbSnapSet;
pHandle->currIdx = 0;
return 0;
_err:
streamSnapHandleDestroy(pHandle);
taosMemoryFreeClear(tdir);
code = -1;
return code;
}
void streamSnapHandleDestroy(SStreamSnapHandle* handle) {
SBanckendFile* pFile = handle->pBackendFile;
if (handle->checkpointId == 0) {
// del tmp dir
if (pFile && taosIsDir(pFile->path)) {
if (handle->delFlag) taosRemoveDir(pFile->path);
if (handle->pDbSnapSet) {
for (int i = 0; i < taosArrayGetSize(handle->pDbSnapSet); i++) {
SBackendSnapFile2* pSnapFile = taosArrayGet(handle->pDbSnapSet, i);
snapFileDebugInfo(pSnapFile);
snapFileDestroy(pSnapFile);
}
} else {
streamBackendDelInUseChkp(handle->handle, handle->checkpointId);
taosArrayDestroy(handle->pDbSnapSet);
}
if (pFile) {
taosMemoryFree(pFile->pCheckpointMeta);
taosMemoryFree(pFile->pCurrent);
taosMemoryFree(pFile->pMainfest);
taosMemoryFree(pFile->pOptions);
taosMemoryFree(pFile->path);
for (int i = 0; i < taosArrayGetSize(pFile->pSst); i++) {
char* sst = taosArrayGetP(pFile->pSst, i);
taosMemoryFree(sst);
}
taosArrayDestroy(pFile->pSst);
taosMemoryFree(pFile);
}
taosArrayDestroy(handle->pFileList);
taosCloseFile(&handle->fd);
taosMemoryFree(handle->metaPath);
return;
}
@ -305,7 +344,7 @@ int32_t streamSnapReaderOpen(void* pMeta, int64_t sver, int64_t chkpId, char* pa
return TSDB_CODE_OUT_OF_MEMORY;
}
if (streamSnapHandleInit(&pReader->handle, (char*)path, chkpId, pMeta) < 0) {
if (streamSnapHandleInit(&pReader->handle, (char*)path, pMeta) < 0) {
taosMemoryFree(pReader);
return -1;
}
@ -321,34 +360,50 @@ int32_t streamSnapReaderClose(SStreamSnapReader* pReader) {
taosMemoryFree(pReader);
return 0;
}
int32_t streamSnapRead(SStreamSnapReader* pReader, uint8_t** ppData, int64_t* size) {
// impl later
int32_t code = 0;
SStreamSnapHandle* pHandle = &pReader->handle;
SBanckendFile* pFile = pHandle->pBackendFile;
int32_t idx = pHandle->currIdx;
SBackendFileItem* item = taosArrayGet(pHandle->pFileList, pHandle->currFileIdx);
SBackendSnapFile2* pSnapFile = taosArrayGet(pHandle->pDbSnapSet, idx);
if (pSnapFile == NULL) {
return 0;
}
SBackendFileItem* item = NULL;
_NEXT:
if (pSnapFile->fd == NULL) {
if (pSnapFile->currFileIdx >= taosArrayGetSize(pSnapFile->pFileList)) {
if (pHandle->currIdx + 1 < taosArrayGetSize(pHandle->pDbSnapSet)) {
pHandle->currIdx += 1;
pSnapFile = taosArrayGet(pHandle->pDbSnapSet, pHandle->currIdx);
goto _NEXT;
} else {
*ppData = NULL;
*size = 0;
return 0;
}
if (pHandle->fd == NULL) {
if (pHandle->currFileIdx >= taosArrayGetSize(pHandle->pFileList)) {
// finish
*ppData = NULL;
*size = 0;
return 0;
} else {
pHandle->fd = streamOpenFile(pFile->path, item->name, TD_FILE_READ);
item = taosArrayGet(pSnapFile->pFileList, pSnapFile->currFileIdx);
pSnapFile->fd = streamOpenFile(pSnapFile->path, item->name, TD_FILE_READ);
stDebug("%s open file %s, current offset:%" PRId64 ", size:% " PRId64 ", file no.%d", STREAM_STATE_TRANSFER,
item->name, (int64_t)pHandle->offset, item->size, pHandle->currFileIdx);
item->name, (int64_t)pSnapFile->offset, item->size, pSnapFile->currFileIdx);
}
}
item = taosArrayGet(pSnapFile->pFileList, pSnapFile->currFileIdx);
qDebug("%s start to read file %s, current offset:%" PRId64 ", size:%" PRId64
", file no.%d, total set:%d, current set idx: %d",
STREAM_STATE_TRANSFER, item->name, (int64_t)pSnapFile->offset, item->size, pSnapFile->currFileIdx,
(int)taosArrayGetSize(pHandle->pDbSnapSet), pHandle->currIdx);
stDebug("%s start to read file %s, current offset:%" PRId64 ", size:%" PRId64 ", file no.%d", STREAM_STATE_TRANSFER,
item->name, (int64_t)pHandle->offset, item->size, pHandle->currFileIdx);
uint8_t* buf = taosMemoryCalloc(1, sizeof(SStreamSnapBlockHdr) + kBlockSize);
if (buf == NULL) {
return TSDB_CODE_OUT_OF_MEMORY;
}
int64_t nread = taosPReadFile(pHandle->fd, buf + sizeof(SStreamSnapBlockHdr), kBlockSize, pHandle->offset);
int64_t nread = taosPReadFile(pSnapFile->fd, buf + sizeof(SStreamSnapBlockHdr), kBlockSize, pSnapFile->offset);
if (nread == -1) {
taosMemoryFree(buf);
code = TAOS_SYSTEM_ERROR(terrno);
@ -358,44 +413,51 @@ int32_t streamSnapRead(SStreamSnapReader* pReader, uint8_t** ppData, int64_t* si
} else if (nread > 0 && nread <= kBlockSize) {
// left bytes less than kBlockSize
stDebug("%s read file %s, current offset:%" PRId64 ",size:% " PRId64 ", file no.%d", STREAM_STATE_TRANSFER,
item->name, (int64_t)pHandle->offset, item->size, pHandle->currFileIdx);
pHandle->offset += nread;
if (pHandle->offset >= item->size || nread < kBlockSize) {
taosCloseFile(&pHandle->fd);
pHandle->offset = 0;
pHandle->currFileIdx += 1;
item->name, (int64_t)pSnapFile->offset, item->size, pSnapFile->currFileIdx);
pSnapFile->offset += nread;
if (pSnapFile->offset >= item->size || nread < kBlockSize) {
taosCloseFile(&pSnapFile->fd);
pSnapFile->offset = 0;
pSnapFile->currFileIdx += 1;
}
} else {
stDebug("%s no data read, close file no.%d, move to next file, open and read", STREAM_STATE_TRANSFER,
pHandle->currFileIdx);
taosCloseFile(&pHandle->fd);
pHandle->offset = 0;
pHandle->currFileIdx += 1;
pSnapFile->currFileIdx);
taosCloseFile(&pSnapFile->fd);
pSnapFile->offset = 0;
pSnapFile->currFileIdx += 1;
if (pHandle->currFileIdx >= taosArrayGetSize(pHandle->pFileList)) {
if (pSnapFile->currFileIdx >= taosArrayGetSize(pSnapFile->pFileList)) {
// finish
*ppData = NULL;
*size = 0;
taosMemoryFree(buf);
return 0;
if (pHandle->currIdx + 1 < taosArrayGetSize(pHandle->pDbSnapSet)) {
// skip to next snap set
pHandle->currIdx += 1;
pSnapFile = taosArrayGet(pHandle->pDbSnapSet, pHandle->currIdx);
goto _NEXT;
} else {
*ppData = NULL;
*size = 0;
return 0;
}
}
item = taosArrayGet(pHandle->pFileList, pHandle->currFileIdx);
pHandle->fd = streamOpenFile(pFile->path, item->name, TD_FILE_READ);
item = taosArrayGet(pSnapFile->pFileList, pSnapFile->currFileIdx);
pSnapFile->fd = streamOpenFile(pSnapFile->path, item->name, TD_FILE_READ);
nread = taosPReadFile(pHandle->fd, buf + sizeof(SStreamSnapBlockHdr), kBlockSize, pHandle->offset);
pHandle->offset += nread;
nread = taosPReadFile(pSnapFile->fd, buf + sizeof(SStreamSnapBlockHdr), kBlockSize, pSnapFile->offset);
pSnapFile->offset += nread;
stDebug("%s open file and read file %s, current offset:%" PRId64 ", size:% " PRId64 ", file no.%d",
STREAM_STATE_TRANSFER, item->name, (int64_t)pHandle->offset, item->size, pHandle->currFileIdx);
STREAM_STATE_TRANSFER, item->name, (int64_t)pSnapFile->offset, item->size, pSnapFile->currFileIdx);
}
SStreamSnapBlockHdr* pHdr = (SStreamSnapBlockHdr*)buf;
pHdr->size = nread;
pHdr->type = item->type;
pHdr->totalSize = item->size;
pHdr->snapInfo = pSnapFile->snapInfo;
memcpy(pHdr->name, item->name, strlen(item->name));
pHandle->seraial += nread;
pSnapFile->seraial += nread;
*ppData = buf;
*size = sizeof(SStreamSnapBlockHdr) + nread;
@ -408,101 +470,133 @@ int32_t streamSnapWriterOpen(void* pMeta, int64_t sver, int64_t ever, char* path
if (pWriter == NULL) {
return TSDB_CODE_OUT_OF_MEMORY;
}
SStreamSnapHandle* pHandle = &pWriter->handle;
pHandle->currIdx = 0;
pHandle->metaPath = taosStrdup(path);
pHandle->pDbSnapSet = taosArrayInit(8, sizeof(SBackendSnapFile2));
SBanckendFile* pFile = taosMemoryCalloc(1, sizeof(SBanckendFile));
pFile->path = taosStrdup(path);
SArray* list = taosArrayInit(64, sizeof(SBackendFileItem));
SBackendFileItem item;
item.name = taosStrdup((char*)ROCKSDB_CURRENT);
item.type = ROCKSDB_CURRENT_TYPE;
taosArrayPush(list, &item);
pHandle->pBackendFile = pFile;
pHandle->pFileList = list;
pHandle->currFileIdx = 0;
pHandle->offset = 0;
pHandle->delFlag = 0;
SBackendSnapFile2 snapFile = {0};
taosArrayPush(pHandle->pDbSnapSet, &snapFile);
*ppWriter = pWriter;
return 0;
}
int32_t snapInfoEqual(SStreamTaskSnap* a, SStreamTaskSnap* b) {
if (a->streamId != b->streamId || a->taskId != b->taskId || a->chkpId != b->chkpId) {
return 0;
}
return 1;
}
int32_t streamSnapWriteImpl(SStreamSnapWriter* pWriter, uint8_t* pData, uint32_t nData, SBackendSnapFile2* pSnapFile) {
int code = -1;
SStreamSnapBlockHdr* pHdr = (SStreamSnapBlockHdr*)pData;
SStreamSnapHandle* pHandle = &pWriter->handle;
SStreamTaskSnap snapInfo = pHdr->snapInfo;
SStreamTaskSnap* pSnapInfo = &pSnapFile->snapInfo;
SBackendFileItem* pItem = taosArrayGet(pSnapFile->pFileList, pSnapFile->currFileIdx);
if (pSnapFile->fd == 0) {
pSnapFile->fd = streamOpenFile(pSnapFile->path, pItem->name, TD_FILE_CREATE | TD_FILE_WRITE | TD_FILE_APPEND);
if (pSnapFile->fd == NULL) {
code = TAOS_SYSTEM_ERROR(terrno);
stError("%s failed to open file name:%s%s%s, reason:%s", STREAM_STATE_TRANSFER, pHandle->metaPath, TD_DIRSEP,
pHdr->name, tstrerror(code));
}
}
if (strlen(pHdr->name) == strlen(pItem->name) && strcmp(pHdr->name, pItem->name) == 0) {
int64_t bytes = taosPWriteFile(pSnapFile->fd, pHdr->data, pHdr->size, pSnapFile->offset);
if (bytes != pHdr->size) {
code = TAOS_SYSTEM_ERROR(terrno);
stError("%s failed to write snap, file name:%s, reason:%s", STREAM_STATE_TRANSFER, pHdr->name, tstrerror(code));
return code;
} else {
qInfo("succ to write data %s", pItem->name);
}
pSnapFile->offset += bytes;
} else {
taosCloseFile(&pSnapFile->fd);
pSnapFile->offset = 0;
pSnapFile->currFileIdx += 1;
SBackendFileItem item = {0};
item.name = taosStrdup(pHdr->name);
item.type = pHdr->type;
taosArrayPush(pSnapFile->pFileList, &item);
SBackendFileItem* pItem = taosArrayGet(pSnapFile->pFileList, pSnapFile->currFileIdx);
pSnapFile->fd = streamOpenFile(pSnapFile->path, pItem->name, TD_FILE_CREATE | TD_FILE_WRITE | TD_FILE_APPEND);
if (pSnapFile->fd == NULL) {
code = TAOS_SYSTEM_ERROR(terrno);
stError("%s failed to open file name:%s%s%s, reason:%s", STREAM_STATE_TRANSFER, pSnapFile->path, TD_DIRSEP,
pHdr->name, tstrerror(code));
}
taosPWriteFile(pSnapFile->fd, pHdr->data, pHdr->size, pSnapFile->offset);
qInfo("succ to write data %s", pItem->name);
pSnapFile->offset += pHdr->size;
}
code = 0;
_EXIT:
return code;
}
int32_t streamSnapWrite(SStreamSnapWriter* pWriter, uint8_t* pData, uint32_t nData) {
int32_t code = 0;
SStreamSnapBlockHdr* pHdr = (SStreamSnapBlockHdr*)pData;
SStreamSnapHandle* pHandle = &pWriter->handle;
SBanckendFile* pFile = pHandle->pBackendFile;
SBackendFileItem* pItem = taosArrayGet(pHandle->pFileList, pHandle->currFileIdx);
SStreamTaskSnap snapInfo = pHdr->snapInfo;
if (pHandle->fd == NULL) {
pHandle->fd = streamOpenFile(pFile->path, pItem->name, TD_FILE_CREATE | TD_FILE_WRITE | TD_FILE_APPEND);
if (pHandle->fd == NULL) {
code = TAOS_SYSTEM_ERROR(terrno);
stError("%s failed to open file name:%s%s%s, reason:%s", STREAM_STATE_TRANSFER, pFile->path, TD_DIRSEP,
pHdr->name, tstrerror(code));
}
}
SBackendSnapFile2* pDbSnapFile = taosArrayGet(pHandle->pDbSnapSet, pHandle->currIdx);
if (pDbSnapFile->inited == 0) {
char idstr[64] = {0};
sprintf(idstr, "0x%" PRIx64 "-0x%x", snapInfo.streamId, (int32_t)(snapInfo.taskId));
if (strlen(pHdr->name) == strlen(pItem->name) && strcmp(pHdr->name, pItem->name) == 0) {
int64_t bytes = taosPWriteFile(pHandle->fd, pHdr->data, pHdr->size, pHandle->offset);
if (bytes != pHdr->size) {
code = TAOS_SYSTEM_ERROR(terrno);
stError("%s failed to write snap, file name:%s, reason:%s", STREAM_STATE_TRANSFER, pHdr->name, tstrerror(code));
return code;
char* path = taosMemoryCalloc(1, strlen(pHandle->metaPath) + 256);
sprintf(path, "%s%s%s%s%s%s%s%" PRId64 "", pHandle->metaPath, TD_DIRSEP, idstr, TD_DIRSEP, "checkpoints", TD_DIRSEP,
"checkpoint", snapInfo.chkpId);
if (!taosIsDir(path)) {
code = taosMulMkDir(path);
qInfo("%s mkdir %s", STREAM_STATE_TRANSFER, path);
ASSERT(code == 0);
}
pHandle->offset += bytes;
pDbSnapFile->path = path;
pDbSnapFile->snapInfo = snapInfo;
pDbSnapFile->pFileList = taosArrayInit(64, sizeof(SBackendFileItem));
pDbSnapFile->currFileIdx = 0;
pDbSnapFile->offset = 0;
SBackendFileItem item = {0};
item.name = taosStrdup((char*)ROCKSDB_CURRENT);
item.type = ROCKSDB_CURRENT_TYPE;
taosArrayPush(pDbSnapFile->pFileList, &item);
pDbSnapFile->inited = 1;
return streamSnapWriteImpl(pWriter, pData, nData, pDbSnapFile);
} else {
taosCloseFile(&pHandle->fd);
pHandle->offset = 0;
pHandle->currFileIdx += 1;
if (snapInfoEqual(&snapInfo, &pDbSnapFile->snapInfo)) {
return streamSnapWriteImpl(pWriter, pData, nData, pDbSnapFile);
} else {
SBackendSnapFile2 snapFile = {0};
taosArrayPush(pHandle->pDbSnapSet, &snapFile);
pHandle->currIdx += 1;
SBackendFileItem item;
item.name = taosStrdup(pHdr->name);
item.type = pHdr->type;
taosArrayPush(pHandle->pFileList, &item);
SBackendFileItem* pItem = taosArrayGet(pHandle->pFileList, pHandle->currFileIdx);
pHandle->fd = streamOpenFile(pFile->path, pItem->name, TD_FILE_CREATE | TD_FILE_WRITE | TD_FILE_APPEND);
if (pHandle->fd == NULL) {
code = TAOS_SYSTEM_ERROR(terrno);
stError("%s failed to open file name:%s%s%s, reason:%s", STREAM_STATE_TRANSFER, pFile->path, TD_DIRSEP,
pHdr->name, tstrerror(code));
return streamSnapWrite(pWriter, pData, nData);
}
taosPWriteFile(pHandle->fd, pHdr->data, pHdr->size, pHandle->offset);
pHandle->offset += pHdr->size;
}
// impl later
return 0;
return code;
}
int32_t streamSnapWriterClose(SStreamSnapWriter* pWriter, int8_t rollback) {
SStreamSnapHandle* handle = &pWriter->handle;
if (qDebugFlag & DEBUG_TRACE) {
char* buf = (char*)taosMemoryMalloc(128 + taosArrayGetSize(handle->pFileList) * 64);
int n = sprintf(buf, "[");
for (int i = 0; i < taosArrayGetSize(handle->pFileList); i++) {
SBackendFileItem* item = taosArrayGet(handle->pFileList, i);
if (i != taosArrayGetSize(handle->pFileList) - 1) {
n += sprintf(buf + n, "%s %" PRId64 ",", item->name, item->size);
} else {
n += sprintf(buf + n, "%s %" PRId64 "]", item->name, item->size);
}
}
stDebug("%s snap get file list, %s", STREAM_STATE_TRANSFER, buf);
taosMemoryFree(buf);
}
for (int i = 0; i < taosArrayGetSize(handle->pFileList); i++) {
SBackendFileItem* item = taosArrayGet(handle->pFileList, i);
taosMemoryFree(item->name);
}
streamSnapHandleDestroy(handle);
if (pWriter == NULL) return 0;
streamSnapHandleDestroy(&pWriter->handle);
taosMemoryFree(pWriter);
return 0;

View File

@ -106,51 +106,21 @@ SStreamState* streamStateOpen(char* path, void* pTask, bool specPath, int32_t sz
}
SStreamTask* pStreamTask = pTask;
char statePath[1024];
if (!specPath) {
sprintf(statePath, "%s%s%d", path, TD_DIRSEP, pStreamTask->id.taskId);
} else {
memset(statePath, 0, 1024);
tstrncpy(statePath, path, 1024);
}
pState->taskId = pStreamTask->id.taskId;
pState->streamId = pStreamTask->id.streamId;
sprintf(pState->pTdbState->idstr, "0x%" PRIx64 "-%d", pState->streamId, pState->taskId);
sprintf(pState->pTdbState->idstr, "0x%" PRIx64 "-0x%x", pState->streamId, pState->taskId);
streamTaskSetDb(pStreamTask->pMeta, pTask, pState->pTdbState->idstr);
#ifdef USE_ROCKSDB
SStreamMeta* pMeta = pStreamTask->pMeta;
pState->streamBackendRid = pMeta->streamBackendRid;
// streamMetaWLock(pMeta);
taosThreadMutexLock(&pMeta->backendMutex);
void* uniqueId =
taosHashGet(pMeta->pTaskBackendUnique, pState->pTdbState->idstr, strlen(pState->pTdbState->idstr) + 1);
if (uniqueId == NULL) {
int code = streamStateOpenBackend(pMeta->streamBackend, pState);
if (code == -1) {
taosThreadMutexUnlock(&pMeta->backendMutex);
taosMemoryFree(pState);
return NULL;
}
taosHashPut(pMeta->pTaskBackendUnique, pState->pTdbState->idstr, strlen(pState->pTdbState->idstr) + 1,
&pState->pTdbState->backendCfWrapperId, sizeof(pState->pTdbState->backendCfWrapperId));
} else {
int64_t id = *(int64_t*)uniqueId;
pState->pTdbState->backendCfWrapperId = id;
pState->pTdbState->pBackendCfWrapper = taosAcquireRef(streamBackendCfWrapperId, id);
// already exist stream task for
stInfo("already exist stream-state for %s", pState->pTdbState->idstr);
// taosAcquireRef(streamBackendId, pState->streamBackendRid);
}
taosThreadMutexUnlock(&pMeta->backendMutex);
pState->pTdbState->pOwner = pTask;
pState->pFileState = NULL;
_hash_fn_t hashFn = taosGetDefaultHashFunction(TSDB_DATA_TYPE_BIGINT);
pState->parNameMap = tSimpleHashInit(1024, hashFn);
stInfo("succ to open state %p on backend %p 0x%" PRIx64 "-%d", pState, pMeta->streamBackend, pState->streamId,
pState->taskId);
pState->taskId);
return pState;
#else
@ -237,6 +207,12 @@ _err:
#endif
}
int32_t streamStateDelTaskDb(SStreamState* pState) {
SStreamTask* pTask = pState->pTdbState->pOwner;
taskDbRemoveRef(pTask->pBackend);
taosMemoryFree(pTask);
return 0;
}
void streamStateClose(SStreamState* pState, bool remove) {
SStreamTask* pTask = pState->pTdbState->pOwner;
#ifdef USE_ROCKSDB
@ -692,8 +668,7 @@ void streamStateResetCur(SStreamStateCur* pCur) {
}
void streamStateFreeCur(SStreamStateCur* pCur) {
if (!pCur || pCur->buffIndex >= 0) {
taosMemoryFree(pCur);
if (!pCur) {
return;
}
qDebug("streamStateFreeCur");
@ -722,7 +697,7 @@ int32_t streamStateSessionPut(SStreamState* pState, const SSessionKey* key, void
streamStateReleaseBuf(pState, pos, true);
putFreeBuff(pState->pFileState, pos);
stDebug("===stream===save skey:%" PRId64 ", ekey:%" PRId64 ", groupId:%" PRIu64 ".code:%d", key->win.skey,
key->win.ekey, key->groupId, code);
key->win.ekey, key->groupId, code);
} else {
code = putSessionWinResultBuff(pState->pFileState, value);
}
@ -768,7 +743,7 @@ int32_t streamStateSessionGet(SStreamState* pState, SSessionKey* key, void** pVa
int32_t streamStateSessionDel(SStreamState* pState, const SSessionKey* key) {
#ifdef USE_ROCKSDB
stDebug("===stream===delete skey:%" PRId64 ", ekey:%" PRId64 ", groupId:%" PRIu64, key->win.skey, key->win.ekey,
qDebug("===stream===delete skey:%" PRId64 ", ekey:%" PRId64 ", groupId:%" PRIu64, key->win.skey, key->win.ekey,
key->groupId);
return deleteRowBuff(pState->pFileState, key, sizeof(SSessionKey));
#else
@ -1122,7 +1097,7 @@ int32_t streamStateGetParName(SStreamState* pState, int64_t groupId, void** pVal
void streamStateDestroy(SStreamState* pState, bool remove) {
#ifdef USE_ROCKSDB
streamFileStateDestroy(pState->pFileState);
streamStateDestroy_rocksdb(pState, remove);
// streamStateDestroy_rocksdb(pState, remove);
tSimpleHashCleanup(pState->parNameMap);
// do nothong
#endif
@ -1232,4 +1207,4 @@ char* streamStateIntervalDump(SStreamState* pState) {
streamStateFreeCur(pCur);
return dumpBuf;
}
#endif
#endif

View File

@ -250,9 +250,8 @@ int32_t tDecodeStreamTaskChkInfo(SDecoder* pDecoder, SCheckpointInfo* pChkpInfo)
SEpSet epSet;
if (tStartDecode(pDecoder) < 0) return -1;
if (tDecodeI64(pDecoder, &ver) < 0) return -1;
if (ver != SSTREAM_TASK_VER) return -1;
if (tDecodeI64(pDecoder, &pChkpInfo->msgVer) < 0) return -1;
// if (ver != SSTREAM_TASK_VER) return -1;
if (tDecodeI64(pDecoder, &skip64) < 0) return -1;
if (tDecodeI32(pDecoder, &skip32) < 0) return -1;
@ -379,6 +378,8 @@ void tFreeStreamTask(SStreamTask* pTask) {
if (pTask->pState) {
stDebug("s-task:0x%x start to free task state", taskId);
streamStateClose(pTask->pState, status == TASK_STATUS__DROPPING);
taskDbRemoveRef(pTask->pBackend);
}
if (pTask->id.idStr != NULL) {
@ -467,6 +468,14 @@ int32_t streamTaskInit(SStreamTask* pTask, SStreamMeta* pMeta, SMsgCb* pMsgCb, i
}
taosThreadMutexInit(&pTask->lock, &attr);
// if (pTask->info.fillHistory == 1) {
// //
// } else {
// }
// if (streamTaskSetDb(pMeta, pTask) != 0) {
// return -1;
// }
streamTaskOpenAllUpstreamInput(pTask);
pTask->outputInfo.pDownstreamUpdateList = taosArrayInit(4, sizeof(SDownstreamTaskEpset));

View File

@ -159,7 +159,7 @@ static void uvStartSendResp(SSvrMsg* msg);
static void uvNotifyLinkBrokenToApp(SSvrConn* conn);
static FORCE_INLINE void destroySmsg(SSvrMsg* smsg);
static FORCE_INLINE void destroySmsg(SSvrMsg* smsg);
static FORCE_INLINE SSvrConn* createConn(void* hThrd);
static FORCE_INLINE void destroyConn(SSvrConn* conn, bool clear /*clear handle or not*/);
static FORCE_INLINE void destroyConnRegArg(SSvrConn* conn);
@ -1499,6 +1499,7 @@ int transSendResponse(const STransMsg* msg) {
}
SExHandle* exh = msg->info.handle;
if (exh == NULL) {
rpcFreeCont(msg->pCont);
return 0;
}
int64_t refId = msg->info.refId;

View File

@ -22,7 +22,7 @@
,,y,system-test,./pytest.sh python3 ./test.py -f 8-stream/pause_resume_test.py
#,,n,system-test,python3 ./test.py -f 8-stream/vnode_restart.py -N 4
#,,n,system-test,python3 ./test.py -f 8-stream/snode_restart.py -N 4
,,n,system-test,python3 ./test.py -f 8-stream/snode_restart_with_checkpoint.py -N 4
#,,n,system-test,python3 ./test.py -f 8-stream/snode_restart_with_checkpoint.py -N 4
,,y,system-test,./pytest.sh python3 ./test.py -f 2-query/tbname_vgroup.py
,,y,system-test,./pytest.sh python3 ./test.py -f 2-query/stbJoin.py

View File

@ -31,7 +31,7 @@ class TDTestCase:
tdSql.query("use test")
tdSql.query("create snode on dnode 4")
tdSql.query("create stream if not exists s1 trigger at_once ignore expired 0 ignore update 0 fill_history 1 into st1 as select _wstart,sum(voltage),groupid from meters partition by groupid interval(2s)")
tdLog.debug("========create stream useing snode and insert data ok========")
tdLog.debug("========create stream using snode and insert data ok========")
time.sleep(60)
tdDnodes = cluster.dnodes