Merge branch '3.0' into feature/TD-11381-3.0
This commit is contained in:
commit
1805642aed
|
@ -1,18 +1,16 @@
|
|||
[submodule "src/connector/go"]
|
||||
path = src/connector/go
|
||||
url = git@github.com:taosdata/driver-go.git
|
||||
[submodule "src/connector/grafanaplugin"]
|
||||
path = src/connector/grafanaplugin
|
||||
url = git@github.com:taosdata/grafanaplugin.git
|
||||
[submodule "src/connector/hivemq-tdengine-extension"]
|
||||
path = src/connector/hivemq-tdengine-extension
|
||||
url = git@github.com:taosdata/hivemq-tdengine-extension.git
|
||||
[submodule "tests/examples/rust"]
|
||||
path = tests/examples/rust
|
||||
url = https://github.com/songtianyi/tdengine-rust-bindings.git
|
||||
[submodule "deps/jemalloc"]
|
||||
path = deps/jemalloc
|
||||
url = https://github.com/jemalloc/jemalloc
|
||||
[submodule "deps/TSZ"]
|
||||
path = deps/TSZ
|
||||
url = https://github.com/taosdata/TSZ.git
|
||||
[submodule "tests"]
|
||||
path = tests
|
||||
url = https://github.com/taosdata/tests
|
||||
branch = 3.0
|
||||
|
|
|
@ -74,6 +74,38 @@ def pre_test(){
|
|||
git pull >/dev/null
|
||||
git fetch origin +refs/pull/${CHANGE_ID}/merge
|
||||
git checkout -qf FETCH_HEAD
|
||||
git submodule update --init --recursive --remote
|
||||
'''
|
||||
script {
|
||||
if (env.CHANGE_TARGET == 'master') {
|
||||
sh '''
|
||||
cd ${WKCT}
|
||||
git checkout master
|
||||
'''
|
||||
}
|
||||
else if(env.CHANGE_TARGET == '2.0'){
|
||||
sh '''
|
||||
cd ${WKCT}
|
||||
git checkout 2.0
|
||||
'''
|
||||
}
|
||||
else if(env.CHANGE_TARGET == '3.0'){
|
||||
sh '''
|
||||
cd ${WKCT}
|
||||
git checkout 3.0
|
||||
'''
|
||||
}
|
||||
else{
|
||||
sh '''
|
||||
cd ${WKCT}
|
||||
git checkout develop
|
||||
'''
|
||||
}
|
||||
}
|
||||
sh'''
|
||||
cd ${WKCT}
|
||||
git pull >/dev/null
|
||||
cd ${WKC}
|
||||
export TZ=Asia/Harbin
|
||||
date
|
||||
rm -rf debug
|
||||
|
@ -81,7 +113,6 @@ def pre_test(){
|
|||
cd debug
|
||||
cmake .. > /dev/null
|
||||
make -j4> /dev/null
|
||||
|
||||
'''
|
||||
return 1
|
||||
}
|
||||
|
@ -92,6 +123,7 @@ pipeline {
|
|||
environment{
|
||||
WK = '/var/lib/jenkins/workspace/TDinternal'
|
||||
WKC= '/var/lib/jenkins/workspace/TDengine'
|
||||
WKCT= '/var/lib/jenkins/workspace/TDengine/tests'
|
||||
}
|
||||
stages {
|
||||
stage('pre_build'){
|
|
@ -111,8 +111,8 @@ typedef enum _mgmt_table {
|
|||
TSDB_MGMT_TABLE_MAX,
|
||||
} EShowType;
|
||||
|
||||
#define TSDB_ALTER_TABLE_ADD_TAG_COLUMN 1
|
||||
#define TSDB_ALTER_TABLE_DROP_TAG_COLUMN 2
|
||||
#define TSDB_ALTER_TABLE_ADD_TAG 1
|
||||
#define TSDB_ALTER_TABLE_DROP_TAG 2
|
||||
#define TSDB_ALTER_TABLE_UPDATE_TAG_NAME 3
|
||||
#define TSDB_ALTER_TABLE_UPDATE_TAG_VAL 4
|
||||
|
||||
|
@ -163,6 +163,12 @@ typedef struct {
|
|||
int32_t vgVersion;
|
||||
} SBuildUseDBInput;
|
||||
|
||||
typedef struct SField {
|
||||
char name[TSDB_COL_NAME_LEN];
|
||||
uint8_t type;
|
||||
int32_t bytes;
|
||||
} SField;
|
||||
|
||||
#pragma pack(push, 1)
|
||||
|
||||
// null-terminated string instead of char array to avoid too many memory consumption in case of more than 1M tableMeta
|
||||
|
@ -252,23 +258,34 @@ typedef struct SSchema {
|
|||
typedef struct {
|
||||
char name[TSDB_TABLE_FNAME_LEN];
|
||||
int8_t igExists;
|
||||
int32_t numOfTags;
|
||||
int32_t numOfColumns;
|
||||
SSchema pSchemas[];
|
||||
int32_t numOfTags;
|
||||
SArray* pColumns;
|
||||
SArray* pTags;
|
||||
char comment[TSDB_STB_COMMENT_LEN];
|
||||
} SMCreateStbReq;
|
||||
|
||||
int32_t tSerializeSMCreateStbReq(void** buf, SMCreateStbReq* pReq);
|
||||
void* tDeserializeSMCreateStbReq(void* buf, SMCreateStbReq* pReq);
|
||||
|
||||
typedef struct {
|
||||
char name[TSDB_TABLE_FNAME_LEN];
|
||||
int8_t igNotExists;
|
||||
} SMDropStbReq;
|
||||
|
||||
int32_t tSerializeSMDropStbReq(void** buf, SMDropStbReq* pReq);
|
||||
void* tDeserializeSMDropStbReq(void* buf, SMDropStbReq* pReq);
|
||||
|
||||
typedef struct {
|
||||
char name[TSDB_TABLE_FNAME_LEN];
|
||||
int8_t alterType;
|
||||
int32_t numOfSchemas;
|
||||
SSchema pSchemas[];
|
||||
int32_t numOfFields;
|
||||
SArray* pFields;
|
||||
} SMAltertbReq;
|
||||
|
||||
int32_t tSerializeSMAlterStbReq(void** buf, SMAltertbReq* pReq);
|
||||
void* tDeserializeSMAlterStbReq(void* buf, SMAltertbReq* pReq);
|
||||
|
||||
typedef struct {
|
||||
int32_t pid;
|
||||
char app[TSDB_APP_NAME_LEN];
|
||||
|
@ -597,7 +614,6 @@ typedef struct {
|
|||
typedef struct {
|
||||
int32_t vgId;
|
||||
int8_t role;
|
||||
int8_t align[3];
|
||||
int64_t totalStorage;
|
||||
int64_t compStorage;
|
||||
int64_t pointsWritten;
|
||||
|
@ -605,27 +621,22 @@ typedef struct {
|
|||
} SVnodeLoad;
|
||||
|
||||
typedef struct {
|
||||
int32_t num;
|
||||
SVnodeLoad data[];
|
||||
} SVnodeLoads;
|
||||
|
||||
typedef struct {
|
||||
int32_t sver;
|
||||
int32_t mver; // msg version
|
||||
int32_t sver; // software version
|
||||
int64_t dver; // dnode table version in sdb
|
||||
int32_t dnodeId;
|
||||
int64_t clusterId;
|
||||
int64_t dver;
|
||||
int64_t rebootTime;
|
||||
int64_t updateTime;
|
||||
int32_t numOfCores;
|
||||
int32_t numOfSupportVnodes;
|
||||
char dnodeEp[TSDB_EP_LEN];
|
||||
SClusterCfg clusterCfg;
|
||||
SVnodeLoads vnodeLoads;
|
||||
SArray* pVloads; // array of SVnodeLoad
|
||||
} SStatusReq;
|
||||
|
||||
typedef struct {
|
||||
int32_t reserved;
|
||||
} STransReq;
|
||||
int32_t tSerializeSStatusReq(void** buf, SStatusReq* pReq);
|
||||
void* tDeserializeSStatusReq(void* buf, SStatusReq* pReq);
|
||||
|
||||
typedef struct {
|
||||
int32_t dnodeId;
|
||||
|
@ -635,21 +646,23 @@ typedef struct {
|
|||
typedef struct {
|
||||
int32_t id;
|
||||
int8_t isMnode;
|
||||
int8_t align;
|
||||
SEp ep;
|
||||
} SDnodeEp;
|
||||
|
||||
typedef struct {
|
||||
int32_t num;
|
||||
SDnodeEp eps[];
|
||||
} SDnodeEps;
|
||||
|
||||
typedef struct {
|
||||
int32_t mver;
|
||||
int64_t dver;
|
||||
SDnodeCfg dnodeCfg;
|
||||
SDnodeEps dnodeEps;
|
||||
SArray* pDnodeEps; // Array of SDnodeEp
|
||||
} SStatusRsp;
|
||||
|
||||
int32_t tSerializeSStatusRsp(void** buf, SStatusRsp* pRsp);
|
||||
void* tDeserializeSStatusRsp(void* buf, SStatusRsp* pRsp);
|
||||
|
||||
typedef struct {
|
||||
int32_t mver;
|
||||
} STransReq;
|
||||
|
||||
typedef struct {
|
||||
int32_t id;
|
||||
uint16_t port; // node sync Port
|
||||
|
@ -726,6 +739,7 @@ typedef struct {
|
|||
char tbName[TSDB_TABLE_NAME_LEN];
|
||||
char stbName[TSDB_TABLE_NAME_LEN];
|
||||
char dbFName[TSDB_DB_FNAME_LEN];
|
||||
uint64_t dbId;
|
||||
int32_t numOfTags;
|
||||
int32_t numOfColumns;
|
||||
int8_t precision;
|
||||
|
@ -1110,10 +1124,14 @@ static FORCE_INLINE void* tDeserializeSMVSubscribeReq(void* buf, SMVSubscribeReq
|
|||
return buf;
|
||||
}
|
||||
|
||||
typedef struct SMqTmrMsg {
|
||||
typedef struct {
|
||||
int32_t reserved;
|
||||
} SMqTmrMsg;
|
||||
|
||||
typedef struct {
|
||||
int64_t consumerId;
|
||||
} SMqDoRebalanceMsg;
|
||||
|
||||
typedef struct {
|
||||
int64_t status;
|
||||
} SMVSubscribeRsp;
|
||||
|
@ -1185,8 +1203,6 @@ typedef struct {
|
|||
|
||||
int32_t tSerializeSVCreateTbReq(void** buf, SVCreateTbReq* pReq);
|
||||
void* tDeserializeSVCreateTbReq(void* buf, SVCreateTbReq* pReq);
|
||||
int32_t tSerializeSVCreateTbRsp(void** buf, SVCreateTbRsp* pRsp);
|
||||
void* tDeserializeSVCreateTbRsp(void* buf, SVCreateTbRsp* pRsp);
|
||||
|
||||
typedef struct {
|
||||
uint64_t ver; // use a general definition
|
||||
|
@ -1198,8 +1214,6 @@ typedef struct {
|
|||
|
||||
int32_t tSerializeSVCreateTbBatchReq(void** buf, SVCreateTbBatchReq* pReq);
|
||||
void* tDeserializeSVCreateTbBatchReq(void* buf, SVCreateTbBatchReq* pReq);
|
||||
int32_t tSerializeSVCreateTbBatchReqp(void** buf, SVCreateTbBatchReq* pRsp);
|
||||
void* tDeserializeSVCreateTbBatchReq(void* buf, SVCreateTbBatchReq* pRsp);
|
||||
|
||||
typedef struct {
|
||||
uint64_t ver;
|
||||
|
@ -1209,13 +1223,10 @@ typedef struct {
|
|||
} SVDropTbReq;
|
||||
|
||||
typedef struct {
|
||||
uint64_t ver;
|
||||
} SVDropTbRsp;
|
||||
|
||||
int32_t tSerializeSVDropTbReq(void** buf, SVDropTbReq* pReq);
|
||||
void* tDeserializeSVDropTbReq(void* buf, SVDropTbReq* pReq);
|
||||
int32_t tSerializeSVDropTbRsp(void** buf, SVDropTbRsp* pRsp);
|
||||
void* tDeserializeSVDropTbRsp(void* buf, SVDropTbRsp* pRsp);
|
||||
|
||||
typedef struct {
|
||||
SMsgHead head;
|
||||
|
@ -1691,13 +1702,13 @@ static FORCE_INLINE void* tDecodeSSchemaWrapper(void* buf, SSchemaWrapper* pSW)
|
|||
return buf;
|
||||
}
|
||||
|
||||
typedef struct SMqTbData {
|
||||
typedef struct {
|
||||
int64_t uid;
|
||||
int32_t numOfRows;
|
||||
char* colData;
|
||||
} SMqTbData;
|
||||
|
||||
typedef struct SMqTopicBlk {
|
||||
typedef struct {
|
||||
char topicName[TSDB_TOPIC_FNAME_LEN];
|
||||
int64_t committedOffset;
|
||||
int64_t reqOffset;
|
||||
|
@ -1708,7 +1719,7 @@ typedef struct SMqTopicBlk {
|
|||
SMqTbData* tbData;
|
||||
} SMqTopicData;
|
||||
|
||||
typedef struct SMqConsumeRsp {
|
||||
typedef struct {
|
||||
int64_t consumerId;
|
||||
SSchemaWrapper* schemas;
|
||||
int64_t committedOffset;
|
||||
|
@ -1720,7 +1731,7 @@ typedef struct SMqConsumeRsp {
|
|||
} SMqConsumeRsp;
|
||||
|
||||
// one req for one vg+topic
|
||||
typedef struct SMqConsumeReq {
|
||||
typedef struct {
|
||||
SMsgHead head;
|
||||
//0: commit only, current offset
|
||||
//1: consume only, poll next offset
|
||||
|
@ -1736,17 +1747,17 @@ typedef struct SMqConsumeReq {
|
|||
char topic[TSDB_TOPIC_FNAME_LEN];
|
||||
} SMqConsumeReq;
|
||||
|
||||
typedef struct SMqSubVgEp {
|
||||
typedef struct {
|
||||
int32_t vgId;
|
||||
SEpSet epSet;
|
||||
} SMqSubVgEp;
|
||||
|
||||
typedef struct SMqSubTopicEp {
|
||||
typedef struct {
|
||||
char topic[TSDB_TOPIC_FNAME_LEN];
|
||||
SArray* vgs; // SArray<SMqSubVgEp>
|
||||
} SMqSubTopicEp;
|
||||
|
||||
typedef struct SMqCMGetSubEpRsp {
|
||||
typedef struct {
|
||||
int64_t consumerId;
|
||||
int64_t epoch;
|
||||
char cgroup[TSDB_CONSUMER_GROUP_LEN];
|
||||
|
|
|
@ -141,7 +141,8 @@ enum {
|
|||
TD_DEF_MSG_TYPE(TDMT_MND_DROP_TOPIC, "mnode-drop-topic", NULL, NULL)
|
||||
TD_DEF_MSG_TYPE(TDMT_MND_SUBSCRIBE, "mnode-subscribe", SCMSubscribeReq, SCMSubscribeRsp)
|
||||
TD_DEF_MSG_TYPE(TDMT_MND_GET_SUB_EP, "mnode-get-sub-ep", SMqCMGetSubEpReq, SMqCMGetSubEpRsp)
|
||||
TD_DEF_MSG_TYPE(TDMT_MND_MQ_TIMER, "mnode-timer", SMqTmrMsg, SMqTmrMsg)
|
||||
TD_DEF_MSG_TYPE(TDMT_MND_MQ_TIMER, "mnode-mq-timer", SMqTmrMsg, SMqTmrMsg)
|
||||
TD_DEF_MSG_TYPE(TDMT_MND_MQ_DO_REBALANCE, "mnode-mq-do-rebalance", SMqDoRebalanceMsg, SMqDoRebalanceMsg)
|
||||
|
||||
// Requests handled by VNODE
|
||||
TD_NEW_MSG_SEG(TDMT_VND_MSG)
|
||||
|
|
|
@ -27,6 +27,7 @@ typedef struct SMnodeMsg SMnodeMsg;
|
|||
typedef int32_t (*SendReqToDnodeFp)(SDnode *pDnode, struct SEpSet *epSet, struct SRpcMsg *rpcMsg);
|
||||
typedef int32_t (*SendReqToMnodeFp)(SDnode *pDnode, struct SRpcMsg *rpcMsg);
|
||||
typedef int32_t (*PutReqToMWriteQFp)(SDnode *pDnode, struct SRpcMsg *rpcMsg);
|
||||
typedef int32_t (*PutReqToMReadQFp)(SDnode *pDnode, struct SRpcMsg *rpcMsg);
|
||||
typedef void (*SendRedirectRspFp)(SDnode *pDnode, struct SRpcMsg *rpcMsg);
|
||||
|
||||
typedef struct SMnodeLoad {
|
||||
|
@ -64,6 +65,7 @@ typedef struct {
|
|||
SMnodeCfg cfg;
|
||||
SDnode *pDnode;
|
||||
PutReqToMWriteQFp putReqToMWriteQFp;
|
||||
PutReqToMReadQFp putReqToMReadQFp;
|
||||
SendReqToDnodeFp sendReqToDnodeFp;
|
||||
SendReqToMnodeFp sendReqToMnodeFp;
|
||||
SendRedirectRspFp sendRedirectRspFp;
|
||||
|
|
|
@ -32,6 +32,15 @@ extern "C" {
|
|||
|
||||
struct SCatalog;
|
||||
|
||||
enum {
|
||||
CTG_DBG_DB_NUM = 1,
|
||||
CTG_DBG_META_NUM,
|
||||
CTG_DBG_STB_NUM,
|
||||
CTG_DBG_DB_RENT_NUM,
|
||||
CTG_DBG_STB_RENT_NUM,
|
||||
};
|
||||
|
||||
|
||||
typedef struct SCatalogReq {
|
||||
SArray *pTableName; // element is SNAME
|
||||
SArray *pUdf; // udf name
|
||||
|
@ -99,7 +108,7 @@ int32_t catalogGetDBVgroupVersion(struct SCatalog* pCatalog, const char* dbName,
|
|||
*/
|
||||
int32_t catalogGetDBVgroup(struct SCatalog* pCatalog, void *pTransporter, const SEpSet* pMgmtEps, const char* pDBName, bool forceUpdate, SArray** pVgroupList);
|
||||
|
||||
int32_t catalogUpdateDBVgroup(struct SCatalog* pCatalog, const char* dbName, SDBVgroupInfo* dbInfo);
|
||||
int32_t catalogUpdateDBVgroup(struct SCatalog* pCatalog, const char* dbName, uint64_t dbId, SDBVgroupInfo* dbInfo);
|
||||
|
||||
int32_t catalogRemoveDB(struct SCatalog* pCatalog, const char* dbName, uint64_t dbId);
|
||||
|
||||
|
@ -127,6 +136,8 @@ int32_t catalogGetTableMeta(struct SCatalog* pCatalog, void * pTransporter, cons
|
|||
*/
|
||||
int32_t catalogGetSTableMeta(struct SCatalog* pCatalog, void * pTransporter, const SEpSet* pMgmtEps, const SName* pTableName, STableMeta** pTableMeta);
|
||||
|
||||
int32_t catalogUpdateSTableMeta(struct SCatalog* pCatalog, STableMetaRsp *rspMsg);
|
||||
|
||||
|
||||
/**
|
||||
* Force renew a table's local cached meta data.
|
||||
|
|
|
@ -37,12 +37,6 @@ typedef struct SQueryNode {
|
|||
|
||||
#define queryNodeType(nodeptr) (((const SQueryNode*)(nodeptr))->type)
|
||||
|
||||
typedef struct SField {
|
||||
char name[TSDB_COL_NAME_LEN];
|
||||
uint8_t type;
|
||||
int32_t bytes;
|
||||
} SField;
|
||||
|
||||
typedef struct SFieldInfo {
|
||||
int16_t numOfOutput; // number of column in result
|
||||
SField *final;
|
||||
|
|
|
@ -81,7 +81,6 @@ typedef struct STableMeta {
|
|||
} STableMeta;
|
||||
|
||||
typedef struct SDBVgroupInfo {
|
||||
uint64_t dbId;
|
||||
int32_t vgVersion;
|
||||
int8_t hashMethod;
|
||||
SHashObj *vgHash; //key:vgId, value:SVgroupInfo
|
||||
|
@ -89,6 +88,7 @@ typedef struct SDBVgroupInfo {
|
|||
|
||||
typedef struct SUseDbOutput {
|
||||
char db[TSDB_DB_FNAME_LEN];
|
||||
uint64_t dbId;
|
||||
SDBVgroupInfo *dbVgroup;
|
||||
} SUseDbOutput;
|
||||
|
||||
|
@ -102,6 +102,7 @@ enum {
|
|||
|
||||
typedef struct STableMetaOutput {
|
||||
int32_t metaType;
|
||||
uint64_t dbId;
|
||||
char dbFName[TSDB_DB_FNAME_LEN];
|
||||
char ctbName[TSDB_TABLE_NAME_LEN];
|
||||
char tbName[TSDB_TABLE_NAME_LEN];
|
||||
|
@ -159,6 +160,8 @@ void initQueryModuleMsgHandle();
|
|||
const SSchema* tGetTbnameColumnSchema();
|
||||
bool tIsValidSchema(struct SSchema* pSchema, int32_t numOfCols, int32_t numOfTags);
|
||||
|
||||
int32_t queryCreateTableMetaFromMsg(STableMetaRsp* msg, bool isSuperTable, STableMeta **pMeta);
|
||||
|
||||
extern int32_t (*queryBuildMsg[TDMT_MAX])(void* input, char **msg, int32_t msgSize, int32_t *msgLen);
|
||||
extern int32_t (*queryProcessMsgRsp[TDMT_MAX])(void* output, char *msg, int32_t msgSize);
|
||||
|
||||
|
|
|
@ -55,7 +55,7 @@ typedef enum ENodeType {
|
|||
QUERY_NODE_FILL,
|
||||
|
||||
// only for parser
|
||||
QUERY_NODE_TARGET_EXPR,
|
||||
QUERY_NODE_RAW_EXPR,
|
||||
|
||||
QUERY_NODE_SET_OPERATOR,
|
||||
QUERY_NODE_SELECT_STMT,
|
||||
|
@ -81,6 +81,13 @@ typedef struct SNodeList {
|
|||
SListCell* pTail;
|
||||
} SNodeList;
|
||||
|
||||
typedef struct SRawExprNode {
|
||||
ENodeType nodeType;
|
||||
char* p;
|
||||
uint32_t n;
|
||||
SNode* pNode;
|
||||
} SRawExprNode;
|
||||
|
||||
typedef struct SDataType {
|
||||
uint8_t type;
|
||||
uint8_t precision;
|
||||
|
@ -114,6 +121,14 @@ typedef struct SColumnNode {
|
|||
typedef struct SValueNode {
|
||||
SExprNode node; // QUERY_NODE_VALUE
|
||||
char* literal;
|
||||
bool isDuration;
|
||||
union {
|
||||
bool b;
|
||||
int64_t i;
|
||||
uint64_t u;
|
||||
double d;
|
||||
char* p;
|
||||
} datum;
|
||||
} SValueNode;
|
||||
|
||||
typedef enum EOperatorType {
|
||||
|
@ -174,7 +189,7 @@ typedef struct SNodeListNode {
|
|||
} SNodeListNode;
|
||||
|
||||
typedef struct SFunctionNode {
|
||||
SExprNode type; // QUERY_NODE_FUNCTION
|
||||
SExprNode node; // QUERY_NODE_FUNCTION
|
||||
char functionName[TSDB_FUNC_NAME_LEN];
|
||||
int32_t funcId;
|
||||
SNodeList* pParameterList;
|
||||
|
|
|
@ -218,18 +218,19 @@ int32_t* taosGetErrno();
|
|||
// mnode-stable
|
||||
#define TSDB_CODE_MND_STB_ALREADY_EXIST TAOS_DEF_ERROR_CODE(0, 0x03A0)
|
||||
#define TSDB_CODE_MND_STB_NOT_EXIST TAOS_DEF_ERROR_CODE(0, 0x03A1)
|
||||
#define TSDB_CODE_MND_TOO_MANY_STBS TAOS_DEF_ERROR_CODE(0, 0x03A2)
|
||||
#define TSDB_CODE_MND_INVALID_STB TAOS_DEF_ERROR_CODE(0, 0x03A3)
|
||||
#define TSDB_CODE_MND_INVALID_STB_OPTION TAOS_DEF_ERROR_CODE(0, 0x03A4)
|
||||
#define TSDB_CODE_MND_STB_OPTION_UNCHNAGED TAOS_DEF_ERROR_CODE(0, 0x03A5)
|
||||
#define TSDB_CODE_MND_TOO_MANY_TAGS TAOS_DEF_ERROR_CODE(0, 0x03A6)
|
||||
#define TSDB_CODE_MND_TAG_ALREAY_EXIST TAOS_DEF_ERROR_CODE(0, 0x03A7)
|
||||
#define TSDB_CODE_MND_TAG_NOT_EXIST TAOS_DEF_ERROR_CODE(0, 0x03A8)
|
||||
#define TSDB_CODE_MND_TOO_MANY_COLUMNS TAOS_DEF_ERROR_CODE(0, 0x03A9)
|
||||
#define TSDB_CODE_MND_COLUMN_ALREAY_EXIST TAOS_DEF_ERROR_CODE(0, 0x03AA)
|
||||
#define TSDB_CODE_MND_COLUMN_NOT_EXIST TAOS_DEF_ERROR_CODE(0, 0x03AB)
|
||||
#define TSDB_CODE_MND_INVALID_ROW_BYTES TAOS_DEF_ERROR_CODE(0, 0x03AC)
|
||||
#define TSDB_CODE_MND_NAME_CONFLICT_WITH_TOPIC TAOS_DEF_ERROR_CODE(0, 0x03AD)
|
||||
#define TSDB_CODE_MND_NAME_CONFLICT_WITH_TOPIC TAOS_DEF_ERROR_CODE(0, 0x03A2)
|
||||
#define TSDB_CODE_MND_TOO_MANY_STBS TAOS_DEF_ERROR_CODE(0, 0x03A3)
|
||||
#define TSDB_CODE_MND_INVALID_STB TAOS_DEF_ERROR_CODE(0, 0x03A4)
|
||||
#define TSDB_CODE_MND_INVALID_STB_OPTION TAOS_DEF_ERROR_CODE(0, 0x03A5)
|
||||
#define TSDB_CODE_MND_INVALID_STB_ALTER_OPTION TAOS_DEF_ERROR_CODE(0, 0x03A6)
|
||||
#define TSDB_CODE_MND_STB_OPTION_UNCHNAGED TAOS_DEF_ERROR_CODE(0, 0x03A7)
|
||||
#define TSDB_CODE_MND_INVALID_ROW_BYTES TAOS_DEF_ERROR_CODE(0, 0x03A8)
|
||||
#define TSDB_CODE_MND_TOO_MANY_TAGS TAOS_DEF_ERROR_CODE(0, 0x03A9)
|
||||
#define TSDB_CODE_MND_TAG_ALREADY_EXIST TAOS_DEF_ERROR_CODE(0, 0x03AA)
|
||||
#define TSDB_CODE_MND_TAG_NOT_EXIST TAOS_DEF_ERROR_CODE(0, 0x03AB)
|
||||
#define TSDB_CODE_MND_TOO_MANY_COLUMNS TAOS_DEF_ERROR_CODE(0, 0x03AC)
|
||||
#define TSDB_CODE_MND_COLUMN_ALREADY_EXIST TAOS_DEF_ERROR_CODE(0, 0x03AD)
|
||||
#define TSDB_CODE_MND_COLUMN_NOT_EXIST TAOS_DEF_ERROR_CODE(0, 0x03AE)
|
||||
|
||||
// mnode-func
|
||||
#define TSDB_CODE_MND_FUNC_ALREADY_EXIST TAOS_DEF_ERROR_CODE(0, 0x03C0)
|
||||
|
@ -435,16 +436,21 @@ int32_t* taosGetErrno();
|
|||
#define TSDB_CODE_CTG_NOT_READY TAOS_DEF_ERROR_CODE(0, 0x2402) //catalog is not ready
|
||||
#define TSDB_CODE_CTG_MEM_ERROR TAOS_DEF_ERROR_CODE(0, 0x2403) //catalog memory error
|
||||
#define TSDB_CODE_CTG_SYS_ERROR TAOS_DEF_ERROR_CODE(0, 0x2404) //catalog system error
|
||||
#define TSDB_CODE_CTG_DB_DROPPED TAOS_DEF_ERROR_CODE(0, 0x2405) //Database is dropped
|
||||
#define TSDB_CODE_CTG_OUT_OF_SERVICE TAOS_DEF_ERROR_CODE(0, 0x2406) //catalog is out of service
|
||||
|
||||
//scheduler
|
||||
#define TSDB_CODE_SCH_STATUS_ERROR TAOS_DEF_ERROR_CODE(0, 0x2501) //scheduler status error
|
||||
#define TSDB_CODE_SCH_INTERNAL_ERROR TAOS_DEF_ERROR_CODE(0, 0x2502) //scheduler internal error
|
||||
|
||||
//parser
|
||||
#define TSDB_CODE_PARSER_INVALID_COLUMN TAOS_DEF_ERROR_CODE(0, 0x2601) //invalid column name
|
||||
#define TSDB_CODE_PARSER_TABLE_NOT_EXIST TAOS_DEF_ERROR_CODE(0, 0x2602) //table not exist
|
||||
#define TSDB_CODE_PARSER_AMBIGUOUS_COLUMN TAOS_DEF_ERROR_CODE(0, 0x2603) //ambiguous column
|
||||
#define TSDB_CODE_PARSER_WRONG_VALUE_TYPE TAOS_DEF_ERROR_CODE(0, 0x2604) //wrong value type
|
||||
#define TSDB_CODE_PAR_INVALID_COLUMN TAOS_DEF_ERROR_CODE(0, 0x2601) //invalid column name
|
||||
#define TSDB_CODE_PAR_TABLE_NOT_EXIST TAOS_DEF_ERROR_CODE(0, 0x2602) //table not exist
|
||||
#define TSDB_CODE_PAR_AMBIGUOUS_COLUMN TAOS_DEF_ERROR_CODE(0, 0x2603) //ambiguous column
|
||||
#define TSDB_CODE_PAR_WRONG_VALUE_TYPE TAOS_DEF_ERROR_CODE(0, 0x2604) //wrong value type
|
||||
#define TSDB_CODE_PAR_FUNTION_PARA_NUM TAOS_DEF_ERROR_CODE(0, 0x2605) //invalid number of arguments
|
||||
#define TSDB_CODE_PAR_FUNTION_PARA_TYPE TAOS_DEF_ERROR_CODE(0, 0x2606) //inconsistent datatypes
|
||||
#define TSDB_CODE_PAR_ILLEGAL_USE_AGG_FUNCTION TAOS_DEF_ERROR_CODE(0, 0x2607) //there mustn't be aggregation
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
|
|
|
@ -187,7 +187,7 @@ do { \
|
|||
#define TSDB_MAX_ALLOWED_SQL_LEN (1*1024*1024u) // sql length should be less than 1mb
|
||||
|
||||
#define TSDB_APP_NAME_LEN TSDB_UNI_LEN
|
||||
|
||||
#define TSDB_STB_COMMENT_LEN 1024
|
||||
/**
|
||||
* In some scenarios uint16_t (0~65535) is used to store the row len.
|
||||
* - Firstly, we use 65531(65535 - 4), as the SDataRow/SKVRow contains 4 bits header.
|
||||
|
|
|
@ -44,7 +44,6 @@ static int32_t hbProcessDBInfoRsp(void *value, int32_t valueLen, struct SCatalog
|
|||
code = catalogRemoveDB(pCatalog, rsp->db, rsp->uid);
|
||||
} else {
|
||||
SDBVgroupInfo vgInfo = {0};
|
||||
vgInfo.dbId = rsp->uid;
|
||||
vgInfo.vgVersion = rsp->vgVersion;
|
||||
vgInfo.hashMethod = rsp->hashMethod;
|
||||
vgInfo.vgHash = taosHashInit(rsp->vgNum, taosGetDefaultHashFunction(TSDB_DATA_TYPE_INT), true, HASH_ENTRY_LOCK);
|
||||
|
@ -69,7 +68,7 @@ static int32_t hbProcessDBInfoRsp(void *value, int32_t valueLen, struct SCatalog
|
|||
}
|
||||
}
|
||||
|
||||
code = catalogUpdateDBVgroup(pCatalog, rsp->db, &vgInfo);
|
||||
code = catalogUpdateDBVgroup(pCatalog, rsp->db, rsp->uid, &vgInfo);
|
||||
if (code) {
|
||||
taosHashCleanup(vgInfo.vgHash);
|
||||
}
|
||||
|
@ -101,50 +100,33 @@ static int32_t hbProcessStbInfoRsp(void *value, int32_t valueLen, struct SCatalo
|
|||
|
||||
tscDebug("hb remove stb, db:%s, stb:%s", rsp->dbFName, rsp->stbName);
|
||||
|
||||
code = catalogRemoveSTableMeta(pCatalog, rsp->dbFName, rsp->stbName, rsp->suid);
|
||||
catalogRemoveSTableMeta(pCatalog, rsp->dbFName, rsp->stbName, rsp->suid);
|
||||
} else {
|
||||
tscDebug("hb update stb, db:%s, stb:%s", rsp->dbFName, rsp->stbName);
|
||||
|
||||
rsp->numOfTags = ntohl(rsp->numOfTags);
|
||||
rsp->sversion = ntohl(rsp->sversion);
|
||||
rsp->tversion = ntohl(rsp->tversion);
|
||||
rsp->tuid = be64toh(rsp->tuid);
|
||||
rsp->vgId = ntohl(rsp->vgId);
|
||||
|
||||
SSchema* pSchema = rsp->pSchema;
|
||||
|
||||
schemaNum = rsp->numOfColumns + rsp->numOfTags;
|
||||
/*
|
||||
rsp->vgNum = ntohl(rsp->vgNum);
|
||||
rsp->uid = be64toh(rsp->uid);
|
||||
|
||||
SDBVgroupInfo vgInfo = {0};
|
||||
vgInfo.dbId = rsp->uid;
|
||||
vgInfo.vgVersion = rsp->vgVersion;
|
||||
vgInfo.hashMethod = rsp->hashMethod;
|
||||
vgInfo.vgHash = taosHashInit(rsp->vgNum, taosGetDefaultHashFunction(TSDB_DATA_TYPE_INT), true, HASH_ENTRY_LOCK);
|
||||
if (NULL == vgInfo.vgHash) {
|
||||
tscError("hash init[%d] failed", rsp->vgNum);
|
||||
return TSDB_CODE_TSC_OUT_OF_MEMORY;
|
||||
for (int i = 0; i < schemaNum; ++i) {
|
||||
pSchema->bytes = ntohl(pSchema->bytes);
|
||||
pSchema->colId = ntohl(pSchema->colId);
|
||||
|
||||
pSchema++;
|
||||
}
|
||||
|
||||
for (int32_t i = 0; i < rsp->vgNum; ++i) {
|
||||
rsp->vgroupInfo[i].vgId = ntohl(rsp->vgroupInfo[i].vgId);
|
||||
rsp->vgroupInfo[i].hashBegin = ntohl(rsp->vgroupInfo[i].hashBegin);
|
||||
rsp->vgroupInfo[i].hashEnd = ntohl(rsp->vgroupInfo[i].hashEnd);
|
||||
|
||||
for (int32_t n = 0; n < rsp->vgroupInfo[i].epset.numOfEps; ++n) {
|
||||
rsp->vgroupInfo[i].epset.eps[n].port = ntohs(rsp->vgroupInfo[i].epset.eps[n].port);
|
||||
if (rsp->pSchema[0].colId != PRIMARYKEY_TIMESTAMP_COL_ID) {
|
||||
tscError("invalid colId[%d] for the first column in table meta rsp msg", rsp->pSchema[0].colId);
|
||||
return TSDB_CODE_TSC_INVALID_VALUE;
|
||||
}
|
||||
|
||||
if (0 != taosHashPut(vgInfo.vgHash, &rsp->vgroupInfo[i].vgId, sizeof(rsp->vgroupInfo[i].vgId), &rsp->vgroupInfo[i], sizeof(rsp->vgroupInfo[i]))) {
|
||||
tscError("hash push failed, errno:%d", errno);
|
||||
taosHashCleanup(vgInfo.vgHash);
|
||||
return TSDB_CODE_TSC_OUT_OF_MEMORY;
|
||||
}
|
||||
}
|
||||
|
||||
code = catalogUpdateDBVgroup(pCatalog, rsp->db, &vgInfo);
|
||||
if (code) {
|
||||
taosHashCleanup(vgInfo.vgHash);
|
||||
}
|
||||
*/
|
||||
}
|
||||
|
||||
if (code) {
|
||||
return code;
|
||||
catalogUpdateSTableMeta(pCatalog, rsp);
|
||||
}
|
||||
|
||||
msgLen += sizeof(STableMetaRsp) + schemaNum * sizeof(SSchema);
|
||||
|
|
|
@ -209,6 +209,9 @@ tmq_resp_err_t tmq_subscribe(tmq_t* tmq, tmq_list_t* topic_list) {
|
|||
|
||||
SName name = {0};
|
||||
char* dbName = getDbOfConnection(tmq->pTscObj);
|
||||
if (dbName == NULL) {
|
||||
return TMQ_RESP_ERR__FAIL;
|
||||
}
|
||||
tNameSetDbName(&name, tmq->pTscObj->acctId, dbName, strlen(dbName));
|
||||
tNameFromString(&name, topicName, T_NAME_TABLE);
|
||||
|
||||
|
|
|
@ -565,7 +565,6 @@ TEST(testCase, insert_test) {
|
|||
#endif
|
||||
|
||||
|
||||
#if 1
|
||||
TEST(testCase, projection_query_tables) {
|
||||
TAOS* pConn = taos_connect("localhost", "root", "taosdata", NULL, 0);
|
||||
ASSERT_NE(pConn, nullptr);
|
||||
|
@ -591,7 +590,7 @@ TEST(testCase, projection_query_tables) {
|
|||
}
|
||||
taos_free_result(pRes);
|
||||
|
||||
for(int32_t i = 0; i < 100000; ++i) {
|
||||
for(int32_t i = 0; i < 10000000; ++i) {
|
||||
char sql[512] = {0};
|
||||
sprintf(sql, "insert into tu values(now+%da, %d)", i, i);
|
||||
TAOS_RES* p = taos_query(pConn, sql);
|
||||
|
@ -622,6 +621,7 @@ TEST(testCase, projection_query_tables) {
|
|||
taos_free_result(pRes);
|
||||
taos_close(pConn);
|
||||
}
|
||||
#if 0
|
||||
|
||||
TEST(testCase, projection_query_stables) {
|
||||
TAOS* pConn = taos_connect("localhost", "root", "taosdata", NULL, 0);
|
||||
|
|
|
@ -46,7 +46,6 @@ TEST(testCase, create_topic_ctb_Test) {
|
|||
if (taos_errno(pRes) != 0) {
|
||||
printf("error in use db, reason:%s\n", taos_errstr(pRes));
|
||||
}
|
||||
//taos_free_result(pRes);
|
||||
|
||||
TAOS_FIELD* pFields = taos_fetch_fields(pRes);
|
||||
ASSERT_TRUE(pFields == nullptr);
|
||||
|
|
|
@ -27,7 +27,7 @@
|
|||
#undef TD_MSG_SEG_CODE_
|
||||
#include "tmsgdef.h"
|
||||
|
||||
int tInitSubmitMsgIter(SSubmitMsg *pMsg, SSubmitMsgIter *pIter) {
|
||||
int32_t tInitSubmitMsgIter(SSubmitMsg *pMsg, SSubmitMsgIter *pIter) {
|
||||
if (pMsg == NULL) {
|
||||
terrno = TSDB_CODE_TDB_SUBMIT_MSG_MSSED_UP;
|
||||
return -1;
|
||||
|
@ -44,7 +44,7 @@ int tInitSubmitMsgIter(SSubmitMsg *pMsg, SSubmitMsgIter *pIter) {
|
|||
return 0;
|
||||
}
|
||||
|
||||
int tGetSubmitMsgNext(SSubmitMsgIter *pIter, SSubmitBlk **pPBlock) {
|
||||
int32_t tGetSubmitMsgNext(SSubmitMsgIter *pIter, SSubmitBlk **pPBlock) {
|
||||
if (pIter->len == 0) {
|
||||
pIter->len += sizeof(SSubmitMsg);
|
||||
} else {
|
||||
|
@ -63,7 +63,7 @@ int tGetSubmitMsgNext(SSubmitMsgIter *pIter, SSubmitBlk **pPBlock) {
|
|||
return 0;
|
||||
}
|
||||
|
||||
int tInitSubmitBlkIter(SSubmitBlk *pBlock, SSubmitBlkIter *pIter) {
|
||||
int32_t tInitSubmitBlkIter(SSubmitBlk *pBlock, SSubmitBlkIter *pIter) {
|
||||
if (pBlock->dataLen <= 0) return -1;
|
||||
pIter->totalLen = pBlock->dataLen;
|
||||
pIter->len = 0;
|
||||
|
@ -85,14 +85,14 @@ STSRow *tGetSubmitBlkNext(SSubmitBlkIter *pIter) {
|
|||
}
|
||||
}
|
||||
|
||||
int tSerializeSClientHbReq(void **buf, const SClientHbReq *pReq) {
|
||||
int tlen = 0;
|
||||
int32_t tSerializeSClientHbReq(void **buf, const SClientHbReq *pReq) {
|
||||
int32_t tlen = 0;
|
||||
tlen += taosEncodeSClientHbKey(buf, &pReq->connKey);
|
||||
|
||||
int32_t kvNum = taosHashGetSize(pReq->info);
|
||||
tlen += taosEncodeFixedI32(buf, kvNum);
|
||||
SKv *kv;
|
||||
void* pIter = taosHashIterate(pReq->info, NULL);
|
||||
void *pIter = taosHashIterate(pReq->info, NULL);
|
||||
while (pIter != NULL) {
|
||||
kv = pIter;
|
||||
tlen += taosEncodeSKv(buf, kv);
|
||||
|
@ -111,7 +111,7 @@ void *tDeserializeSClientHbReq(void *buf, SClientHbReq *pReq) {
|
|||
if (pReq->info == NULL) {
|
||||
pReq->info = taosHashInit(kvNum, taosGetDefaultHashFunction(TSDB_DATA_TYPE_BINARY), true, HASH_NO_LOCK);
|
||||
}
|
||||
for(int i = 0; i < kvNum; i++) {
|
||||
for (int32_t i = 0; i < kvNum; i++) {
|
||||
SKv kv;
|
||||
buf = taosDecodeSKv(buf, &kv);
|
||||
taosHashPut(pReq->info, &kv.key, sizeof(kv.key), &kv, sizeof(kv));
|
||||
|
@ -120,25 +120,26 @@ void *tDeserializeSClientHbReq(void *buf, SClientHbReq *pReq) {
|
|||
return buf;
|
||||
}
|
||||
|
||||
int tSerializeSClientHbRsp(void** buf, const SClientHbRsp* pRsp) {
|
||||
int tlen = 0;
|
||||
int32_t tSerializeSClientHbRsp(void **buf, const SClientHbRsp *pRsp) {
|
||||
int32_t tlen = 0;
|
||||
int32_t kvNum = taosArrayGetSize(pRsp->info);
|
||||
tlen += taosEncodeSClientHbKey(buf, &pRsp->connKey);
|
||||
tlen += taosEncodeFixedI32(buf, pRsp->status);
|
||||
tlen += taosEncodeFixedI32(buf, kvNum);
|
||||
for (int i = 0; i < kvNum; i++) {
|
||||
for (int32_t i = 0; i < kvNum; i++) {
|
||||
SKv *kv = (SKv *)taosArrayGet(pRsp->info, i);
|
||||
tlen += taosEncodeSKv(buf, kv);
|
||||
}
|
||||
return tlen;
|
||||
}
|
||||
void* tDeserializeSClientHbRsp(void* buf, SClientHbRsp* pRsp) {
|
||||
|
||||
void *tDeserializeSClientHbRsp(void *buf, SClientHbRsp *pRsp) {
|
||||
int32_t kvNum = 0;
|
||||
buf = taosDecodeSClientHbKey(buf, &pRsp->connKey);
|
||||
buf = taosDecodeFixedI32(buf, &pRsp->status);
|
||||
buf = taosDecodeFixedI32(buf, &kvNum);
|
||||
pRsp->info = taosArrayInit(kvNum, sizeof(SKv));
|
||||
for (int i = 0; i < kvNum; i++) {
|
||||
for (int32_t i = 0; i < kvNum; i++) {
|
||||
SKv kv = {0};
|
||||
buf = taosDecodeSKv(buf, &kv);
|
||||
taosArrayPush(pRsp->info, &kv);
|
||||
|
@ -147,19 +148,19 @@ void* tDeserializeSClientHbRsp(void* buf, SClientHbRsp* pRsp) {
|
|||
return buf;
|
||||
}
|
||||
|
||||
int tSerializeSClientHbBatchReq(void** buf, const SClientHbBatchReq* pBatchReq) {
|
||||
int tlen = 0;
|
||||
int32_t tSerializeSClientHbBatchReq(void **buf, const SClientHbBatchReq *pBatchReq) {
|
||||
int32_t tlen = 0;
|
||||
tlen += taosEncodeFixedI64(buf, pBatchReq->reqId);
|
||||
int32_t reqNum = taosArrayGetSize(pBatchReq->reqs);
|
||||
tlen += taosEncodeFixedI32(buf, reqNum);
|
||||
for (int i = 0; i < reqNum; i++) {
|
||||
SClientHbReq* pReq = taosArrayGet(pBatchReq->reqs, i);
|
||||
for (int32_t i = 0; i < reqNum; i++) {
|
||||
SClientHbReq *pReq = taosArrayGet(pBatchReq->reqs, i);
|
||||
tlen += tSerializeSClientHbReq(buf, pReq);
|
||||
}
|
||||
return tlen;
|
||||
}
|
||||
|
||||
void* tDeserializeSClientHbBatchReq(void* buf, SClientHbBatchReq* pBatchReq) {
|
||||
void *tDeserializeSClientHbBatchReq(void *buf, SClientHbBatchReq *pBatchReq) {
|
||||
buf = taosDecodeFixedI64(buf, &pBatchReq->reqId);
|
||||
if (pBatchReq->reqs == NULL) {
|
||||
pBatchReq->reqs = taosArrayInit(0, sizeof(SClientHbReq));
|
||||
|
@ -167,7 +168,7 @@ void* tDeserializeSClientHbBatchReq(void* buf, SClientHbBatchReq* pBatchReq) {
|
|||
|
||||
int32_t reqNum;
|
||||
buf = taosDecodeFixedI32(buf, &reqNum);
|
||||
for (int i = 0; i < reqNum; i++) {
|
||||
for (int32_t i = 0; i < reqNum; i++) {
|
||||
SClientHbReq req = {0};
|
||||
buf = tDeserializeSClientHbReq(buf, &req);
|
||||
taosArrayPush(pBatchReq->reqs, &req);
|
||||
|
@ -175,22 +176,22 @@ void* tDeserializeSClientHbBatchReq(void* buf, SClientHbBatchReq* pBatchReq) {
|
|||
return buf;
|
||||
}
|
||||
|
||||
int tSerializeSClientHbBatchRsp(void** buf, const SClientHbBatchRsp* pBatchRsp) {
|
||||
int tlen = 0;
|
||||
int32_t tSerializeSClientHbBatchRsp(void **buf, const SClientHbBatchRsp *pBatchRsp) {
|
||||
int32_t tlen = 0;
|
||||
int32_t sz = taosArrayGetSize(pBatchRsp->rsps);
|
||||
tlen += taosEncodeFixedI32(buf, sz);
|
||||
for (int i = 0; i < sz; i++) {
|
||||
SClientHbRsp* pRsp = taosArrayGet(pBatchRsp->rsps, i);
|
||||
for (int32_t i = 0; i < sz; i++) {
|
||||
SClientHbRsp *pRsp = taosArrayGet(pBatchRsp->rsps, i);
|
||||
tlen += tSerializeSClientHbRsp(buf, pRsp);
|
||||
}
|
||||
return tlen;
|
||||
}
|
||||
|
||||
void* tDeserializeSClientHbBatchRsp(void* buf, SClientHbBatchRsp* pBatchRsp) {
|
||||
void *tDeserializeSClientHbBatchRsp(void *buf, SClientHbBatchRsp *pBatchRsp) {
|
||||
int32_t sz;
|
||||
buf = taosDecodeFixedI32(buf, &sz);
|
||||
pBatchRsp->rsps = taosArrayInit(sz, sizeof(SClientHbRsp));
|
||||
for (int i = 0; i < sz; i++) {
|
||||
for (int32_t i = 0; i < sz; i++) {
|
||||
SClientHbRsp rsp = {0};
|
||||
buf = tDeserializeSClientHbRsp(buf, &rsp);
|
||||
taosArrayPush(pBatchRsp->rsps, &rsp);
|
||||
|
@ -198,8 +199,8 @@ void* tDeserializeSClientHbBatchRsp(void* buf, SClientHbBatchRsp* pBatchRsp) {
|
|||
return buf;
|
||||
}
|
||||
|
||||
int tSerializeSVCreateTbReq(void **buf, SVCreateTbReq *pReq) {
|
||||
int tlen = 0;
|
||||
int32_t tSerializeSVCreateTbReq(void **buf, SVCreateTbReq *pReq) {
|
||||
int32_t tlen = 0;
|
||||
|
||||
tlen += taosEncodeFixedU64(buf, pReq->ver);
|
||||
tlen += taosEncodeString(buf, pReq->name);
|
||||
|
@ -293,8 +294,8 @@ void *tDeserializeSVCreateTbReq(void *buf, SVCreateTbReq *pReq) {
|
|||
return buf;
|
||||
}
|
||||
|
||||
int tSerializeSVCreateTbBatchReq(void **buf, SVCreateTbBatchReq *pReq) {
|
||||
int tlen = 0;
|
||||
int32_t tSerializeSVCreateTbBatchReq(void **buf, SVCreateTbBatchReq *pReq) {
|
||||
int32_t tlen = 0;
|
||||
|
||||
tlen += taosEncodeFixedU64(buf, pReq->ver);
|
||||
tlen += taosEncodeFixedU32(buf, taosArrayGetSize(pReq->pArray));
|
||||
|
@ -322,7 +323,7 @@ void *tDeserializeSVCreateTbBatchReq(void *buf, SVCreateTbBatchReq *pReq) {
|
|||
}
|
||||
|
||||
int32_t tSerializeSVDropTbReq(void **buf, SVDropTbReq *pReq) {
|
||||
int tlen = 0;
|
||||
int32_t tlen = 0;
|
||||
tlen += taosEncodeFixedU64(buf, pReq->ver);
|
||||
tlen += taosEncodeString(buf, pReq->name);
|
||||
tlen += taosEncodeFixedU8(buf, pReq->type);
|
||||
|
@ -335,3 +336,268 @@ void *tDeserializeSVDropTbReq(void *buf, SVDropTbReq *pReq) {
|
|||
buf = taosDecodeFixedU8(buf, &pReq->type);
|
||||
return buf;
|
||||
}
|
||||
|
||||
int32_t tSerializeSMCreateStbReq(void **buf, SMCreateStbReq *pReq) {
|
||||
int32_t tlen = 0;
|
||||
|
||||
tlen += taosEncodeString(buf, pReq->name);
|
||||
tlen += taosEncodeFixedI8(buf, pReq->igExists);
|
||||
tlen += taosEncodeFixedI32(buf, pReq->numOfColumns);
|
||||
tlen += taosEncodeFixedI32(buf, pReq->numOfTags);
|
||||
|
||||
for (int32_t i = 0; i < pReq->numOfColumns; ++i) {
|
||||
SField *pField = taosArrayGet(pReq->pColumns, i);
|
||||
tlen += taosEncodeFixedI8(buf, pField->type);
|
||||
tlen += taosEncodeFixedI32(buf, pField->bytes);
|
||||
tlen += taosEncodeString(buf, pField->name);
|
||||
}
|
||||
|
||||
for (int32_t i = 0; i < pReq->numOfTags; ++i) {
|
||||
SField *pField = taosArrayGet(pReq->pTags, i);
|
||||
tlen += taosEncodeFixedI8(buf, pField->type);
|
||||
tlen += taosEncodeFixedI32(buf, pField->bytes);
|
||||
tlen += taosEncodeString(buf, pField->name);
|
||||
}
|
||||
|
||||
tlen += taosEncodeString(buf, pReq->comment);
|
||||
return tlen;
|
||||
}
|
||||
|
||||
void *tDeserializeSMCreateStbReq(void *buf, SMCreateStbReq *pReq) {
|
||||
buf = taosDecodeStringTo(buf, pReq->name);
|
||||
buf = taosDecodeFixedI8(buf, &pReq->igExists);
|
||||
buf = taosDecodeFixedI32(buf, &pReq->numOfColumns);
|
||||
buf = taosDecodeFixedI32(buf, &pReq->numOfTags);
|
||||
|
||||
pReq->pColumns = taosArrayInit(pReq->numOfColumns, sizeof(SField));
|
||||
pReq->pTags = taosArrayInit(pReq->numOfTags, sizeof(SField));
|
||||
if (pReq->pColumns == NULL || pReq->pTags == NULL) {
|
||||
terrno = TSDB_CODE_OUT_OF_MEMORY;
|
||||
return NULL;
|
||||
}
|
||||
|
||||
for (int32_t i = 0; i < pReq->numOfColumns; ++i) {
|
||||
SField field = {0};
|
||||
buf = taosDecodeFixedI8(buf, &field.type);
|
||||
buf = taosDecodeFixedI32(buf, &field.bytes);
|
||||
buf = taosDecodeStringTo(buf, field.name);
|
||||
if (taosArrayPush(pReq->pColumns, &field) == NULL) {
|
||||
terrno = TSDB_CODE_OUT_OF_MEMORY;
|
||||
return NULL;
|
||||
}
|
||||
}
|
||||
|
||||
for (int32_t i = 0; i < pReq->numOfTags; ++i) {
|
||||
SField field = {0};
|
||||
buf = taosDecodeFixedI8(buf, &field.type);
|
||||
buf = taosDecodeFixedI32(buf, &field.bytes);
|
||||
buf = taosDecodeStringTo(buf, field.name);
|
||||
if (taosArrayPush(pReq->pTags, &field) == NULL) {
|
||||
terrno = TSDB_CODE_OUT_OF_MEMORY;
|
||||
return NULL;
|
||||
}
|
||||
}
|
||||
|
||||
buf = taosDecodeStringTo(buf, pReq->comment);
|
||||
return buf;
|
||||
}
|
||||
|
||||
int32_t tSerializeSMDropStbReq(void **buf, SMDropStbReq *pReq) {
|
||||
int32_t tlen = 0;
|
||||
|
||||
tlen += taosEncodeString(buf, pReq->name);
|
||||
tlen += taosEncodeFixedI8(buf, pReq->igNotExists);
|
||||
|
||||
return tlen;
|
||||
}
|
||||
|
||||
void *tDeserializeSMDropStbReq(void *buf, SMDropStbReq *pReq) {
|
||||
buf = taosDecodeStringTo(buf, pReq->name);
|
||||
buf = taosDecodeFixedI8(buf, &pReq->igNotExists);
|
||||
|
||||
return buf;
|
||||
}
|
||||
|
||||
int32_t tSerializeSMAlterStbReq(void **buf, SMAltertbReq *pReq) {
|
||||
int32_t tlen = 0;
|
||||
|
||||
tlen += taosEncodeString(buf, pReq->name);
|
||||
tlen += taosEncodeFixedI8(buf, pReq->alterType);
|
||||
tlen += taosEncodeFixedI32(buf, pReq->numOfFields);
|
||||
|
||||
for (int32_t i = 0; i < pReq->numOfFields; ++i) {
|
||||
SField *pField = taosArrayGet(pReq->pFields, i);
|
||||
tlen += taosEncodeFixedU8(buf, pField->type);
|
||||
tlen += taosEncodeFixedI32(buf, pField->bytes);
|
||||
tlen += taosEncodeString(buf, pField->name);
|
||||
}
|
||||
|
||||
return tlen;
|
||||
}
|
||||
|
||||
void *tDeserializeSMAlterStbReq(void *buf, SMAltertbReq *pReq) {
|
||||
buf = taosDecodeStringTo(buf, pReq->name);
|
||||
buf = taosDecodeFixedI8(buf, &pReq->alterType);
|
||||
buf = taosDecodeFixedI32(buf, &pReq->numOfFields);
|
||||
|
||||
pReq->pFields = taosArrayInit(pReq->numOfFields, sizeof(SField));
|
||||
if (pReq->pFields == NULL) {
|
||||
terrno = TSDB_CODE_OUT_OF_MEMORY;
|
||||
return NULL;
|
||||
}
|
||||
|
||||
for (int32_t i = 0; i < pReq->numOfFields; ++i) {
|
||||
SField field = {0};
|
||||
buf = taosDecodeFixedU8(buf, &field.type);
|
||||
buf = taosDecodeFixedI32(buf, &field.bytes);
|
||||
buf = taosDecodeStringTo(buf, field.name);
|
||||
if (taosArrayPush(pReq->pFields, &field) == NULL) {
|
||||
terrno = TSDB_CODE_OUT_OF_MEMORY;
|
||||
return NULL;
|
||||
}
|
||||
}
|
||||
|
||||
return buf;
|
||||
}
|
||||
|
||||
int32_t tSerializeSStatusReq(void **buf, SStatusReq *pReq) {
|
||||
int32_t tlen = 0;
|
||||
|
||||
// status
|
||||
tlen += taosEncodeFixedI32(buf, pReq->mver);
|
||||
tlen += taosEncodeFixedI32(buf, pReq->sver);
|
||||
tlen += taosEncodeFixedI64(buf, pReq->dver);
|
||||
tlen += taosEncodeFixedI32(buf, pReq->dnodeId);
|
||||
tlen += taosEncodeFixedI64(buf, pReq->clusterId);
|
||||
tlen += taosEncodeFixedI64(buf, pReq->rebootTime);
|
||||
tlen += taosEncodeFixedI64(buf, pReq->updateTime);
|
||||
tlen += taosEncodeFixedI32(buf, pReq->numOfCores);
|
||||
tlen += taosEncodeFixedI32(buf, pReq->numOfSupportVnodes);
|
||||
tlen += taosEncodeString(buf, pReq->dnodeEp);
|
||||
|
||||
// cluster cfg
|
||||
tlen += taosEncodeFixedI32(buf, pReq->clusterCfg.statusInterval);
|
||||
tlen += taosEncodeFixedI64(buf, pReq->clusterCfg.checkTime);
|
||||
tlen += taosEncodeString(buf, pReq->clusterCfg.timezone);
|
||||
tlen += taosEncodeString(buf, pReq->clusterCfg.locale);
|
||||
tlen += taosEncodeString(buf, pReq->clusterCfg.charset);
|
||||
|
||||
// vnode loads
|
||||
int32_t vlen = (int32_t)taosArrayGetSize(pReq->pVloads);
|
||||
tlen += taosEncodeFixedI32(buf, vlen);
|
||||
for (int32_t i = 0; i < vlen; ++i) {
|
||||
SVnodeLoad *pload = taosArrayGet(pReq->pVloads, i);
|
||||
tlen += taosEncodeFixedI32(buf, pload->vgId);
|
||||
tlen += taosEncodeFixedI8(buf, pload->role);
|
||||
tlen += taosEncodeFixedI64(buf, pload->totalStorage);
|
||||
tlen += taosEncodeFixedI64(buf, pload->compStorage);
|
||||
tlen += taosEncodeFixedI64(buf, pload->pointsWritten);
|
||||
tlen += taosEncodeFixedI64(buf, pload->tablesNum);
|
||||
}
|
||||
|
||||
return tlen;
|
||||
}
|
||||
|
||||
void *tDeserializeSStatusReq(void *buf, SStatusReq *pReq) {
|
||||
// status
|
||||
buf = taosDecodeFixedI32(buf, &pReq->mver);
|
||||
buf = taosDecodeFixedI32(buf, &pReq->sver);
|
||||
buf = taosDecodeFixedI64(buf, &pReq->dver);
|
||||
buf = taosDecodeFixedI32(buf, &pReq->dnodeId);
|
||||
buf = taosDecodeFixedI64(buf, &pReq->clusterId);
|
||||
buf = taosDecodeFixedI64(buf, &pReq->rebootTime);
|
||||
buf = taosDecodeFixedI64(buf, &pReq->updateTime);
|
||||
buf = taosDecodeFixedI32(buf, &pReq->numOfCores);
|
||||
buf = taosDecodeFixedI32(buf, &pReq->numOfSupportVnodes);
|
||||
buf = taosDecodeStringTo(buf, pReq->dnodeEp);
|
||||
|
||||
// cluster cfg
|
||||
buf = taosDecodeFixedI32(buf, &pReq->clusterCfg.statusInterval);
|
||||
buf = taosDecodeFixedI64(buf, &pReq->clusterCfg.checkTime);
|
||||
buf = taosDecodeStringTo(buf, pReq->clusterCfg.timezone);
|
||||
buf = taosDecodeStringTo(buf, pReq->clusterCfg.locale);
|
||||
buf = taosDecodeStringTo(buf, pReq->clusterCfg.charset);
|
||||
|
||||
// vnode loads
|
||||
int32_t vlen = 0;
|
||||
buf = taosDecodeFixedI32(buf, &vlen);
|
||||
pReq->pVloads = taosArrayInit(vlen, sizeof(SVnodeLoad));
|
||||
if (pReq->pVloads == NULL) {
|
||||
terrno = TSDB_CODE_OUT_OF_MEMORY;
|
||||
return NULL;
|
||||
}
|
||||
|
||||
for (int32_t i = 0; i < vlen; ++i) {
|
||||
SVnodeLoad vload = {0};
|
||||
buf = taosDecodeFixedI32(buf, &vload.vgId);
|
||||
buf = taosDecodeFixedI8(buf, &vload.role);
|
||||
buf = taosDecodeFixedI64(buf, &vload.totalStorage);
|
||||
buf = taosDecodeFixedI64(buf, &vload.compStorage);
|
||||
buf = taosDecodeFixedI64(buf, &vload.pointsWritten);
|
||||
buf = taosDecodeFixedI64(buf, &vload.tablesNum);
|
||||
if (taosArrayPush(pReq->pVloads, &vload) == NULL) {
|
||||
terrno = TSDB_CODE_OUT_OF_MEMORY;
|
||||
return NULL;
|
||||
}
|
||||
}
|
||||
|
||||
return buf;
|
||||
}
|
||||
|
||||
int32_t tSerializeSStatusRsp(void **buf, SStatusRsp *pRsp) {
|
||||
int32_t tlen = 0;
|
||||
|
||||
// status
|
||||
tlen += taosEncodeFixedI32(buf, pRsp->mver);
|
||||
tlen += taosEncodeFixedI64(buf, pRsp->dver);
|
||||
|
||||
// dnode cfg
|
||||
tlen += taosEncodeFixedI32(buf, pRsp->dnodeCfg.dnodeId);
|
||||
tlen += taosEncodeFixedI64(buf, pRsp->dnodeCfg.clusterId);
|
||||
|
||||
// dnode eps
|
||||
int32_t dlen = (int32_t)taosArrayGetSize(pRsp->pDnodeEps);
|
||||
tlen += taosEncodeFixedI32(buf, dlen);
|
||||
for (int32_t i = 0; i < dlen; ++i) {
|
||||
SDnodeEp *pDnodeEp = taosArrayGet(pRsp->pDnodeEps, i);
|
||||
tlen += taosEncodeFixedI32(buf, pDnodeEp->id);
|
||||
tlen += taosEncodeFixedI8(buf, pDnodeEp->isMnode);
|
||||
tlen += taosEncodeString(buf, pDnodeEp->ep.fqdn);
|
||||
tlen += taosEncodeFixedU16(buf, pDnodeEp->ep.port);
|
||||
}
|
||||
|
||||
return tlen;
|
||||
}
|
||||
|
||||
void *tDeserializeSStatusRsp(void *buf, SStatusRsp *pRsp) {
|
||||
// status
|
||||
buf = taosDecodeFixedI32(buf, &pRsp->mver);
|
||||
buf = taosDecodeFixedI64(buf, &pRsp->dver);
|
||||
|
||||
// cluster cfg
|
||||
buf = taosDecodeFixedI32(buf, &pRsp->dnodeCfg.dnodeId);
|
||||
buf = taosDecodeFixedI64(buf, &pRsp->dnodeCfg.clusterId);
|
||||
|
||||
// dnode eps
|
||||
int32_t dlen = 0;
|
||||
buf = taosDecodeFixedI32(buf, &dlen);
|
||||
pRsp->pDnodeEps = taosArrayInit(dlen, sizeof(SDnodeEp));
|
||||
if (pRsp->pDnodeEps == NULL) {
|
||||
terrno = TSDB_CODE_OUT_OF_MEMORY;
|
||||
return NULL;
|
||||
}
|
||||
|
||||
for (int32_t i = 0; i < dlen; ++i) {
|
||||
SDnodeEp dnodeEp = {0};
|
||||
buf = taosDecodeFixedI32(buf, &dnodeEp.id);
|
||||
buf = taosDecodeFixedI8(buf, &dnodeEp.isMnode);
|
||||
buf = taosDecodeStringTo(buf, dnodeEp.ep.fqdn);
|
||||
buf = taosDecodeFixedU16(buf, &dnodeEp.ep.port);
|
||||
if (taosArrayPush(pRsp->pDnodeEps, &dnodeEp) == NULL) {
|
||||
terrno = TSDB_CODE_OUT_OF_MEMORY;
|
||||
return NULL;
|
||||
}
|
||||
}
|
||||
|
||||
return buf;
|
||||
}
|
||||
|
|
|
@ -55,7 +55,7 @@ typedef struct {
|
|||
SEpSet mnodeEpSet;
|
||||
char *file;
|
||||
SHashObj *dnodeHash;
|
||||
SDnodeEps *dnodeEps;
|
||||
SArray *pDnodeEps;
|
||||
pthread_t *threadId;
|
||||
SRWLatch latch;
|
||||
SDnodeWorker mgmtWorker;
|
||||
|
|
|
@ -23,7 +23,7 @@ extern "C" {
|
|||
|
||||
int32_t dndInitVnodes(SDnode *pDnode);
|
||||
void dndCleanupVnodes(SDnode *pDnode);
|
||||
void dndGetVnodeLoads(SDnode *pDnode, SVnodeLoads *pVloads);
|
||||
void dndGetVnodeLoads(SDnode *pDnode, SArray *pLoads);
|
||||
void dndProcessVnodeWriteMsg(SDnode *pDnode, SRpcMsg *pMsg, SEpSet *pEpSet);
|
||||
void dndProcessVnodeSyncMsg(SDnode *pDnode, SRpcMsg *pMsg, SEpSet *pEpSet);
|
||||
void dndProcessVnodeQueryMsg(SDnode *pDnode, SRpcMsg *pMsg, SEpSet *pEpSet);
|
||||
|
|
|
@ -113,35 +113,31 @@ static void dndUpdateMnodeEpSet(SDnode *pDnode, SEpSet *pEpSet) {
|
|||
static void dndPrintDnodes(SDnode *pDnode) {
|
||||
SDnodeMgmt *pMgmt = &pDnode->dmgmt;
|
||||
|
||||
dDebug("print dnode ep list, num:%d", pMgmt->dnodeEps->num);
|
||||
for (int32_t i = 0; i < pMgmt->dnodeEps->num; i++) {
|
||||
SDnodeEp *pEp = &pMgmt->dnodeEps->eps[i];
|
||||
int32_t numOfEps = (int32_t)taosArrayGetSize(pMgmt->pDnodeEps);
|
||||
dDebug("print dnode ep list, num:%d", numOfEps);
|
||||
for (int32_t i = 0; i < numOfEps; i++) {
|
||||
SDnodeEp *pEp = taosArrayGet(pMgmt->pDnodeEps, i);
|
||||
dDebug("dnode:%d, fqdn:%s port:%u isMnode:%d", pEp->id, pEp->ep.fqdn, pEp->ep.port, pEp->isMnode);
|
||||
}
|
||||
}
|
||||
|
||||
static void dndResetDnodes(SDnode *pDnode, SDnodeEps *pDnodeEps) {
|
||||
static void dndResetDnodes(SDnode *pDnode, SArray *pDnodeEps) {
|
||||
SDnodeMgmt *pMgmt = &pDnode->dmgmt;
|
||||
|
||||
int32_t size = sizeof(SDnodeEps) + pDnodeEps->num * sizeof(SDnodeEp);
|
||||
if (pDnodeEps->num > pMgmt->dnodeEps->num) {
|
||||
SDnodeEps *tmp = calloc(1, size);
|
||||
if (tmp == NULL) return;
|
||||
|
||||
tfree(pMgmt->dnodeEps);
|
||||
pMgmt->dnodeEps = tmp;
|
||||
}
|
||||
|
||||
if (pMgmt->dnodeEps != pDnodeEps) {
|
||||
memcpy(pMgmt->dnodeEps, pDnodeEps, size);
|
||||
if (pMgmt->pDnodeEps != pDnodeEps) {
|
||||
SArray *tmp = pMgmt->pDnodeEps;
|
||||
pMgmt->pDnodeEps = taosArrayDup(pDnodeEps);
|
||||
taosArrayDestroy(tmp);
|
||||
}
|
||||
|
||||
pMgmt->mnodeEpSet.inUse = 0;
|
||||
pMgmt->mnodeEpSet.numOfEps = 0;
|
||||
|
||||
int32_t mIndex = 0;
|
||||
for (int32_t i = 0; i < pMgmt->dnodeEps->num; i++) {
|
||||
SDnodeEp *pDnodeEp = &pMgmt->dnodeEps->eps[i];
|
||||
int32_t numOfEps = (int32_t)taosArrayGetSize(pDnodeEps);
|
||||
|
||||
for (int32_t i = 0; i < numOfEps; i++) {
|
||||
SDnodeEp *pDnodeEp = taosArrayGet(pDnodeEps, i);
|
||||
if (!pDnodeEp->isMnode) continue;
|
||||
if (mIndex >= TSDB_MAX_REPLICA) continue;
|
||||
pMgmt->mnodeEpSet.numOfEps++;
|
||||
|
@ -150,8 +146,8 @@ static void dndResetDnodes(SDnode *pDnode, SDnodeEps *pDnodeEps) {
|
|||
mIndex++;
|
||||
}
|
||||
|
||||
for (int32_t i = 0; i < pMgmt->dnodeEps->num; ++i) {
|
||||
SDnodeEp *pDnodeEp = &pMgmt->dnodeEps->eps[i];
|
||||
for (int32_t i = 0; i < numOfEps; i++) {
|
||||
SDnodeEp *pDnodeEp = taosArrayGet(pDnodeEps, i);
|
||||
taosHashPut(pMgmt->dnodeHash, &pDnodeEp->id, sizeof(int32_t), pDnodeEp, sizeof(SDnodeEp));
|
||||
}
|
||||
|
||||
|
@ -178,6 +174,12 @@ static bool dndIsEpChanged(SDnode *pDnode, int32_t dnodeId, char *pEp) {
|
|||
static int32_t dndReadDnodes(SDnode *pDnode) {
|
||||
SDnodeMgmt *pMgmt = &pDnode->dmgmt;
|
||||
|
||||
pMgmt->pDnodeEps = taosArrayInit(1, sizeof(SDnodeEp));
|
||||
if (pMgmt->pDnodeEps == NULL) {
|
||||
dError("failed to calloc dnodeEp array since %s", strerror(errno));
|
||||
goto PRASE_DNODE_OVER;
|
||||
}
|
||||
|
||||
int32_t code = TSDB_CODE_DND_DNODE_READ_FILE_ERROR;
|
||||
int32_t len = 0;
|
||||
int32_t maxLen = 256 * 1024;
|
||||
|
@ -238,18 +240,11 @@ static int32_t dndReadDnodes(SDnode *pDnode) {
|
|||
goto PRASE_DNODE_OVER;
|
||||
}
|
||||
|
||||
pMgmt->dnodeEps = calloc(1, numOfDnodes * sizeof(SDnodeEp) + sizeof(SDnodeEps));
|
||||
if (pMgmt->dnodeEps == NULL) {
|
||||
dError("failed to calloc dnodeEpList since %s", strerror(errno));
|
||||
goto PRASE_DNODE_OVER;
|
||||
}
|
||||
pMgmt->dnodeEps->num = numOfDnodes;
|
||||
|
||||
for (int32_t i = 0; i < numOfDnodes; ++i) {
|
||||
cJSON *node = cJSON_GetArrayItem(dnodes, i);
|
||||
if (node == NULL) break;
|
||||
|
||||
SDnodeEp *pDnodeEp = &pMgmt->dnodeEps->eps[i];
|
||||
SDnodeEp dnodeEp = {0};
|
||||
|
||||
cJSON *did = cJSON_GetObjectItem(node, "id");
|
||||
if (!did || did->type != cJSON_Number) {
|
||||
|
@ -257,14 +252,14 @@ static int32_t dndReadDnodes(SDnode *pDnode) {
|
|||
goto PRASE_DNODE_OVER;
|
||||
}
|
||||
|
||||
pDnodeEp->id = dnodeId->valueint;
|
||||
dnodeEp.id = dnodeId->valueint;
|
||||
|
||||
cJSON *dnodeFqdn = cJSON_GetObjectItem(node, "fqdn");
|
||||
if (!dnodeFqdn || dnodeFqdn->type != cJSON_String || dnodeFqdn->valuestring == NULL) {
|
||||
dError("failed to read %s since dnodeFqdn not found", pMgmt->file);
|
||||
goto PRASE_DNODE_OVER;
|
||||
}
|
||||
tstrncpy(pDnodeEp->ep.fqdn, dnodeFqdn->valuestring, TSDB_FQDN_LEN);
|
||||
tstrncpy(dnodeEp.ep.fqdn, dnodeFqdn->valuestring, TSDB_FQDN_LEN);
|
||||
|
||||
cJSON *dnodePort = cJSON_GetObjectItem(node, "port");
|
||||
if (!dnodePort || dnodePort->type != cJSON_Number) {
|
||||
|
@ -272,14 +267,16 @@ static int32_t dndReadDnodes(SDnode *pDnode) {
|
|||
goto PRASE_DNODE_OVER;
|
||||
}
|
||||
|
||||
pDnodeEp->ep.port = dnodePort->valueint;
|
||||
dnodeEp.ep.port = dnodePort->valueint;
|
||||
|
||||
cJSON *isMnode = cJSON_GetObjectItem(node, "isMnode");
|
||||
if (!isMnode || isMnode->type != cJSON_Number) {
|
||||
dError("failed to read %s since isMnode not found", pMgmt->file);
|
||||
goto PRASE_DNODE_OVER;
|
||||
}
|
||||
pDnodeEp->isMnode = isMnode->valueint;
|
||||
dnodeEp.isMnode = isMnode->valueint;
|
||||
|
||||
taosArrayPush(pMgmt->pDnodeEps, &dnodeEp);
|
||||
}
|
||||
|
||||
code = 0;
|
||||
|
@ -296,15 +293,14 @@ PRASE_DNODE_OVER:
|
|||
return -1;
|
||||
}
|
||||
|
||||
if (pMgmt->dnodeEps == NULL) {
|
||||
pMgmt->dnodeEps = calloc(1, sizeof(SDnodeEps) + sizeof(SDnodeEp));
|
||||
pMgmt->dnodeEps->num = 1;
|
||||
pMgmt->dnodeEps->eps[0].isMnode = 1;
|
||||
|
||||
taosGetFqdnPortFromEp(pDnode->cfg.firstEp, &(pMgmt->dnodeEps->eps[0].ep));
|
||||
if (taosArrayGetSize(pMgmt->pDnodeEps) == 0) {
|
||||
SDnodeEp dnodeEp = {0};
|
||||
dnodeEp.isMnode = 1;
|
||||
taosGetFqdnPortFromEp(pDnode->cfg.firstEp, &dnodeEp.ep);
|
||||
taosArrayPush(pMgmt->pDnodeEps, &dnodeEp);
|
||||
}
|
||||
|
||||
dndResetDnodes(pDnode, pMgmt->dnodeEps);
|
||||
dndResetDnodes(pDnode, pMgmt->pDnodeEps);
|
||||
|
||||
terrno = 0;
|
||||
return 0;
|
||||
|
@ -329,13 +325,15 @@ static int32_t dndWriteDnodes(SDnode *pDnode) {
|
|||
len += snprintf(content + len, maxLen - len, " \"clusterId\": \"%" PRId64 "\",\n", pMgmt->clusterId);
|
||||
len += snprintf(content + len, maxLen - len, " \"dropped\": %d,\n", pMgmt->dropped);
|
||||
len += snprintf(content + len, maxLen - len, " \"dnodes\": [{\n");
|
||||
for (int32_t i = 0; i < pMgmt->dnodeEps->num; ++i) {
|
||||
SDnodeEp *pDnodeEp = &pMgmt->dnodeEps->eps[i];
|
||||
|
||||
int32_t numOfEps = (int32_t)taosArrayGetSize(pMgmt->pDnodeEps);
|
||||
for (int32_t i = 0; i < numOfEps; ++i) {
|
||||
SDnodeEp *pDnodeEp = taosArrayGet(pMgmt->pDnodeEps, i);
|
||||
len += snprintf(content + len, maxLen - len, " \"id\": %d,\n", pDnodeEp->id);
|
||||
len += snprintf(content + len, maxLen - len, " \"fqdn\": \"%s\",\n", pDnodeEp->ep.fqdn);
|
||||
len += snprintf(content + len, maxLen - len, " \"port\": %u,\n", pDnodeEp->ep.port);
|
||||
len += snprintf(content + len, maxLen - len, " \"isMnode\": %d\n", pDnodeEp->isMnode);
|
||||
if (i < pMgmt->dnodeEps->num - 1) {
|
||||
if (i < numOfEps - 1) {
|
||||
len += snprintf(content + len, maxLen - len, " },{\n");
|
||||
} else {
|
||||
len += snprintf(content + len, maxLen - len, " }]\n");
|
||||
|
@ -355,40 +353,39 @@ static int32_t dndWriteDnodes(SDnode *pDnode) {
|
|||
}
|
||||
|
||||
void dndSendStatusReq(SDnode *pDnode) {
|
||||
int32_t contLen = sizeof(SStatusReq) + TSDB_MAX_VNODES * sizeof(SVnodeLoad);
|
||||
|
||||
SStatusReq *pStatus = rpcMallocCont(contLen);
|
||||
if (pStatus == NULL) {
|
||||
dError("failed to malloc status message");
|
||||
return;
|
||||
}
|
||||
SStatusReq req = {0};
|
||||
|
||||
SDnodeMgmt *pMgmt = &pDnode->dmgmt;
|
||||
taosRLockLatch(&pMgmt->latch);
|
||||
pStatus->sver = htonl(pDnode->env.sver);
|
||||
pStatus->dver = htobe64(pMgmt->dver);
|
||||
pStatus->dnodeId = htonl(pMgmt->dnodeId);
|
||||
pStatus->clusterId = htobe64(pMgmt->clusterId);
|
||||
pStatus->rebootTime = htobe64(pMgmt->rebootTime);
|
||||
pStatus->updateTime = htobe64(pMgmt->updateTime);
|
||||
pStatus->numOfCores = htonl(pDnode->env.numOfCores);
|
||||
pStatus->numOfSupportVnodes = htonl(pDnode->cfg.numOfSupportVnodes);
|
||||
tstrncpy(pStatus->dnodeEp, pDnode->cfg.localEp, TSDB_EP_LEN);
|
||||
req.sver = pDnode->env.sver;
|
||||
req.dver = pMgmt->dver;
|
||||
req.dnodeId = pMgmt->dnodeId;
|
||||
req.clusterId = pMgmt->clusterId;
|
||||
req.rebootTime = pMgmt->rebootTime;
|
||||
req.updateTime = pMgmt->updateTime;
|
||||
req.numOfCores = pDnode->env.numOfCores;
|
||||
req.numOfSupportVnodes = pDnode->cfg.numOfSupportVnodes;
|
||||
memcpy(req.dnodeEp, pDnode->cfg.localEp, TSDB_EP_LEN);
|
||||
|
||||
pStatus->clusterCfg.statusInterval = htonl(pDnode->cfg.statusInterval);
|
||||
pStatus->clusterCfg.checkTime = 0;
|
||||
req.clusterCfg.statusInterval = pDnode->cfg.statusInterval;
|
||||
req.clusterCfg.checkTime = 0;
|
||||
char timestr[32] = "1970-01-01 00:00:00.00";
|
||||
(void)taosParseTime(timestr, &pStatus->clusterCfg.checkTime, (int32_t)strlen(timestr), TSDB_TIME_PRECISION_MILLI, 0);
|
||||
pStatus->clusterCfg.checkTime = htonl(pStatus->clusterCfg.checkTime);
|
||||
tstrncpy(pStatus->clusterCfg.timezone, pDnode->env.timezone, TSDB_TIMEZONE_LEN);
|
||||
tstrncpy(pStatus->clusterCfg.locale, pDnode->env.locale, TSDB_LOCALE_LEN);
|
||||
tstrncpy(pStatus->clusterCfg.charset, pDnode->env.charset, TSDB_LOCALE_LEN);
|
||||
(void)taosParseTime(timestr, &req.clusterCfg.checkTime, (int32_t)strlen(timestr), TSDB_TIME_PRECISION_MILLI, 0);
|
||||
memcpy(req.clusterCfg.timezone, pDnode->env.timezone, TSDB_TIMEZONE_LEN);
|
||||
memcpy(req.clusterCfg.locale, pDnode->env.locale, TSDB_LOCALE_LEN);
|
||||
memcpy(req.clusterCfg.charset, pDnode->env.charset, TSDB_LOCALE_LEN);
|
||||
taosRUnLockLatch(&pMgmt->latch);
|
||||
|
||||
dndGetVnodeLoads(pDnode, &pStatus->vnodeLoads);
|
||||
contLen = sizeof(SStatusReq) + pStatus->vnodeLoads.num * sizeof(SVnodeLoad);
|
||||
req.pVloads = taosArrayInit(TSDB_MAX_VNODES, sizeof(SVnodeLoad));
|
||||
dndGetVnodeLoads(pDnode, req.pVloads);
|
||||
|
||||
SRpcMsg rpcMsg = {.pCont = pStatus, .contLen = contLen, .msgType = TDMT_MND_STATUS, .ahandle = (void *)9527};
|
||||
int32_t contLen = tSerializeSStatusReq(NULL, &req);
|
||||
void *pHead = rpcMallocCont(contLen);
|
||||
void *pBuf = pHead;
|
||||
tSerializeSStatusReq(&pBuf, &req);
|
||||
taosArrayDestroy(req.pVloads);
|
||||
|
||||
SRpcMsg rpcMsg = {.pCont = pHead, .contLen = contLen, .msgType = TDMT_MND_STATUS, .ahandle = (void *)9527};
|
||||
pMgmt->statusSent = 1;
|
||||
|
||||
dTrace("pDnode:%p, send status req to mnode", pDnode);
|
||||
|
@ -407,18 +404,20 @@ static void dndUpdateDnodeCfg(SDnode *pDnode, SDnodeCfg *pCfg) {
|
|||
}
|
||||
}
|
||||
|
||||
static void dndUpdateDnodeEps(SDnode *pDnode, SDnodeEps *pDnodeEps) {
|
||||
if (pDnodeEps == NULL || pDnodeEps->num <= 0) return;
|
||||
static void dndUpdateDnodeEps(SDnode *pDnode, SArray *pDnodeEps) {
|
||||
int32_t numOfEps = taosArrayGetSize(pDnodeEps);
|
||||
if (numOfEps <= 0) return;
|
||||
|
||||
SDnodeMgmt *pMgmt = &pDnode->dmgmt;
|
||||
taosWLockLatch(&pMgmt->latch);
|
||||
|
||||
if (pDnodeEps->num != pMgmt->dnodeEps->num) {
|
||||
int32_t numOfEpsOld = (int32_t)taosArrayGetSize(pMgmt->pDnodeEps);
|
||||
if (numOfEps != numOfEpsOld) {
|
||||
dndResetDnodes(pDnode, pDnodeEps);
|
||||
dndWriteDnodes(pDnode);
|
||||
} else {
|
||||
int32_t size = pDnodeEps->num * sizeof(SDnodeEp) + sizeof(SDnodeEps);
|
||||
if (memcmp(pMgmt->dnodeEps, pDnodeEps, size) != 0) {
|
||||
int32_t size = numOfEps * sizeof(SDnodeEp);
|
||||
if (memcmp(pMgmt->pDnodeEps->pData, pDnodeEps->pData, size) != 0) {
|
||||
dndResetDnodes(pDnode, pDnodeEps);
|
||||
dndWriteDnodes(pDnode);
|
||||
}
|
||||
|
@ -431,33 +430,21 @@ static void dndProcessStatusRsp(SDnode *pDnode, SRpcMsg *pRsp) {
|
|||
SDnodeMgmt *pMgmt = &pDnode->dmgmt;
|
||||
|
||||
if (pRsp->code != TSDB_CODE_SUCCESS) {
|
||||
pMgmt->statusSent = 0;
|
||||
if (pRsp->code == TSDB_CODE_MND_DNODE_NOT_EXIST && !pMgmt->dropped && pMgmt->dnodeId > 0) {
|
||||
dInfo("dnode:%d, set to dropped since not exist in mnode", pMgmt->dnodeId);
|
||||
pMgmt->dropped = 1;
|
||||
dndWriteDnodes(pDnode);
|
||||
}
|
||||
return;
|
||||
} else {
|
||||
SStatusRsp statusRsp = {0};
|
||||
if (pRsp->pCont != NULL && pRsp->contLen != 0 && tDeserializeSStatusRsp(pRsp->pCont, &statusRsp) != NULL) {
|
||||
pMgmt->dver = statusRsp.dver;
|
||||
dndUpdateDnodeCfg(pDnode, &statusRsp.dnodeCfg);
|
||||
dndUpdateDnodeEps(pDnode, statusRsp.pDnodeEps);
|
||||
}
|
||||
taosArrayDestroy(statusRsp.pDnodeEps);
|
||||
}
|
||||
|
||||
if (pRsp->pCont != NULL && pRsp->contLen != 0) {
|
||||
SStatusRsp *pStatus = pRsp->pCont;
|
||||
pMgmt->dver = htobe64(pStatus->dver);
|
||||
|
||||
SDnodeCfg *pCfg = &pStatus->dnodeCfg;
|
||||
pCfg->dnodeId = htonl(pCfg->dnodeId);
|
||||
pCfg->clusterId = htobe64(pCfg->clusterId);
|
||||
dndUpdateDnodeCfg(pDnode, pCfg);
|
||||
|
||||
SDnodeEps *pDnodeEps = &pStatus->dnodeEps;
|
||||
pDnodeEps->num = htonl(pDnodeEps->num);
|
||||
for (int32_t i = 0; i < pDnodeEps->num; ++i) {
|
||||
pDnodeEps->eps[i].id = htonl(pDnodeEps->eps[i].id);
|
||||
pDnodeEps->eps[i].ep.port = htons(pDnodeEps->eps[i].ep.port);
|
||||
}
|
||||
|
||||
dndUpdateDnodeEps(pDnode, pDnodeEps);
|
||||
}
|
||||
pMgmt->statusSent = 0;
|
||||
}
|
||||
|
||||
|
@ -572,9 +559,9 @@ void dndCleanupMgmt(SDnode *pDnode) {
|
|||
SDnodeMgmt *pMgmt = &pDnode->dmgmt;
|
||||
taosWLockLatch(&pMgmt->latch);
|
||||
|
||||
if (pMgmt->dnodeEps != NULL) {
|
||||
free(pMgmt->dnodeEps);
|
||||
pMgmt->dnodeEps = NULL;
|
||||
if (pMgmt->pDnodeEps != NULL) {
|
||||
taosArrayDestroy(pMgmt->pDnodeEps);
|
||||
pMgmt->pDnodeEps = NULL;
|
||||
}
|
||||
|
||||
if (pMgmt->dnodeHash != NULL) {
|
||||
|
|
|
@ -256,6 +256,12 @@ static bool dndNeedDeployMnode(SDnode *pDnode) {
|
|||
|
||||
static int32_t dndPutMsgToMWriteQ(SDnode *pDnode, SRpcMsg *pRpcMsg) {
|
||||
dndWriteMnodeMsgToWorker(pDnode, &pDnode->mmgmt.writeWorker, pRpcMsg);
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int32_t dndPutMsgToMReadQ(SDnode *pDnode, SRpcMsg* pRpcMsg) {
|
||||
dndWriteMnodeMsgToWorker(pDnode, &pDnode->mmgmt.readWorker, pRpcMsg);
|
||||
return 0;
|
||||
}
|
||||
|
||||
static void dndInitMnodeOption(SDnode *pDnode, SMnodeOpt *pOption) {
|
||||
|
@ -264,6 +270,7 @@ static void dndInitMnodeOption(SDnode *pDnode, SMnodeOpt *pOption) {
|
|||
pOption->sendReqToMnodeFp = dndSendReqToMnode;
|
||||
pOption->sendRedirectRspFp = dndSendRedirectRsp;
|
||||
pOption->putReqToMWriteQFp = dndPutMsgToMWriteQ;
|
||||
pOption->putReqToMReadQFp = dndPutMsgToMReadQ;
|
||||
pOption->dnodeId = dndGetDnodeId(pDnode);
|
||||
pOption->clusterId = dndGetClusterId(pDnode);
|
||||
pOption->cfg.sver = pDnode->env.sver;
|
||||
|
|
|
@ -1008,27 +1008,21 @@ void dndCleanupVnodes(SDnode *pDnode) {
|
|||
dInfo("dnode-vnodes is cleaned up");
|
||||
}
|
||||
|
||||
void dndGetVnodeLoads(SDnode *pDnode, SVnodeLoads *pLoads) {
|
||||
void dndGetVnodeLoads(SDnode *pDnode, SArray *pLoads) {
|
||||
SVnodesMgmt *pMgmt = &pDnode->vmgmt;
|
||||
|
||||
taosRLockLatch(&pMgmt->latch);
|
||||
pLoads->num = taosHashGetSize(pMgmt->hash);
|
||||
|
||||
int32_t v = 0;
|
||||
void * pIter = taosHashIterate(pMgmt->hash, NULL);
|
||||
void *pIter = taosHashIterate(pMgmt->hash, NULL);
|
||||
while (pIter) {
|
||||
SVnodeObj **ppVnode = pIter;
|
||||
if (ppVnode == NULL || *ppVnode == NULL) continue;
|
||||
|
||||
SVnodeObj * pVnode = *ppVnode;
|
||||
SVnodeLoad *pLoad = &pLoads->data[v++];
|
||||
|
||||
vnodeGetLoad(pVnode->pImpl, pLoad);
|
||||
pLoad->vgId = htonl(pLoad->vgId);
|
||||
pLoad->totalStorage = htobe64(pLoad->totalStorage);
|
||||
pLoad->compStorage = htobe64(pLoad->compStorage);
|
||||
pLoad->pointsWritten = htobe64(pLoad->pointsWritten);
|
||||
pLoad->tablesNum = htobe64(pLoad->tablesNum);
|
||||
SVnodeObj *pVnode = *ppVnode;
|
||||
SVnodeLoad vload = {0};
|
||||
vnodeGetLoad(pVnode->pImpl, &vload);
|
||||
taosArrayPush(pLoads, &vload);
|
||||
|
||||
pIter = taosHashIterate(pMgmt->hash, pIter);
|
||||
}
|
||||
|
|
|
@ -108,7 +108,7 @@ TEST_F(DndTestVnode, 01_Create_Vnode) {
|
|||
}
|
||||
}
|
||||
|
||||
TEST_F(DndTestVnode, 02_ALTER_Vnode) {
|
||||
TEST_F(DndTestVnode, 02_Alter_Vnode) {
|
||||
for (int i = 0; i < 3; ++i) {
|
||||
int32_t contLen = sizeof(SAlterVnodeReq);
|
||||
|
||||
|
@ -219,7 +219,7 @@ TEST_F(DndTestVnode, 03_Create_Stb) {
|
|||
}
|
||||
}
|
||||
|
||||
TEST_F(DndTestVnode, 04_ALTER_Stb) {
|
||||
TEST_F(DndTestVnode, 04_Alter_Stb) {
|
||||
for (int i = 0; i < 1; ++i) {
|
||||
SVCreateTbReq req = {0};
|
||||
req.ver = 0;
|
||||
|
@ -310,7 +310,7 @@ TEST_F(DndTestVnode, 05_DROP_Stb) {
|
|||
}
|
||||
}
|
||||
|
||||
TEST_F(DndTestVnode, 06_DROP_Vnode) {
|
||||
TEST_F(DndTestVnode, 06_Drop_Vnode) {
|
||||
for (int i = 0; i < 3; ++i) {
|
||||
int32_t contLen = sizeof(SDropVnodeReq);
|
||||
|
||||
|
|
|
@ -28,6 +28,8 @@ void mndCleanupConsumer(SMnode *pMnode);
|
|||
SMqConsumerObj *mndAcquireConsumer(SMnode *pMnode, int64_t consumerId);
|
||||
void mndReleaseConsumer(SMnode *pMnode, SMqConsumerObj *pConsumer);
|
||||
|
||||
SMqConsumerObj* mndCreateConsumer(int64_t consumerId, const char* cgroup);
|
||||
|
||||
SSdbRaw *mndConsumerActionEncode(SMqConsumerObj *pConsumer);
|
||||
SSdbRow *mndConsumerActionDecode(SSdbRaw *pRaw);
|
||||
|
||||
|
|
|
@ -304,9 +304,10 @@ typedef struct {
|
|||
int32_t nextColId;
|
||||
int32_t numOfColumns;
|
||||
int32_t numOfTags;
|
||||
SSchema* pTags;
|
||||
SSchema* pColumns;
|
||||
SSchema* pTags;
|
||||
SRWLatch lock;
|
||||
char comment[TSDB_STB_COMMENT_LEN];
|
||||
} SStbObj;
|
||||
|
||||
typedef struct {
|
||||
|
@ -343,42 +344,22 @@ typedef struct {
|
|||
char payload[];
|
||||
} SShowObj;
|
||||
|
||||
#if 0
|
||||
typedef struct SConsumerObj {
|
||||
uint64_t uid;
|
||||
int64_t createTime;
|
||||
int64_t updateTime;
|
||||
//uint64_t dbUid;
|
||||
int32_t version;
|
||||
SRWLatch lock;
|
||||
SArray* topics;
|
||||
} SConsumerObj;
|
||||
|
||||
typedef struct SMqTopicConsumer {
|
||||
int64_t consumerId;
|
||||
SList* topicList;
|
||||
} SMqTopicConsumer;
|
||||
#endif
|
||||
|
||||
typedef struct SMqConsumerEp {
|
||||
typedef struct {
|
||||
int32_t vgId; // -1 for unassigned
|
||||
int32_t status;
|
||||
SEpSet epSet;
|
||||
int64_t oldConsumerId;
|
||||
int64_t consumerId; // -1 for unassigned
|
||||
int64_t lastConsumerHbTs;
|
||||
int64_t lastVgHbTs;
|
||||
char* qmsg;
|
||||
} SMqConsumerEp;
|
||||
|
||||
static FORCE_INLINE int32_t tEncodeSMqConsumerEp(void** buf, SMqConsumerEp* pConsumerEp) {
|
||||
static FORCE_INLINE int32_t tEncodeSMqConsumerEp(void** buf, const SMqConsumerEp* pConsumerEp) {
|
||||
int32_t tlen = 0;
|
||||
tlen += taosEncodeFixedI32(buf, pConsumerEp->vgId);
|
||||
tlen += taosEncodeFixedI32(buf, pConsumerEp->status);
|
||||
tlen += taosEncodeSEpSet(buf, &pConsumerEp->epSet);
|
||||
tlen += taosEncodeFixedI64(buf, pConsumerEp->oldConsumerId);
|
||||
tlen += taosEncodeFixedI64(buf, pConsumerEp->consumerId);
|
||||
tlen += taosEncodeFixedI64(buf, pConsumerEp->lastConsumerHbTs);
|
||||
tlen += taosEncodeFixedI64(buf, pConsumerEp->lastVgHbTs);
|
||||
//tlen += tEncodeSSubQueryMsg(buf, &pConsumerEp->qExec);
|
||||
tlen += taosEncodeString(buf, pConsumerEp->qmsg);
|
||||
return tlen;
|
||||
}
|
||||
|
@ -387,10 +368,8 @@ static FORCE_INLINE void* tDecodeSMqConsumerEp(void** buf, SMqConsumerEp* pConsu
|
|||
buf = taosDecodeFixedI32(buf, &pConsumerEp->vgId);
|
||||
buf = taosDecodeFixedI32(buf, &pConsumerEp->status);
|
||||
buf = taosDecodeSEpSet(buf, &pConsumerEp->epSet);
|
||||
buf = taosDecodeFixedI64(buf, &pConsumerEp->oldConsumerId);
|
||||
buf = taosDecodeFixedI64(buf, &pConsumerEp->consumerId);
|
||||
buf = taosDecodeFixedI64(buf, &pConsumerEp->lastConsumerHbTs);
|
||||
buf = taosDecodeFixedI64(buf, &pConsumerEp->lastVgHbTs);
|
||||
//buf = tDecodeSSubQueryMsg(buf, &pConsumerEp->qExec);
|
||||
buf = taosDecodeString(buf, &pConsumerEp->qmsg);
|
||||
return buf;
|
||||
}
|
||||
|
@ -401,97 +380,89 @@ static FORCE_INLINE void tDeleteSMqConsumerEp(SMqConsumerEp* pConsumerEp) {
|
|||
}
|
||||
}
|
||||
|
||||
// unit for rebalance
|
||||
typedef struct SMqSubscribeObj {
|
||||
typedef struct {
|
||||
int64_t consumerId;
|
||||
SArray* vgInfo; // SArray<SMqConsumerEp>
|
||||
} SMqSubConsumer;
|
||||
|
||||
static FORCE_INLINE int32_t tEncodeSMqSubConsumer(void** buf, const SMqSubConsumer* pConsumer) {
|
||||
int32_t tlen = 0;
|
||||
tlen += taosEncodeFixedI64(buf, pConsumer->consumerId);
|
||||
int32_t sz = taosArrayGetSize(pConsumer->vgInfo);
|
||||
for (int32_t i = 0; i < sz; i++) {
|
||||
SMqConsumerEp* pCEp = taosArrayGet(pConsumer->vgInfo, i);
|
||||
tlen += tEncodeSMqConsumerEp(buf, pCEp);
|
||||
}
|
||||
return tlen;
|
||||
}
|
||||
|
||||
static FORCE_INLINE void* tDecodeSMqSubConsumer(void** buf, SMqSubConsumer* pConsumer) {
|
||||
int32_t sz;
|
||||
buf = taosDecodeFixedI64(buf, &pConsumer->consumerId);
|
||||
buf = taosDecodeFixedI32(buf, &sz);
|
||||
pConsumer->vgInfo = taosArrayInit(sz, sizeof(SMqConsumerEp));
|
||||
for (int32_t i = 0; i < sz; i++) {
|
||||
SMqConsumerEp consumerEp;
|
||||
buf = tDecodeSMqConsumerEp(buf, &consumerEp);
|
||||
taosArrayPush(pConsumer->vgInfo, &consumerEp);
|
||||
}
|
||||
return buf;
|
||||
}
|
||||
|
||||
static FORCE_INLINE void tDeleteSMqSubConsumer(SMqSubConsumer* pSubConsumer) {
|
||||
if (pSubConsumer->vgInfo) {
|
||||
taosArrayDestroyEx(pSubConsumer->vgInfo, (void (*)(void*))tDeleteSMqConsumerEp);
|
||||
pSubConsumer->vgInfo = NULL;
|
||||
}
|
||||
}
|
||||
|
||||
typedef struct {
|
||||
char key[TSDB_SUBSCRIBE_KEY_LEN];
|
||||
int32_t epoch;
|
||||
// TODO: replace with priority queue
|
||||
int32_t nextConsumerIdx;
|
||||
SArray* availConsumer; // SArray<int64_t> (consumerId)
|
||||
SArray* assigned; // SArray<SMqConsumerEp>
|
||||
SArray* idleConsumer; // SArray<SMqConsumerEp>
|
||||
SArray* lostConsumer; // SArray<SMqConsumerEp>
|
||||
int32_t status;
|
||||
int32_t vgNum;
|
||||
SArray* consumers; // SArray<SMqSubConsumer>
|
||||
SArray* unassignedVg; // SArray<SMqConsumerEp>
|
||||
} SMqSubscribeObj;
|
||||
|
||||
static FORCE_INLINE SMqSubscribeObj* tNewSubscribeObj() {
|
||||
SMqSubscribeObj* pSub = malloc(sizeof(SMqSubscribeObj));
|
||||
SMqSubscribeObj* pSub = calloc(1, sizeof(SMqSubscribeObj));
|
||||
if (pSub == NULL) {
|
||||
return NULL;
|
||||
}
|
||||
pSub->key[0] = 0;
|
||||
pSub->epoch = 0;
|
||||
|
||||
pSub->availConsumer = taosArrayInit(0, sizeof(int64_t));
|
||||
if (pSub->availConsumer == NULL) {
|
||||
free(pSub);
|
||||
return NULL;
|
||||
}
|
||||
pSub->assigned = taosArrayInit(0, sizeof(SMqConsumerEp));
|
||||
if (pSub->assigned == NULL) {
|
||||
taosArrayDestroy(pSub->availConsumer);
|
||||
free(pSub);
|
||||
return NULL;
|
||||
}
|
||||
pSub->lostConsumer = taosArrayInit(0, sizeof(SMqConsumerEp));
|
||||
if (pSub->lostConsumer == NULL) {
|
||||
taosArrayDestroy(pSub->availConsumer);
|
||||
taosArrayDestroy(pSub->assigned);
|
||||
free(pSub);
|
||||
return NULL;
|
||||
}
|
||||
pSub->idleConsumer = taosArrayInit(0, sizeof(SMqConsumerEp));
|
||||
if (pSub->idleConsumer == NULL) {
|
||||
taosArrayDestroy(pSub->availConsumer);
|
||||
taosArrayDestroy(pSub->assigned);
|
||||
taosArrayDestroy(pSub->lostConsumer);
|
||||
free(pSub);
|
||||
return NULL;
|
||||
pSub->consumers = taosArrayInit(0, sizeof(SMqSubConsumer));
|
||||
if (pSub->consumers == NULL) {
|
||||
goto _err;
|
||||
}
|
||||
pSub->unassignedVg = taosArrayInit(0, sizeof(SMqConsumerEp));
|
||||
if (pSub->unassignedVg == NULL) {
|
||||
taosArrayDestroy(pSub->availConsumer);
|
||||
taosArrayDestroy(pSub->assigned);
|
||||
taosArrayDestroy(pSub->lostConsumer);
|
||||
taosArrayDestroy(pSub->idleConsumer);
|
||||
free(pSub);
|
||||
return NULL;
|
||||
goto _err;
|
||||
}
|
||||
|
||||
pSub->key[0] = 0;
|
||||
pSub->vgNum = 0;
|
||||
pSub->status = 0;
|
||||
|
||||
return pSub;
|
||||
|
||||
_err:
|
||||
tfree(pSub->unassignedVg);
|
||||
tfree(pSub->consumers);
|
||||
tfree(pSub);
|
||||
return NULL;
|
||||
}
|
||||
|
||||
static FORCE_INLINE int32_t tEncodeSubscribeObj(void** buf, const SMqSubscribeObj* pSub) {
|
||||
int32_t tlen = 0;
|
||||
tlen += taosEncodeString(buf, pSub->key);
|
||||
tlen += taosEncodeFixedI32(buf, pSub->epoch);
|
||||
tlen += taosEncodeFixedI32(buf, pSub->vgNum);
|
||||
tlen += taosEncodeFixedI32(buf, pSub->status);
|
||||
int32_t sz;
|
||||
|
||||
sz = taosArrayGetSize(pSub->availConsumer);
|
||||
sz = taosArrayGetSize(pSub->consumers);
|
||||
tlen += taosEncodeFixedI32(buf, sz);
|
||||
for (int32_t i = 0; i < sz; i++) {
|
||||
int64_t* pConsumerId = taosArrayGet(pSub->availConsumer, i);
|
||||
tlen += taosEncodeFixedI64(buf, *pConsumerId);
|
||||
}
|
||||
|
||||
sz = taosArrayGetSize(pSub->assigned);
|
||||
tlen += taosEncodeFixedI32(buf, sz);
|
||||
for (int32_t i = 0; i < sz; i++) {
|
||||
SMqConsumerEp* pCEp = taosArrayGet(pSub->assigned, i);
|
||||
tlen += tEncodeSMqConsumerEp(buf, pCEp);
|
||||
}
|
||||
|
||||
sz = taosArrayGetSize(pSub->lostConsumer);
|
||||
tlen += taosEncodeFixedI32(buf, sz);
|
||||
for (int32_t i = 0; i < sz; i++) {
|
||||
SMqConsumerEp* pCEp = taosArrayGet(pSub->lostConsumer, i);
|
||||
tlen += tEncodeSMqConsumerEp(buf, pCEp);
|
||||
}
|
||||
|
||||
sz = taosArrayGetSize(pSub->idleConsumer);
|
||||
tlen += taosEncodeFixedI32(buf, sz);
|
||||
for (int32_t i = 0; i < sz; i++) {
|
||||
SMqConsumerEp* pCEp = taosArrayGet(pSub->idleConsumer, i);
|
||||
tlen += tEncodeSMqConsumerEp(buf, pCEp);
|
||||
SMqSubConsumer* pSubConsumer = taosArrayGet(pSub->consumers, i);
|
||||
tlen += tEncodeSMqSubConsumer(buf, pSubConsumer);
|
||||
}
|
||||
|
||||
sz = taosArrayGetSize(pSub->unassignedVg);
|
||||
|
@ -506,68 +477,25 @@ static FORCE_INLINE int32_t tEncodeSubscribeObj(void** buf, const SMqSubscribeOb
|
|||
|
||||
static FORCE_INLINE void* tDecodeSubscribeObj(void* buf, SMqSubscribeObj* pSub) {
|
||||
buf = taosDecodeStringTo(buf, pSub->key);
|
||||
buf = taosDecodeFixedI32(buf, &pSub->epoch);
|
||||
buf = taosDecodeFixedI32(buf, &pSub->vgNum);
|
||||
buf = taosDecodeFixedI32(buf, &pSub->status);
|
||||
|
||||
int32_t sz;
|
||||
|
||||
buf = taosDecodeFixedI32(buf, &sz);
|
||||
pSub->availConsumer = taosArrayInit(sz, sizeof(int64_t));
|
||||
if (pSub->availConsumer == NULL) {
|
||||
pSub->consumers = taosArrayInit(sz, sizeof(SMqSubConsumer));
|
||||
if (pSub->consumers == NULL) {
|
||||
return NULL;
|
||||
}
|
||||
for (int32_t i = 0; i < sz; i++) {
|
||||
int64_t consumerId;
|
||||
buf = taosDecodeFixedI64(buf, &consumerId);
|
||||
taosArrayPush(pSub->availConsumer, &consumerId);
|
||||
SMqSubConsumer subConsumer = {0};
|
||||
buf = tDecodeSMqSubConsumer(buf, &subConsumer);
|
||||
taosArrayPush(pSub->consumers, &subConsumer);
|
||||
}
|
||||
|
||||
buf = taosDecodeFixedI32(buf, &sz);
|
||||
pSub->assigned = taosArrayInit(sz, sizeof(SMqConsumerEp));
|
||||
if (pSub->assigned == NULL) {
|
||||
taosArrayDestroy(pSub->availConsumer);
|
||||
return NULL;
|
||||
}
|
||||
for (int32_t i = 0; i < sz; i++) {
|
||||
SMqConsumerEp cEp = {0};
|
||||
buf = tDecodeSMqConsumerEp(buf, &cEp);
|
||||
taosArrayPush(pSub->assigned, &cEp);
|
||||
}
|
||||
|
||||
buf = taosDecodeFixedI32(buf, &sz);
|
||||
pSub->lostConsumer = taosArrayInit(sz, sizeof(SMqConsumerEp));
|
||||
if (pSub->lostConsumer == NULL) {
|
||||
taosArrayDestroy(pSub->availConsumer);
|
||||
taosArrayDestroy(pSub->assigned);
|
||||
return NULL;
|
||||
}
|
||||
for (int32_t i = 0; i < sz; i++) {
|
||||
SMqConsumerEp cEp = {0};
|
||||
buf = tDecodeSMqConsumerEp(buf, &cEp);
|
||||
taosArrayPush(pSub->lostConsumer, &cEp);
|
||||
}
|
||||
|
||||
buf = taosDecodeFixedI32(buf, &sz);
|
||||
pSub->idleConsumer = taosArrayInit(sz, sizeof(SMqConsumerEp));
|
||||
if (pSub->idleConsumer == NULL) {
|
||||
taosArrayDestroy(pSub->availConsumer);
|
||||
taosArrayDestroy(pSub->assigned);
|
||||
taosArrayDestroy(pSub->lostConsumer);
|
||||
return NULL;
|
||||
}
|
||||
for (int32_t i = 0; i < sz; i++) {
|
||||
SMqConsumerEp cEp = {0};
|
||||
buf = tDecodeSMqConsumerEp(buf, &cEp);
|
||||
taosArrayPush(pSub->idleConsumer, &cEp);
|
||||
}
|
||||
|
||||
|
||||
buf = taosDecodeFixedI32(buf, &sz);
|
||||
pSub->unassignedVg = taosArrayInit(sz, sizeof(SMqConsumerEp));
|
||||
if (pSub->unassignedVg == NULL) {
|
||||
taosArrayDestroy(pSub->availConsumer);
|
||||
taosArrayDestroy(pSub->assigned);
|
||||
taosArrayDestroy(pSub->lostConsumer);
|
||||
taosArrayDestroy(pSub->idleConsumer);
|
||||
return NULL;
|
||||
}
|
||||
for (int32_t i = 0; i < sz; i++) {
|
||||
|
@ -575,176 +503,24 @@ static FORCE_INLINE void* tDecodeSubscribeObj(void* buf, SMqSubscribeObj* pSub)
|
|||
buf = tDecodeSMqConsumerEp(buf, &cEp);
|
||||
taosArrayPush(pSub->unassignedVg, &cEp);
|
||||
}
|
||||
|
||||
return buf;
|
||||
}
|
||||
|
||||
static FORCE_INLINE void tDeleteSMqSubscribeObj(SMqSubscribeObj* pSub) {
|
||||
if (pSub->availConsumer) {
|
||||
taosArrayDestroy(pSub->availConsumer);
|
||||
pSub->availConsumer = NULL;
|
||||
}
|
||||
if (pSub->assigned) {
|
||||
//taosArrayDestroyEx(pSub->assigned, (void (*)(void*))tDeleteSMqConsumerEp);
|
||||
taosArrayDestroy(pSub->assigned);
|
||||
pSub->assigned = NULL;
|
||||
if (pSub->consumers) {
|
||||
taosArrayDestroyEx(pSub->consumers, (void (*)(void*))tDeleteSMqSubConsumer);
|
||||
//taosArrayDestroy(pSub->consumers);
|
||||
pSub->consumers = NULL;
|
||||
}
|
||||
|
||||
if (pSub->unassignedVg) {
|
||||
//taosArrayDestroyEx(pSub->unassignedVg, (void (*)(void*))tDeleteSMqConsumerEp);
|
||||
taosArrayDestroy(pSub->unassignedVg);
|
||||
taosArrayDestroyEx(pSub->unassignedVg, (void (*)(void*))tDeleteSMqConsumerEp);
|
||||
//taosArrayDestroy(pSub->unassignedVg);
|
||||
pSub->unassignedVg = NULL;
|
||||
}
|
||||
if (pSub->idleConsumer) {
|
||||
//taosArrayDestroyEx(pSub->idleConsumer, (void (*)(void*))tDeleteSMqConsumerEp);
|
||||
taosArrayDestroy(pSub->idleConsumer);
|
||||
pSub->idleConsumer = NULL;
|
||||
}
|
||||
if (pSub->lostConsumer) {
|
||||
//taosArrayDestroyEx(pSub->lostConsumer, (void (*)(void*))tDeleteSMqConsumerEp);
|
||||
taosArrayDestroy(pSub->lostConsumer);
|
||||
pSub->lostConsumer = NULL;
|
||||
}
|
||||
}
|
||||
|
||||
typedef struct SMqCGroup {
|
||||
char name[TSDB_CONSUMER_GROUP_LEN];
|
||||
int32_t status; // 0 - uninitialized, 1 - wait rebalance, 2- normal
|
||||
SList* consumerIds; // SList<int64_t>
|
||||
SList* idleVGroups; // SList<int32_t>
|
||||
} SMqCGroup;
|
||||
|
||||
typedef struct SMqTopicObj {
|
||||
char name[TSDB_TOPIC_FNAME_LEN];
|
||||
char db[TSDB_DB_FNAME_LEN];
|
||||
int64_t createTime;
|
||||
int64_t updateTime;
|
||||
uint64_t uid;
|
||||
int64_t dbUid;
|
||||
int32_t version;
|
||||
SRWLatch lock;
|
||||
int32_t sqlLen;
|
||||
char* sql;
|
||||
char* logicalPlan;
|
||||
char* physicalPlan;
|
||||
// SHashObj *cgroups; // SHashObj<SMqCGroup>
|
||||
// SHashObj *consumers; // SHashObj<SMqConsumerObj>
|
||||
} SMqTopicObj;
|
||||
|
||||
// TODO: add cache and change name to id
|
||||
typedef struct SMqConsumerTopic {
|
||||
char name[TSDB_TOPIC_FNAME_LEN];
|
||||
int32_t epoch;
|
||||
// vg assigned to the consumer on the topic
|
||||
SArray* pVgInfo; // SArray<int32_t>
|
||||
} SMqConsumerTopic;
|
||||
|
||||
static FORCE_INLINE SMqConsumerTopic* tNewConsumerTopic(int64_t consumerId, SMqTopicObj* pTopic,
|
||||
SMqSubscribeObj* pSub, int64_t* oldConsumerId) {
|
||||
SMqConsumerTopic* pCTopic = malloc(sizeof(SMqConsumerTopic));
|
||||
if (pCTopic == NULL) {
|
||||
terrno = TSDB_CODE_OUT_OF_MEMORY;
|
||||
return NULL;
|
||||
}
|
||||
strcpy(pCTopic->name, pTopic->name);
|
||||
pCTopic->epoch = 0;
|
||||
pCTopic->pVgInfo = taosArrayInit(0, sizeof(int32_t));
|
||||
|
||||
int32_t unassignedVgSz = taosArrayGetSize(pSub->unassignedVg);
|
||||
if (unassignedVgSz > 0) {
|
||||
SMqConsumerEp* pCEp = taosArrayPop(pSub->unassignedVg);
|
||||
*oldConsumerId = pCEp->consumerId;
|
||||
pCEp->consumerId = consumerId;
|
||||
taosArrayPush(pCTopic->pVgInfo, &pCEp->vgId);
|
||||
taosArrayPush(pSub->assigned, pCEp);
|
||||
}
|
||||
return pCTopic;
|
||||
}
|
||||
|
||||
static FORCE_INLINE int32_t tEncodeSMqConsumerTopic(void** buf, SMqConsumerTopic* pConsumerTopic) {
|
||||
int32_t tlen = 0;
|
||||
tlen += taosEncodeString(buf, pConsumerTopic->name);
|
||||
tlen += taosEncodeFixedI32(buf, pConsumerTopic->epoch);
|
||||
int32_t sz = 0;
|
||||
if (pConsumerTopic->pVgInfo != NULL) {
|
||||
sz = taosArrayGetSize(pConsumerTopic->pVgInfo);
|
||||
}
|
||||
tlen += taosEncodeFixedI32(buf, sz);
|
||||
for (int32_t i = 0; i < sz; i++) {
|
||||
int32_t* pVgInfo = taosArrayGet(pConsumerTopic->pVgInfo, i);
|
||||
tlen += taosEncodeFixedI32(buf, *pVgInfo);
|
||||
}
|
||||
return tlen;
|
||||
}
|
||||
|
||||
static FORCE_INLINE void* tDecodeSMqConsumerTopic(void* buf, SMqConsumerTopic* pConsumerTopic) {
|
||||
buf = taosDecodeStringTo(buf, pConsumerTopic->name);
|
||||
buf = taosDecodeFixedI32(buf, &pConsumerTopic->epoch);
|
||||
int32_t sz;
|
||||
buf = taosDecodeFixedI32(buf, &sz);
|
||||
pConsumerTopic->pVgInfo = taosArrayInit(sz, sizeof(SMqConsumerTopic));
|
||||
for (int32_t i = 0; i < sz; i++) {
|
||||
int32_t vgInfo;
|
||||
buf = taosDecodeFixedI32(buf, &vgInfo);
|
||||
taosArrayPush(pConsumerTopic->pVgInfo, &vgInfo);
|
||||
}
|
||||
return buf;
|
||||
}
|
||||
|
||||
typedef struct SMqConsumerObj {
|
||||
int64_t consumerId;
|
||||
int64_t connId;
|
||||
SRWLatch lock;
|
||||
char cgroup[TSDB_CONSUMER_GROUP_LEN];
|
||||
SArray* topics; // SArray<SMqConsumerTopic>
|
||||
int64_t epoch;
|
||||
// stat
|
||||
int64_t pollCnt;
|
||||
} SMqConsumerObj;
|
||||
|
||||
static FORCE_INLINE int32_t tEncodeSMqConsumerObj(void** buf, const SMqConsumerObj* pConsumer) {
|
||||
int32_t tlen = 0;
|
||||
tlen += taosEncodeFixedI64(buf, pConsumer->consumerId);
|
||||
tlen += taosEncodeFixedI64(buf, pConsumer->connId);
|
||||
tlen += taosEncodeFixedI64(buf, pConsumer->epoch);
|
||||
tlen += taosEncodeFixedI64(buf, pConsumer->pollCnt);
|
||||
tlen += taosEncodeString(buf, pConsumer->cgroup);
|
||||
int32_t sz = taosArrayGetSize(pConsumer->topics);
|
||||
tlen += taosEncodeFixedI32(buf, sz);
|
||||
for (int32_t i = 0; i < sz; i++) {
|
||||
SMqConsumerTopic* pConsumerTopic = taosArrayGet(pConsumer->topics, i);
|
||||
tlen += tEncodeSMqConsumerTopic(buf, pConsumerTopic);
|
||||
}
|
||||
return tlen;
|
||||
}
|
||||
|
||||
static FORCE_INLINE void* tDecodeSMqConsumerObj(void* buf, SMqConsumerObj* pConsumer) {
|
||||
buf = taosDecodeFixedI64(buf, &pConsumer->consumerId);
|
||||
buf = taosDecodeFixedI64(buf, &pConsumer->connId);
|
||||
buf = taosDecodeFixedI64(buf, &pConsumer->epoch);
|
||||
buf = taosDecodeFixedI64(buf, &pConsumer->pollCnt);
|
||||
buf = taosDecodeStringTo(buf, pConsumer->cgroup);
|
||||
int32_t sz;
|
||||
buf = taosDecodeFixedI32(buf, &sz);
|
||||
pConsumer->topics = taosArrayInit(sz, sizeof(SMqConsumerObj));
|
||||
for (int32_t i = 0; i < sz; i++) {
|
||||
SMqConsumerTopic cTopic;
|
||||
buf = tDecodeSMqConsumerTopic(buf, &cTopic);
|
||||
taosArrayPush(pConsumer->topics, &cTopic);
|
||||
}
|
||||
return buf;
|
||||
}
|
||||
|
||||
typedef struct SMqSubConsumerObj {
|
||||
int64_t consumerUid; // if -1, unassigned
|
||||
SList* vgId; // SList<int32_t>
|
||||
} SMqSubConsumerObj;
|
||||
|
||||
typedef struct SMqSubCGroupObj {
|
||||
char name[TSDB_CONSUMER_GROUP_LEN];
|
||||
SList* consumers; // SList<SMqConsumerObj>
|
||||
} SMqSubCGroupObj;
|
||||
|
||||
typedef struct SMqSubTopicObj {
|
||||
typedef struct {
|
||||
char name[TSDB_TOPIC_FNAME_LEN];
|
||||
char db[TSDB_DB_FNAME_LEN];
|
||||
int64_t createTime;
|
||||
|
@ -757,41 +533,57 @@ typedef struct SMqSubTopicObj {
|
|||
char* sql;
|
||||
char* logicalPlan;
|
||||
char* physicalPlan;
|
||||
SList* cgroups; // SList<SMqSubCGroupObj>
|
||||
} SMqSubTopicObj;
|
||||
} SMqTopicObj;
|
||||
|
||||
typedef struct SMqConsumerSubObj {
|
||||
int64_t topicUid;
|
||||
SList* vgIds; // SList<int64_t>
|
||||
} SMqConsumerSubObj;
|
||||
|
||||
typedef struct SMqConsumerHbObj {
|
||||
typedef struct {
|
||||
int64_t consumerId;
|
||||
SList* consumerSubs; // SList<SMqConsumerSubObj>
|
||||
} SMqConsumerHbObj;
|
||||
|
||||
typedef struct SMqVGroupSubObj {
|
||||
int64_t topicUid;
|
||||
SList* consumerIds; // SList<int64_t>
|
||||
} SMqVGroupSubObj;
|
||||
|
||||
typedef struct SMqVGroupHbObj {
|
||||
int64_t vgId;
|
||||
SList* vgSubs; // SList<SMqVGroupSubObj>
|
||||
} SMqVGroupHbObj;
|
||||
|
||||
#if 0
|
||||
typedef struct SCGroupObj {
|
||||
char name[TSDB_TOPIC_NAME_LEN];
|
||||
int64_t createTime;
|
||||
int64_t updateTime;
|
||||
uint64_t uid;
|
||||
//uint64_t dbUid;
|
||||
int32_t version;
|
||||
int64_t connId;
|
||||
SRWLatch lock;
|
||||
SList* consumerIds;
|
||||
} SCGroupObj;
|
||||
#endif
|
||||
char cgroup[TSDB_CONSUMER_GROUP_LEN];
|
||||
SArray* topics; // SArray<char*>
|
||||
int64_t epoch;
|
||||
// stat
|
||||
int64_t pollCnt;
|
||||
// status
|
||||
int32_t status;
|
||||
// heartbeat from the consumer reset hbStatus to 0
|
||||
// each checkConsumerAlive msg add hbStatus by 1
|
||||
// if checkConsumerAlive > CONSUMER_REBALANCE_CNT, mask to lost
|
||||
int32_t hbStatus;
|
||||
} SMqConsumerObj;
|
||||
|
||||
static FORCE_INLINE int32_t tEncodeSMqConsumerObj(void** buf, const SMqConsumerObj* pConsumer) {
|
||||
int32_t tlen = 0;
|
||||
tlen += taosEncodeFixedI64(buf, pConsumer->consumerId);
|
||||
tlen += taosEncodeFixedI64(buf, pConsumer->connId);
|
||||
tlen += taosEncodeFixedI64(buf, pConsumer->epoch);
|
||||
tlen += taosEncodeFixedI64(buf, pConsumer->pollCnt);
|
||||
tlen += taosEncodeString(buf, pConsumer->cgroup);
|
||||
int32_t sz = taosArrayGetSize(pConsumer->topics);
|
||||
tlen += taosEncodeFixedI32(buf, sz);
|
||||
for (int32_t i = 0; i < sz; i++) {
|
||||
char* topic = taosArrayGetP(pConsumer->topics, i);
|
||||
tlen += taosEncodeString(buf, topic);
|
||||
}
|
||||
return tlen;
|
||||
}
|
||||
|
||||
static FORCE_INLINE void* tDecodeSMqConsumerObj(void* buf, SMqConsumerObj* pConsumer) {
|
||||
int32_t sz;
|
||||
buf = taosDecodeFixedI64(buf, &pConsumer->consumerId);
|
||||
buf = taosDecodeFixedI64(buf, &pConsumer->connId);
|
||||
buf = taosDecodeFixedI64(buf, &pConsumer->epoch);
|
||||
buf = taosDecodeFixedI64(buf, &pConsumer->pollCnt);
|
||||
buf = taosDecodeStringTo(buf, pConsumer->cgroup);
|
||||
buf = taosDecodeFixedI32(buf, &sz);
|
||||
pConsumer->topics = taosArrayInit(sz, sizeof(SMqConsumerObj));
|
||||
for (int32_t i = 0; i < sz; i++) {
|
||||
char* topic;
|
||||
buf = taosDecodeString(buf, &topic);
|
||||
taosArrayPush(pConsumer->topics, &topic);
|
||||
}
|
||||
return buf;
|
||||
}
|
||||
|
||||
typedef struct SMnodeMsg {
|
||||
char user[TSDB_USER_LEN];
|
||||
|
|
|
@ -96,6 +96,7 @@ typedef struct SMnode {
|
|||
SendReqToMnodeFp sendReqToMnodeFp;
|
||||
SendRedirectRspFp sendRedirectRspFp;
|
||||
PutReqToMWriteQFp putReqToMWriteQFp;
|
||||
PutReqToMReadQFp putReqToMReadQFp;
|
||||
} SMnode;
|
||||
|
||||
int32_t mndSendReqToDnode(SMnode *pMnode, SEpSet *pEpSet, SRpcMsg *rpcMsg);
|
||||
|
|
|
@ -28,9 +28,6 @@ void mndCleanupSubscribe(SMnode *pMnode);
|
|||
SMqSubscribeObj *mndAcquireSubscribe(SMnode *pMnode, char *CGroup, char *topicName);
|
||||
void mndReleaseSubscribe(SMnode *pMnode, SMqSubscribeObj *pSub);
|
||||
|
||||
SSdbRaw *mndSubscribeActionEncode(SMqSubscribeObj *pSub);
|
||||
SSdbRow *mndSubscribeActionDecode(SSdbRaw *pRaw);
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
#endif
|
||||
|
|
|
@ -53,6 +53,19 @@ int32_t mndInitConsumer(SMnode *pMnode) {
|
|||
|
||||
void mndCleanupConsumer(SMnode *pMnode) {}
|
||||
|
||||
SMqConsumerObj* mndCreateConsumer(int64_t consumerId, const char* cgroup) {
|
||||
SMqConsumerObj* pConsumer = malloc(sizeof(SMqConsumerObj));
|
||||
if (pConsumer == NULL) {
|
||||
terrno = TSDB_CODE_OUT_OF_MEMORY;
|
||||
return NULL;
|
||||
}
|
||||
pConsumer->epoch = 1;
|
||||
pConsumer->consumerId = consumerId;
|
||||
strcpy(pConsumer->cgroup, cgroup);
|
||||
taosInitRWLatch(&pConsumer->lock);
|
||||
return pConsumer;
|
||||
}
|
||||
|
||||
SSdbRaw *mndConsumerActionEncode(SMqConsumerObj *pConsumer) {
|
||||
terrno = TSDB_CODE_OUT_OF_MEMORY;
|
||||
void* buf = NULL;
|
||||
|
@ -164,148 +177,3 @@ void mndReleaseConsumer(SMnode *pMnode, SMqConsumerObj *pConsumer) {
|
|||
SSdb *pSdb = pMnode->pSdb;
|
||||
sdbRelease(pSdb, pConsumer);
|
||||
}
|
||||
|
||||
#if 0
|
||||
static int32_t mndProcessConsumerMetaMsg(SMnodeMsg *pMsg) {
|
||||
SMnode *pMnode = pMsg->pMnode;
|
||||
STableInfoReq *pInfo = pMsg->rpcMsg.pCont;
|
||||
|
||||
mDebug("consumer:%s, start to retrieve meta", pInfo->tableFname);
|
||||
|
||||
SDbObj *pDb = mndAcquireDbByConsumer(pMnode, pInfo->tableFname);
|
||||
if (pDb == NULL) {
|
||||
terrno = TSDB_CODE_MND_DB_NOT_SELECTED;
|
||||
mError("consumer:%s, failed to retrieve meta since %s", pInfo->tableFname, terrstr());
|
||||
return -1;
|
||||
}
|
||||
|
||||
SConsumerObj *pConsumer = mndAcquireConsumer(pMnode, pInfo->tableFname);
|
||||
if (pConsumer == NULL) {
|
||||
mndReleaseDb(pMnode, pDb);
|
||||
terrno = TSDB_CODE_MND_INVALID_CONSUMER;
|
||||
mError("consumer:%s, failed to get meta since %s", pInfo->tableFname, terrstr());
|
||||
return -1;
|
||||
}
|
||||
|
||||
taosRLockLatch(&pConsumer->lock);
|
||||
int32_t totalCols = pConsumer->numOfColumns + pConsumer->numOfTags;
|
||||
int32_t contLen = sizeof(STableMetaRsp) + totalCols * sizeof(SSchema);
|
||||
|
||||
STableMetaRsp *pMeta = rpcMallocCont(contLen);
|
||||
if (pMeta == NULL) {
|
||||
taosRUnLockLatch(&pConsumer->lock);
|
||||
mndReleaseDb(pMnode, pDb);
|
||||
mndReleaseConsumer(pMnode, pConsumer);
|
||||
terrno = TSDB_CODE_OUT_OF_MEMORY;
|
||||
mError("consumer:%s, failed to get meta since %s", pInfo->tableFname, terrstr());
|
||||
return -1;
|
||||
}
|
||||
|
||||
memcpy(pMeta->consumerFname, pConsumer->name, TSDB_TABLE_FNAME_LEN);
|
||||
pMeta->numOfTags = htonl(pConsumer->numOfTags);
|
||||
pMeta->numOfColumns = htonl(pConsumer->numOfColumns);
|
||||
pMeta->precision = pDb->cfg.precision;
|
||||
pMeta->tableType = TSDB_SUPER_TABLE;
|
||||
pMeta->update = pDb->cfg.update;
|
||||
pMeta->sversion = htonl(pConsumer->version);
|
||||
pMeta->tuid = htonl(pConsumer->uid);
|
||||
|
||||
for (int32_t i = 0; i < totalCols; ++i) {
|
||||
SSchema *pSchema = &pMeta->pSchema[i];
|
||||
SSchema *pSrcSchema = &pConsumer->pSchema[i];
|
||||
memcpy(pSchema->name, pSrcSchema->name, TSDB_COL_NAME_LEN);
|
||||
pSchema->type = pSrcSchema->type;
|
||||
pSchema->colId = htonl(pSrcSchema->colId);
|
||||
pSchema->bytes = htonl(pSrcSchema->bytes);
|
||||
}
|
||||
taosRUnLockLatch(&pConsumer->lock);
|
||||
mndReleaseDb(pMnode, pDb);
|
||||
mndReleaseConsumer(pMnode, pConsumer);
|
||||
|
||||
pMsg->pCont = pMeta;
|
||||
pMsg->contLen = contLen;
|
||||
|
||||
mDebug("consumer:%s, meta is retrieved, cols:%d tags:%d", pInfo->tableFname, pConsumer->numOfColumns, pConsumer->numOfTags);
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int32_t mndGetNumOfConsumers(SMnode *pMnode, char *dbName, int32_t *pNumOfConsumers) {
|
||||
SSdb *pSdb = pMnode->pSdb;
|
||||
|
||||
SDbObj *pDb = mndAcquireDb(pMnode, dbName);
|
||||
if (pDb == NULL) {
|
||||
terrno = TSDB_CODE_MND_DB_NOT_SELECTED;
|
||||
return -1;
|
||||
}
|
||||
|
||||
int32_t numOfConsumers = 0;
|
||||
void *pIter = NULL;
|
||||
while (1) {
|
||||
SMqConsumerObj *pConsumer = NULL;
|
||||
pIter = sdbFetch(pSdb, SDB_CONSUMER, pIter, (void **)&pConsumer);
|
||||
if (pIter == NULL) break;
|
||||
|
||||
numOfConsumers++;
|
||||
|
||||
sdbRelease(pSdb, pConsumer);
|
||||
}
|
||||
|
||||
*pNumOfConsumers = numOfConsumers;
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int32_t mndGetConsumerMeta(SMnodeMsg *pMsg, SShowObj *pShow, STableMetaRsp *pMeta) {
|
||||
SMnode *pMnode = pMsg->pMnode;
|
||||
SSdb *pSdb = pMnode->pSdb;
|
||||
|
||||
if (mndGetNumOfConsumers(pMnode, pShow->db, &pShow->numOfRows) != 0) {
|
||||
return -1;
|
||||
}
|
||||
|
||||
int32_t cols = 0;
|
||||
SSchema *pSchema = pMeta->pSchema;
|
||||
|
||||
pShow->bytes[cols] = TSDB_TABLE_NAME_LEN + VARSTR_HEADER_SIZE;
|
||||
pSchema[cols].type = TSDB_DATA_TYPE_BINARY;
|
||||
strcpy(pSchema[cols].name, "name");
|
||||
pSchema[cols].bytes = htonl(pShow->bytes[cols]);
|
||||
cols++;
|
||||
|
||||
pShow->bytes[cols] = 8;
|
||||
pSchema[cols].type = TSDB_DATA_TYPE_TIMESTAMP;
|
||||
strcpy(pSchema[cols].name, "create_time");
|
||||
pSchema[cols].bytes = htonl(pShow->bytes[cols]);
|
||||
cols++;
|
||||
|
||||
pShow->bytes[cols] = 4;
|
||||
pSchema[cols].type = TSDB_DATA_TYPE_INT;
|
||||
strcpy(pSchema[cols].name, "columns");
|
||||
pSchema[cols].bytes = htonl(pShow->bytes[cols]);
|
||||
cols++;
|
||||
|
||||
pShow->bytes[cols] = 4;
|
||||
pSchema[cols].type = TSDB_DATA_TYPE_INT;
|
||||
strcpy(pSchema[cols].name, "tags");
|
||||
pSchema[cols].bytes = htonl(pShow->bytes[cols]);
|
||||
cols++;
|
||||
|
||||
pMeta->numOfColumns = htonl(cols);
|
||||
pShow->numOfColumns = cols;
|
||||
|
||||
pShow->offset[0] = 0;
|
||||
for (int32_t i = 1; i < cols; ++i) {
|
||||
pShow->offset[i] = pShow->offset[i - 1] + pShow->bytes[i - 1];
|
||||
}
|
||||
|
||||
pShow->numOfRows = sdbGetSize(pSdb, SDB_CONSUMER);
|
||||
pShow->rowSize = pShow->offset[cols - 1] + pShow->bytes[cols - 1];
|
||||
strcpy(pMeta->tbFname, mndShowStr(pShow->type));
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static void mndCancelGetNextConsumer(SMnode *pMnode, void *pIter) {
|
||||
SSdb *pSdb = pMnode->pSdb;
|
||||
sdbCancelFetch(pSdb, pIter);
|
||||
}
|
||||
#endif
|
||||
|
|
|
@ -244,7 +244,7 @@ bool mndIsDnodeOnline(SMnode *pMnode, SDnodeObj *pDnode, int64_t curMs) {
|
|||
return true;
|
||||
}
|
||||
|
||||
static void mndGetDnodeData(SMnode *pMnode, SDnodeEps *pEps, int32_t maxEps) {
|
||||
static void mndGetDnodeData(SMnode *pMnode, SArray *pDnodeEps) {
|
||||
SSdb *pSdb = pMnode->pSdb;
|
||||
|
||||
int32_t numOfEps = 0;
|
||||
|
@ -253,25 +253,20 @@ static void mndGetDnodeData(SMnode *pMnode, SDnodeEps *pEps, int32_t maxEps) {
|
|||
SDnodeObj *pDnode = NULL;
|
||||
pIter = sdbFetch(pSdb, SDB_DNODE, pIter, (void **)&pDnode);
|
||||
if (pIter == NULL) break;
|
||||
if (numOfEps >= maxEps) {
|
||||
sdbCancelFetch(pSdb, pIter);
|
||||
sdbRelease(pSdb, pDnode);
|
||||
break;
|
||||
}
|
||||
|
||||
SDnodeEp *pEp = &pEps->eps[numOfEps];
|
||||
pEp->id = htonl(pDnode->id);
|
||||
pEp->ep.port = htons(pDnode->port);
|
||||
memcpy(pEp->ep.fqdn, pDnode->fqdn, TSDB_FQDN_LEN);
|
||||
pEp->isMnode = 0;
|
||||
SDnodeEp dnodeEp = {0};
|
||||
dnodeEp.id = pDnode->id;
|
||||
dnodeEp.isMnode = 0;
|
||||
dnodeEp.ep.port = pDnode->port;
|
||||
memcpy(dnodeEp.ep.fqdn, pDnode->fqdn, TSDB_FQDN_LEN);
|
||||
|
||||
if (mndIsMnode(pMnode, pDnode->id)) {
|
||||
pEp->isMnode = 1;
|
||||
}
|
||||
numOfEps++;
|
||||
sdbRelease(pSdb, pDnode);
|
||||
dnodeEp.isMnode = 1;
|
||||
}
|
||||
|
||||
pEps->num = htonl(numOfEps);
|
||||
sdbRelease(pSdb, pDnode);
|
||||
taosArrayPush(pDnodeEps, &dnodeEp);
|
||||
}
|
||||
}
|
||||
|
||||
static int32_t mndCheckClusterCfgPara(SMnode *pMnode, const SClusterCfg *pCfg) {
|
||||
|
@ -299,50 +294,29 @@ static int32_t mndCheckClusterCfgPara(SMnode *pMnode, const SClusterCfg *pCfg) {
|
|||
return 0;
|
||||
}
|
||||
|
||||
static void mndParseStatusMsg(SStatusReq *pStatus) {
|
||||
pStatus->sver = htonl(pStatus->sver);
|
||||
pStatus->dver = htobe64(pStatus->dver);
|
||||
pStatus->dnodeId = htonl(pStatus->dnodeId);
|
||||
pStatus->clusterId = htobe64(pStatus->clusterId);
|
||||
pStatus->rebootTime = htobe64(pStatus->rebootTime);
|
||||
pStatus->updateTime = htobe64(pStatus->updateTime);
|
||||
pStatus->numOfCores = htonl(pStatus->numOfCores);
|
||||
pStatus->numOfSupportVnodes = htonl(pStatus->numOfSupportVnodes);
|
||||
pStatus->clusterCfg.statusInterval = htonl(pStatus->clusterCfg.statusInterval);
|
||||
pStatus->clusterCfg.checkTime = htobe64(pStatus->clusterCfg.checkTime);
|
||||
for (int32_t v = 0; v < pStatus->vnodeLoads.num; ++v) {
|
||||
SVnodeLoad *pVload = &pStatus->vnodeLoads.data[v];
|
||||
pVload->vgId = htonl(pVload->vgId);
|
||||
pVload->totalStorage = htobe64(pVload->totalStorage);
|
||||
pVload->compStorage = htobe64(pVload->compStorage);
|
||||
pVload->pointsWritten = htobe64(pVload->pointsWritten);
|
||||
pVload->tablesNum = htobe64(pVload->tablesNum);
|
||||
}
|
||||
}
|
||||
|
||||
static int32_t mndProcessStatusReq(SMnodeMsg *pReq) {
|
||||
SMnode *pMnode = pReq->pMnode;
|
||||
SStatusReq *pStatus = pReq->rpcMsg.pCont;
|
||||
SStatusReq statusReq = {0};
|
||||
SDnodeObj *pDnode = NULL;
|
||||
int32_t code = -1;
|
||||
|
||||
mndParseStatusMsg(pStatus);
|
||||
if (tDeserializeSStatusReq(pReq->rpcMsg.pCont, &statusReq) == NULL) goto PROCESS_STATUS_MSG_OVER;
|
||||
|
||||
if (pStatus->dnodeId == 0) {
|
||||
pDnode = mndAcquireDnodeByEp(pMnode, pStatus->dnodeEp);
|
||||
if (statusReq.dnodeId == 0) {
|
||||
pDnode = mndAcquireDnodeByEp(pMnode, statusReq.dnodeEp);
|
||||
if (pDnode == NULL) {
|
||||
mDebug("dnode:%s, not created yet", pStatus->dnodeEp);
|
||||
mDebug("dnode:%s, not created yet", statusReq.dnodeEp);
|
||||
terrno = TSDB_CODE_MND_DNODE_NOT_EXIST;
|
||||
goto PROCESS_STATUS_MSG_OVER;
|
||||
}
|
||||
} else {
|
||||
pDnode = mndAcquireDnode(pMnode, pStatus->dnodeId);
|
||||
pDnode = mndAcquireDnode(pMnode, statusReq.dnodeId);
|
||||
if (pDnode == NULL) {
|
||||
pDnode = mndAcquireDnodeByEp(pMnode, pStatus->dnodeEp);
|
||||
pDnode = mndAcquireDnodeByEp(pMnode, statusReq.dnodeEp);
|
||||
if (pDnode != NULL) {
|
||||
pDnode->offlineReason = DND_REASON_DNODE_ID_NOT_MATCH;
|
||||
}
|
||||
mError("dnode:%d, %s not exist", pStatus->dnodeId, pStatus->dnodeEp);
|
||||
mError("dnode:%d, %s not exist", statusReq.dnodeId, statusReq.dnodeEp);
|
||||
terrno = TSDB_CODE_MND_DNODE_NOT_EXIST;
|
||||
goto PROCESS_STATUS_MSG_OVER;
|
||||
}
|
||||
|
@ -350,28 +324,28 @@ static int32_t mndProcessStatusReq(SMnodeMsg *pReq) {
|
|||
|
||||
int64_t curMs = taosGetTimestampMs();
|
||||
bool online = mndIsDnodeOnline(pMnode, pDnode, curMs);
|
||||
bool dnodeChanged = (pStatus->dver != sdbGetTableVer(pMnode->pSdb, SDB_DNODE));
|
||||
bool reboot = (pDnode->rebootTime != pStatus->rebootTime);
|
||||
bool dnodeChanged = (statusReq.dver != sdbGetTableVer(pMnode->pSdb, SDB_DNODE));
|
||||
bool reboot = (pDnode->rebootTime != statusReq.rebootTime);
|
||||
bool needCheck = !online || dnodeChanged || reboot;
|
||||
|
||||
if (needCheck) {
|
||||
if (pStatus->sver != pMnode->cfg.sver) {
|
||||
if (statusReq.sver != pMnode->cfg.sver) {
|
||||
if (pDnode != NULL) {
|
||||
pDnode->offlineReason = DND_REASON_VERSION_NOT_MATCH;
|
||||
}
|
||||
mError("dnode:%d, status msg version:%d not match cluster:%d", pStatus->dnodeId, pStatus->sver, pMnode->cfg.sver);
|
||||
mError("dnode:%d, status msg version:%d not match cluster:%d", statusReq.dnodeId, statusReq.sver, pMnode->cfg.sver);
|
||||
terrno = TSDB_CODE_MND_INVALID_MSG_VERSION;
|
||||
goto PROCESS_STATUS_MSG_OVER;
|
||||
}
|
||||
|
||||
if (pStatus->dnodeId == 0) {
|
||||
if (statusReq.dnodeId == 0) {
|
||||
mDebug("dnode:%d, %s first access, set clusterId %" PRId64, pDnode->id, pDnode->ep, pMnode->clusterId);
|
||||
} else {
|
||||
if (pStatus->clusterId != pMnode->clusterId) {
|
||||
if (statusReq.clusterId != pMnode->clusterId) {
|
||||
if (pDnode != NULL) {
|
||||
pDnode->offlineReason = DND_REASON_CLUSTER_ID_NOT_MATCH;
|
||||
}
|
||||
mError("dnode:%d, clusterId %" PRId64 " not match exist %" PRId64, pDnode->id, pStatus->clusterId,
|
||||
mError("dnode:%d, clusterId %" PRId64 " not match exist %" PRId64, pDnode->id, statusReq.clusterId,
|
||||
pMnode->clusterId);
|
||||
terrno = TSDB_CODE_MND_INVALID_CLUSTER_ID;
|
||||
goto PROCESS_STATUS_MSG_OVER;
|
||||
|
@ -382,7 +356,7 @@ static int32_t mndProcessStatusReq(SMnodeMsg *pReq) {
|
|||
}
|
||||
|
||||
// Verify whether the cluster parameters are consistent when status change from offline to ready
|
||||
int32_t ret = mndCheckClusterCfgPara(pMnode, &pStatus->clusterCfg);
|
||||
int32_t ret = mndCheckClusterCfgPara(pMnode, &statusReq.clusterCfg);
|
||||
if (0 != ret) {
|
||||
pDnode->offlineReason = ret;
|
||||
mError("dnode:%d, cluster cfg inconsistent since:%s", pDnode->id, offlineReason[ret]);
|
||||
|
@ -396,25 +370,30 @@ static int32_t mndProcessStatusReq(SMnodeMsg *pReq) {
|
|||
mDebug("dnode:%d, send dnode eps", pDnode->id);
|
||||
}
|
||||
|
||||
pDnode->rebootTime = pStatus->rebootTime;
|
||||
pDnode->numOfCores = pStatus->numOfCores;
|
||||
pDnode->numOfSupportVnodes = pStatus->numOfSupportVnodes;
|
||||
pDnode->rebootTime = statusReq.rebootTime;
|
||||
pDnode->numOfCores = statusReq.numOfCores;
|
||||
pDnode->numOfSupportVnodes = statusReq.numOfSupportVnodes;
|
||||
|
||||
int32_t numOfEps = mndGetDnodeSize(pMnode);
|
||||
int32_t contLen = sizeof(SStatusRsp) + numOfEps * sizeof(SDnodeEp);
|
||||
SStatusRsp *pRsp = rpcMallocCont(contLen);
|
||||
if (pRsp == NULL) {
|
||||
SStatusRsp statusRsp = {0};
|
||||
statusRsp.dver = sdbGetTableVer(pMnode->pSdb, SDB_DNODE);
|
||||
statusRsp.dnodeCfg.dnodeId = pDnode->id;
|
||||
statusRsp.dnodeCfg.clusterId = pMnode->clusterId;
|
||||
statusRsp.pDnodeEps = taosArrayInit(mndGetDnodeSize(pMnode), sizeof(SDnodeEp));
|
||||
if (statusRsp.pDnodeEps == NULL) {
|
||||
terrno = TSDB_CODE_OUT_OF_MEMORY;
|
||||
goto PROCESS_STATUS_MSG_OVER;
|
||||
}
|
||||
|
||||
pRsp->dver = htobe64(sdbGetTableVer(pMnode->pSdb, SDB_DNODE));
|
||||
pRsp->dnodeCfg.dnodeId = htonl(pDnode->id);
|
||||
pRsp->dnodeCfg.clusterId = htobe64(pMnode->clusterId);
|
||||
mndGetDnodeData(pMnode, &pRsp->dnodeEps, numOfEps);
|
||||
mndGetDnodeData(pMnode, statusRsp.pDnodeEps);
|
||||
|
||||
int32_t contLen = tSerializeSStatusRsp(NULL, &statusRsp);
|
||||
void *pHead = rpcMallocCont(contLen);
|
||||
void *pBuf = pHead;
|
||||
tSerializeSStatusRsp(&pBuf, &statusRsp);
|
||||
taosArrayDestroy(statusRsp.pDnodeEps);
|
||||
|
||||
pReq->contLen = contLen;
|
||||
pReq->pCont = pRsp;
|
||||
pReq->pCont = pHead;
|
||||
}
|
||||
|
||||
pDnode->lastAccessTime = curMs;
|
||||
|
@ -422,6 +401,7 @@ static int32_t mndProcessStatusReq(SMnodeMsg *pReq) {
|
|||
|
||||
PROCESS_STATUS_MSG_OVER:
|
||||
mndReleaseDnode(pMnode, pDnode);
|
||||
taosArrayDestroy(statusReq.pVloads);
|
||||
return code;
|
||||
}
|
||||
|
||||
|
|
|
@ -104,6 +104,7 @@ static SSdbRaw *mndStbActionEncode(SStbObj *pStb) {
|
|||
SDB_SET_BINARY(pRaw, dataPos, pSchema->name, TSDB_COL_NAME_LEN, STB_ENCODE_OVER)
|
||||
}
|
||||
|
||||
SDB_SET_BINARY(pRaw, dataPos, pStb->comment, TSDB_STB_COMMENT_LEN, STB_ENCODE_OVER)
|
||||
SDB_SET_RESERVE(pRaw, dataPos, TSDB_STB_RESERVE_SIZE, STB_ENCODE_OVER)
|
||||
SDB_SET_DATALEN(pRaw, dataPos, STB_ENCODE_OVER)
|
||||
|
||||
|
@ -171,6 +172,7 @@ static SSdbRow *mndStbActionDecode(SSdbRaw *pRaw) {
|
|||
SDB_GET_BINARY(pRaw, dataPos, pSchema->name, TSDB_COL_NAME_LEN, STB_DECODE_OVER)
|
||||
}
|
||||
|
||||
SDB_GET_BINARY(pRaw, dataPos, pStb->comment, TSDB_STB_COMMENT_LEN, STB_DECODE_OVER)
|
||||
SDB_GET_RESERVE(pRaw, dataPos, TSDB_STB_RESERVE_SIZE, STB_DECODE_OVER)
|
||||
|
||||
terrno = 0;
|
||||
|
@ -234,6 +236,7 @@ static int32_t mndStbActionUpdate(SSdb *pSdb, SStbObj *pOld, SStbObj *pNew) {
|
|||
pOld->numOfTags = pNew->numOfTags;
|
||||
memcpy(pOld->pColumns, pNew->pColumns, pOld->numOfColumns * sizeof(SSchema));
|
||||
memcpy(pOld->pTags, pNew->pTags, pOld->numOfTags * sizeof(SSchema));
|
||||
memcpy(pOld->comment, pNew->comment, TSDB_STB_COMMENT_LEN);
|
||||
taosWUnLockLatch(&pOld->lock);
|
||||
return 0;
|
||||
}
|
||||
|
@ -262,7 +265,7 @@ static SDbObj *mndAcquireDbByStb(SMnode *pMnode, const char *stbName) {
|
|||
return mndAcquireDb(pMnode, db);
|
||||
}
|
||||
|
||||
static void *mndBuildCreateStbReq(SMnode *pMnode, SVgObj *pVgroup, SStbObj *pStb, int32_t *pContLen) {
|
||||
static void *mndBuildVCreateStbReq(SMnode *pMnode, SVgObj *pVgroup, SStbObj *pStb, int32_t *pContLen) {
|
||||
SName name = {0};
|
||||
tNameFromString(&name, pStb->name, T_NAME_ACCT | T_NAME_DB | T_NAME_TABLE);
|
||||
|
||||
|
@ -295,7 +298,7 @@ static void *mndBuildCreateStbReq(SMnode *pMnode, SVgObj *pVgroup, SStbObj *pStb
|
|||
return pHead;
|
||||
}
|
||||
|
||||
static void *mndBuildDropStbReq(SMnode *pMnode, SVgObj *pVgroup, SStbObj *pStb, int32_t *pContLen) {
|
||||
static void *mndBuildVDropStbReq(SMnode *pMnode, SVgObj *pVgroup, SStbObj *pStb, int32_t *pContLen) {
|
||||
SName name = {0};
|
||||
tNameFromString(&name, pStb->name, T_NAME_ACCT | T_NAME_DB | T_NAME_TABLE);
|
||||
|
||||
|
@ -323,14 +326,6 @@ static void *mndBuildDropStbReq(SMnode *pMnode, SVgObj *pVgroup, SStbObj *pStb,
|
|||
}
|
||||
|
||||
static int32_t mndCheckCreateStbReq(SMCreateStbReq *pCreate) {
|
||||
pCreate->numOfColumns = htonl(pCreate->numOfColumns);
|
||||
pCreate->numOfTags = htonl(pCreate->numOfTags);
|
||||
int32_t totalCols = pCreate->numOfColumns + pCreate->numOfTags;
|
||||
for (int32_t i = 0; i < totalCols; ++i) {
|
||||
SSchema *pSchema = &pCreate->pSchemas[i];
|
||||
pSchema->bytes = htonl(pSchema->bytes);
|
||||
}
|
||||
|
||||
if (pCreate->igExists < 0 || pCreate->igExists > 1) {
|
||||
terrno = TSDB_CODE_MND_INVALID_STB_OPTION;
|
||||
return -1;
|
||||
|
@ -346,18 +341,39 @@ static int32_t mndCheckCreateStbReq(SMCreateStbReq *pCreate) {
|
|||
return -1;
|
||||
}
|
||||
|
||||
int32_t maxColId = (TSDB_MAX_COLUMNS + TSDB_MAX_TAGS);
|
||||
for (int32_t i = 0; i < totalCols; ++i) {
|
||||
SSchema *pSchema = &pCreate->pSchemas[i];
|
||||
if (pSchema->type < 0) {
|
||||
SField *pField = taosArrayGet(pCreate->pColumns, 0) ;
|
||||
if (pField->type != TSDB_DATA_TYPE_TIMESTAMP) {
|
||||
terrno = TSDB_CODE_MND_INVALID_STB_OPTION;
|
||||
return -1;
|
||||
}
|
||||
if (pSchema->bytes <= 0) {
|
||||
|
||||
for (int32_t i = 0; i < pCreate->numOfColumns; ++i) {
|
||||
SField *pField = taosArrayGet(pCreate->pColumns, i);
|
||||
if (pField->type < 0) {
|
||||
terrno = TSDB_CODE_MND_INVALID_STB_OPTION;
|
||||
return -1;
|
||||
}
|
||||
if (pSchema->name[0] == 0) {
|
||||
if (pField->bytes <= 0) {
|
||||
terrno = TSDB_CODE_MND_INVALID_STB_OPTION;
|
||||
return -1;
|
||||
}
|
||||
if (pField->name[0] == 0) {
|
||||
terrno = TSDB_CODE_MND_INVALID_STB_OPTION;
|
||||
return -1;
|
||||
}
|
||||
}
|
||||
|
||||
for (int32_t i = 0; i < pCreate->numOfTags; ++i) {
|
||||
SField *pField = taosArrayGet(pCreate->pTags, i);
|
||||
if (pField->type < 0) {
|
||||
terrno = TSDB_CODE_MND_INVALID_STB_OPTION;
|
||||
return -1;
|
||||
}
|
||||
if (pField->bytes <= 0) {
|
||||
terrno = TSDB_CODE_MND_INVALID_STB_OPTION;
|
||||
return -1;
|
||||
}
|
||||
if (pField->name[0] == 0) {
|
||||
terrno = TSDB_CODE_MND_INVALID_STB_OPTION;
|
||||
return -1;
|
||||
}
|
||||
|
@ -404,7 +420,7 @@ static int32_t mndSetCreateStbRedoActions(SMnode *pMnode, STrans *pTrans, SDbObj
|
|||
if (pIter == NULL) break;
|
||||
if (pVgroup->dbUid != pDb->uid) continue;
|
||||
|
||||
void *pReq = mndBuildCreateStbReq(pMnode, pVgroup, pStb, &contLen);
|
||||
void *pReq = mndBuildVCreateStbReq(pMnode, pVgroup, pStb, &contLen);
|
||||
if (pReq == NULL) {
|
||||
sdbCancelFetch(pSdb, pIter);
|
||||
sdbRelease(pSdb, pVgroup);
|
||||
|
@ -440,7 +456,7 @@ static int32_t mndSetCreateStbUndoActions(SMnode *pMnode, STrans *pTrans, SDbObj
|
|||
if (pVgroup->dbUid != pDb->uid) continue;
|
||||
|
||||
int32_t contLen = 0;
|
||||
void *pReq = mndBuildDropStbReq(pMnode, pVgroup, pStb, &contLen);
|
||||
void *pReq = mndBuildVDropStbReq(pMnode, pVgroup, pStb, &contLen);
|
||||
if (pReq == NULL) {
|
||||
sdbCancelFetch(pSdb, pIter);
|
||||
sdbRelease(pSdb, pVgroup);
|
||||
|
@ -485,16 +501,23 @@ static int32_t mndCreateStb(SMnode *pMnode, SMnodeMsg *pReq, SMCreateStbReq *pCr
|
|||
return -1;
|
||||
}
|
||||
|
||||
memcpy(stbObj.pColumns, pCreate->pSchemas, stbObj.numOfColumns * sizeof(SSchema));
|
||||
memcpy(stbObj.pTags, pCreate->pSchemas + stbObj.numOfColumns, stbObj.numOfTags * sizeof(SSchema));
|
||||
|
||||
for (int32_t i = 0; i < stbObj.numOfColumns; ++i) {
|
||||
stbObj.pColumns[i].colId = stbObj.nextColId;
|
||||
SField *pField = taosArrayGet(pCreate->pColumns, i);
|
||||
SSchema *pSchema = &stbObj.pColumns[i];
|
||||
pSchema->type = pField->type;
|
||||
pSchema->bytes = pField->bytes;
|
||||
memcpy(pSchema->name, pField->name, TSDB_COL_NAME_LEN);
|
||||
pSchema->colId = stbObj.nextColId;
|
||||
stbObj.nextColId++;
|
||||
}
|
||||
|
||||
for (int32_t i = 0; i < stbObj.numOfTags; ++i) {
|
||||
stbObj.pTags[i].colId = stbObj.nextColId;
|
||||
SField *pField = taosArrayGet(pCreate->pTags, i);
|
||||
SSchema *pSchema = &stbObj.pTags[i];
|
||||
pSchema->type = pField->type;
|
||||
pSchema->bytes = pField->bytes;
|
||||
memcpy(pSchema->name, pField->name, TSDB_COL_NAME_LEN);
|
||||
pSchema->colId = stbObj.nextColId;
|
||||
stbObj.nextColId++;
|
||||
}
|
||||
|
||||
|
@ -520,56 +543,59 @@ CREATE_STB_OVER:
|
|||
|
||||
static int32_t mndProcessMCreateStbReq(SMnodeMsg *pReq) {
|
||||
SMnode *pMnode = pReq->pMnode;
|
||||
SMCreateStbReq *pCreate = pReq->rpcMsg.pCont;
|
||||
int32_t code = -1;
|
||||
SStbObj *pTopicStb = NULL;
|
||||
SStbObj *pStb = NULL;
|
||||
SDbObj *pDb = NULL;
|
||||
SMCreateStbReq createReq = {0};
|
||||
|
||||
mDebug("stb:%s, start to create", pCreate->name);
|
||||
if (tDeserializeSMCreateStbReq(pReq->rpcMsg.pCont, &createReq) == NULL) goto CREATE_STB_OVER;
|
||||
|
||||
if (mndCheckCreateStbReq(pCreate) != 0) {
|
||||
mError("stb:%s, failed to create since %s", pCreate->name, terrstr());
|
||||
return -1;
|
||||
}
|
||||
mDebug("stb:%s, start to create", createReq.name);
|
||||
if (mndCheckCreateStbReq(&createReq) != 0) goto CREATE_STB_OVER;
|
||||
|
||||
SStbObj *pStb = mndAcquireStb(pMnode, pCreate->name);
|
||||
pStb = mndAcquireStb(pMnode, createReq.name);
|
||||
if (pStb != NULL) {
|
||||
mndReleaseStb(pMnode, pStb);
|
||||
if (pCreate->igExists) {
|
||||
mDebug("stb:%s, already exist, ignore exist is set", pCreate->name);
|
||||
return 0;
|
||||
if (createReq.igExists) {
|
||||
mDebug("stb:%s, already exist, ignore exist is set", createReq.name);
|
||||
code = 0;
|
||||
goto CREATE_STB_OVER;
|
||||
} else {
|
||||
terrno = TSDB_CODE_MND_STB_ALREADY_EXIST;
|
||||
mError("stb:%s, failed to create since %s", pCreate->name, terrstr());
|
||||
return -1;
|
||||
goto CREATE_STB_OVER;
|
||||
}
|
||||
} else if (terrno != TSDB_CODE_MND_STB_NOT_EXIST) {
|
||||
mError("stb:%s, failed to create since %s", pCreate->name, terrstr());
|
||||
return -1;
|
||||
goto CREATE_STB_OVER;
|
||||
}
|
||||
|
||||
// topic should have different name with stb
|
||||
SStbObj *pTopicStb = mndAcquireStb(pMnode, pCreate->name);
|
||||
pTopicStb = mndAcquireStb(pMnode, createReq.name);
|
||||
if (pTopicStb != NULL) {
|
||||
mndReleaseStb(pMnode, pTopicStb);
|
||||
terrno = TSDB_CODE_MND_NAME_CONFLICT_WITH_TOPIC;
|
||||
mError("stb:%s, failed to create since %s", pCreate->name, terrstr());
|
||||
return -1;
|
||||
goto CREATE_STB_OVER;
|
||||
}
|
||||
|
||||
SDbObj *pDb = mndAcquireDbByStb(pMnode, pCreate->name);
|
||||
pDb = mndAcquireDbByStb(pMnode, createReq.name);
|
||||
if (pDb == NULL) {
|
||||
terrno = TSDB_CODE_MND_DB_NOT_SELECTED;
|
||||
mError("stb:%s, failed to create since %s", pCreate->name, terrstr());
|
||||
return -1;
|
||||
goto CREATE_STB_OVER;
|
||||
}
|
||||
|
||||
int32_t code = mndCreateStb(pMnode, pReq, pCreate, pDb);
|
||||
mndReleaseDb(pMnode, pDb);
|
||||
code = mndCreateStb(pMnode, pReq, &createReq, pDb);
|
||||
|
||||
CREATE_STB_OVER:
|
||||
if (code != 0) {
|
||||
mError("stb:%s, failed to create since %s", pCreate->name, terrstr());
|
||||
return -1;
|
||||
mError("stb:%s, failed to create since %s", createReq.name, terrstr());
|
||||
} else {
|
||||
code = TSDB_CODE_MND_ACTION_IN_PROGRESS;
|
||||
}
|
||||
|
||||
return TSDB_CODE_MND_ACTION_IN_PROGRESS;
|
||||
mndReleaseStb(pMnode, pStb);
|
||||
mndReleaseStb(pMnode, pTopicStb);
|
||||
mndReleaseDb(pMnode, pDb);
|
||||
taosArrayDestroy(createReq.pColumns);
|
||||
taosArrayDestroy(createReq.pTags);
|
||||
|
||||
return code;
|
||||
}
|
||||
|
||||
static int32_t mndProcessVCreateStbRsp(SMnodeMsg *pRsp) {
|
||||
|
@ -578,26 +604,23 @@ static int32_t mndProcessVCreateStbRsp(SMnodeMsg *pRsp) {
|
|||
}
|
||||
|
||||
static int32_t mndCheckAlterStbReq(SMAltertbReq *pAlter) {
|
||||
pAlter->numOfSchemas = htonl(pAlter->numOfSchemas);
|
||||
if (pAlter->numOfFields < 1 || pAlter->numOfFields != (int32_t)taosArrayGetSize(pAlter->pFields)) {
|
||||
terrno = TSDB_CODE_MND_INVALID_STB_OPTION;
|
||||
return -1;
|
||||
}
|
||||
|
||||
for (int32_t i = 0; i < pAlter->numOfSchemas; ++i) {
|
||||
SSchema *pSchema = &pAlter->pSchemas[i];
|
||||
pSchema->colId = htonl(pSchema->colId);
|
||||
pSchema->bytes = htonl(pSchema->bytes);
|
||||
for (int32_t i = 0; i < pAlter->numOfFields; ++i) {
|
||||
SField *pField = taosArrayGet(pAlter->pFields, i);
|
||||
|
||||
if (pSchema->type <= 0) {
|
||||
if (pField->type <= 0) {
|
||||
terrno = TSDB_CODE_MND_INVALID_STB_OPTION;
|
||||
return -1;
|
||||
}
|
||||
if (pSchema->colId < 0 || pSchema->colId >= (TSDB_MAX_COLUMNS + TSDB_MAX_TAGS)) {
|
||||
if (pField->bytes <= 0) {
|
||||
terrno = TSDB_CODE_MND_INVALID_STB_OPTION;
|
||||
return -1;
|
||||
}
|
||||
if (pSchema->bytes <= 0) {
|
||||
terrno = TSDB_CODE_MND_INVALID_STB_OPTION;
|
||||
return -1;
|
||||
}
|
||||
if (pSchema->name[0] == 0) {
|
||||
if (pField->name[0] == 0) {
|
||||
terrno = TSDB_CODE_MND_INVALID_STB_OPTION;
|
||||
return -1;
|
||||
}
|
||||
|
@ -639,7 +662,7 @@ static int32_t mndAllocStbSchemas(const SStbObj *pOld, SStbObj *pNew) {
|
|||
return 0;
|
||||
}
|
||||
|
||||
static int32_t mndAddSuperTableTag(const SStbObj *pOld, SStbObj *pNew, const SSchema *pSchemas, int32_t ntags) {
|
||||
static int32_t mndAddSuperTableTag(const SStbObj *pOld, SStbObj *pNew, SArray *pFields, int32_t ntags) {
|
||||
if (pOld->numOfTags + ntags > TSDB_MAX_TAGS) {
|
||||
terrno = TSDB_CODE_MND_TOO_MANY_TAGS;
|
||||
return -1;
|
||||
|
@ -650,33 +673,34 @@ static int32_t mndAddSuperTableTag(const SStbObj *pOld, SStbObj *pNew, const SSc
|
|||
return -1;
|
||||
}
|
||||
|
||||
for (int32_t i = 0; i < ntags; i++) {
|
||||
if (mndFindSuperTableColumnIndex(pOld, pSchemas[i].name) > 0) {
|
||||
terrno = TSDB_CODE_MND_TAG_ALREAY_EXIST;
|
||||
return -1;
|
||||
}
|
||||
|
||||
if (mndFindSuperTableTagIndex(pOld, pSchemas[i].name) > 0) {
|
||||
terrno = TSDB_CODE_MND_COLUMN_ALREAY_EXIST;
|
||||
return -1;
|
||||
}
|
||||
}
|
||||
|
||||
pNew->numOfTags = pNew->numOfTags + ntags;
|
||||
if (mndAllocStbSchemas(pOld, pNew) != 0) {
|
||||
return -1;
|
||||
}
|
||||
|
||||
memcpy(pNew->pTags + pOld->numOfTags, pSchemas, sizeof(SSchema) * ntags);
|
||||
for (int32_t i = 0; i < ntags; i++) {
|
||||
SField *pField = taosArrayGet(pFields, i);
|
||||
if (mndFindSuperTableColumnIndex(pOld, pField->name) > 0) {
|
||||
terrno = TSDB_CODE_MND_COLUMN_ALREADY_EXIST;
|
||||
return -1;
|
||||
}
|
||||
|
||||
for (int32_t i = pOld->numOfTags; i < pNew->numOfTags; i++) {
|
||||
SSchema *pSchema = &pNew->pTags[i];
|
||||
if (mndFindSuperTableTagIndex(pOld, pField->name) > 0) {
|
||||
terrno = TSDB_CODE_MND_TAG_ALREADY_EXIST;
|
||||
return -1;
|
||||
}
|
||||
|
||||
SSchema *pSchema = &pNew->pTags[pOld->numOfTags + i];
|
||||
pSchema->bytes = pField->bytes;
|
||||
pSchema->type = pField->type;
|
||||
memcpy(pSchema->name, pField->name, TSDB_COL_NAME_LEN);
|
||||
pSchema->colId = pNew->nextColId;
|
||||
pNew->nextColId++;
|
||||
|
||||
mDebug("stb:%s, start to add tag %s", pNew->name, pSchema->name);
|
||||
}
|
||||
|
||||
pNew->version++;
|
||||
mDebug("stb:%s, start to add tag %s", pNew->name, pSchemas[0].name);
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
@ -692,13 +716,25 @@ static int32_t mndDropSuperTableTag(const SStbObj *pOld, SStbObj *pNew, const ch
|
|||
}
|
||||
|
||||
memmove(pNew->pTags + tag, pNew->pTags + tag + 1, sizeof(SSchema) * (pNew->numOfTags - tag - 1));
|
||||
pNew->numOfTags--;
|
||||
|
||||
pNew->version++;
|
||||
mDebug("stb:%s, start to drop tag %s", pNew->name, tagName);
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int32_t mndAlterStbTagName(const SStbObj *pOld, SStbObj *pNew, const char *oldTagName, const char *newTagName) {
|
||||
static int32_t mndAlterStbTagName(const SStbObj *pOld, SStbObj *pNew, SArray *pFields) {
|
||||
if ((int32_t)taosArrayGetSize(pFields) != 2) {
|
||||
terrno = TSDB_CODE_MND_INVALID_STB_OPTION;
|
||||
return -1;
|
||||
}
|
||||
|
||||
SField *pField0 = taosArrayGet(pFields, 0);
|
||||
SField *pField1 = taosArrayGet(pFields, 1);
|
||||
|
||||
const char *oldTagName = pField0->name;
|
||||
const char *newTagName = pField1->name;
|
||||
|
||||
int32_t tag = mndFindSuperTableTagIndex(pOld, oldTagName);
|
||||
if (tag < 0) {
|
||||
terrno = TSDB_CODE_MND_TAG_NOT_EXIST;
|
||||
|
@ -706,13 +742,12 @@ static int32_t mndAlterStbTagName(const SStbObj *pOld, SStbObj *pNew, const char
|
|||
}
|
||||
|
||||
if (mndFindSuperTableTagIndex(pOld, newTagName) >= 0) {
|
||||
terrno = TSDB_CODE_MND_TAG_ALREAY_EXIST;
|
||||
terrno = TSDB_CODE_MND_TAG_ALREADY_EXIST;
|
||||
return -1;
|
||||
}
|
||||
|
||||
int32_t len = (int32_t)strlen(newTagName);
|
||||
if (len >= TSDB_COL_NAME_LEN) {
|
||||
terrno = TSDB_CODE_MND_INVALID_STB_OPTION;
|
||||
if (mndFindSuperTableColumnIndex(pOld, newTagName) >= 0) {
|
||||
terrno = TSDB_CODE_MND_COLUMN_ALREADY_EXIST;
|
||||
return -1;
|
||||
}
|
||||
|
||||
|
@ -728,104 +763,112 @@ static int32_t mndAlterStbTagName(const SStbObj *pOld, SStbObj *pNew, const char
|
|||
return 0;
|
||||
}
|
||||
|
||||
static int32_t mndAlterStbTagBytes(const SStbObj *pOld, SStbObj *pNew, const SSchema *pSchema) {
|
||||
int32_t tag = mndFindSuperTableTagIndex(pOld, pSchema->name);
|
||||
static int32_t mndAlterStbTagBytes(const SStbObj *pOld, SStbObj *pNew, const SField *pField) {
|
||||
int32_t tag = mndFindSuperTableTagIndex(pOld, pField->name);
|
||||
if (tag < 0) {
|
||||
terrno = TSDB_CODE_MND_TAG_NOT_EXIST;
|
||||
return -1;
|
||||
}
|
||||
|
||||
if (!(pSchema->type == TSDB_DATA_TYPE_BINARY || pSchema->type == TSDB_DATA_TYPE_NCHAR)) {
|
||||
terrno = TSDB_CODE_MND_INVALID_STB_OPTION;
|
||||
return -1;
|
||||
}
|
||||
|
||||
if (mndAllocStbSchemas(pOld, pNew) != 0) {
|
||||
return -1;
|
||||
}
|
||||
|
||||
SSchema *pTag = pNew->pTags + tag;
|
||||
if (pSchema->bytes <= pTag->bytes) {
|
||||
|
||||
if (!(pTag->type == TSDB_DATA_TYPE_BINARY || pTag->type == TSDB_DATA_TYPE_NCHAR)) {
|
||||
terrno = TSDB_CODE_MND_INVALID_STB_OPTION;
|
||||
return -1;
|
||||
}
|
||||
|
||||
if (pField->bytes <= pTag->bytes) {
|
||||
terrno = TSDB_CODE_MND_INVALID_ROW_BYTES;
|
||||
return -1;
|
||||
}
|
||||
|
||||
pTag->bytes = pSchema->bytes;
|
||||
pTag->bytes = pField->bytes;
|
||||
pNew->version++;
|
||||
|
||||
mDebug("stb:%s, start to modify tag len %s to %d", pNew->name, pSchema->name, pSchema->bytes);
|
||||
mDebug("stb:%s, start to modify tag len %s to %d", pNew->name, pField->name, pField->bytes);
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int32_t mndAddSuperTableColumn(const SStbObj *pOld, SStbObj *pNew, const SSchema *pSchemas, int32_t ncols) {
|
||||
static int32_t mndAddSuperTableColumn(const SStbObj *pOld, SStbObj *pNew, SArray *pFields, int32_t ncols) {
|
||||
if (pOld->numOfColumns + ncols + pOld->numOfTags > TSDB_MAX_COLUMNS) {
|
||||
terrno = TSDB_CODE_MND_TOO_MANY_COLUMNS;
|
||||
return -1;
|
||||
}
|
||||
|
||||
for (int32_t i = 0; i < ncols; i++) {
|
||||
if (mndFindSuperTableColumnIndex(pOld, pSchemas[i].name) > 0) {
|
||||
terrno = TSDB_CODE_MND_TAG_ALREAY_EXIST;
|
||||
return -1;
|
||||
}
|
||||
|
||||
if (mndFindSuperTableTagIndex(pOld, pSchemas[i].name) > 0) {
|
||||
terrno = TSDB_CODE_MND_COLUMN_ALREAY_EXIST;
|
||||
return -1;
|
||||
}
|
||||
}
|
||||
|
||||
pNew->numOfColumns = pNew->numOfColumns + ncols;
|
||||
if (mndAllocStbSchemas(pOld, pNew) != 0) {
|
||||
return -1;
|
||||
}
|
||||
|
||||
memcpy(pNew->pColumns + pOld->numOfColumns, pSchemas, sizeof(SSchema) * ncols);
|
||||
for (int32_t i = 0; i < ncols; i++) {
|
||||
SField *pField = taosArrayGet(pFields, i);
|
||||
if (mndFindSuperTableColumnIndex(pOld, pField->name) > 0) {
|
||||
terrno = TSDB_CODE_MND_COLUMN_ALREADY_EXIST;
|
||||
return -1;
|
||||
}
|
||||
|
||||
for (int32_t i = pOld->numOfColumns; i < pNew->numOfColumns; i++) {
|
||||
SSchema *pSchema = &pNew->pColumns[i];
|
||||
if (mndFindSuperTableTagIndex(pOld, pField->name) > 0) {
|
||||
terrno = TSDB_CODE_MND_TAG_ALREADY_EXIST;
|
||||
return -1;
|
||||
}
|
||||
|
||||
SSchema *pSchema = &pNew->pColumns[pOld->numOfColumns + i];
|
||||
pSchema->bytes = pField->bytes;
|
||||
pSchema->type = pField->type;
|
||||
memcpy(pSchema->name, pField->name, TSDB_COL_NAME_LEN);
|
||||
pSchema->colId = pNew->nextColId;
|
||||
pNew->nextColId++;
|
||||
|
||||
mDebug("stb:%s, start to add column %s", pNew->name, pSchema->name);
|
||||
}
|
||||
|
||||
pNew->version++;
|
||||
mDebug("stb:%s, start to add column %s", pNew->name, pSchemas[0].name);
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int32_t mndDropSuperTableColumn(const SStbObj *pOld, SStbObj *pNew, const char *colName) {
|
||||
int32_t col = mndFindSuperTableColumnIndex(pOld, colName);
|
||||
if (col <= 0) {
|
||||
if (col < 0) {
|
||||
terrno = TSDB_CODE_MND_COLUMN_NOT_EXIST;
|
||||
return -1;
|
||||
}
|
||||
|
||||
if (col == 0) {
|
||||
terrno = TSDB_CODE_MND_INVALID_STB_ALTER_OPTION;
|
||||
return -1;
|
||||
}
|
||||
|
||||
if (pOld->numOfColumns == 2) {
|
||||
terrno = TSDB_CODE_MND_INVALID_STB_ALTER_OPTION;
|
||||
return -1;
|
||||
}
|
||||
|
||||
if (mndAllocStbSchemas(pOld, pNew) != 0) {
|
||||
return -1;
|
||||
}
|
||||
|
||||
memmove(pNew->pColumns + col, pNew->pColumns + col + 1, sizeof(SSchema) * (pNew->numOfColumns - col - 1));
|
||||
pNew->numOfColumns--;
|
||||
|
||||
pNew->version++;
|
||||
mDebug("stb:%s, start to drop col %s", pNew->name, colName);
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int32_t mndAlterStbColumnBytes(const SStbObj *pOld, SStbObj *pNew, const SSchema *pSchema) {
|
||||
int32_t col = mndFindSuperTableColumnIndex(pOld, pSchema->name);
|
||||
static int32_t mndAlterStbColumnBytes(const SStbObj *pOld, SStbObj *pNew, const SField *pField) {
|
||||
int32_t col = mndFindSuperTableColumnIndex(pOld, pField->name);
|
||||
if (col < 0) {
|
||||
terrno = TSDB_CODE_MND_COLUMN_NOT_EXIST;
|
||||
return -1;
|
||||
}
|
||||
|
||||
if (!(pSchema->type == TSDB_DATA_TYPE_BINARY || pSchema->type == TSDB_DATA_TYPE_NCHAR)) {
|
||||
terrno = TSDB_CODE_MND_INVALID_STB_OPTION;
|
||||
return -1;
|
||||
}
|
||||
|
||||
uint32_t nLen = 0;
|
||||
for (int32_t i = 0; i < pOld->numOfColumns; ++i) {
|
||||
nLen += (pOld->pColumns[i].colId == col) ? pSchema->bytes : pOld->pColumns[i].bytes;
|
||||
nLen += (pOld->pColumns[i].colId == col) ? pField->bytes : pOld->pColumns[i].bytes;
|
||||
}
|
||||
|
||||
if (nLen > TSDB_MAX_BYTES_PER_ROW) {
|
||||
|
@ -838,15 +881,20 @@ static int32_t mndAlterStbColumnBytes(const SStbObj *pOld, SStbObj *pNew, const
|
|||
}
|
||||
|
||||
SSchema *pCol = pNew->pColumns + col;
|
||||
if (pSchema->bytes <= pCol->bytes) {
|
||||
if (!(pCol->type == TSDB_DATA_TYPE_BINARY || pCol->type == TSDB_DATA_TYPE_NCHAR)) {
|
||||
terrno = TSDB_CODE_MND_INVALID_STB_OPTION;
|
||||
return -1;
|
||||
}
|
||||
|
||||
if (pField->bytes <= pCol->bytes) {
|
||||
terrno = TSDB_CODE_MND_INVALID_ROW_BYTES;
|
||||
return -1;
|
||||
}
|
||||
|
||||
pCol->bytes = pSchema->bytes;
|
||||
pCol->bytes = pField->bytes;
|
||||
pNew->version++;
|
||||
|
||||
mDebug("stb:%s, start to modify col len %s to %d", pNew->name, pSchema->name, pSchema->bytes);
|
||||
mDebug("stb:%s, start to modify col len %s to %d", pNew->name, pField->name, pField->bytes);
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
@ -879,7 +927,7 @@ static int32_t mndSetAlterStbRedoActions(SMnode *pMnode, STrans *pTrans, SDbObj
|
|||
if (pIter == NULL) break;
|
||||
if (pVgroup->dbUid != pDb->uid) continue;
|
||||
|
||||
void *pReq = mndBuildCreateStbReq(pMnode, pVgroup, pStb, &contLen);
|
||||
void *pReq = mndBuildVCreateStbReq(pMnode, pVgroup, pStb, &contLen);
|
||||
if (pReq == NULL) {
|
||||
sdbCancelFetch(pSdb, pIter);
|
||||
sdbRelease(pSdb, pVgroup);
|
||||
|
@ -914,28 +962,30 @@ static int32_t mndAlterStb(SMnode *pMnode, SMnodeMsg *pReq, const SMAltertbReq *
|
|||
taosRUnLockLatch(&pOld->lock);
|
||||
|
||||
int32_t code = -1;
|
||||
STrans *pTrans = NULL;
|
||||
SField *pField0 = taosArrayGet(pAlter->pFields, 0);
|
||||
|
||||
switch (pAlter->alterType) {
|
||||
case TSDB_ALTER_TABLE_ADD_TAG_COLUMN:
|
||||
code = mndAddSuperTableTag(pOld, &stbObj, pAlter->pSchemas, 1);
|
||||
case TSDB_ALTER_TABLE_ADD_TAG:
|
||||
code = mndAddSuperTableTag(pOld, &stbObj, pAlter->pFields, pAlter->numOfFields);
|
||||
break;
|
||||
case TSDB_ALTER_TABLE_DROP_TAG_COLUMN:
|
||||
code = mndDropSuperTableTag(pOld, &stbObj, pAlter->pSchemas[0].name);
|
||||
case TSDB_ALTER_TABLE_DROP_TAG:
|
||||
code = mndDropSuperTableTag(pOld, &stbObj, pField0->name);
|
||||
break;
|
||||
case TSDB_ALTER_TABLE_UPDATE_TAG_NAME:
|
||||
code = mndAlterStbTagName(pOld, &stbObj, pAlter->pSchemas[0].name, pAlter->pSchemas[1].name);
|
||||
code = mndAlterStbTagName(pOld, &stbObj, pAlter->pFields);
|
||||
break;
|
||||
case TSDB_ALTER_TABLE_UPDATE_TAG_BYTES:
|
||||
code = mndAlterStbTagBytes(pOld, &stbObj, &pAlter->pSchemas[0]);
|
||||
code = mndAlterStbTagBytes(pOld, &stbObj, pField0);
|
||||
break;
|
||||
case TSDB_ALTER_TABLE_ADD_COLUMN:
|
||||
code = mndAddSuperTableColumn(pOld, &stbObj, pAlter->pSchemas, 1);
|
||||
code = mndAddSuperTableColumn(pOld, &stbObj, pAlter->pFields, pAlter->numOfFields);
|
||||
break;
|
||||
case TSDB_ALTER_TABLE_DROP_COLUMN:
|
||||
code = mndDropSuperTableColumn(pOld, &stbObj, pAlter->pSchemas[0].name);
|
||||
code = mndDropSuperTableColumn(pOld, &stbObj, pField0->name);
|
||||
break;
|
||||
case TSDB_ALTER_TABLE_UPDATE_COLUMN_BYTES:
|
||||
code = mndAlterStbColumnBytes(pOld, &stbObj, &pAlter->pSchemas[0]);
|
||||
code = mndAlterStbColumnBytes(pOld, &stbObj, pField0);
|
||||
break;
|
||||
default:
|
||||
terrno = TSDB_CODE_MND_INVALID_STB_OPTION;
|
||||
|
@ -945,7 +995,7 @@ static int32_t mndAlterStb(SMnode *pMnode, SMnodeMsg *pReq, const SMAltertbReq *
|
|||
if (code != 0) goto ALTER_STB_OVER;
|
||||
|
||||
code = -1;
|
||||
STrans *pTrans = mndTransCreate(pMnode, TRN_POLICY_RETRY, &pReq->rpcMsg);
|
||||
pTrans = mndTransCreate(pMnode, TRN_POLICY_RETRY, &pReq->rpcMsg);
|
||||
if (pTrans == NULL) goto ALTER_STB_OVER;
|
||||
|
||||
mDebug("trans:%d, used to alter stb:%s", pTrans->id, pAlter->name);
|
||||
|
@ -966,39 +1016,42 @@ ALTER_STB_OVER:
|
|||
|
||||
static int32_t mndProcessMAlterStbReq(SMnodeMsg *pReq) {
|
||||
SMnode *pMnode = pReq->pMnode;
|
||||
SMAltertbReq *pAlter = pReq->rpcMsg.pCont;
|
||||
int32_t code = -1;
|
||||
SDbObj *pDb = NULL;
|
||||
SStbObj *pStb = NULL;
|
||||
SMAltertbReq alterReq = {0};
|
||||
|
||||
mDebug("stb:%s, start to alter", pAlter->name);
|
||||
if (tDeserializeSMAlterStbReq(pReq->rpcMsg.pCont, &alterReq) == NULL) goto ALTER_STB_OVER;
|
||||
|
||||
if (mndCheckAlterStbReq(pAlter) != 0) {
|
||||
mError("stb:%s, failed to alter since %s", pAlter->name, terrstr());
|
||||
return -1;
|
||||
mDebug("stb:%s, start to alter", alterReq.name);
|
||||
if (mndCheckAlterStbReq(&alterReq) != 0) goto ALTER_STB_OVER;
|
||||
|
||||
pDb = mndAcquireDbByStb(pMnode, alterReq.name);
|
||||
if (pDb == NULL) {
|
||||
terrno = TSDB_CODE_MND_INVALID_DB;
|
||||
goto ALTER_STB_OVER;
|
||||
}
|
||||
|
||||
SStbObj *pStb = mndAcquireStb(pMnode, pAlter->name);
|
||||
pStb = mndAcquireStb(pMnode, alterReq.name);
|
||||
if (pStb == NULL) {
|
||||
terrno = TSDB_CODE_MND_STB_NOT_EXIST;
|
||||
mError("stb:%s, failed to alter since %s", pAlter->name, terrstr());
|
||||
return -1;
|
||||
goto ALTER_STB_OVER;
|
||||
}
|
||||
|
||||
SDbObj *pDb = mndAcquireDbByStb(pMnode, pAlter->name);
|
||||
if (pDb == NULL) {
|
||||
mndReleaseStb(pMnode, pStb);
|
||||
terrno = TSDB_CODE_MND_DB_NOT_SELECTED;
|
||||
mError("stb:%s, failed to alter since %s", pAlter->name, terrstr());
|
||||
return -1;
|
||||
}
|
||||
|
||||
int32_t code = mndAlterStb(pMnode, pReq, pAlter, pDb, pStb);
|
||||
mndReleaseStb(pMnode, pStb);
|
||||
code = mndAlterStb(pMnode, pReq, &alterReq, pDb, pStb);
|
||||
|
||||
ALTER_STB_OVER:
|
||||
if (code != 0) {
|
||||
mError("stb:%s, failed to alter since %s", pAlter->name, tstrerror(code));
|
||||
return code;
|
||||
mError("stb:%s, failed to alter since %s", alterReq.name, terrstr());
|
||||
} else {
|
||||
code = TSDB_CODE_MND_ACTION_IN_PROGRESS;
|
||||
}
|
||||
|
||||
return TSDB_CODE_MND_ACTION_IN_PROGRESS;
|
||||
mndReleaseStb(pMnode, pStb);
|
||||
mndReleaseDb(pMnode, pDb);
|
||||
taosArrayDestroy(alterReq.pFields);
|
||||
|
||||
return code;
|
||||
}
|
||||
|
||||
static int32_t mndProcessVAlterStbRsp(SMnodeMsg *pRsp) {
|
||||
|
@ -1036,7 +1089,7 @@ static int32_t mndSetDropStbRedoActions(SMnode *pMnode, STrans *pTrans, SDbObj *
|
|||
if (pVgroup->dbUid != pDb->uid) continue;
|
||||
|
||||
int32_t contLen = 0;
|
||||
void *pReq = mndBuildDropStbReq(pMnode, pVgroup, pStb, &contLen);
|
||||
void *pReq = mndBuildVDropStbReq(pMnode, pVgroup, pStb, &contLen);
|
||||
if (pReq == NULL) {
|
||||
sdbCancelFetch(pSdb, pIter);
|
||||
sdbRelease(pSdb, pVgroup);
|
||||
|
@ -1083,27 +1136,29 @@ DROP_STB_OVER:
|
|||
|
||||
static int32_t mndProcessMDropStbReq(SMnodeMsg *pReq) {
|
||||
SMnode *pMnode = pReq->pMnode;
|
||||
SMDropStbReq *pDrop = pReq->rpcMsg.pCont;
|
||||
|
||||
mDebug("stb:%s, start to drop", pDrop->name);
|
||||
SMDropStbReq dropReq = {0};
|
||||
tDeserializeSMDropStbReq(pReq->rpcMsg.pCont, &dropReq);
|
||||
|
||||
SStbObj *pStb = mndAcquireStb(pMnode, pDrop->name);
|
||||
mDebug("stb:%s, start to drop", dropReq.name);
|
||||
|
||||
SStbObj *pStb = mndAcquireStb(pMnode, dropReq.name);
|
||||
if (pStb == NULL) {
|
||||
if (pDrop->igNotExists) {
|
||||
mDebug("stb:%s, not exist, ignore not exist is set", pDrop->name);
|
||||
if (dropReq.igNotExists) {
|
||||
mDebug("stb:%s, not exist, ignore not exist is set", dropReq.name);
|
||||
return 0;
|
||||
} else {
|
||||
terrno = TSDB_CODE_MND_STB_NOT_EXIST;
|
||||
mError("stb:%s, failed to drop since %s", pDrop->name, terrstr());
|
||||
mError("stb:%s, failed to drop since %s", dropReq.name, terrstr());
|
||||
return -1;
|
||||
}
|
||||
}
|
||||
|
||||
SDbObj *pDb = mndAcquireDbByStb(pMnode, pDrop->name);
|
||||
SDbObj *pDb = mndAcquireDbByStb(pMnode, dropReq.name);
|
||||
if (pDb == NULL) {
|
||||
mndReleaseStb(pMnode, pStb);
|
||||
terrno = TSDB_CODE_MND_DB_NOT_SELECTED;
|
||||
mError("stb:%s, failed to drop since %s", pDrop->name, terrstr());
|
||||
mError("stb:%s, failed to drop since %s", dropReq.name, terrstr());
|
||||
return -1;
|
||||
}
|
||||
|
||||
|
@ -1112,7 +1167,7 @@ static int32_t mndProcessMDropStbReq(SMnodeMsg *pReq) {
|
|||
mndReleaseStb(pMnode, pStb);
|
||||
|
||||
if (code != 0) {
|
||||
mError("stb:%s, failed to drop since %s", pDrop->name, terrstr());
|
||||
mError("stb:%s, failed to drop since %s", dropReq.name, terrstr());
|
||||
return -1;
|
||||
}
|
||||
|
||||
|
@ -1165,6 +1220,7 @@ static int32_t mndProcessStbMetaReq(SMnodeMsg *pReq) {
|
|||
strcpy(pMeta->dbFName, pStb->db);
|
||||
strcpy(pMeta->tbName, pInfo->tbName);
|
||||
strcpy(pMeta->stbName, pInfo->tbName);
|
||||
pMeta->dbId = htobe64(pDb->uid);
|
||||
pMeta->numOfTags = htonl(pStb->numOfTags);
|
||||
pMeta->numOfColumns = htonl(pStb->numOfColumns);
|
||||
pMeta->precision = pDb->cfg.precision;
|
||||
|
|
File diff suppressed because it is too large
Load Diff
|
@ -76,7 +76,7 @@ static void mndCalMqRebalance(void *param, void *tmrId) {
|
|||
if (mndIsMaster(pMnode)) {
|
||||
SMqTmrMsg *pMsg = rpcMallocCont(sizeof(SMqTmrMsg));
|
||||
SRpcMsg rpcMsg = {.msgType = TDMT_MND_MQ_TIMER, .pCont = pMsg, .contLen = sizeof(SMqTmrMsg)};
|
||||
pMnode->putReqToMWriteQFp(pMnode->pDnode, &rpcMsg);
|
||||
pMnode->putReqToMReadQFp(pMnode->pDnode, &rpcMsg);
|
||||
}
|
||||
|
||||
taosTmrReset(mndCalMqRebalance, 3000, pMnode, pMnode->timer, &pMnode->mqTimer);
|
||||
|
@ -249,6 +249,7 @@ static int32_t mndSetOptions(SMnode *pMnode, const SMnodeOpt *pOption) {
|
|||
memcpy(&pMnode->replicas, pOption->replicas, sizeof(SReplica) * TSDB_MAX_REPLICA);
|
||||
pMnode->pDnode = pOption->pDnode;
|
||||
pMnode->putReqToMWriteQFp = pOption->putReqToMWriteQFp;
|
||||
pMnode->putReqToMReadQFp = pOption->putReqToMReadQFp;
|
||||
pMnode->sendReqToDnodeFp = pOption->sendReqToDnodeFp;
|
||||
pMnode->sendReqToMnodeFp = pOption->sendReqToMnodeFp;
|
||||
pMnode->sendRedirectRspFp = pOption->sendRedirectRspFp;
|
||||
|
|
File diff suppressed because it is too large
Load Diff
|
@ -143,7 +143,7 @@ int tsdbLoadBlockIdx(SReadH *pReadh);
|
|||
int tsdbSetReadTable(SReadH *pReadh, STable *pTable);
|
||||
int tsdbLoadBlockInfo(SReadH *pReadh, void *pTarget);
|
||||
int tsdbLoadBlockData(SReadH *pReadh, SBlock *pBlock, SBlockInfo *pBlockInfo);
|
||||
int tsdbLoadBlockDataCols(SReadH *pReadh, SBlock *pBlock, SBlockInfo *pBlkInfo, int16_t *colIds, int numOfColsIds);
|
||||
int tsdbLoadBlockDataCols(SReadH *pReadh, SBlock *pBlock, SBlockInfo *pBlkInfo, const int16_t *colIds, int numOfColsIds);
|
||||
int tsdbLoadBlockStatis(SReadH *pReadh, SBlock *pBlock);
|
||||
int tsdbEncodeSBlockIdx(void **buf, SBlockIdx *pIdx);
|
||||
void *tsdbDecodeSBlockIdx(void *buf, SBlockIdx *pIdx);
|
||||
|
|
|
@ -272,7 +272,7 @@ int tsdbLoadBlockData(SReadH *pReadh, SBlock *pBlock, SBlockInfo *pBlkInfo) {
|
|||
return 0;
|
||||
}
|
||||
|
||||
int tsdbLoadBlockDataCols(SReadH *pReadh, SBlock *pBlock, SBlockInfo *pBlkInfo, int16_t *colIds, int numOfColsIds) {
|
||||
int tsdbLoadBlockDataCols(SReadH *pReadh, SBlock *pBlock, SBlockInfo *pBlkInfo, const int16_t *colIds, int numOfColsIds) {
|
||||
ASSERT(pBlock->numOfSubBlocks > 0);
|
||||
int8_t update = pReadh->pRepo->config.update;
|
||||
|
||||
|
@ -580,7 +580,7 @@ static int tsdbCheckAndDecodeColumnData(SDataCol *pDataCol, void *content, int32
|
|||
return 0;
|
||||
}
|
||||
|
||||
static int tsdbLoadBlockDataColsImpl(SReadH *pReadh, SBlock *pBlock, SDataCols *pDataCols, int16_t *colIds,
|
||||
static int tsdbLoadBlockDataColsImpl(SReadH *pReadh, SBlock *pBlock, SDataCols *pDataCols, const int16_t *colIds,
|
||||
int numOfColIds) {
|
||||
ASSERT(pBlock->numOfSubBlocks == 0 || pBlock->numOfSubBlocks == 1);
|
||||
ASSERT(colIds[0] == PRIMARYKEY_TIMESTAMP_COL_ID);
|
||||
|
|
|
@ -48,18 +48,22 @@ enum {
|
|||
};
|
||||
|
||||
typedef struct SCtgDebug {
|
||||
int32_t lockDebug;
|
||||
bool lockDebug;
|
||||
bool cacheDebug;
|
||||
uint32_t showCachePeriodSec;
|
||||
} SCtgDebug;
|
||||
|
||||
|
||||
typedef struct SCtgTbMetaCache {
|
||||
SRWLatch stbLock;
|
||||
SHashObj *cache; //key:tbname, value:STableMeta
|
||||
SRWLatch metaLock; // RC between cache destroy and all other operations
|
||||
SHashObj *metaCache; //key:tbname, value:STableMeta
|
||||
SHashObj *stbCache; //key:suid, value:STableMeta*
|
||||
} SCtgTbMetaCache;
|
||||
|
||||
typedef struct SCtgDBCache {
|
||||
SRWLatch vgLock;
|
||||
uint64_t dbId;
|
||||
int8_t deleted;
|
||||
SDBVgroupInfo *vgInfo;
|
||||
SCtgTbMetaCache tbCache;
|
||||
|
@ -81,6 +85,7 @@ typedef struct SCtgRentMgmt {
|
|||
|
||||
typedef struct SCatalog {
|
||||
uint64_t clusterId;
|
||||
SRWLatch dbLock;
|
||||
SHashObj *dbCache; //key:dbname, value:SCtgDBCache
|
||||
SCtgRentMgmt dbRent;
|
||||
SCtgRentMgmt stbRent;
|
||||
|
@ -105,6 +110,8 @@ typedef struct SCatalogStat {
|
|||
} SCatalogStat;
|
||||
|
||||
typedef struct SCatalogMgmt {
|
||||
bool exit;
|
||||
SRWLatch lock;
|
||||
SHashObj *pCluster; //key: clusterId, value: SCatalog*
|
||||
SCatalogStat stat;
|
||||
SCatalogCfg cfg;
|
||||
|
@ -132,11 +139,8 @@ typedef uint32_t (*tableNameHashFp)(const char *, uint32_t);
|
|||
#define ctgDebug(param, ...) qDebug("CTG:%p " param, pCatalog, __VA_ARGS__)
|
||||
#define ctgTrace(param, ...) qTrace("CTG:%p " param, pCatalog, __VA_ARGS__)
|
||||
|
||||
#define CTG_ERR_RET(c) do { int32_t _code = c; if (_code != TSDB_CODE_SUCCESS) { terrno = _code; return _code; } } while (0)
|
||||
#define CTG_RET(c) do { int32_t _code = c; if (_code != TSDB_CODE_SUCCESS) { terrno = _code; } return _code; } while (0)
|
||||
#define CTG_ERR_JRET(c) do { code = c; if (code != TSDB_CODE_SUCCESS) { terrno = code; goto _return; } } while (0)
|
||||
|
||||
#define CTG_LOCK_DEBUG(...) do { if (gCTGDebug.lockDebug) { qDebug(__VA_ARGS__); } } while (0)
|
||||
#define CTG_CACHE_DEBUG(...) do { if (gCTGDebug.cacheDebug) { qDebug(__VA_ARGS__); } } while (0)
|
||||
|
||||
#define TD_RWLATCH_WRITE_FLAG_COPY 0x40000000
|
||||
|
||||
|
@ -173,6 +177,15 @@ typedef uint32_t (*tableNameHashFp)(const char *, uint32_t);
|
|||
} while (0)
|
||||
|
||||
|
||||
#define CTG_ERR_RET(c) do { int32_t _code = c; if (_code != TSDB_CODE_SUCCESS) { terrno = _code; return _code; } } while (0)
|
||||
#define CTG_RET(c) do { int32_t _code = c; if (_code != TSDB_CODE_SUCCESS) { terrno = _code; } return _code; } while (0)
|
||||
#define CTG_ERR_JRET(c) do { code = c; if (code != TSDB_CODE_SUCCESS) { terrno = code; goto _return; } } while (0)
|
||||
|
||||
#define CTG_API_ENTER() do { CTG_LOCK(CTG_READ, &ctgMgmt.lock); if (atomic_load_8(&ctgMgmt.exit)) { CTG_RET(TSDB_CODE_CTG_OUT_OF_SERVICE); } } while (0)
|
||||
#define CTG_API_LEAVE(c) do { int32_t __code = c; CTG_UNLOCK(CTG_READ, &ctgMgmt.lock); CTG_RET(__code); } while (0)
|
||||
|
||||
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
#endif
|
||||
|
|
File diff suppressed because it is too large
Load Diff
|
@ -39,6 +39,7 @@ namespace {
|
|||
extern "C" int32_t ctgGetTableMetaFromCache(struct SCatalog *pCatalog, const SName *pTableName, STableMeta **pTableMeta,
|
||||
int32_t *exist);
|
||||
extern "C" int32_t ctgUpdateTableMetaCache(struct SCatalog *pCatalog, STableMetaOutput *output);
|
||||
extern "C" int32_t ctgDbgGetClusterCacheNum(struct SCatalog* pCatalog, int32_t type);
|
||||
|
||||
void ctgTestSetPrepareTableMeta();
|
||||
void ctgTestSetPrepareCTableMeta();
|
||||
|
@ -49,7 +50,7 @@ bool ctgTestStop = false;
|
|||
bool ctgTestEnableSleep = false;
|
||||
bool ctgTestDeadLoop = false;
|
||||
int32_t ctgTestPrintNum = 200000;
|
||||
int32_t ctgTestMTRunSec = 30;
|
||||
int32_t ctgTestMTRunSec = 5;
|
||||
|
||||
int32_t ctgTestCurrentVgVersion = 0;
|
||||
int32_t ctgTestVgVersion = 1;
|
||||
|
@ -107,6 +108,7 @@ void ctgTestInitLogFile() {
|
|||
const int32_t maxLogFileNum = 10;
|
||||
|
||||
tsAsyncLog = 0;
|
||||
qDebugFlag = 159;
|
||||
|
||||
char temp[128] = {0};
|
||||
sprintf(temp, "%s/%s", tsLogDir, defaultLogFileNamePrefix);
|
||||
|
@ -185,7 +187,6 @@ void ctgTestBuildDBVgroup(SDBVgroupInfo **pdbVgroup) {
|
|||
ctgTestCurrentVgVersion = dbVgroup->vgVersion;
|
||||
|
||||
dbVgroup->hashMethod = 0;
|
||||
dbVgroup->dbId = ctgTestDbId;
|
||||
dbVgroup->vgHash = taosHashInit(ctgTestVgNum, taosGetDefaultHashFunction(TSDB_DATA_TYPE_INT), true, HASH_ENTRY_LOCK);
|
||||
|
||||
vgNum = ctgTestGetVgNumFromVgVersion(dbVgroup->vgVersion);
|
||||
|
@ -209,6 +210,45 @@ void ctgTestBuildDBVgroup(SDBVgroupInfo **pdbVgroup) {
|
|||
*pdbVgroup = dbVgroup;
|
||||
}
|
||||
|
||||
|
||||
void ctgTestBuildSTableMetaRsp(STableMetaRsp *rspMsg) {
|
||||
strcpy(rspMsg->dbFName, ctgTestDbname);
|
||||
sprintf(rspMsg->tbName, "%s", ctgTestSTablename);
|
||||
sprintf(rspMsg->stbName, "%s", ctgTestSTablename);
|
||||
rspMsg->numOfTags = ctgTestTagNum;
|
||||
rspMsg->numOfColumns = ctgTestColNum;
|
||||
rspMsg->precision = 1 + 1;
|
||||
rspMsg->tableType = TSDB_SUPER_TABLE;
|
||||
rspMsg->update = 1 + 1;
|
||||
rspMsg->sversion = ctgTestSVersion + 1;
|
||||
rspMsg->tversion = ctgTestTVersion + 1;
|
||||
rspMsg->suid = ctgTestSuid + 1;
|
||||
rspMsg->tuid = ctgTestSuid + 1;
|
||||
rspMsg->vgId = 1;
|
||||
|
||||
SSchema *s = NULL;
|
||||
s = &rspMsg->pSchema[0];
|
||||
s->type = TSDB_DATA_TYPE_TIMESTAMP;
|
||||
s->colId = 1;
|
||||
s->bytes = 8;
|
||||
strcpy(s->name, "ts");
|
||||
|
||||
s = &rspMsg->pSchema[1];
|
||||
s->type = TSDB_DATA_TYPE_INT;
|
||||
s->colId = 2;
|
||||
s->bytes = 4;
|
||||
strcpy(s->name, "col1s");
|
||||
|
||||
s = &rspMsg->pSchema[2];
|
||||
s->type = TSDB_DATA_TYPE_BINARY;
|
||||
s->colId = 3;
|
||||
s->bytes = 12 + 1;
|
||||
strcpy(s->name, "tag1s");
|
||||
|
||||
return;
|
||||
}
|
||||
|
||||
|
||||
void ctgTestPrepareDbVgroups(void *shandle, SEpSet *pEpSet, SRpcMsg *pMsg, SRpcMsg *pRsp) {
|
||||
SUseDbRsp *rspMsg = NULL; // todo
|
||||
|
||||
|
@ -592,7 +632,7 @@ void *ctgTestGetDbVgroupThread(void *param) {
|
|||
return NULL;
|
||||
}
|
||||
|
||||
void *ctgTestSetDbVgroupThread(void *param) {
|
||||
void *ctgTestSetSameDbVgroupThread(void *param) {
|
||||
struct SCatalog *pCtg = (struct SCatalog *)param;
|
||||
int32_t code = 0;
|
||||
SDBVgroupInfo *dbVgroup = NULL;
|
||||
|
@ -600,7 +640,7 @@ void *ctgTestSetDbVgroupThread(void *param) {
|
|||
|
||||
while (!ctgTestStop) {
|
||||
ctgTestBuildDBVgroup(&dbVgroup);
|
||||
code = catalogUpdateDBVgroup(pCtg, ctgTestDbname, dbVgroup);
|
||||
code = catalogUpdateDBVgroup(pCtg, ctgTestDbname, ctgTestDbId, dbVgroup);
|
||||
if (code) {
|
||||
assert(0);
|
||||
}
|
||||
|
@ -616,6 +656,32 @@ void *ctgTestSetDbVgroupThread(void *param) {
|
|||
return NULL;
|
||||
}
|
||||
|
||||
|
||||
void *ctgTestSetDiffDbVgroupThread(void *param) {
|
||||
struct SCatalog *pCtg = (struct SCatalog *)param;
|
||||
int32_t code = 0;
|
||||
SDBVgroupInfo *dbVgroup = NULL;
|
||||
int32_t n = 0;
|
||||
|
||||
while (!ctgTestStop) {
|
||||
ctgTestBuildDBVgroup(&dbVgroup);
|
||||
code = catalogUpdateDBVgroup(pCtg, ctgTestDbname, ctgTestDbId++, dbVgroup);
|
||||
if (code) {
|
||||
assert(0);
|
||||
}
|
||||
|
||||
if (ctgTestEnableSleep) {
|
||||
usleep(rand() % 5);
|
||||
}
|
||||
if (++n % ctgTestPrintNum == 0) {
|
||||
printf("Set:%d\n", n);
|
||||
}
|
||||
}
|
||||
|
||||
return NULL;
|
||||
}
|
||||
|
||||
|
||||
void *ctgTestGetCtableMetaThread(void *param) {
|
||||
struct SCatalog *pCtg = (struct SCatalog *)param;
|
||||
int32_t code = 0;
|
||||
|
@ -681,6 +747,8 @@ TEST(tableMeta, normalTable) {
|
|||
void *mockPointer = (void *)0x1;
|
||||
SVgroupInfo vgInfo = {0};
|
||||
|
||||
ctgTestInitLogFile();
|
||||
|
||||
ctgTestSetPrepareDbVgroups();
|
||||
|
||||
initQueryModuleMsgHandle();
|
||||
|
@ -771,6 +839,8 @@ TEST(tableMeta, childTableCase) {
|
|||
void *mockPointer = (void *)0x1;
|
||||
SVgroupInfo vgInfo = {0};
|
||||
|
||||
ctgTestInitLogFile();
|
||||
|
||||
ctgTestSetPrepareDbVgroupsAndChildMeta();
|
||||
|
||||
initQueryModuleMsgHandle();
|
||||
|
@ -964,6 +1034,124 @@ TEST(tableMeta, superTableCase) {
|
|||
catalogDestroy();
|
||||
}
|
||||
|
||||
TEST(tableMeta, rmStbMeta) {
|
||||
struct SCatalog *pCtg = NULL;
|
||||
void *mockPointer = (void *)0x1;
|
||||
SVgroupInfo vgInfo = {0};
|
||||
|
||||
ctgTestInitLogFile();
|
||||
|
||||
ctgTestSetPrepareDbVgroupsAndSuperMeta();
|
||||
|
||||
initQueryModuleMsgHandle();
|
||||
|
||||
int32_t code = catalogInit(NULL);
|
||||
ASSERT_EQ(code, 0);
|
||||
|
||||
// sendCreateDbMsg(pConn->pTransporter, &pConn->pAppInfo->mgmtEp.epSet);
|
||||
code = catalogGetHandle(ctgTestClusterId, &pCtg);
|
||||
ASSERT_EQ(code, 0);
|
||||
|
||||
SName n = {.type = TSDB_TABLE_NAME_T, .acctId = 1};
|
||||
strcpy(n.dbname, "db1");
|
||||
strcpy(n.tname, ctgTestSTablename);
|
||||
|
||||
STableMeta *tableMeta = NULL;
|
||||
code = catalogGetTableMeta(pCtg, mockPointer, (const SEpSet *)mockPointer, &n, &tableMeta);
|
||||
ASSERT_EQ(code, 0);
|
||||
ASSERT_EQ(tableMeta->vgId, 0);
|
||||
ASSERT_EQ(tableMeta->tableType, TSDB_SUPER_TABLE);
|
||||
ASSERT_EQ(tableMeta->sversion, ctgTestSVersion);
|
||||
ASSERT_EQ(tableMeta->tversion, ctgTestTVersion);
|
||||
ASSERT_EQ(tableMeta->uid, ctgTestSuid);
|
||||
ASSERT_EQ(tableMeta->suid, ctgTestSuid);
|
||||
ASSERT_EQ(tableMeta->tableInfo.numOfColumns, ctgTestColNum);
|
||||
ASSERT_EQ(tableMeta->tableInfo.numOfTags, ctgTestTagNum);
|
||||
ASSERT_EQ(tableMeta->tableInfo.precision, 1);
|
||||
ASSERT_EQ(tableMeta->tableInfo.rowSize, 12);
|
||||
|
||||
code = catalogRemoveSTableMeta(pCtg, "1.db1", ctgTestSTablename, ctgTestSuid);
|
||||
ASSERT_EQ(code, 0);
|
||||
|
||||
ASSERT_EQ(ctgDbgGetClusterCacheNum(pCtg, CTG_DBG_DB_NUM), 1);
|
||||
ASSERT_EQ(ctgDbgGetClusterCacheNum(pCtg, CTG_DBG_META_NUM), 0);
|
||||
ASSERT_EQ(ctgDbgGetClusterCacheNum(pCtg, CTG_DBG_STB_NUM), 0);
|
||||
ASSERT_EQ(ctgDbgGetClusterCacheNum(pCtg, CTG_DBG_DB_RENT_NUM), 1);
|
||||
ASSERT_EQ(ctgDbgGetClusterCacheNum(pCtg, CTG_DBG_STB_RENT_NUM), 0);
|
||||
|
||||
catalogDestroy();
|
||||
}
|
||||
|
||||
TEST(tableMeta, updateStbMeta) {
|
||||
struct SCatalog *pCtg = NULL;
|
||||
void *mockPointer = (void *)0x1;
|
||||
SVgroupInfo vgInfo = {0};
|
||||
|
||||
ctgTestInitLogFile();
|
||||
|
||||
ctgTestSetPrepareDbVgroupsAndSuperMeta();
|
||||
|
||||
initQueryModuleMsgHandle();
|
||||
|
||||
int32_t code = catalogInit(NULL);
|
||||
ASSERT_EQ(code, 0);
|
||||
|
||||
// sendCreateDbMsg(pConn->pTransporter, &pConn->pAppInfo->mgmtEp.epSet);
|
||||
code = catalogGetHandle(ctgTestClusterId, &pCtg);
|
||||
ASSERT_EQ(code, 0);
|
||||
|
||||
SName n = {.type = TSDB_TABLE_NAME_T, .acctId = 1};
|
||||
strcpy(n.dbname, "db1");
|
||||
strcpy(n.tname, ctgTestSTablename);
|
||||
|
||||
STableMeta *tableMeta = NULL;
|
||||
code = catalogGetTableMeta(pCtg, mockPointer, (const SEpSet *)mockPointer, &n, &tableMeta);
|
||||
ASSERT_EQ(code, 0);
|
||||
ASSERT_EQ(tableMeta->vgId, 0);
|
||||
ASSERT_EQ(tableMeta->tableType, TSDB_SUPER_TABLE);
|
||||
ASSERT_EQ(tableMeta->sversion, ctgTestSVersion);
|
||||
ASSERT_EQ(tableMeta->tversion, ctgTestTVersion);
|
||||
ASSERT_EQ(tableMeta->uid, ctgTestSuid);
|
||||
ASSERT_EQ(tableMeta->suid, ctgTestSuid);
|
||||
ASSERT_EQ(tableMeta->tableInfo.numOfColumns, ctgTestColNum);
|
||||
ASSERT_EQ(tableMeta->tableInfo.numOfTags, ctgTestTagNum);
|
||||
ASSERT_EQ(tableMeta->tableInfo.precision, 1);
|
||||
ASSERT_EQ(tableMeta->tableInfo.rowSize, 12);
|
||||
|
||||
tfree(tableMeta);
|
||||
|
||||
STableMetaRsp rsp = {0};
|
||||
ctgTestBuildSTableMetaRsp(&rsp);
|
||||
|
||||
code = catalogUpdateSTableMeta(pCtg, &rsp);
|
||||
ASSERT_EQ(code, 0);
|
||||
|
||||
ASSERT_EQ(ctgDbgGetClusterCacheNum(pCtg, CTG_DBG_DB_NUM), 1);
|
||||
ASSERT_EQ(ctgDbgGetClusterCacheNum(pCtg, CTG_DBG_META_NUM), 1);
|
||||
ASSERT_EQ(ctgDbgGetClusterCacheNum(pCtg, CTG_DBG_STB_NUM), 1);
|
||||
ASSERT_EQ(ctgDbgGetClusterCacheNum(pCtg, CTG_DBG_DB_RENT_NUM), 1);
|
||||
ASSERT_EQ(ctgDbgGetClusterCacheNum(pCtg, CTG_DBG_STB_RENT_NUM), 1);
|
||||
|
||||
code = catalogGetTableMeta(pCtg, mockPointer, (const SEpSet *)mockPointer, &n, &tableMeta);
|
||||
ASSERT_EQ(code, 0);
|
||||
ASSERT_EQ(tableMeta->vgId, 0);
|
||||
ASSERT_EQ(tableMeta->tableType, TSDB_SUPER_TABLE);
|
||||
ASSERT_EQ(tableMeta->sversion, ctgTestSVersion + 1);
|
||||
ASSERT_EQ(tableMeta->tversion, ctgTestTVersion + 1);
|
||||
ASSERT_EQ(tableMeta->uid, ctgTestSuid + 1);
|
||||
ASSERT_EQ(tableMeta->suid, ctgTestSuid + 1);
|
||||
ASSERT_EQ(tableMeta->tableInfo.numOfColumns, ctgTestColNum);
|
||||
ASSERT_EQ(tableMeta->tableInfo.numOfTags, ctgTestTagNum);
|
||||
ASSERT_EQ(tableMeta->tableInfo.precision, 1 + 1);
|
||||
ASSERT_EQ(tableMeta->tableInfo.rowSize, 12);
|
||||
|
||||
tfree(tableMeta);
|
||||
|
||||
catalogDestroy();
|
||||
}
|
||||
|
||||
|
||||
|
||||
TEST(tableDistVgroup, normalTable) {
|
||||
struct SCatalog *pCtg = NULL;
|
||||
void *mockPointer = (void *)0x1;
|
||||
|
@ -1109,7 +1297,7 @@ TEST(dbVgroup, getSetDbVgroupCase) {
|
|||
taosArrayDestroy(vgList);
|
||||
|
||||
ctgTestBuildDBVgroup(&dbVgroup);
|
||||
code = catalogUpdateDBVgroup(pCtg, ctgTestDbname, dbVgroup);
|
||||
code = catalogUpdateDBVgroup(pCtg, ctgTestDbname, ctgTestDbId, dbVgroup);
|
||||
ASSERT_EQ(code, 0);
|
||||
|
||||
code = catalogGetTableHashVgroup(pCtg, mockPointer, (const SEpSet *)mockPointer, &n, &vgInfo);
|
||||
|
@ -1128,7 +1316,7 @@ TEST(dbVgroup, getSetDbVgroupCase) {
|
|||
catalogDestroy();
|
||||
}
|
||||
|
||||
TEST(multiThread, getSetDbVgroupCase) {
|
||||
TEST(multiThread, getSetRmSameDbVgroup) {
|
||||
struct SCatalog *pCtg = NULL;
|
||||
void *mockPointer = (void *)0x1;
|
||||
SVgroupInfo vgInfo = {0};
|
||||
|
@ -1159,10 +1347,10 @@ TEST(multiThread, getSetDbVgroupCase) {
|
|||
pthread_attr_init(&thattr);
|
||||
|
||||
pthread_t thread1, thread2;
|
||||
pthread_create(&(thread1), &thattr, ctgTestSetDbVgroupThread, pCtg);
|
||||
pthread_create(&(thread1), &thattr, ctgTestSetSameDbVgroupThread, pCtg);
|
||||
|
||||
sleep(1);
|
||||
pthread_create(&(thread1), &thattr, ctgTestGetDbVgroupThread, pCtg);
|
||||
pthread_create(&(thread2), &thattr, ctgTestGetDbVgroupThread, pCtg);
|
||||
|
||||
while (true) {
|
||||
if (ctgTestDeadLoop) {
|
||||
|
@ -1179,6 +1367,58 @@ TEST(multiThread, getSetDbVgroupCase) {
|
|||
catalogDestroy();
|
||||
}
|
||||
|
||||
TEST(multiThread, getSetRmDiffDbVgroup) {
|
||||
struct SCatalog *pCtg = NULL;
|
||||
void *mockPointer = (void *)0x1;
|
||||
SVgroupInfo vgInfo = {0};
|
||||
SVgroupInfo *pvgInfo = NULL;
|
||||
SDBVgroupInfo dbVgroup = {0};
|
||||
SArray *vgList = NULL;
|
||||
ctgTestStop = false;
|
||||
|
||||
ctgTestInitLogFile();
|
||||
|
||||
ctgTestSetPrepareDbVgroups();
|
||||
|
||||
initQueryModuleMsgHandle();
|
||||
|
||||
// sendCreateDbMsg(pConn->pTransporter, &pConn->pAppInfo->mgmtEp.epSet);
|
||||
|
||||
int32_t code = catalogInit(NULL);
|
||||
ASSERT_EQ(code, 0);
|
||||
|
||||
code = catalogGetHandle(ctgTestClusterId, &pCtg);
|
||||
ASSERT_EQ(code, 0);
|
||||
|
||||
SName n = {.type = TSDB_TABLE_NAME_T, .acctId = 1};
|
||||
strcpy(n.dbname, "db1");
|
||||
strcpy(n.tname, ctgTestTablename);
|
||||
|
||||
pthread_attr_t thattr;
|
||||
pthread_attr_init(&thattr);
|
||||
|
||||
pthread_t thread1, thread2;
|
||||
pthread_create(&(thread1), &thattr, ctgTestSetDiffDbVgroupThread, pCtg);
|
||||
|
||||
sleep(1);
|
||||
pthread_create(&(thread2), &thattr, ctgTestGetDbVgroupThread, pCtg);
|
||||
|
||||
while (true) {
|
||||
if (ctgTestDeadLoop) {
|
||||
sleep(1);
|
||||
} else {
|
||||
sleep(ctgTestMTRunSec);
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
ctgTestStop = true;
|
||||
sleep(1);
|
||||
|
||||
catalogDestroy();
|
||||
}
|
||||
|
||||
|
||||
|
||||
TEST(multiThread, ctableMeta) {
|
||||
struct SCatalog *pCtg = NULL;
|
||||
|
|
|
@ -78,3 +78,11 @@ FuncDef setExecFuncs(FuncDef def, FExecGetEnv getEnv, FExecInit init, FExecProce
|
|||
int32_t registerFunc(FuncDef func) {
|
||||
|
||||
}
|
||||
|
||||
int32_t fmGetFuncResultType(FuncMgtHandle handle, SFunctionNode* pFunc) {
|
||||
return TSDB_CODE_SUCCESS;
|
||||
}
|
||||
|
||||
bool fmIsAggFunc(int32_t funcId) {
|
||||
return false;
|
||||
}
|
||||
|
|
|
@ -27,12 +27,10 @@ extern "C" {
|
|||
|
||||
extern SToken nil_token;
|
||||
|
||||
typedef struct STargetExprNode {
|
||||
ENodeType nodeType;
|
||||
char* p;
|
||||
uint32_t n;
|
||||
SNode* pNode;
|
||||
} STargetExprNode;
|
||||
SNode* createRawExprNode(SAstCreateContext* pCxt, const SToken* pToken, SNode* pNode);
|
||||
SNode* createRawExprNodeExt(SAstCreateContext* pCxt, const SToken* pStart, const SToken* pEnd, SNode* pNode);
|
||||
SNode* releaseRawExprNode(SAstCreateContext* pCxt, SNode* pNode);
|
||||
SToken getTokenFromRawExprNode(SAstCreateContext* pCxt, SNode* pNode);
|
||||
|
||||
SNodeList* createNodeList(SAstCreateContext* pCxt, SNode* pNode);
|
||||
SNodeList* addNodeToList(SAstCreateContext* pCxt, SNodeList* pList, SNode* pNode);
|
||||
|
|
|
@ -10,8 +10,8 @@ SCreateAcctReq* buildAcctManipulationMsg(SSqlInfo* pInfo, int32_t* outputLen, in
|
|||
SDropUserReq* buildDropUserMsg(SSqlInfo* pInfo, int32_t* outputLen, int64_t id, char* msgBuf, int32_t msgLen);
|
||||
SShowReq* buildShowMsg(SShowInfo* pShowInfo, SParseContext* pParseCtx, SMsgBuf* pMsgBuf);
|
||||
SCreateDbReq* buildCreateDbMsg(SCreateDbInfo* pCreateDbInfo, SParseContext *pCtx, SMsgBuf* pMsgBuf);
|
||||
SMCreateStbReq* buildCreateStbMsg(SCreateTableSql* pCreateTableSql, int32_t* len, SParseContext* pParseCtx, SMsgBuf* pMsgBuf);
|
||||
SMDropStbReq* buildDropStableMsg(SSqlInfo* pInfo, int32_t* len, SParseContext* pParseCtx, SMsgBuf* pMsgBuf);
|
||||
char* buildCreateStbReq(SCreateTableSql* pCreateTableSql, int32_t* len, SParseContext* pParseCtx, SMsgBuf* pMsgBuf);
|
||||
char* buildDropStableReq(SSqlInfo* pInfo, int32_t* len, SParseContext* pParseCtx, SMsgBuf* pMsgBuf);
|
||||
SCreateDnodeReq *buildCreateDnodeMsg(SSqlInfo* pInfo, int32_t* len, SMsgBuf* pMsgBuf);
|
||||
SDropDnodeReq *buildDropDnodeMsg(SSqlInfo* pInfo, int32_t* len, SMsgBuf* pMsgBuf);
|
||||
|
||||
|
|
|
@ -67,19 +67,19 @@ cmd ::= SHOW DATABASES.
|
|||
cmd ::= query_expression(A). { PARSER_TRACE; pCxt->pRootNode = A; }
|
||||
|
||||
/************************************************ literal *************************************************************/
|
||||
literal(A) ::= NK_INTEGER(B). { PARSER_TRACE; A = createValueNode(pCxt, TSDB_DATA_TYPE_BIGINT, &B); }
|
||||
literal(A) ::= NK_FLOAT(B). { PARSER_TRACE; A = createValueNode(pCxt, TSDB_DATA_TYPE_DOUBLE, &B); }
|
||||
literal(A) ::= NK_STRING(B). { PARSER_TRACE; A = createValueNode(pCxt, TSDB_DATA_TYPE_BINARY, &B); }
|
||||
literal(A) ::= NK_BOOL(B). { PARSER_TRACE; A = createValueNode(pCxt, TSDB_DATA_TYPE_BOOL, &B); }
|
||||
literal(A) ::= TIMESTAMP NK_STRING(B). { PARSER_TRACE; A = createValueNode(pCxt, TSDB_DATA_TYPE_TIMESTAMP, &B); }
|
||||
literal(A) ::= NK_INTEGER(B). { PARSER_TRACE; A = createRawExprNode(pCxt, &B, createValueNode(pCxt, TSDB_DATA_TYPE_BIGINT, &B)); }
|
||||
literal(A) ::= NK_FLOAT(B). { PARSER_TRACE; A = createRawExprNode(pCxt, &B, createValueNode(pCxt, TSDB_DATA_TYPE_DOUBLE, &B)); }
|
||||
literal(A) ::= NK_STRING(B). { PARSER_TRACE; A = createRawExprNode(pCxt, &B, createValueNode(pCxt, TSDB_DATA_TYPE_BINARY, &B)); }
|
||||
literal(A) ::= NK_BOOL(B). { PARSER_TRACE; A = createRawExprNode(pCxt, &B, createValueNode(pCxt, TSDB_DATA_TYPE_BOOL, &B)); }
|
||||
literal(A) ::= TIMESTAMP(B) NK_STRING(C). { PARSER_TRACE; A = createRawExprNodeExt(pCxt, &B, &C, createValueNode(pCxt, TSDB_DATA_TYPE_TIMESTAMP, &C)); }
|
||||
literal(A) ::= duration_literal(B). { PARSER_TRACE; A = B; }
|
||||
|
||||
duration_literal(A) ::= NK_VARIABLE(B). { PARSER_TRACE; A = createDurationValueNode(pCxt, &B); }
|
||||
duration_literal(A) ::= NK_VARIABLE(B). { PARSER_TRACE; A = createRawExprNode(pCxt, &B, createDurationValueNode(pCxt, &B)); }
|
||||
|
||||
%type literal_list { SNodeList* }
|
||||
%destructor literal_list { PARSER_DESTRUCTOR_TRACE; nodesDestroyList($$); }
|
||||
literal_list(A) ::= literal(B). { PARSER_TRACE; A = createNodeList(pCxt, B); }
|
||||
literal_list(A) ::= literal_list(B) NK_COMMA literal(C). { PARSER_TRACE; A = addNodeToList(pCxt, B, C); }
|
||||
literal_list(A) ::= literal(B). { PARSER_TRACE; A = createNodeList(pCxt, releaseRawExprNode(pCxt, B)); }
|
||||
literal_list(A) ::= literal_list(B) NK_COMMA literal(C). { PARSER_TRACE; A = addNodeToList(pCxt, B, releaseRawExprNode(pCxt, C)); }
|
||||
|
||||
/************************************************ names and identifiers ***********************************************/
|
||||
%type db_name { SToken }
|
||||
|
@ -111,37 +111,70 @@ expression(A) ::= literal(B).
|
|||
//expression(A) ::= NK_QUESTION(B). { PARSER_TRACE; A = B; }
|
||||
//expression(A) ::= pseudo_column(B). { PARSER_TRACE; A = B; }
|
||||
expression(A) ::= column_reference(B). { PARSER_TRACE; A = B; }
|
||||
expression(A) ::= function_name(B) NK_LP expression_list(C) NK_RP. { PARSER_TRACE; A = createFunctionNode(pCxt, &B, C); }
|
||||
expression(A) ::= function_name(B) NK_LP expression_list(C) NK_RP(D). { PARSER_TRACE; A = createRawExprNodeExt(pCxt, &B, &D, createFunctionNode(pCxt, &B, C)); }
|
||||
//expression(A) ::= cast_expression(B). { PARSER_TRACE; A = B; }
|
||||
//expression(A) ::= case_expression(B). { PARSER_TRACE; A = B; }
|
||||
expression(A) ::= subquery(B). { PARSER_TRACE; A = B; }
|
||||
expression(A) ::= NK_LP expression(B) NK_RP. { PARSER_TRACE; A = B; }
|
||||
expression(A) ::= NK_PLUS expression(B). { PARSER_TRACE; A = B; }
|
||||
expression(A) ::= NK_MINUS expression(B). { PARSER_TRACE; A = createOperatorNode(pCxt, OP_TYPE_SUB, B, NULL); }
|
||||
expression(A) ::= expression(B) NK_PLUS expression(C). { PARSER_TRACE; A = createOperatorNode(pCxt, OP_TYPE_ADD, B, C); }
|
||||
expression(A) ::= expression(B) NK_MINUS expression(C). { PARSER_TRACE; A = createOperatorNode(pCxt, OP_TYPE_SUB, B, C); }
|
||||
expression(A) ::= expression(B) NK_STAR expression(C). { PARSER_TRACE; A = createOperatorNode(pCxt, OP_TYPE_MULTI, B, C); }
|
||||
expression(A) ::= expression(B) NK_SLASH expression(C). { PARSER_TRACE; A = createOperatorNode(pCxt, OP_TYPE_DIV, B, C); }
|
||||
expression(A) ::= expression(B) NK_REM expression(C). { PARSER_TRACE; A = createOperatorNode(pCxt, OP_TYPE_MOD, B, C); }
|
||||
expression(A) ::= NK_LP(B) expression(C) NK_RP(D). { PARSER_TRACE; A = createRawExprNodeExt(pCxt, &B, &D, releaseRawExprNode(pCxt, C)); }
|
||||
expression(A) ::= NK_PLUS(B) expression(C). {
|
||||
PARSER_TRACE;
|
||||
SToken t = getTokenFromRawExprNode(pCxt, C);
|
||||
A = createRawExprNodeExt(pCxt, &B, &t, releaseRawExprNode(pCxt, C));
|
||||
}
|
||||
expression(A) ::= NK_MINUS(B) expression(C). {
|
||||
PARSER_TRACE;
|
||||
SToken t = getTokenFromRawExprNode(pCxt, C);
|
||||
A = createRawExprNodeExt(pCxt, &B, &t, createOperatorNode(pCxt, OP_TYPE_SUB, releaseRawExprNode(pCxt, C), NULL));
|
||||
}
|
||||
expression(A) ::= expression(B) NK_PLUS expression(C). {
|
||||
PARSER_TRACE;
|
||||
SToken s = getTokenFromRawExprNode(pCxt, B);
|
||||
SToken e = getTokenFromRawExprNode(pCxt, C);
|
||||
A = createRawExprNodeExt(pCxt, &s, &e, createOperatorNode(pCxt, OP_TYPE_ADD, releaseRawExprNode(pCxt, B), releaseRawExprNode(pCxt, C)));
|
||||
}
|
||||
expression(A) ::= expression(B) NK_MINUS expression(C). {
|
||||
PARSER_TRACE;
|
||||
SToken s = getTokenFromRawExprNode(pCxt, B);
|
||||
SToken e = getTokenFromRawExprNode(pCxt, C);
|
||||
A = createRawExprNodeExt(pCxt, &s, &e, createOperatorNode(pCxt, OP_TYPE_SUB, releaseRawExprNode(pCxt, B), releaseRawExprNode(pCxt, C)));
|
||||
}
|
||||
expression(A) ::= expression(B) NK_STAR expression(C). {
|
||||
PARSER_TRACE;
|
||||
SToken s = getTokenFromRawExprNode(pCxt, B);
|
||||
SToken e = getTokenFromRawExprNode(pCxt, C);
|
||||
A = createRawExprNodeExt(pCxt, &s, &e, createOperatorNode(pCxt, OP_TYPE_MULTI, releaseRawExprNode(pCxt, B), releaseRawExprNode(pCxt, C)));
|
||||
}
|
||||
expression(A) ::= expression(B) NK_SLASH expression(C). {
|
||||
PARSER_TRACE;
|
||||
SToken s = getTokenFromRawExprNode(pCxt, B);
|
||||
SToken e = getTokenFromRawExprNode(pCxt, C);
|
||||
A = createRawExprNodeExt(pCxt, &s, &e, createOperatorNode(pCxt, OP_TYPE_DIV, releaseRawExprNode(pCxt, B), releaseRawExprNode(pCxt, C)));
|
||||
}
|
||||
expression(A) ::= expression(B) NK_REM expression(C). {
|
||||
PARSER_TRACE;
|
||||
SToken s = getTokenFromRawExprNode(pCxt, B);
|
||||
SToken e = getTokenFromRawExprNode(pCxt, C);
|
||||
A = createRawExprNodeExt(pCxt, &s, &e, createOperatorNode(pCxt, OP_TYPE_MOD, releaseRawExprNode(pCxt, B), releaseRawExprNode(pCxt, C)));
|
||||
}
|
||||
|
||||
%type expression_list { SNodeList* }
|
||||
%destructor expression_list { PARSER_DESTRUCTOR_TRACE; nodesDestroyList($$); }
|
||||
expression_list(A) ::= expression(B). { PARSER_TRACE; A = createNodeList(pCxt, B); }
|
||||
expression_list(A) ::= expression_list(B) NK_COMMA expression(C). { PARSER_TRACE; A = addNodeToList(pCxt, B, C); }
|
||||
expression_list(A) ::= expression(B). { PARSER_TRACE; A = createNodeList(pCxt, releaseRawExprNode(pCxt, B)); }
|
||||
expression_list(A) ::= expression_list(B) NK_COMMA expression(C). { PARSER_TRACE; A = addNodeToList(pCxt, B, releaseRawExprNode(pCxt, C)); }
|
||||
|
||||
column_reference(A) ::= column_name(B). { PARSER_TRACE; A = createColumnNode(pCxt, NULL, &B); }
|
||||
column_reference(A) ::= table_name(B) NK_DOT column_name(C). { PARSER_TRACE; A = createColumnNode(pCxt, &B, &C); }
|
||||
column_reference(A) ::= column_name(B). { PARSER_TRACE; A = createRawExprNode(pCxt, &B, createColumnNode(pCxt, NULL, &B)); }
|
||||
column_reference(A) ::= table_name(B) NK_DOT column_name(C). { PARSER_TRACE; A = createRawExprNodeExt(pCxt, &B, &C, createColumnNode(pCxt, &B, &C)); }
|
||||
|
||||
//pseudo_column(A) ::= NK_NOW. { PARSER_TRACE; A = createFunctionNode(pCxt, NULL, NULL); }
|
||||
|
||||
/************************************************ predicate ***********************************************************/
|
||||
predicate(A) ::= expression(B) compare_op(C) expression(D). { PARSER_TRACE; A = createOperatorNode(pCxt, C, B, D); }
|
||||
predicate(A) ::= expression(B) compare_op(C) expression(D). { PARSER_TRACE; A = createOperatorNode(pCxt, C, releaseRawExprNode(pCxt, B), releaseRawExprNode(pCxt, D)); }
|
||||
//predicate(A) ::= expression(B) compare_op sub_type expression(B).
|
||||
predicate(A) ::= expression(B) BETWEEN expression(C) AND expression(D). { PARSER_TRACE; A = createBetweenAnd(pCxt, B, C, D); }
|
||||
predicate(A) ::= expression(B) NOT BETWEEN expression(C) AND expression(D). { PARSER_TRACE; A = createNotBetweenAnd(pCxt, C, B, D); }
|
||||
predicate(A) ::= expression(B) IS NULL. { PARSER_TRACE; A = createIsNullCondNode(pCxt, B, true); }
|
||||
predicate(A) ::= expression(B) IS NOT NULL. { PARSER_TRACE; A = createIsNullCondNode(pCxt, B, false); }
|
||||
predicate(A) ::= expression(B) in_op(C) in_predicate_value(D). { PARSER_TRACE; A = createOperatorNode(pCxt, C, B, D); }
|
||||
predicate(A) ::= expression(B) BETWEEN expression(C) AND expression(D). { PARSER_TRACE; A = createBetweenAnd(pCxt, releaseRawExprNode(pCxt, B), releaseRawExprNode(pCxt, C), releaseRawExprNode(pCxt, D)); }
|
||||
predicate(A) ::= expression(B) NOT BETWEEN expression(C) AND expression(D). { PARSER_TRACE; A = createNotBetweenAnd(pCxt, releaseRawExprNode(pCxt, C), releaseRawExprNode(pCxt, B), releaseRawExprNode(pCxt, D)); }
|
||||
predicate(A) ::= expression(B) IS NULL. { PARSER_TRACE; A = createIsNullCondNode(pCxt, releaseRawExprNode(pCxt, B), true); }
|
||||
predicate(A) ::= expression(B) IS NOT NULL. { PARSER_TRACE; A = createIsNullCondNode(pCxt, releaseRawExprNode(pCxt, B), false); }
|
||||
predicate(A) ::= expression(B) in_op(C) in_predicate_value(D). { PARSER_TRACE; A = createOperatorNode(pCxt, C, releaseRawExprNode(pCxt, B), D); }
|
||||
|
||||
%type compare_op { EOperatorType }
|
||||
%destructor compare_op { PARSER_DESTRUCTOR_TRACE; }
|
||||
|
@ -186,7 +219,7 @@ table_reference(A) ::= joined_table(B).
|
|||
|
||||
table_primary(A) ::= table_name(B) alias_opt(C). { PARSER_TRACE; A = createRealTableNode(pCxt, NULL, &B, &C); }
|
||||
table_primary(A) ::= db_name(B) NK_DOT table_name(C) alias_opt(D). { PARSER_TRACE; A = createRealTableNode(pCxt, &B, &C, &D); }
|
||||
table_primary(A) ::= subquery(B) alias_opt(C). { PARSER_TRACE; A = createTempTableNode(pCxt, B, &C); }
|
||||
table_primary(A) ::= subquery(B) alias_opt(C). { PARSER_TRACE; A = createTempTableNode(pCxt, releaseRawExprNode(pCxt, B), &C); }
|
||||
table_primary(A) ::= parenthesized_joined_table(B). { PARSER_TRACE; A = B; }
|
||||
|
||||
%type alias_opt { SToken }
|
||||
|
@ -236,9 +269,13 @@ select_list(A) ::= select_sublist(B).
|
|||
select_sublist(A) ::= select_item(B). { PARSER_TRACE; A = createNodeList(pCxt, B); }
|
||||
select_sublist(A) ::= select_sublist(B) NK_COMMA select_item(C). { PARSER_TRACE; A = addNodeToList(pCxt, B, C); }
|
||||
|
||||
select_item(A) ::= expression(B). { PARSER_TRACE; A = B; }
|
||||
select_item(A) ::= expression(B) column_alias(C). { PARSER_TRACE; A = setProjectionAlias(pCxt, B, &C); }
|
||||
select_item(A) ::= expression(B) AS column_alias(C). { PARSER_TRACE; A = setProjectionAlias(pCxt, B, &C); }
|
||||
select_item(A) ::= expression(B). {
|
||||
PARSER_TRACE;
|
||||
SToken t = getTokenFromRawExprNode(pCxt, B);
|
||||
A = setProjectionAlias(pCxt, releaseRawExprNode(pCxt, B), &t);
|
||||
}
|
||||
select_item(A) ::= expression(B) column_alias(C). { PARSER_TRACE; A = setProjectionAlias(pCxt, releaseRawExprNode(pCxt, B), &C); }
|
||||
select_item(A) ::= expression(B) AS column_alias(C). { PARSER_TRACE; A = setProjectionAlias(pCxt, releaseRawExprNode(pCxt, B), &C); }
|
||||
select_item(A) ::= table_name(B) NK_DOT NK_STAR(C). { PARSER_TRACE; A = createColumnNode(pCxt, &B, &C); }
|
||||
|
||||
where_clause_opt(A) ::= . { PARSER_TRACE; A = NULL; }
|
||||
|
@ -251,8 +288,8 @@ partition_by_clause_opt(A) ::= PARTITION BY expression_list(B).
|
|||
|
||||
twindow_clause_opt(A) ::= . { PARSER_TRACE; A = NULL; }
|
||||
twindow_clause_opt(A) ::=
|
||||
SESSION NK_LP column_reference(B) NK_COMMA NK_INTEGER(C) NK_RP. { PARSER_TRACE; A = createSessionWindowNode(pCxt, B, &C); }
|
||||
twindow_clause_opt(A) ::= STATE_WINDOW NK_LP column_reference(B) NK_RP. { PARSER_TRACE; A = createStateWindowNode(pCxt, B); }
|
||||
SESSION NK_LP column_reference(B) NK_COMMA NK_INTEGER(C) NK_RP. { PARSER_TRACE; A = createSessionWindowNode(pCxt, releaseRawExprNode(pCxt, B), &C); }
|
||||
twindow_clause_opt(A) ::= STATE_WINDOW NK_LP column_reference(B) NK_RP. { PARSER_TRACE; A = createStateWindowNode(pCxt, releaseRawExprNode(pCxt, B)); }
|
||||
twindow_clause_opt(A) ::=
|
||||
INTERVAL NK_LP duration_literal(B) NK_RP sliding_opt(C) fill_opt(D). { PARSER_TRACE; A = createIntervalWindowNode(pCxt, B, NULL, C, D); }
|
||||
twindow_clause_opt(A) ::=
|
||||
|
@ -317,7 +354,7 @@ limit_clause_opt(A) ::= LIMIT NK_INTEGER(B) OFFSET NK_INTEGER(C).
|
|||
limit_clause_opt(A) ::= LIMIT NK_INTEGER(C) NK_COMMA NK_INTEGER(B). { PARSER_TRACE; A = createLimitNode(pCxt, &B, &C); }
|
||||
|
||||
/************************************************ subquery ************************************************************/
|
||||
subquery(A) ::= NK_LP query_expression(B) NK_RP. { PARSER_TRACE; A = B; }
|
||||
subquery(A) ::= NK_LP(B) query_expression(C) NK_RP(D). { PARSER_TRACE; A = createRawExprNodeExt(pCxt, &B, &D, C); }
|
||||
|
||||
/************************************************ search_condition ****************************************************/
|
||||
search_condition(A) ::= boolean_value_expression(B). { PARSER_TRACE; A = B; }
|
||||
|
@ -330,7 +367,7 @@ sort_specification_list(A) ::=
|
|||
sort_specification_list(B) NK_COMMA sort_specification(C). { PARSER_TRACE; A = addNodeToList(pCxt, B, C); }
|
||||
|
||||
sort_specification(A) ::=
|
||||
expression(B) ordering_specification_opt(C) null_ordering_opt(D). { PARSER_TRACE; A = createOrderByExprNode(pCxt, B, C, D); }
|
||||
expression(B) ordering_specification_opt(C) null_ordering_opt(D). { PARSER_TRACE; A = createOrderByExprNode(pCxt, releaseRawExprNode(pCxt, B), C, D); }
|
||||
|
||||
%type ordering_specification_opt EOrder
|
||||
%destructor ordering_specification_opt { PARSER_DESTRUCTOR_TRACE; }
|
||||
|
|
|
@ -820,7 +820,7 @@ cmd ::= ALTER TABLE ids(X) cpxName(F) MODIFY COLUMN columnlist(A). {
|
|||
//////////////////////////////////ALTER TAGS statement/////////////////////////////////////
|
||||
cmd ::= ALTER TABLE ids(X) cpxName(Y) ADD TAG columnlist(A). {
|
||||
X.n += Y.n;
|
||||
SAlterTableInfo* pAlterTable = tSetAlterTableInfo(&X, A, NULL, TSDB_ALTER_TABLE_ADD_TAG_COLUMN, -1);
|
||||
SAlterTableInfo* pAlterTable = tSetAlterTableInfo(&X, A, NULL, TSDB_ALTER_TABLE_ADD_TAG, -1);
|
||||
setSqlInfo(pInfo, pAlterTable, NULL, TSDB_SQL_ALTER_TABLE);
|
||||
}
|
||||
cmd ::= ALTER TABLE ids(X) cpxName(Z) DROP TAG ids(Y). {
|
||||
|
@ -829,7 +829,7 @@ cmd ::= ALTER TABLE ids(X) cpxName(Z) DROP TAG ids(Y). {
|
|||
toTSDBType(Y.type);
|
||||
SArray* A = tListItemAppendToken(NULL, &Y, -1);
|
||||
|
||||
SAlterTableInfo* pAlterTable = tSetAlterTableInfo(&X, NULL, A, TSDB_ALTER_TABLE_DROP_TAG_COLUMN, -1);
|
||||
SAlterTableInfo* pAlterTable = tSetAlterTableInfo(&X, NULL, A, TSDB_ALTER_TABLE_DROP_TAG, -1);
|
||||
setSqlInfo(pInfo, pAlterTable, NULL, TSDB_SQL_ALTER_TABLE);
|
||||
}
|
||||
|
||||
|
@ -889,7 +889,7 @@ cmd ::= ALTER STABLE ids(X) cpxName(F) MODIFY COLUMN columnlist(A). {
|
|||
//////////////////////////////////ALTER TAGS statement/////////////////////////////////////
|
||||
cmd ::= ALTER STABLE ids(X) cpxName(Y) ADD TAG columnlist(A). {
|
||||
X.n += Y.n;
|
||||
SAlterTableInfo* pAlterTable = tSetAlterTableInfo(&X, A, NULL, TSDB_ALTER_TABLE_ADD_TAG_COLUMN, TSDB_SUPER_TABLE);
|
||||
SAlterTableInfo* pAlterTable = tSetAlterTableInfo(&X, A, NULL, TSDB_ALTER_TABLE_ADD_TAG, TSDB_SUPER_TABLE);
|
||||
setSqlInfo(pInfo, pAlterTable, NULL, TSDB_SQL_ALTER_TABLE);
|
||||
}
|
||||
cmd ::= ALTER STABLE ids(X) cpxName(Z) DROP TAG ids(Y). {
|
||||
|
@ -898,7 +898,7 @@ cmd ::= ALTER STABLE ids(X) cpxName(Z) DROP TAG ids(Y). {
|
|||
toTSDBType(Y.type);
|
||||
SArray* A = tListItemAppendToken(NULL, &Y, -1);
|
||||
|
||||
SAlterTableInfo* pAlterTable = tSetAlterTableInfo(&X, NULL, A, TSDB_ALTER_TABLE_DROP_TAG_COLUMN, TSDB_SUPER_TABLE);
|
||||
SAlterTableInfo* pAlterTable = tSetAlterTableInfo(&X, NULL, A, TSDB_ALTER_TABLE_DROP_TAG, TSDB_SUPER_TABLE);
|
||||
setSqlInfo(pInfo, pAlterTable, NULL, TSDB_SQL_ALTER_TABLE);
|
||||
}
|
||||
|
||||
|
|
|
@ -24,6 +24,14 @@
|
|||
} \
|
||||
} while (0)
|
||||
|
||||
#define CHECK_RAW_EXPR_NODE(node) \
|
||||
do { \
|
||||
if (NULL == (node) || QUERY_NODE_RAW_EXPR != nodeType(node)) { \
|
||||
pCxt->valid = false; \
|
||||
return NULL; \
|
||||
} \
|
||||
} while (0)
|
||||
|
||||
SToken nil_token = { .type = TK_NIL, .n = 0, .z = NULL };
|
||||
|
||||
static bool checkDbName(SAstCreateContext* pCxt, const SToken* pDbName) {
|
||||
|
@ -50,6 +58,37 @@ static bool checkColumnName(SAstCreateContext* pCxt, const SToken* pColumnName)
|
|||
return pCxt->valid;
|
||||
}
|
||||
|
||||
SNode* createRawExprNode(SAstCreateContext* pCxt, const SToken* pToken, SNode* pNode) {
|
||||
SRawExprNode* target = (SRawExprNode*)nodesMakeNode(QUERY_NODE_RAW_EXPR);
|
||||
CHECK_OUT_OF_MEM(target);
|
||||
target->p = pToken->z;
|
||||
target->n = pToken->n;
|
||||
target->pNode = pNode;
|
||||
return (SNode*)target;
|
||||
}
|
||||
|
||||
SNode* createRawExprNodeExt(SAstCreateContext* pCxt, const SToken* pStart, const SToken* pEnd, SNode* pNode) {
|
||||
SRawExprNode* target = (SRawExprNode*)nodesMakeNode(QUERY_NODE_RAW_EXPR);
|
||||
CHECK_OUT_OF_MEM(target);
|
||||
target->p = pStart->z;
|
||||
target->n = (pEnd->z + pEnd->n) - pStart->z;
|
||||
target->pNode = pNode;
|
||||
return (SNode*)target;
|
||||
}
|
||||
|
||||
SNode* releaseRawExprNode(SAstCreateContext* pCxt, SNode* pNode) {
|
||||
CHECK_RAW_EXPR_NODE(pNode);
|
||||
SNode* tmp = ((SRawExprNode*)pNode)->pNode;
|
||||
tfree(pNode);
|
||||
return tmp;
|
||||
}
|
||||
|
||||
SToken getTokenFromRawExprNode(SAstCreateContext* pCxt, SNode* pNode) {
|
||||
SRawExprNode* target = (SRawExprNode*)pNode;
|
||||
SToken t = { .type = 0, .z = target->p, .n = target->n};
|
||||
return t;
|
||||
}
|
||||
|
||||
SNodeList* createNodeList(SAstCreateContext* pCxt, SNode* pNode) {
|
||||
SNodeList* list = nodesMakeList();
|
||||
CHECK_OUT_OF_MEM(list);
|
||||
|
@ -79,14 +118,22 @@ SNode* createValueNode(SAstCreateContext* pCxt, int32_t dataType, const SToken*
|
|||
val->literal = strndup(pLiteral->z, pLiteral->n);
|
||||
CHECK_OUT_OF_MEM(val->literal);
|
||||
val->node.resType.type = dataType;
|
||||
val->node.resType.bytes = tDataTypes[TSDB_DATA_TYPE_BOOL].bytes;
|
||||
val->node.resType.bytes = tDataTypes[dataType].bytes;
|
||||
if (TSDB_DATA_TYPE_TIMESTAMP == dataType) {
|
||||
val->node.resType.precision = TSDB_TIME_PRECISION_MILLI;
|
||||
}
|
||||
return (SNode*)val;
|
||||
}
|
||||
|
||||
SNode* createDurationValueNode(SAstCreateContext* pCxt, const SToken* pLiteral) {
|
||||
SValueNode* val = (SValueNode*)nodesMakeNode(QUERY_NODE_VALUE);
|
||||
CHECK_OUT_OF_MEM(val);
|
||||
// todo
|
||||
val->literal = strndup(pLiteral->z, pLiteral->n);
|
||||
CHECK_OUT_OF_MEM(val->literal);
|
||||
val->isDuration = true;
|
||||
val->node.resType.type = TSDB_DATA_TYPE_BIGINT;
|
||||
val->node.resType.bytes = tDataTypes[TSDB_DATA_TYPE_BIGINT].bytes;
|
||||
val->node.resType.precision = TSDB_TIME_PRECISION_MILLI;
|
||||
return (SNode*)val;
|
||||
}
|
||||
|
||||
|
|
|
@ -610,7 +610,7 @@ SAlterTableInfo *tSetAlterTableInfo(SToken *pTableName, SArray *pCols, SArray *p
|
|||
pAlterTable->type = type;
|
||||
pAlterTable->tableType = tableType;
|
||||
|
||||
if (type == TSDB_ALTER_TABLE_ADD_COLUMN || type == TSDB_ALTER_TABLE_ADD_TAG_COLUMN || type == TSDB_ALTER_TABLE_UPDATE_COLUMN_BYTES || type == TSDB_ALTER_TABLE_UPDATE_TAG_BYTES) {
|
||||
if (type == TSDB_ALTER_TABLE_ADD_COLUMN || type == TSDB_ALTER_TABLE_ADD_TAG || type == TSDB_ALTER_TABLE_UPDATE_COLUMN_BYTES || type == TSDB_ALTER_TABLE_UPDATE_TAG_BYTES) {
|
||||
pAlterTable->pAddColumns = pCols;
|
||||
assert(pVals == NULL);
|
||||
} else {
|
||||
|
|
|
@ -249,95 +249,38 @@ SCreateDbReq* buildCreateDbMsg(SCreateDbInfo* pCreateDbInfo, SParseContext *pCtx
|
|||
return pCreateMsg;
|
||||
}
|
||||
|
||||
SMCreateStbReq* buildCreateStbMsg(SCreateTableSql* pCreateTableSql, int32_t* len, SParseContext* pParseCtx, SMsgBuf* pMsgBuf) {
|
||||
SSchema* pSchema;
|
||||
char* buildCreateStbReq(SCreateTableSql* pCreateTableSql, int32_t* len, SParseContext* pParseCtx, SMsgBuf* pMsgBuf) {
|
||||
SMCreateStbReq createReq = {0};
|
||||
createReq.igExists = pCreateTableSql->existCheck ? 1 : 0;
|
||||
createReq.pColumns = pCreateTableSql->colInfo.pColumns;
|
||||
createReq.pTags = pCreateTableSql->colInfo.pTagColumns;
|
||||
createReq.numOfColumns = (int32_t)taosArrayGetSize(pCreateTableSql->colInfo.pColumns);
|
||||
createReq.numOfTags = (int32_t)taosArrayGetSize(pCreateTableSql->colInfo.pTagColumns);
|
||||
|
||||
int32_t numOfTags = 0;
|
||||
int32_t numOfCols = (int32_t) taosArrayGetSize(pCreateTableSql->colInfo.pColumns);
|
||||
if (pCreateTableSql->colInfo.pTagColumns != NULL) {
|
||||
numOfTags = (int32_t) taosArrayGetSize(pCreateTableSql->colInfo.pTagColumns);
|
||||
}
|
||||
|
||||
SMCreateStbReq* pCreateStbMsg = (SMCreateStbReq*)calloc(1, sizeof(SMCreateStbReq) + (numOfCols + numOfTags) * sizeof(SSchema));
|
||||
if (pCreateStbMsg == NULL) {
|
||||
return NULL;
|
||||
}
|
||||
|
||||
char* pMsg = NULL;
|
||||
#if 0
|
||||
int32_t tableType = pCreateTableSql->type;
|
||||
if (tableType != TSQL_CREATE_TABLE && tableType != TSQL_CREATE_STABLE) { // create by using super table, tags value
|
||||
SArray* list = pInfo->pCreateTableInfo->childTableInfo;
|
||||
|
||||
int32_t numOfTables = (int32_t)taosArrayGetSize(list);
|
||||
pCreateStbMsg->numOfTables = htonl(numOfTables);
|
||||
|
||||
pMsg = (char*)pCreateMsg;
|
||||
for (int32_t i = 0; i < numOfTables; ++i) {
|
||||
SCreateTableMsg* pCreate = (SCreateTableMsg*)pMsg;
|
||||
|
||||
pCreate->numOfColumns = htons(pCmd->numOfCols);
|
||||
pCreate->numOfTags = htons(pCmd->count);
|
||||
pMsg += sizeof(SCreateTableMsg);
|
||||
|
||||
SCreatedTableInfo* p = taosArrayGet(list, i);
|
||||
strcpy(pCreate->tableName, p->fullname);
|
||||
pCreate->igExists = (p->igExist) ? 1 : 0;
|
||||
|
||||
// use dbinfo from table id without modifying current db info
|
||||
pMsg = serializeTagData(&p->tagdata, pMsg);
|
||||
|
||||
int32_t len = (int32_t)(pMsg - (char*)pCreate);
|
||||
pCreate->len = htonl(len);
|
||||
}
|
||||
|
||||
} else {
|
||||
#endif
|
||||
// create (super) table
|
||||
SName n = {0};
|
||||
int32_t code = createSName(&n, &pCreateTableSql->name, pParseCtx, pMsgBuf);
|
||||
if (code != 0) {
|
||||
if (createSName(&n, &pCreateTableSql->name, pParseCtx, pMsgBuf) != 0) {
|
||||
return NULL;
|
||||
}
|
||||
|
||||
code = tNameExtractFullName(&n, pCreateStbMsg->name);
|
||||
if (code != 0) {
|
||||
if (tNameExtractFullName(&n, createReq.name) != 0) {
|
||||
buildInvalidOperationMsg(pMsgBuf, "invalid table name or database not specified");
|
||||
return NULL;
|
||||
}
|
||||
|
||||
pCreateStbMsg->igExists = pCreateTableSql->existCheck ? 1 : 0;
|
||||
pCreateStbMsg->numOfColumns = htonl(numOfCols);
|
||||
pCreateStbMsg->numOfTags = htonl(numOfTags);
|
||||
|
||||
pSchema = (SSchema*)pCreateStbMsg->pSchemas;
|
||||
for (int i = 0; i < numOfCols; ++i) {
|
||||
SField* pField = taosArrayGet(pCreateTableSql->colInfo.pColumns, i);
|
||||
pSchema->type = pField->type;
|
||||
pSchema->bytes = htonl(pField->bytes);
|
||||
strcpy(pSchema->name, pField->name);
|
||||
|
||||
pSchema++;
|
||||
int32_t tlen = tSerializeSMCreateStbReq(NULL, &createReq);
|
||||
void* req = malloc(tlen);
|
||||
if (req == NULL) {
|
||||
terrno = TSDB_CODE_OUT_OF_MEMORY;
|
||||
return NULL;
|
||||
}
|
||||
|
||||
for(int32_t i = 0; i < numOfTags; ++i) {
|
||||
SField* pField = taosArrayGet(pCreateTableSql->colInfo.pTagColumns, i);
|
||||
pSchema->type = pField->type;
|
||||
pSchema->bytes = htonl(pField->bytes);
|
||||
strcpy(pSchema->name, pField->name);
|
||||
|
||||
pSchema++;
|
||||
}
|
||||
|
||||
pMsg = (char*)pSchema;
|
||||
|
||||
int32_t msgLen = (int32_t)(pMsg - (char*)pCreateStbMsg);
|
||||
*len = msgLen;
|
||||
|
||||
return pCreateStbMsg;
|
||||
void* buf = req;
|
||||
tSerializeSMCreateStbReq(&buf, &createReq);
|
||||
*len = tlen;
|
||||
return req;
|
||||
}
|
||||
|
||||
SMDropStbReq* buildDropStableMsg(SSqlInfo* pInfo, int32_t* len, SParseContext* pParseCtx, SMsgBuf* pMsgBuf) {
|
||||
char* buildDropStableReq(SSqlInfo* pInfo, int32_t* len, SParseContext* pParseCtx, SMsgBuf* pMsgBuf) {
|
||||
SToken* tableName = taosArrayGet(pInfo->pMiscInfo->a, 0);
|
||||
|
||||
SName name = {0};
|
||||
|
@ -347,14 +290,23 @@ SMDropStbReq* buildDropStableMsg(SSqlInfo* pInfo, int32_t* len, SParseContext* p
|
|||
return NULL;
|
||||
}
|
||||
|
||||
SMDropStbReq *pDropTableMsg = (SMDropStbReq*) calloc(1, sizeof(SMDropStbReq));
|
||||
SMDropStbReq dropReq = {0};
|
||||
code = tNameExtractFullName(&name, dropReq.name);
|
||||
|
||||
code = tNameExtractFullName(&name, pDropTableMsg->name);
|
||||
assert(code == TSDB_CODE_SUCCESS && name.type == TSDB_TABLE_NAME_T);
|
||||
dropReq.igNotExists = pInfo->pMiscInfo->existsCheck ? 1 : 0;
|
||||
|
||||
pDropTableMsg->igNotExists = pInfo->pMiscInfo->existsCheck ? 1 : 0;
|
||||
*len = sizeof(SMDropStbReq);
|
||||
return pDropTableMsg;
|
||||
int32_t tlen = tSerializeSMDropStbReq(NULL, &dropReq);
|
||||
void* req = malloc(tlen);
|
||||
if (req == NULL) {
|
||||
terrno = TSDB_CODE_OUT_OF_MEMORY;
|
||||
return NULL;
|
||||
}
|
||||
|
||||
void* buf = req;
|
||||
tSerializeSMDropStbReq(&buf, &dropReq);
|
||||
*len = tlen;
|
||||
return req;
|
||||
}
|
||||
|
||||
SCreateDnodeReq *buildCreateDnodeMsg(SSqlInfo* pInfo, int32_t* len, SMsgBuf* pMsgBuf) {
|
||||
|
|
|
@ -924,13 +924,13 @@ SDclStmtInfo* qParserValidateDclSqlNode(SSqlInfo* pInfo, SParseContext* pCtx, ch
|
|||
goto _error;
|
||||
}
|
||||
|
||||
pDcl->pMsg = (char*)buildCreateStbMsg(pCreateTable, &pDcl->msgLen, pCtx, pMsgBuf);
|
||||
pDcl->pMsg = buildCreateStbReq(pCreateTable, &pDcl->msgLen, pCtx, pMsgBuf);
|
||||
pDcl->msgType = TDMT_MND_CREATE_STB;
|
||||
break;
|
||||
}
|
||||
|
||||
case TSDB_SQL_DROP_TABLE: {
|
||||
pDcl->pMsg = (char*)buildDropStableMsg(pInfo, &pDcl->msgLen, pCtx, pMsgBuf);
|
||||
pDcl->pMsg = buildDropStableReq(pInfo, &pDcl->msgLen, pCtx, pMsgBuf);
|
||||
if (pDcl->pMsg == NULL) {
|
||||
goto _error;
|
||||
}
|
||||
|
|
|
@ -1517,23 +1517,24 @@ static YYACTIONTYPE yy_reduce(
|
|||
{ PARSER_TRACE; pCxt->pRootNode = yymsp[0].minor.yy168; }
|
||||
break;
|
||||
case 2: /* literal ::= NK_INTEGER */
|
||||
{ PARSER_TRACE; yylhsminor.yy168 = createValueNode(pCxt, TSDB_DATA_TYPE_BIGINT, &yymsp[0].minor.yy0); }
|
||||
{ PARSER_TRACE; yylhsminor.yy168 = createRawExprNode(pCxt, &yymsp[0].minor.yy0, createValueNode(pCxt, TSDB_DATA_TYPE_BIGINT, &yymsp[0].minor.yy0)); }
|
||||
yymsp[0].minor.yy168 = yylhsminor.yy168;
|
||||
break;
|
||||
case 3: /* literal ::= NK_FLOAT */
|
||||
{ PARSER_TRACE; yylhsminor.yy168 = createValueNode(pCxt, TSDB_DATA_TYPE_DOUBLE, &yymsp[0].minor.yy0); }
|
||||
{ PARSER_TRACE; yylhsminor.yy168 = createRawExprNode(pCxt, &yymsp[0].minor.yy0, createValueNode(pCxt, TSDB_DATA_TYPE_DOUBLE, &yymsp[0].minor.yy0)); }
|
||||
yymsp[0].minor.yy168 = yylhsminor.yy168;
|
||||
break;
|
||||
case 4: /* literal ::= NK_STRING */
|
||||
{ PARSER_TRACE; yylhsminor.yy168 = createValueNode(pCxt, TSDB_DATA_TYPE_BINARY, &yymsp[0].minor.yy0); }
|
||||
{ PARSER_TRACE; yylhsminor.yy168 = createRawExprNode(pCxt, &yymsp[0].minor.yy0, createValueNode(pCxt, TSDB_DATA_TYPE_BINARY, &yymsp[0].minor.yy0)); }
|
||||
yymsp[0].minor.yy168 = yylhsminor.yy168;
|
||||
break;
|
||||
case 5: /* literal ::= NK_BOOL */
|
||||
{ PARSER_TRACE; yylhsminor.yy168 = createValueNode(pCxt, TSDB_DATA_TYPE_BOOL, &yymsp[0].minor.yy0); }
|
||||
{ PARSER_TRACE; yylhsminor.yy168 = createRawExprNode(pCxt, &yymsp[0].minor.yy0, createValueNode(pCxt, TSDB_DATA_TYPE_BOOL, &yymsp[0].minor.yy0)); }
|
||||
yymsp[0].minor.yy168 = yylhsminor.yy168;
|
||||
break;
|
||||
case 6: /* literal ::= TIMESTAMP NK_STRING */
|
||||
{ PARSER_TRACE; yymsp[-1].minor.yy168 = createValueNode(pCxt, TSDB_DATA_TYPE_TIMESTAMP, &yymsp[0].minor.yy0); }
|
||||
{ PARSER_TRACE; yylhsminor.yy168 = createRawExprNodeExt(pCxt, &yymsp[-1].minor.yy0, &yymsp[0].minor.yy0, createValueNode(pCxt, TSDB_DATA_TYPE_TIMESTAMP, &yymsp[0].minor.yy0)); }
|
||||
yymsp[-1].minor.yy168 = yylhsminor.yy168;
|
||||
break;
|
||||
case 7: /* literal ::= duration_literal */
|
||||
case 17: /* expression ::= literal */ yytestcase(yyruleno==17);
|
||||
|
@ -1545,7 +1546,6 @@ static YYACTIONTYPE yy_reduce(
|
|||
case 61: /* table_reference ::= table_primary */ yytestcase(yyruleno==61);
|
||||
case 62: /* table_reference ::= joined_table */ yytestcase(yyruleno==62);
|
||||
case 66: /* table_primary ::= parenthesized_joined_table */ yytestcase(yyruleno==66);
|
||||
case 82: /* select_item ::= expression */ yytestcase(yyruleno==82);
|
||||
case 110: /* query_expression_body ::= query_primary */ yytestcase(yyruleno==110);
|
||||
case 112: /* query_primary ::= query_specification */ yytestcase(yyruleno==112);
|
||||
case 124: /* search_condition ::= boolean_value_expression */ yytestcase(yyruleno==124);
|
||||
|
@ -1553,21 +1553,17 @@ static YYACTIONTYPE yy_reduce(
|
|||
yymsp[0].minor.yy168 = yylhsminor.yy168;
|
||||
break;
|
||||
case 8: /* duration_literal ::= NK_VARIABLE */
|
||||
{ PARSER_TRACE; yylhsminor.yy168 = createDurationValueNode(pCxt, &yymsp[0].minor.yy0); }
|
||||
{ PARSER_TRACE; yylhsminor.yy168 = createRawExprNode(pCxt, &yymsp[0].minor.yy0, createDurationValueNode(pCxt, &yymsp[0].minor.yy0)); }
|
||||
yymsp[0].minor.yy168 = yylhsminor.yy168;
|
||||
break;
|
||||
case 9: /* literal_list ::= literal */
|
||||
case 29: /* expression_list ::= expression */ yytestcase(yyruleno==29);
|
||||
case 80: /* select_sublist ::= select_item */ yytestcase(yyruleno==80);
|
||||
case 125: /* sort_specification_list ::= sort_specification */ yytestcase(yyruleno==125);
|
||||
{ PARSER_TRACE; yylhsminor.yy192 = createNodeList(pCxt, yymsp[0].minor.yy168); }
|
||||
{ PARSER_TRACE; yylhsminor.yy192 = createNodeList(pCxt, releaseRawExprNode(pCxt, yymsp[0].minor.yy168)); }
|
||||
yymsp[0].minor.yy192 = yylhsminor.yy192;
|
||||
break;
|
||||
case 10: /* literal_list ::= literal_list NK_COMMA literal */
|
||||
case 30: /* expression_list ::= expression_list NK_COMMA expression */ yytestcase(yyruleno==30);
|
||||
case 81: /* select_sublist ::= select_sublist NK_COMMA select_item */ yytestcase(yyruleno==81);
|
||||
case 126: /* sort_specification_list ::= sort_specification_list NK_COMMA sort_specification */ yytestcase(yyruleno==126);
|
||||
{ PARSER_TRACE; yylhsminor.yy192 = addNodeToList(pCxt, yymsp[-2].minor.yy192, yymsp[0].minor.yy168); }
|
||||
{ PARSER_TRACE; yylhsminor.yy192 = addNodeToList(pCxt, yymsp[-2].minor.yy192, releaseRawExprNode(pCxt, yymsp[0].minor.yy168)); }
|
||||
yymsp[-2].minor.yy192 = yylhsminor.yy192;
|
||||
break;
|
||||
case 11: /* db_name ::= NK_ID */
|
||||
|
@ -1580,74 +1576,106 @@ static YYACTIONTYPE yy_reduce(
|
|||
yymsp[0].minor.yy241 = yylhsminor.yy241;
|
||||
break;
|
||||
case 19: /* expression ::= function_name NK_LP expression_list NK_RP */
|
||||
{ PARSER_TRACE; yylhsminor.yy168 = createFunctionNode(pCxt, &yymsp[-3].minor.yy241, yymsp[-1].minor.yy192); }
|
||||
{ PARSER_TRACE; yylhsminor.yy168 = createRawExprNodeExt(pCxt, &yymsp[-3].minor.yy241, &yymsp[0].minor.yy0, createFunctionNode(pCxt, &yymsp[-3].minor.yy241, yymsp[-1].minor.yy192)); }
|
||||
yymsp[-3].minor.yy168 = yylhsminor.yy168;
|
||||
break;
|
||||
case 21: /* expression ::= NK_LP expression NK_RP */
|
||||
case 57: /* boolean_primary ::= NK_LP boolean_value_expression NK_RP */ yytestcase(yyruleno==57);
|
||||
case 70: /* parenthesized_joined_table ::= NK_LP joined_table NK_RP */ yytestcase(yyruleno==70);
|
||||
case 71: /* parenthesized_joined_table ::= NK_LP parenthesized_joined_table NK_RP */ yytestcase(yyruleno==71);
|
||||
case 123: /* subquery ::= NK_LP query_expression NK_RP */ yytestcase(yyruleno==123);
|
||||
{ PARSER_TRACE; yymsp[-2].minor.yy168 = yymsp[-1].minor.yy168; }
|
||||
{ PARSER_TRACE; yylhsminor.yy168 = createRawExprNodeExt(pCxt, &yymsp[-2].minor.yy0, &yymsp[0].minor.yy0, releaseRawExprNode(pCxt, yymsp[-1].minor.yy168)); }
|
||||
yymsp[-2].minor.yy168 = yylhsminor.yy168;
|
||||
break;
|
||||
case 22: /* expression ::= NK_PLUS expression */
|
||||
case 58: /* from_clause ::= FROM table_reference_list */ yytestcase(yyruleno==58);
|
||||
case 87: /* where_clause_opt ::= WHERE search_condition */ yytestcase(yyruleno==87);
|
||||
case 108: /* having_clause_opt ::= HAVING search_condition */ yytestcase(yyruleno==108);
|
||||
{ PARSER_TRACE; yymsp[-1].minor.yy168 = yymsp[0].minor.yy168; }
|
||||
{
|
||||
PARSER_TRACE;
|
||||
SToken t = getTokenFromRawExprNode(pCxt, yymsp[0].minor.yy168);
|
||||
yylhsminor.yy168 = createRawExprNodeExt(pCxt, &yymsp[-1].minor.yy0, &t, releaseRawExprNode(pCxt, yymsp[0].minor.yy168));
|
||||
}
|
||||
yymsp[-1].minor.yy168 = yylhsminor.yy168;
|
||||
break;
|
||||
case 23: /* expression ::= NK_MINUS expression */
|
||||
{ PARSER_TRACE; yymsp[-1].minor.yy168 = createOperatorNode(pCxt, OP_TYPE_SUB, yymsp[0].minor.yy168, NULL); }
|
||||
{
|
||||
PARSER_TRACE;
|
||||
SToken t = getTokenFromRawExprNode(pCxt, yymsp[0].minor.yy168);
|
||||
yylhsminor.yy168 = createRawExprNodeExt(pCxt, &yymsp[-1].minor.yy0, &t, createOperatorNode(pCxt, OP_TYPE_SUB, releaseRawExprNode(pCxt, yymsp[0].minor.yy168), NULL));
|
||||
}
|
||||
yymsp[-1].minor.yy168 = yylhsminor.yy168;
|
||||
break;
|
||||
case 24: /* expression ::= expression NK_PLUS expression */
|
||||
{ PARSER_TRACE; yylhsminor.yy168 = createOperatorNode(pCxt, OP_TYPE_ADD, yymsp[-2].minor.yy168, yymsp[0].minor.yy168); }
|
||||
{
|
||||
PARSER_TRACE;
|
||||
SToken s = getTokenFromRawExprNode(pCxt, yymsp[-2].minor.yy168);
|
||||
SToken e = getTokenFromRawExprNode(pCxt, yymsp[0].minor.yy168);
|
||||
yylhsminor.yy168 = createRawExprNodeExt(pCxt, &s, &e, createOperatorNode(pCxt, OP_TYPE_ADD, releaseRawExprNode(pCxt, yymsp[-2].minor.yy168), releaseRawExprNode(pCxt, yymsp[0].minor.yy168)));
|
||||
}
|
||||
yymsp[-2].minor.yy168 = yylhsminor.yy168;
|
||||
break;
|
||||
case 25: /* expression ::= expression NK_MINUS expression */
|
||||
{ PARSER_TRACE; yylhsminor.yy168 = createOperatorNode(pCxt, OP_TYPE_SUB, yymsp[-2].minor.yy168, yymsp[0].minor.yy168); }
|
||||
{
|
||||
PARSER_TRACE;
|
||||
SToken s = getTokenFromRawExprNode(pCxt, yymsp[-2].minor.yy168);
|
||||
SToken e = getTokenFromRawExprNode(pCxt, yymsp[0].minor.yy168);
|
||||
yylhsminor.yy168 = createRawExprNodeExt(pCxt, &s, &e, createOperatorNode(pCxt, OP_TYPE_SUB, releaseRawExprNode(pCxt, yymsp[-2].minor.yy168), releaseRawExprNode(pCxt, yymsp[0].minor.yy168)));
|
||||
}
|
||||
yymsp[-2].minor.yy168 = yylhsminor.yy168;
|
||||
break;
|
||||
case 26: /* expression ::= expression NK_STAR expression */
|
||||
{ PARSER_TRACE; yylhsminor.yy168 = createOperatorNode(pCxt, OP_TYPE_MULTI, yymsp[-2].minor.yy168, yymsp[0].minor.yy168); }
|
||||
{
|
||||
PARSER_TRACE;
|
||||
SToken s = getTokenFromRawExprNode(pCxt, yymsp[-2].minor.yy168);
|
||||
SToken e = getTokenFromRawExprNode(pCxt, yymsp[0].minor.yy168);
|
||||
yylhsminor.yy168 = createRawExprNodeExt(pCxt, &s, &e, createOperatorNode(pCxt, OP_TYPE_MULTI, releaseRawExprNode(pCxt, yymsp[-2].minor.yy168), releaseRawExprNode(pCxt, yymsp[0].minor.yy168)));
|
||||
}
|
||||
yymsp[-2].minor.yy168 = yylhsminor.yy168;
|
||||
break;
|
||||
case 27: /* expression ::= expression NK_SLASH expression */
|
||||
{ PARSER_TRACE; yylhsminor.yy168 = createOperatorNode(pCxt, OP_TYPE_DIV, yymsp[-2].minor.yy168, yymsp[0].minor.yy168); }
|
||||
{
|
||||
PARSER_TRACE;
|
||||
SToken s = getTokenFromRawExprNode(pCxt, yymsp[-2].minor.yy168);
|
||||
SToken e = getTokenFromRawExprNode(pCxt, yymsp[0].minor.yy168);
|
||||
yylhsminor.yy168 = createRawExprNodeExt(pCxt, &s, &e, createOperatorNode(pCxt, OP_TYPE_DIV, releaseRawExprNode(pCxt, yymsp[-2].minor.yy168), releaseRawExprNode(pCxt, yymsp[0].minor.yy168)));
|
||||
}
|
||||
yymsp[-2].minor.yy168 = yylhsminor.yy168;
|
||||
break;
|
||||
case 28: /* expression ::= expression NK_REM expression */
|
||||
{ PARSER_TRACE; yylhsminor.yy168 = createOperatorNode(pCxt, OP_TYPE_MOD, yymsp[-2].minor.yy168, yymsp[0].minor.yy168); }
|
||||
{
|
||||
PARSER_TRACE;
|
||||
SToken s = getTokenFromRawExprNode(pCxt, yymsp[-2].minor.yy168);
|
||||
SToken e = getTokenFromRawExprNode(pCxt, yymsp[0].minor.yy168);
|
||||
yylhsminor.yy168 = createRawExprNodeExt(pCxt, &s, &e, createOperatorNode(pCxt, OP_TYPE_MOD, releaseRawExprNode(pCxt, yymsp[-2].minor.yy168), releaseRawExprNode(pCxt, yymsp[0].minor.yy168)));
|
||||
}
|
||||
yymsp[-2].minor.yy168 = yylhsminor.yy168;
|
||||
break;
|
||||
case 31: /* column_reference ::= column_name */
|
||||
{ PARSER_TRACE; yylhsminor.yy168 = createColumnNode(pCxt, NULL, &yymsp[0].minor.yy241); }
|
||||
{ PARSER_TRACE; yylhsminor.yy168 = createRawExprNode(pCxt, &yymsp[0].minor.yy241, createColumnNode(pCxt, NULL, &yymsp[0].minor.yy241)); }
|
||||
yymsp[0].minor.yy168 = yylhsminor.yy168;
|
||||
break;
|
||||
case 32: /* column_reference ::= table_name NK_DOT column_name */
|
||||
{ PARSER_TRACE; yylhsminor.yy168 = createColumnNode(pCxt, &yymsp[-2].minor.yy241, &yymsp[0].minor.yy241); }
|
||||
{ PARSER_TRACE; yylhsminor.yy168 = createRawExprNodeExt(pCxt, &yymsp[-2].minor.yy241, &yymsp[0].minor.yy241, createColumnNode(pCxt, &yymsp[-2].minor.yy241, &yymsp[0].minor.yy241)); }
|
||||
yymsp[-2].minor.yy168 = yylhsminor.yy168;
|
||||
break;
|
||||
case 33: /* predicate ::= expression compare_op expression */
|
||||
case 38: /* predicate ::= expression in_op in_predicate_value */ yytestcase(yyruleno==38);
|
||||
{ PARSER_TRACE; yylhsminor.yy168 = createOperatorNode(pCxt, yymsp[-1].minor.yy228, yymsp[-2].minor.yy168, yymsp[0].minor.yy168); }
|
||||
{ PARSER_TRACE; yylhsminor.yy168 = createOperatorNode(pCxt, yymsp[-1].minor.yy228, releaseRawExprNode(pCxt, yymsp[-2].minor.yy168), releaseRawExprNode(pCxt, yymsp[0].minor.yy168)); }
|
||||
yymsp[-2].minor.yy168 = yylhsminor.yy168;
|
||||
break;
|
||||
case 34: /* predicate ::= expression BETWEEN expression AND expression */
|
||||
{ PARSER_TRACE; yylhsminor.yy168 = createBetweenAnd(pCxt, yymsp[-4].minor.yy168, yymsp[-2].minor.yy168, yymsp[0].minor.yy168); }
|
||||
{ PARSER_TRACE; yylhsminor.yy168 = createBetweenAnd(pCxt, releaseRawExprNode(pCxt, yymsp[-4].minor.yy168), releaseRawExprNode(pCxt, yymsp[-2].minor.yy168), releaseRawExprNode(pCxt, yymsp[0].minor.yy168)); }
|
||||
yymsp[-4].minor.yy168 = yylhsminor.yy168;
|
||||
break;
|
||||
case 35: /* predicate ::= expression NOT BETWEEN expression AND expression */
|
||||
{ PARSER_TRACE; yylhsminor.yy168 = createNotBetweenAnd(pCxt, yymsp[-2].minor.yy168, yymsp[-5].minor.yy168, yymsp[0].minor.yy168); }
|
||||
{ PARSER_TRACE; yylhsminor.yy168 = createNotBetweenAnd(pCxt, releaseRawExprNode(pCxt, yymsp[-2].minor.yy168), releaseRawExprNode(pCxt, yymsp[-5].minor.yy168), releaseRawExprNode(pCxt, yymsp[0].minor.yy168)); }
|
||||
yymsp[-5].minor.yy168 = yylhsminor.yy168;
|
||||
break;
|
||||
case 36: /* predicate ::= expression IS NULL */
|
||||
{ PARSER_TRACE; yylhsminor.yy168 = createIsNullCondNode(pCxt, yymsp[-2].minor.yy168, true); }
|
||||
{ PARSER_TRACE; yylhsminor.yy168 = createIsNullCondNode(pCxt, releaseRawExprNode(pCxt, yymsp[-2].minor.yy168), true); }
|
||||
yymsp[-2].minor.yy168 = yylhsminor.yy168;
|
||||
break;
|
||||
case 37: /* predicate ::= expression IS NOT NULL */
|
||||
{ PARSER_TRACE; yylhsminor.yy168 = createIsNullCondNode(pCxt, yymsp[-3].minor.yy168, false); }
|
||||
{ PARSER_TRACE; yylhsminor.yy168 = createIsNullCondNode(pCxt, releaseRawExprNode(pCxt, yymsp[-3].minor.yy168), false); }
|
||||
yymsp[-3].minor.yy168 = yylhsminor.yy168;
|
||||
break;
|
||||
case 38: /* predicate ::= expression in_op in_predicate_value */
|
||||
{ PARSER_TRACE; yylhsminor.yy168 = createOperatorNode(pCxt, yymsp[-1].minor.yy228, releaseRawExprNode(pCxt, yymsp[-2].minor.yy168), yymsp[0].minor.yy168); }
|
||||
yymsp[-2].minor.yy168 = yylhsminor.yy168;
|
||||
break;
|
||||
case 39: /* compare_op ::= NK_LT */
|
||||
{ PARSER_TRACE; yymsp[0].minor.yy228 = OP_TYPE_LOWER_THAN; }
|
||||
break;
|
||||
|
@ -1698,6 +1726,16 @@ static YYACTIONTYPE yy_reduce(
|
|||
{ PARSER_TRACE; yylhsminor.yy168 = createLogicConditionNode(pCxt, LOGIC_COND_TYPE_AND, yymsp[-2].minor.yy168, yymsp[0].minor.yy168); }
|
||||
yymsp[-2].minor.yy168 = yylhsminor.yy168;
|
||||
break;
|
||||
case 57: /* boolean_primary ::= NK_LP boolean_value_expression NK_RP */
|
||||
case 70: /* parenthesized_joined_table ::= NK_LP joined_table NK_RP */ yytestcase(yyruleno==70);
|
||||
case 71: /* parenthesized_joined_table ::= NK_LP parenthesized_joined_table NK_RP */ yytestcase(yyruleno==71);
|
||||
{ PARSER_TRACE; yymsp[-2].minor.yy168 = yymsp[-1].minor.yy168; }
|
||||
break;
|
||||
case 58: /* from_clause ::= FROM table_reference_list */
|
||||
case 87: /* where_clause_opt ::= WHERE search_condition */ yytestcase(yyruleno==87);
|
||||
case 108: /* having_clause_opt ::= HAVING search_condition */ yytestcase(yyruleno==108);
|
||||
{ PARSER_TRACE; yymsp[-1].minor.yy168 = yymsp[0].minor.yy168; }
|
||||
break;
|
||||
case 60: /* table_reference_list ::= table_reference_list NK_COMMA table_reference */
|
||||
{ PARSER_TRACE; yylhsminor.yy168 = createJoinTableNode(pCxt, JOIN_TYPE_INNER, yymsp[-2].minor.yy168, yymsp[0].minor.yy168, NULL); }
|
||||
yymsp[-2].minor.yy168 = yylhsminor.yy168;
|
||||
|
@ -1711,7 +1749,7 @@ static YYACTIONTYPE yy_reduce(
|
|||
yymsp[-3].minor.yy168 = yylhsminor.yy168;
|
||||
break;
|
||||
case 65: /* table_primary ::= subquery alias_opt */
|
||||
{ PARSER_TRACE; yylhsminor.yy168 = createTempTableNode(pCxt, yymsp[-1].minor.yy168, &yymsp[0].minor.yy241); }
|
||||
{ PARSER_TRACE; yylhsminor.yy168 = createTempTableNode(pCxt, releaseRawExprNode(pCxt, yymsp[-1].minor.yy168), &yymsp[0].minor.yy241); }
|
||||
yymsp[-1].minor.yy168 = yylhsminor.yy168;
|
||||
break;
|
||||
case 67: /* alias_opt ::= */
|
||||
|
@ -1758,12 +1796,30 @@ static YYACTIONTYPE yy_reduce(
|
|||
{ PARSER_TRACE; yylhsminor.yy192 = yymsp[0].minor.yy192; }
|
||||
yymsp[0].minor.yy192 = yylhsminor.yy192;
|
||||
break;
|
||||
case 80: /* select_sublist ::= select_item */
|
||||
case 125: /* sort_specification_list ::= sort_specification */ yytestcase(yyruleno==125);
|
||||
{ PARSER_TRACE; yylhsminor.yy192 = createNodeList(pCxt, yymsp[0].minor.yy168); }
|
||||
yymsp[0].minor.yy192 = yylhsminor.yy192;
|
||||
break;
|
||||
case 81: /* select_sublist ::= select_sublist NK_COMMA select_item */
|
||||
case 126: /* sort_specification_list ::= sort_specification_list NK_COMMA sort_specification */ yytestcase(yyruleno==126);
|
||||
{ PARSER_TRACE; yylhsminor.yy192 = addNodeToList(pCxt, yymsp[-2].minor.yy192, yymsp[0].minor.yy168); }
|
||||
yymsp[-2].minor.yy192 = yylhsminor.yy192;
|
||||
break;
|
||||
case 82: /* select_item ::= expression */
|
||||
{
|
||||
PARSER_TRACE;
|
||||
SToken t = getTokenFromRawExprNode(pCxt, yymsp[0].minor.yy168);
|
||||
yylhsminor.yy168 = setProjectionAlias(pCxt, releaseRawExprNode(pCxt, yymsp[0].minor.yy168), &t);
|
||||
}
|
||||
yymsp[0].minor.yy168 = yylhsminor.yy168;
|
||||
break;
|
||||
case 83: /* select_item ::= expression column_alias */
|
||||
{ PARSER_TRACE; yylhsminor.yy168 = setProjectionAlias(pCxt, yymsp[-1].minor.yy168, &yymsp[0].minor.yy241); }
|
||||
{ PARSER_TRACE; yylhsminor.yy168 = setProjectionAlias(pCxt, releaseRawExprNode(pCxt, yymsp[-1].minor.yy168), &yymsp[0].minor.yy241); }
|
||||
yymsp[-1].minor.yy168 = yylhsminor.yy168;
|
||||
break;
|
||||
case 84: /* select_item ::= expression AS column_alias */
|
||||
{ PARSER_TRACE; yylhsminor.yy168 = setProjectionAlias(pCxt, yymsp[-2].minor.yy168, &yymsp[0].minor.yy241); }
|
||||
{ PARSER_TRACE; yylhsminor.yy168 = setProjectionAlias(pCxt, releaseRawExprNode(pCxt, yymsp[-2].minor.yy168), &yymsp[0].minor.yy241); }
|
||||
yymsp[-2].minor.yy168 = yylhsminor.yy168;
|
||||
break;
|
||||
case 85: /* select_item ::= table_name NK_DOT NK_STAR */
|
||||
|
@ -1790,10 +1846,10 @@ static YYACTIONTYPE yy_reduce(
|
|||
{ PARSER_TRACE; yymsp[-2].minor.yy192 = yymsp[0].minor.yy192; }
|
||||
break;
|
||||
case 91: /* twindow_clause_opt ::= SESSION NK_LP column_reference NK_COMMA NK_INTEGER NK_RP */
|
||||
{ PARSER_TRACE; yymsp[-5].minor.yy168 = createSessionWindowNode(pCxt, yymsp[-3].minor.yy168, &yymsp[-1].minor.yy0); }
|
||||
{ PARSER_TRACE; yymsp[-5].minor.yy168 = createSessionWindowNode(pCxt, releaseRawExprNode(pCxt, yymsp[-3].minor.yy168), &yymsp[-1].minor.yy0); }
|
||||
break;
|
||||
case 92: /* twindow_clause_opt ::= STATE_WINDOW NK_LP column_reference NK_RP */
|
||||
{ PARSER_TRACE; yymsp[-3].minor.yy168 = createStateWindowNode(pCxt, yymsp[-1].minor.yy168); }
|
||||
{ PARSER_TRACE; yymsp[-3].minor.yy168 = createStateWindowNode(pCxt, releaseRawExprNode(pCxt, yymsp[-1].minor.yy168)); }
|
||||
break;
|
||||
case 93: /* twindow_clause_opt ::= INTERVAL NK_LP duration_literal NK_RP sliding_opt fill_opt */
|
||||
{ PARSER_TRACE; yymsp[-5].minor.yy168 = createIntervalWindowNode(pCxt, yymsp[-3].minor.yy168, NULL, yymsp[-1].minor.yy168, yymsp[0].minor.yy168); }
|
||||
|
@ -1850,8 +1906,12 @@ static YYACTIONTYPE yy_reduce(
|
|||
case 122: /* limit_clause_opt ::= LIMIT NK_INTEGER NK_COMMA NK_INTEGER */ yytestcase(yyruleno==122);
|
||||
{ PARSER_TRACE; yymsp[-3].minor.yy168 = createLimitNode(pCxt, &yymsp[0].minor.yy0, &yymsp[-2].minor.yy0); }
|
||||
break;
|
||||
case 123: /* subquery ::= NK_LP query_expression NK_RP */
|
||||
{ PARSER_TRACE; yylhsminor.yy168 = createRawExprNodeExt(pCxt, &yymsp[-2].minor.yy0, &yymsp[0].minor.yy0, yymsp[-1].minor.yy168); }
|
||||
yymsp[-2].minor.yy168 = yylhsminor.yy168;
|
||||
break;
|
||||
case 127: /* sort_specification ::= expression ordering_specification_opt null_ordering_opt */
|
||||
{ PARSER_TRACE; yylhsminor.yy168 = createOrderByExprNode(pCxt, yymsp[-2].minor.yy168, yymsp[-1].minor.yy10, yymsp[0].minor.yy177); }
|
||||
{ PARSER_TRACE; yylhsminor.yy168 = createOrderByExprNode(pCxt, releaseRawExprNode(pCxt, yymsp[-2].minor.yy168), yymsp[-1].minor.yy10, yymsp[0].minor.yy177); }
|
||||
yymsp[-2].minor.yy168 = yylhsminor.yy168;
|
||||
break;
|
||||
case 128: /* ordering_specification_opt ::= */
|
||||
|
|
|
@ -16,7 +16,10 @@
|
|||
#include "parserImpl.h"
|
||||
|
||||
#include "astCreateContext.h"
|
||||
#include "functionMgt.h"
|
||||
#include "parserInt.h"
|
||||
#include "tglobal.h"
|
||||
#include "ttime.h"
|
||||
#include "ttoken.h"
|
||||
|
||||
typedef void* (*FMalloc)(size_t);
|
||||
|
@ -240,6 +243,7 @@ typedef enum ESqlClause {
|
|||
|
||||
typedef struct STranslateContext {
|
||||
SParseContext* pParseCxt;
|
||||
FuncMgtHandle fmgt;
|
||||
int32_t errCode;
|
||||
SMsgBuf msgBuf;
|
||||
SArray* pNsLevel; // element is SArray*, the element of this subarray is STableNode*
|
||||
|
@ -251,21 +255,30 @@ static int32_t translateSubquery(STranslateContext* pCxt, SNode* pNode);
|
|||
|
||||
static char* getSyntaxErrFormat(int32_t errCode) {
|
||||
switch (errCode) {
|
||||
case TSDB_CODE_PARSER_INVALID_COLUMN:
|
||||
case TSDB_CODE_PAR_INVALID_COLUMN:
|
||||
return "Invalid column name : %s";
|
||||
case TSDB_CODE_PARSER_TABLE_NOT_EXIST:
|
||||
case TSDB_CODE_PAR_TABLE_NOT_EXIST:
|
||||
return "Table does not exist : %s";
|
||||
case TSDB_CODE_PARSER_AMBIGUOUS_COLUMN:
|
||||
case TSDB_CODE_PAR_AMBIGUOUS_COLUMN:
|
||||
return "Column ambiguously defined : %s";
|
||||
case TSDB_CODE_PARSER_WRONG_VALUE_TYPE:
|
||||
case TSDB_CODE_PAR_WRONG_VALUE_TYPE:
|
||||
return "Invalid value type : %s";
|
||||
case TSDB_CODE_PAR_FUNTION_PARA_NUM:
|
||||
return "Invalid number of arguments : %s";
|
||||
case TSDB_CODE_PAR_FUNTION_PARA_TYPE:
|
||||
return "Inconsistent datatypes : %s";
|
||||
case TSDB_CODE_PAR_ILLEGAL_USE_AGG_FUNCTION:
|
||||
return "There mustn't be aggregation";
|
||||
default:
|
||||
return "Unknown error";
|
||||
}
|
||||
}
|
||||
|
||||
static int32_t generateSyntaxErrMsg(STranslateContext* pCxt, int32_t errCode, const char* additionalInfo) {
|
||||
snprintf(pCxt->msgBuf.buf, pCxt->msgBuf.len, getSyntaxErrFormat(errCode), additionalInfo);
|
||||
static int32_t generateSyntaxErrMsg(STranslateContext* pCxt, int32_t errCode, ...) {
|
||||
va_list vArgList;
|
||||
va_start(vArgList, errCode);
|
||||
vsnprintf(pCxt->msgBuf.buf, pCxt->msgBuf.len, getSyntaxErrFormat(errCode), vArgList);
|
||||
va_end(vArgList);
|
||||
pCxt->errCode = errCode;
|
||||
return errCode;
|
||||
}
|
||||
|
@ -394,7 +407,7 @@ static bool translateColumnWithPrefix(STranslateContext* pCxt, SColumnNode* pCol
|
|||
if (findAndSetColumn(pCol, pTable)) {
|
||||
break;
|
||||
}
|
||||
generateSyntaxErrMsg(pCxt, TSDB_CODE_PARSER_INVALID_COLUMN, pCol->colName);
|
||||
generateSyntaxErrMsg(pCxt, TSDB_CODE_PAR_INVALID_COLUMN, pCol->colName);
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
@ -409,14 +422,14 @@ static bool translateColumnWithoutPrefix(STranslateContext* pCxt, SColumnNode* p
|
|||
STableNode* pTable = taosArrayGetP(pTables, i);
|
||||
if (findAndSetColumn(pCol, pTable)) {
|
||||
if (found) {
|
||||
generateSyntaxErrMsg(pCxt, TSDB_CODE_PARSER_AMBIGUOUS_COLUMN, pCol->colName);
|
||||
generateSyntaxErrMsg(pCxt, TSDB_CODE_PAR_AMBIGUOUS_COLUMN, pCol->colName);
|
||||
return false;
|
||||
}
|
||||
found = true;
|
||||
}
|
||||
}
|
||||
if (!found) {
|
||||
generateSyntaxErrMsg(pCxt, TSDB_CODE_PARSER_INVALID_COLUMN, pCol->colName);
|
||||
generateSyntaxErrMsg(pCxt, TSDB_CODE_PAR_INVALID_COLUMN, pCol->colName);
|
||||
return false;
|
||||
}
|
||||
return true;
|
||||
|
@ -429,8 +442,72 @@ static bool translateColumn(STranslateContext* pCxt, SColumnNode* pCol) {
|
|||
return translateColumnWithoutPrefix(pCxt, pCol);
|
||||
}
|
||||
|
||||
// check literal format
|
||||
static int32_t trimStringCopy(const char* src, int32_t len, char* dst) {
|
||||
// delete escape character: \\, \', \"
|
||||
char delim = src[0];
|
||||
int32_t cnt = 0;
|
||||
int32_t j = 0;
|
||||
for (uint32_t k = 1; k < len - 1; ++k) {
|
||||
if (src[k] == '\\' || (src[k] == delim && src[k + 1] == delim)) {
|
||||
dst[j] = src[k + 1];
|
||||
cnt++;
|
||||
j++;
|
||||
k++;
|
||||
continue;
|
||||
}
|
||||
dst[j] = src[k];
|
||||
j++;
|
||||
}
|
||||
dst[j] = '\0';
|
||||
return j;
|
||||
}
|
||||
|
||||
static bool translateValue(STranslateContext* pCxt, SValueNode* pVal) {
|
||||
if (pVal->isDuration) {
|
||||
char unit = 0;
|
||||
if (parseAbsoluteDuration(pVal->literal, strlen(pVal->literal), &pVal->datum.i, &unit, pVal->node.resType.precision) != TSDB_CODE_SUCCESS) {
|
||||
generateSyntaxErrMsg(pCxt, TSDB_CODE_PAR_WRONG_VALUE_TYPE, pVal->literal);
|
||||
return false;
|
||||
}
|
||||
} else {
|
||||
switch (pVal->node.resType.type) {
|
||||
case TSDB_DATA_TYPE_NULL:
|
||||
break;
|
||||
case TSDB_DATA_TYPE_BOOL:
|
||||
pVal->datum.b = (0 == strcasecmp(pVal->literal, "true"));
|
||||
break;
|
||||
case TSDB_DATA_TYPE_BIGINT: {
|
||||
char* endPtr = NULL;
|
||||
pVal->datum.i = strtoull(pVal->literal, &endPtr, 10);
|
||||
break;
|
||||
}
|
||||
case TSDB_DATA_TYPE_DOUBLE: {
|
||||
char* endPtr = NULL;
|
||||
pVal->datum.d = strtold(pVal->literal, &endPtr);
|
||||
break;
|
||||
}
|
||||
case TSDB_DATA_TYPE_BINARY: {
|
||||
int32_t n = strlen(pVal->literal);
|
||||
pVal->datum.p = calloc(1, n);
|
||||
trimStringCopy(pVal->literal, n, pVal->datum.p);
|
||||
break;
|
||||
}
|
||||
case TSDB_DATA_TYPE_TIMESTAMP: {
|
||||
int32_t n = strlen(pVal->literal);
|
||||
char* tmp = calloc(1, n);
|
||||
int32_t len = trimStringCopy(pVal->literal, n, tmp);
|
||||
if (taosParseTime(tmp, &pVal->datum.u, len, pVal->node.resType.precision, tsDaylight) != TSDB_CODE_SUCCESS) {
|
||||
tfree(tmp);
|
||||
generateSyntaxErrMsg(pCxt, TSDB_CODE_PAR_WRONG_VALUE_TYPE, pVal->literal);
|
||||
return false;
|
||||
}
|
||||
tfree(tmp);
|
||||
break;
|
||||
}
|
||||
default:
|
||||
break;
|
||||
}
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
|
@ -440,7 +517,7 @@ static bool translateOperator(STranslateContext* pCxt, SOperatorNode* pOp) {
|
|||
if (nodesIsArithmeticOp(pOp)) {
|
||||
if (TSDB_DATA_TYPE_JSON == ldt.type || TSDB_DATA_TYPE_BLOB == ldt.type ||
|
||||
TSDB_DATA_TYPE_JSON == rdt.type || TSDB_DATA_TYPE_BLOB == rdt.type) {
|
||||
generateSyntaxErrMsg(pCxt, TSDB_CODE_PARSER_WRONG_VALUE_TYPE, ((SExprNode*)(pOp->pRight))->aliasName);
|
||||
generateSyntaxErrMsg(pCxt, TSDB_CODE_PAR_WRONG_VALUE_TYPE, ((SExprNode*)(pOp->pRight))->aliasName);
|
||||
return false;
|
||||
}
|
||||
pOp->node.resType.type = TSDB_DATA_TYPE_DOUBLE;
|
||||
|
@ -449,7 +526,7 @@ static bool translateOperator(STranslateContext* pCxt, SOperatorNode* pOp) {
|
|||
} else if (nodesIsComparisonOp(pOp)) {
|
||||
if (TSDB_DATA_TYPE_JSON == ldt.type || TSDB_DATA_TYPE_BLOB == ldt.type ||
|
||||
TSDB_DATA_TYPE_JSON == rdt.type || TSDB_DATA_TYPE_BLOB == rdt.type) {
|
||||
generateSyntaxErrMsg(pCxt, TSDB_CODE_PARSER_WRONG_VALUE_TYPE, ((SExprNode*)(pOp->pRight))->aliasName);
|
||||
generateSyntaxErrMsg(pCxt, TSDB_CODE_PAR_WRONG_VALUE_TYPE, ((SExprNode*)(pOp->pRight))->aliasName);
|
||||
return false;
|
||||
}
|
||||
pOp->node.resType.type = TSDB_DATA_TYPE_BOOL;
|
||||
|
@ -463,6 +540,15 @@ static bool translateOperator(STranslateContext* pCxt, SOperatorNode* pOp) {
|
|||
}
|
||||
|
||||
static bool translateFunction(STranslateContext* pCxt, SFunctionNode* pFunc) {
|
||||
int32_t code = fmGetFuncResultType(pCxt->fmgt, pFunc);
|
||||
if (TSDB_CODE_SUCCESS != code) {
|
||||
generateSyntaxErrMsg(pCxt, code, pFunc->functionName);
|
||||
return false;
|
||||
}
|
||||
if (fmIsAggFunc(pFunc->funcId) && (SQL_CLAUSE_FROM == pCxt->currClause || SQL_CLAUSE_WHERE == pCxt->currClause)) {
|
||||
generateSyntaxErrMsg(pCxt, TSDB_CODE_PAR_ILLEGAL_USE_AGG_FUNCTION);
|
||||
return false;
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
|
@ -504,7 +590,7 @@ static int32_t translateTable(STranslateContext* pCxt, SNode* pTable) {
|
|||
code = catalogGetTableMeta(pCxt->pParseCxt->pCatalog, pCxt->pParseCxt->pTransporter, &(pCxt->pParseCxt->mgmtEpSet),
|
||||
toName(pCxt->pParseCxt->acctId, pRealTable, &name), &(pRealTable->pMeta));
|
||||
if (TSDB_CODE_SUCCESS != code) {
|
||||
return generateSyntaxErrMsg(pCxt, TSDB_CODE_PARSER_TABLE_NOT_EXIST, pRealTable->table.tableName);
|
||||
return generateSyntaxErrMsg(pCxt, TSDB_CODE_PAR_TABLE_NOT_EXIST, pRealTable->table.tableName);
|
||||
}
|
||||
code = addNamespace(pCxt, pRealTable);
|
||||
break;
|
||||
|
|
|
@ -3206,7 +3206,7 @@ static void yy_reduce(
|
|||
case 286: /* cmd ::= ALTER TABLE ids cpxName ADD TAG columnlist */
|
||||
{
|
||||
yymsp[-4].minor.yy0.n += yymsp[-3].minor.yy0.n;
|
||||
SAlterTableInfo* pAlterTable = tSetAlterTableInfo(&yymsp[-4].minor.yy0, yymsp[0].minor.yy165, NULL, TSDB_ALTER_TABLE_ADD_TAG_COLUMN, -1);
|
||||
SAlterTableInfo* pAlterTable = tSetAlterTableInfo(&yymsp[-4].minor.yy0, yymsp[0].minor.yy165, NULL, TSDB_ALTER_TABLE_ADD_TAG, -1);
|
||||
setSqlInfo(pInfo, pAlterTable, NULL, TSDB_SQL_ALTER_TABLE);
|
||||
}
|
||||
break;
|
||||
|
@ -3217,7 +3217,7 @@ static void yy_reduce(
|
|||
toTSDBType(yymsp[0].minor.yy0.type);
|
||||
SArray* A = tListItemAppendToken(NULL, &yymsp[0].minor.yy0, -1);
|
||||
|
||||
SAlterTableInfo* pAlterTable = tSetAlterTableInfo(&yymsp[-4].minor.yy0, NULL, A, TSDB_ALTER_TABLE_DROP_TAG_COLUMN, -1);
|
||||
SAlterTableInfo* pAlterTable = tSetAlterTableInfo(&yymsp[-4].minor.yy0, NULL, A, TSDB_ALTER_TABLE_DROP_TAG, -1);
|
||||
setSqlInfo(pInfo, pAlterTable, NULL, TSDB_SQL_ALTER_TABLE);
|
||||
}
|
||||
break;
|
||||
|
@ -3282,7 +3282,7 @@ static void yy_reduce(
|
|||
case 294: /* cmd ::= ALTER STABLE ids cpxName ADD TAG columnlist */
|
||||
{
|
||||
yymsp[-4].minor.yy0.n += yymsp[-3].minor.yy0.n;
|
||||
SAlterTableInfo* pAlterTable = tSetAlterTableInfo(&yymsp[-4].minor.yy0, yymsp[0].minor.yy165, NULL, TSDB_ALTER_TABLE_ADD_TAG_COLUMN, TSDB_SUPER_TABLE);
|
||||
SAlterTableInfo* pAlterTable = tSetAlterTableInfo(&yymsp[-4].minor.yy0, yymsp[0].minor.yy165, NULL, TSDB_ALTER_TABLE_ADD_TAG, TSDB_SUPER_TABLE);
|
||||
setSqlInfo(pInfo, pAlterTable, NULL, TSDB_SQL_ALTER_TABLE);
|
||||
}
|
||||
break;
|
||||
|
@ -3293,7 +3293,7 @@ static void yy_reduce(
|
|||
toTSDBType(yymsp[0].minor.yy0.type);
|
||||
SArray* A = tListItemAppendToken(NULL, &yymsp[0].minor.yy0, -1);
|
||||
|
||||
SAlterTableInfo* pAlterTable = tSetAlterTableInfo(&yymsp[-4].minor.yy0, NULL, A, TSDB_ALTER_TABLE_DROP_TAG_COLUMN, TSDB_SUPER_TABLE);
|
||||
SAlterTableInfo* pAlterTable = tSetAlterTableInfo(&yymsp[-4].minor.yy0, NULL, A, TSDB_ALTER_TABLE_DROP_TAG, TSDB_SUPER_TABLE);
|
||||
setSqlInfo(pInfo, pAlterTable, NULL, TSDB_SQL_ALTER_TABLE);
|
||||
}
|
||||
break;
|
||||
|
|
|
@ -118,6 +118,53 @@ private:
|
|||
return "Unknown Data Type " + to_string(dt.type);
|
||||
}
|
||||
|
||||
void valueNodeToStr(const SValueNode* pVal, string& str, bool isProject) {
|
||||
switch (pVal->node.resType.type) {
|
||||
case TSDB_DATA_TYPE_NULL:
|
||||
str.append("null");
|
||||
break;
|
||||
case TSDB_DATA_TYPE_BOOL:
|
||||
str.append(pVal->datum.b ? "true" : "false");
|
||||
break;
|
||||
case TSDB_DATA_TYPE_TINYINT:
|
||||
case TSDB_DATA_TYPE_SMALLINT:
|
||||
case TSDB_DATA_TYPE_INT:
|
||||
case TSDB_DATA_TYPE_BIGINT:
|
||||
str.append(to_string(pVal->datum.i));
|
||||
break;
|
||||
case TSDB_DATA_TYPE_FLOAT:
|
||||
case TSDB_DATA_TYPE_DOUBLE:
|
||||
str.append(to_string(pVal->datum.d));
|
||||
break;
|
||||
case TSDB_DATA_TYPE_BINARY:
|
||||
case TSDB_DATA_TYPE_NCHAR:
|
||||
case TSDB_DATA_TYPE_VARCHAR:
|
||||
case TSDB_DATA_TYPE_VARBINARY:
|
||||
str.append(pVal->datum.p);
|
||||
break;
|
||||
case TSDB_DATA_TYPE_TIMESTAMP:
|
||||
str.append(to_string(pVal->datum.u));
|
||||
break;
|
||||
case TSDB_DATA_TYPE_UTINYINT:
|
||||
case TSDB_DATA_TYPE_USMALLINT:
|
||||
case TSDB_DATA_TYPE_UINT:
|
||||
case TSDB_DATA_TYPE_UBIGINT:
|
||||
str.append(to_string(pVal->datum.u));
|
||||
break;
|
||||
case TSDB_DATA_TYPE_JSON:
|
||||
case TSDB_DATA_TYPE_DECIMAL:
|
||||
case TSDB_DATA_TYPE_BLOB:
|
||||
str.append("JSON or DECIMAL or BLOB");
|
||||
break;
|
||||
default:
|
||||
break;
|
||||
}
|
||||
str.append(" [" + dataTypeToStr(pVal->node.resType) + "]");
|
||||
if (isProject) {
|
||||
str.append(" AS " + string(pVal->node.aliasName));
|
||||
}
|
||||
}
|
||||
|
||||
void nodeToStr(const SNode* node, string& str, bool isProject) {
|
||||
if (nullptr == node) {
|
||||
return;
|
||||
|
@ -142,12 +189,7 @@ private:
|
|||
break;
|
||||
}
|
||||
case QUERY_NODE_VALUE: {
|
||||
SValueNode* pVal = (SValueNode*)node;
|
||||
str.append(pVal->literal);
|
||||
str.append(" [" + dataTypeToStr(pVal->node.resType) + "]");
|
||||
if (isProject) {
|
||||
str.append(" AS " + string(pVal->node.aliasName));
|
||||
}
|
||||
valueNodeToStr((SValueNode*)node, str, isProject);
|
||||
break;
|
||||
}
|
||||
case QUERY_NODE_OPERATOR: {
|
||||
|
@ -391,10 +433,20 @@ TEST_F(NewParserTest, selectSimple) {
|
|||
ASSERT_TRUE(run());
|
||||
}
|
||||
|
||||
TEST_F(NewParserTest, selectConstant) {
|
||||
setDatabase("root", "test");
|
||||
|
||||
bind("SELECT 123, 20.4, 'abc', \"wxy\", TIMESTAMP '2022-02-09 17:30:20', true, false, 10s FROM t1");
|
||||
ASSERT_TRUE(run());
|
||||
|
||||
bind("SELECT 1234567890123456789012345678901234567890, 20.1234567890123456789012345678901234567890, 'abc', \"wxy\", TIMESTAMP '2022-02-09 17:30:20', true, false, 15s FROM t1");
|
||||
ASSERT_TRUE(run());
|
||||
}
|
||||
|
||||
TEST_F(NewParserTest, selectExpression) {
|
||||
setDatabase("root", "test");
|
||||
|
||||
bind("SELECT c1 + 10, c2 FROM t1");
|
||||
bind("SELECT ts + 10s, c1 + 10, concat(c2, 'abc') FROM t1");
|
||||
ASSERT_TRUE(run());
|
||||
}
|
||||
|
||||
|
|
|
@ -119,9 +119,9 @@ int32_t queryProcessUseDBRsp(void* output, char *msg, int32_t msgSize) {
|
|||
return TSDB_CODE_TSC_OUT_OF_MEMORY;
|
||||
}
|
||||
|
||||
pOut->dbId = pRsp->uid;
|
||||
pOut->dbVgroup->vgVersion = pRsp->vgVersion;
|
||||
pOut->dbVgroup->hashMethod = pRsp->hashMethod;
|
||||
pOut->dbVgroup->dbId = pRsp->uid;
|
||||
pOut->dbVgroup->vgHash = taosHashInit(pRsp->vgNum, taosGetDefaultHashFunction(TSDB_DATA_TYPE_INT), true, HASH_ENTRY_LOCK);
|
||||
if (NULL == pOut->dbVgroup->vgHash) {
|
||||
qError("taosHashInit %d failed", pRsp->vgNum);
|
||||
|
@ -159,6 +159,7 @@ _return:
|
|||
}
|
||||
|
||||
static int32_t queryConvertTableMetaMsg(STableMetaRsp* pMetaMsg) {
|
||||
pMetaMsg->dbId = be64toh(pMetaMsg->dbId);
|
||||
pMetaMsg->numOfTags = ntohl(pMetaMsg->numOfTags);
|
||||
pMetaMsg->numOfColumns = ntohl(pMetaMsg->numOfColumns);
|
||||
pMetaMsg->sversion = ntohl(pMetaMsg->sversion);
|
||||
|
@ -259,6 +260,8 @@ int32_t queryProcessTableMetaRsp(void* output, char *msg, int32_t msgSize) {
|
|||
|
||||
strcpy(pOut->dbFName, pMetaMsg->dbFName);
|
||||
|
||||
pOut->dbId = pMetaMsg->dbId;
|
||||
|
||||
if (pMetaMsg->tableType == TSDB_CHILD_TABLE) {
|
||||
SET_META_TYPE_BOTH_TABLE(pOut->metaType);
|
||||
|
||||
|
|
|
@ -1,3 +1,4 @@
|
|||
|
||||
aux_source_directory(src SYNC_SRC)
|
||||
add_library(sync ${SYNC_SRC})
|
||||
|
||||
|
|
|
@ -1,146 +0,0 @@
|
|||
/*
|
||||
* Copyright (c) 2019 TAOS Data, Inc. <cli@taosdata.com>
|
||||
*
|
||||
* This program is free software: you can use, redistribute, and/or modify
|
||||
* it under the terms of the GNU Affero General Public License, version 3
|
||||
* or later ("AGPL"), as published by the Free Software Foundation.
|
||||
*
|
||||
* This program is distributed in the hope that it will be useful, but WITHOUT
|
||||
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
|
||||
* FITNESS FOR A PARTICULAR PURPOSE.
|
||||
*
|
||||
* You should have received a copy of the GNU Affero General Public License
|
||||
* along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
|
||||
#ifndef _TD_LIBS_SYNC_RAFT_H
|
||||
#define _TD_LIBS_SYNC_RAFT_H
|
||||
|
||||
#include "sync.h"
|
||||
#include "sync_type.h"
|
||||
#include "thash.h"
|
||||
#include "raft_message.h"
|
||||
#include "sync_raft_impl.h"
|
||||
#include "sync_raft_quorum.h"
|
||||
|
||||
typedef struct RaftLeaderState {
|
||||
|
||||
} RaftLeaderState;
|
||||
|
||||
typedef struct RaftCandidateState {
|
||||
/* true if in pre-vote phase */
|
||||
bool inPreVote;
|
||||
} RaftCandidateState;
|
||||
|
||||
typedef struct SSyncRaftIOMethods {
|
||||
// send SSyncMessage to node
|
||||
int (*send)(const SSyncMessage* pMsg, const SNodeInfo* pNode);
|
||||
} SSyncRaftIOMethods;
|
||||
|
||||
typedef int (*SyncRaftStepFp)(SSyncRaft* pRaft, const SSyncMessage* pMsg);
|
||||
typedef void (*SyncRaftTickFp)(SSyncRaft* pRaft);
|
||||
|
||||
struct SSyncRaft {
|
||||
// owner sync node
|
||||
SSyncNode* pNode;
|
||||
|
||||
// hash map nodeId -> SNodeInfo*
|
||||
SHashObj* nodeInfoMap;
|
||||
|
||||
SyncNodeId selfId;
|
||||
SyncGroupId selfGroupId;
|
||||
|
||||
SSyncRaftIOMethods io;
|
||||
|
||||
SSyncFSM fsm;
|
||||
SSyncLogStore logStore;
|
||||
SStateManager stateManager;
|
||||
|
||||
union {
|
||||
RaftLeaderState leaderState;
|
||||
RaftCandidateState candidateState;
|
||||
};
|
||||
|
||||
SyncTerm term;
|
||||
SyncNodeId voteFor;
|
||||
|
||||
SSyncRaftLog *log;
|
||||
|
||||
uint64_t maxMsgSize;
|
||||
uint64_t maxUncommittedSize;
|
||||
SSyncRaftProgressTracker *tracker;
|
||||
|
||||
ESyncState state;
|
||||
|
||||
// isLearner is true if the local raft node is a learner.
|
||||
bool isLearner;
|
||||
|
||||
/**
|
||||
* the leader id
|
||||
**/
|
||||
SyncNodeId leaderId;
|
||||
|
||||
/**
|
||||
* leadTransferee is id of the leader transfer target when its value is not zero.
|
||||
* Follow the procedure defined in raft thesis 3.10.
|
||||
**/
|
||||
SyncNodeId leadTransferee;
|
||||
|
||||
/**
|
||||
* Only one conf change may be pending (in the log, but not yet
|
||||
* applied) at a time. This is enforced via pendingConfIndex, which
|
||||
* is set to a value >= the log index of the latest pending
|
||||
* configuration change (if any). Config changes are only allowed to
|
||||
* be proposed if the leader's applied index is greater than this
|
||||
* value.
|
||||
**/
|
||||
SyncIndex pendingConfigIndex;
|
||||
|
||||
/**
|
||||
* an estimate of the size of the uncommitted tail of the Raft log. Used to
|
||||
* prevent unbounded log growth. Only maintained by the leader. Reset on
|
||||
* term changes.
|
||||
**/
|
||||
uint32_t uncommittedSize;
|
||||
|
||||
/**
|
||||
* number of ticks since it reached last electionTimeout when it is leader
|
||||
* or candidate.
|
||||
* number of ticks since it reached last electionTimeout or received a
|
||||
* valid message from current leader when it is a follower.
|
||||
**/
|
||||
uint16_t electionElapsed;
|
||||
|
||||
/**
|
||||
* number of ticks since it reached last heartbeatTimeout.
|
||||
* only leader keeps heartbeatElapsed.
|
||||
**/
|
||||
uint16_t heartbeatElapsed;
|
||||
|
||||
bool preVote;
|
||||
bool checkQuorum;
|
||||
|
||||
int heartbeatTimeout;
|
||||
int electionTimeout;
|
||||
|
||||
/**
|
||||
* randomizedElectionTimeout is a random number between
|
||||
* [electiontimeout, 2 * electiontimeout - 1]. It gets reset
|
||||
* when raft changes its state to follower or candidate.
|
||||
**/
|
||||
int randomizedElectionTimeout;
|
||||
bool disableProposalForwarding;
|
||||
|
||||
// current tick count since start up
|
||||
uint32_t currentTick;
|
||||
|
||||
SyncRaftStepFp stepFp;
|
||||
|
||||
SyncRaftTickFp tickFp;
|
||||
};
|
||||
|
||||
int32_t syncRaftStart(SSyncRaft* pRaft, const SSyncInfo* pInfo);
|
||||
int32_t syncRaftStep(SSyncRaft* pRaft, const SSyncMessage* pMsg);
|
||||
int32_t syncRaftTick(SSyncRaft* pRaft);
|
||||
|
||||
#endif /* _TD_LIBS_SYNC_RAFT_H */
|
|
@ -1,76 +0,0 @@
|
|||
/*
|
||||
* Copyright (c) 2019 TAOS Data, Inc. <cli@taosdata.com>
|
||||
*
|
||||
* This program is free software: you can use, redistribute, and/or modify
|
||||
* it under the terms of the GNU Affero General Public License, version 3
|
||||
* or later ("AGPL"), as published by the Free Software Foundation.
|
||||
*
|
||||
* This program is distributed in the hope that it will be useful, but WITHOUT
|
||||
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
|
||||
* FITNESS FOR A PARTICULAR PURPOSE.
|
||||
*
|
||||
* You should have received a copy of the GNU Affero General Public License
|
||||
* along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
|
||||
#ifndef _TD_LIBS_SYNC_RAFT_LOG_H
|
||||
#define _TD_LIBS_SYNC_RAFT_LOG_H
|
||||
|
||||
#include "sync.h"
|
||||
#include "sync_type.h"
|
||||
|
||||
typedef enum ESyncRaftEntryType {
|
||||
SYNC_ENTRY_TYPE_LOG = 1,
|
||||
} ESyncRaftEntryType;
|
||||
|
||||
struct SSyncRaftEntry {
|
||||
SyncTerm term;
|
||||
|
||||
SyncIndex index;
|
||||
|
||||
ESyncRaftEntryType type;
|
||||
|
||||
SSyncBuffer buffer;
|
||||
};
|
||||
|
||||
struct SSyncRaftLog {
|
||||
SyncIndex uncommittedConfigIndex;
|
||||
|
||||
SyncIndex commitIndex;
|
||||
|
||||
SyncIndex appliedIndex;
|
||||
};
|
||||
|
||||
SSyncRaftLog* syncRaftLogOpen();
|
||||
|
||||
SyncIndex syncRaftLogLastIndex(SSyncRaftLog* pLog);
|
||||
|
||||
SyncIndex syncRaftLogSnapshotIndex(SSyncRaftLog* pLog);
|
||||
|
||||
SyncTerm syncRaftLogLastTerm(SSyncRaftLog* pLog);
|
||||
|
||||
void syncRaftLogAppliedTo(SSyncRaftLog* pLog, SyncIndex appliedIndex);
|
||||
|
||||
bool syncRaftLogIsUptodate(SSyncRaftLog* pLog, SyncIndex index, SyncTerm term);
|
||||
|
||||
int syncRaftLogNumOfPendingConf(SSyncRaftLog* pLog);
|
||||
|
||||
bool syncRaftHasUnappliedLog(SSyncRaftLog* pLog);
|
||||
|
||||
SyncTerm syncRaftLogTermOf(SSyncRaftLog* pLog, SyncIndex index);
|
||||
|
||||
int syncRaftLogAppend(SSyncRaftLog* pLog, SSyncRaftEntry *pEntries, int n);
|
||||
|
||||
int syncRaftLogAcquire(SSyncRaftLog* pLog, SyncIndex index, int maxMsgSize,
|
||||
SSyncRaftEntry **ppEntries, int *n);
|
||||
|
||||
void syncRaftLogRelease(SSyncRaftLog* pLog, SyncIndex index,
|
||||
SSyncRaftEntry *pEntries, int n);
|
||||
|
||||
bool syncRaftLogMatchTerm();
|
||||
|
||||
static FORCE_INLINE bool syncRaftLogIsCommitted(SSyncRaftLog* pLog, SyncIndex index) {
|
||||
return pLog->commitIndex > index;
|
||||
}
|
||||
|
||||
#endif /* _TD_LIBS_SYNC_RAFT_LOG_H */
|
|
@ -1,237 +0,0 @@
|
|||
/*
|
||||
* Copyright (c) 2019 TAOS Data, Inc. <cli@taosdata.com>
|
||||
*
|
||||
* This program is free software: you can use, redistribute, and/or modify
|
||||
* it under the terms of the GNU Affero General Public License, version 3
|
||||
* or later ("AGPL"), as published by the Free Software Foundation.
|
||||
*
|
||||
* This program is distributed in the hope that it will be useful, but WITHOUT
|
||||
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
|
||||
* FITNESS FOR A PARTICULAR PURPOSE.
|
||||
*
|
||||
* You should have received a copy of the GNU Affero General Public License
|
||||
* along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
|
||||
#ifndef _TD_LIBS_SYNC_RAFT_MESSAGE_H
|
||||
#define _TD_LIBS_SYNC_RAFT_MESSAGE_H
|
||||
|
||||
#include "sync.h"
|
||||
#include "sync_type.h"
|
||||
|
||||
/**
|
||||
* below define message type which handled by Raft.
|
||||
*
|
||||
* internal message, which communicate between threads, start with RAFT_MSG_INTERNAL_*.
|
||||
* internal message use pointer only and stack memory, need not to be decode/encode and free.
|
||||
*
|
||||
* outter message start with RAFT_MSG_*, which communicate between cluster peers,
|
||||
* need to implement its decode/encode functions.
|
||||
**/
|
||||
typedef enum ESyncRaftMessageType {
|
||||
// client propose a cmd
|
||||
RAFT_MSG_INTERNAL_PROP = 1,
|
||||
|
||||
// node election timeout
|
||||
RAFT_MSG_INTERNAL_ELECTION = 2,
|
||||
|
||||
RAFT_MSG_VOTE = 3,
|
||||
RAFT_MSG_VOTE_RESP = 4,
|
||||
|
||||
RAFT_MSG_APPEND = 5,
|
||||
RAFT_MSG_APPEND_RESP = 6,
|
||||
} ESyncRaftMessageType;
|
||||
|
||||
typedef struct RaftMsgInternal_Prop {
|
||||
const SSyncBuffer *pBuf;
|
||||
bool isWeak;
|
||||
void* pData;
|
||||
} RaftMsgInternal_Prop;
|
||||
|
||||
typedef struct RaftMsgInternal_Election {
|
||||
|
||||
} RaftMsgInternal_Election;
|
||||
|
||||
typedef struct RaftMsg_Vote {
|
||||
ESyncRaftElectionType cType;
|
||||
SyncIndex lastIndex;
|
||||
SyncTerm lastTerm;
|
||||
} RaftMsg_Vote;
|
||||
|
||||
typedef struct RaftMsg_VoteResp {
|
||||
bool rejected;
|
||||
ESyncRaftElectionType cType;
|
||||
} RaftMsg_VoteResp;
|
||||
|
||||
typedef struct RaftMsg_Append_Entries {
|
||||
// index of log entry preceeding new ones
|
||||
SyncIndex index;
|
||||
|
||||
// term of entry at prevIndex
|
||||
SyncTerm term;
|
||||
|
||||
// leader's commit index.
|
||||
SyncIndex commitIndex;
|
||||
|
||||
// size of the log entries array
|
||||
int nEntries;
|
||||
|
||||
// log entries array
|
||||
SSyncRaftEntry* entries;
|
||||
} RaftMsg_Append_Entries;
|
||||
|
||||
typedef struct RaftMsg_Append_Resp {
|
||||
SyncIndex index;
|
||||
} RaftMsg_Append_Resp;
|
||||
|
||||
typedef struct SSyncMessage {
|
||||
ESyncRaftMessageType msgType;
|
||||
SyncTerm term;
|
||||
SyncGroupId groupId;
|
||||
SyncNodeId from;
|
||||
|
||||
union {
|
||||
RaftMsgInternal_Prop propose;
|
||||
|
||||
RaftMsgInternal_Election election;
|
||||
|
||||
RaftMsg_Vote vote;
|
||||
RaftMsg_VoteResp voteResp;
|
||||
|
||||
RaftMsg_Append_Entries appendEntries;
|
||||
RaftMsg_Append_Resp appendResp;
|
||||
};
|
||||
} SSyncMessage;
|
||||
|
||||
static FORCE_INLINE SSyncMessage* syncInitPropMsg(SSyncMessage* pMsg, const SSyncBuffer* pBuf, void* pData, bool isWeak) {
|
||||
*pMsg = (SSyncMessage) {
|
||||
.msgType = RAFT_MSG_INTERNAL_PROP,
|
||||
.term = 0,
|
||||
.propose = (RaftMsgInternal_Prop) {
|
||||
.isWeak = isWeak,
|
||||
.pBuf = pBuf,
|
||||
.pData = pData,
|
||||
},
|
||||
};
|
||||
|
||||
return pMsg;
|
||||
}
|
||||
|
||||
static FORCE_INLINE SSyncMessage* syncInitElectionMsg(SSyncMessage* pMsg, SyncNodeId from) {
|
||||
*pMsg = (SSyncMessage) {
|
||||
.msgType = RAFT_MSG_INTERNAL_ELECTION,
|
||||
.term = 0,
|
||||
.from = from,
|
||||
.election = (RaftMsgInternal_Election) {
|
||||
|
||||
},
|
||||
};
|
||||
|
||||
return pMsg;
|
||||
}
|
||||
|
||||
static FORCE_INLINE SSyncMessage* syncNewVoteMsg(SyncGroupId groupId, SyncNodeId from,
|
||||
SyncTerm term, ESyncRaftElectionType cType,
|
||||
SyncIndex lastIndex, SyncTerm lastTerm) {
|
||||
SSyncMessage* pMsg = (SSyncMessage*)malloc(sizeof(SSyncMessage));
|
||||
if (pMsg == NULL) {
|
||||
return NULL;
|
||||
}
|
||||
*pMsg = (SSyncMessage) {
|
||||
.groupId = groupId,
|
||||
.from = from,
|
||||
.term = term,
|
||||
.msgType = RAFT_MSG_VOTE,
|
||||
.vote = (RaftMsg_Vote) {
|
||||
.cType = cType,
|
||||
.lastIndex = lastIndex,
|
||||
.lastTerm = lastTerm,
|
||||
},
|
||||
};
|
||||
|
||||
return pMsg;
|
||||
}
|
||||
|
||||
static FORCE_INLINE SSyncMessage* syncNewVoteRespMsg(SyncGroupId groupId, SyncNodeId from,
|
||||
ESyncRaftElectionType cType, bool rejected) {
|
||||
SSyncMessage* pMsg = (SSyncMessage*)malloc(sizeof(SSyncMessage));
|
||||
if (pMsg == NULL) {
|
||||
return NULL;
|
||||
}
|
||||
*pMsg = (SSyncMessage) {
|
||||
.groupId = groupId,
|
||||
.from = from,
|
||||
.msgType = RAFT_MSG_VOTE_RESP,
|
||||
.voteResp = (RaftMsg_VoteResp) {
|
||||
.cType = cType,
|
||||
.rejected = rejected,
|
||||
},
|
||||
};
|
||||
|
||||
return pMsg;
|
||||
}
|
||||
|
||||
static FORCE_INLINE SSyncMessage* syncNewAppendMsg(SyncGroupId groupId, SyncNodeId from,
|
||||
SyncTerm term, SyncIndex logIndex, SyncTerm logTerm,
|
||||
SyncIndex commitIndex, int nEntries, SSyncRaftEntry* entries) {
|
||||
SSyncMessage* pMsg = (SSyncMessage*)malloc(sizeof(SSyncMessage));
|
||||
if (pMsg == NULL) {
|
||||
return NULL;
|
||||
}
|
||||
*pMsg = (SSyncMessage) {
|
||||
.groupId = groupId,
|
||||
.from = from,
|
||||
.term = term,
|
||||
.msgType = RAFT_MSG_APPEND,
|
||||
.appendEntries = (RaftMsg_Append_Entries) {
|
||||
.index = logIndex,
|
||||
.term = logTerm,
|
||||
.commitIndex = commitIndex,
|
||||
.nEntries = nEntries,
|
||||
.entries = entries,
|
||||
},
|
||||
};
|
||||
|
||||
return pMsg;
|
||||
}
|
||||
|
||||
static FORCE_INLINE SSyncMessage* syncNewEmptyAppendRespMsg(SyncGroupId groupId, SyncNodeId from, SyncTerm term) {
|
||||
SSyncMessage* pMsg = (SSyncMessage*)malloc(sizeof(SSyncMessage));
|
||||
if (pMsg == NULL) {
|
||||
return NULL;
|
||||
}
|
||||
*pMsg = (SSyncMessage) {
|
||||
.groupId = groupId,
|
||||
.from = from,
|
||||
.term = term,
|
||||
.msgType = RAFT_MSG_APPEND_RESP,
|
||||
.appendResp = (RaftMsg_Append_Resp) {
|
||||
|
||||
},
|
||||
};
|
||||
|
||||
return pMsg;
|
||||
}
|
||||
|
||||
static FORCE_INLINE bool syncIsInternalMsg(ESyncRaftMessageType msgType) {
|
||||
return msgType == RAFT_MSG_INTERNAL_PROP ||
|
||||
msgType == RAFT_MSG_INTERNAL_ELECTION;
|
||||
}
|
||||
|
||||
static FORCE_INLINE bool syncIsPreVoteRespMsg(const SSyncMessage* pMsg) {
|
||||
return pMsg->msgType == RAFT_MSG_VOTE_RESP && pMsg->voteResp.cType == SYNC_RAFT_CAMPAIGN_PRE_ELECTION;
|
||||
}
|
||||
|
||||
static FORCE_INLINE bool syncIsPreVoteMsg(const SSyncMessage* pMsg) {
|
||||
return pMsg->msgType == RAFT_MSG_VOTE && pMsg->voteResp.cType == SYNC_RAFT_CAMPAIGN_PRE_ELECTION;
|
||||
}
|
||||
|
||||
void syncFreeMessage(const SSyncMessage* pMsg);
|
||||
|
||||
// message handlers
|
||||
int syncRaftHandleElectionMessage(SSyncRaft* pRaft, const SSyncMessage* pMsg);
|
||||
int syncRaftHandleVoteMessage(SSyncRaft* pRaft, const SSyncMessage* pMsg);
|
||||
int syncRaftHandleVoteRespMessage(SSyncRaft* pRaft, const SSyncMessage* pMsg);
|
||||
int syncRaftHandleAppendEntriesMessage(SSyncRaft* pRaft, const SSyncMessage* pMsg);
|
||||
|
||||
#endif /* _TD_LIBS_SYNC_RAFT_MESSAGE_H */
|
|
@ -1,30 +0,0 @@
|
|||
/*
|
||||
* Copyright (c) 2019 TAOS Data, Inc. <cli@taosdata.com>
|
||||
*
|
||||
* This program is free software: you can use, redistribute, and/or modify
|
||||
* it under the terms of the GNU Affero General Public License, version 3
|
||||
* or later ("AGPL"), as published by the Free Software Foundation.
|
||||
*
|
||||
* This program is distributed in the hope that it will be useful, but WITHOUT
|
||||
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
|
||||
* FITNESS FOR A PARTICULAR PURPOSE.
|
||||
*
|
||||
* You should have received a copy of the GNU Affero General Public License
|
||||
* along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
|
||||
#ifndef TD_SYNC_RAFT_REPLICATION_H
|
||||
#define TD_SYNC_RAFT_REPLICATION_H
|
||||
|
||||
#include "sync.h"
|
||||
#include "syncInt.h"
|
||||
#include "sync_type.h"
|
||||
|
||||
// syncRaftMaybeSendAppend sends an append RPC with new entries to the given peer,
|
||||
// if necessary. Returns true if a message was sent. The sendIfEmpty
|
||||
// argument controls whether messages with no entries will be sent
|
||||
// ("empty" messages are useful to convey updated Commit indexes, but
|
||||
// are undesirable when we're sending multiple messages in a batch).
|
||||
bool syncRaftMaybeSendAppend(SSyncRaft* pRaft, SSyncRaftProgress* progress, bool sendIfEmpty);
|
||||
|
||||
#endif /* TD_SYNC_RAFT_REPLICATION_H */
|
|
@ -1,115 +0,0 @@
|
|||
/*
|
||||
* Copyright (c) 2019 TAOS Data, Inc. <cli@taosdata.com>
|
||||
*
|
||||
* This program is free software: you can use, redistribute, and/or modify
|
||||
* it under the terms of the GNU Affero General Public License, version 3
|
||||
* or later ("AGPL"), as published by the Free Software Foundation.
|
||||
*
|
||||
* This program is distributed in the hope that it will be useful, but WITHOUT
|
||||
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
|
||||
* FITNESS FOR A PARTICULAR PURPOSE.
|
||||
*
|
||||
* You should have received a copy of the GNU Affero General Public License
|
||||
* along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
|
||||
#ifndef TD_SYNC_RAFT_UNSTABLE_LOG_H
|
||||
#define TD_SYNC_RAFT_UNSTABLE_LOG_H
|
||||
|
||||
#include "sync_type.h"
|
||||
|
||||
/* in-memory unstable raft log storage */
|
||||
struct SSyncRaftUnstableLog {
|
||||
#if 0
|
||||
/* Circular buffer of log entries */
|
||||
RaftEntry *entries;
|
||||
|
||||
/* size of Circular buffer */
|
||||
int size;
|
||||
|
||||
/* Indexes of used slots [front, back) */
|
||||
int front, back;
|
||||
|
||||
/* Index of first entry is offset + 1 */
|
||||
SyncIndex offset;
|
||||
|
||||
/* meta data of snapshot */
|
||||
SSyncRaftUnstableLog snapshot;
|
||||
#endif
|
||||
};
|
||||
|
||||
/**
|
||||
* return index of last in memory log, return 0 if log is empty
|
||||
**/
|
||||
//SyncIndex syncRaftLogLastIndex(SSyncRaftUnstableLog* pLog);
|
||||
|
||||
#if 0
|
||||
void raftLogInit(RaftLog* pLog);
|
||||
|
||||
void raftLogClose(RaftLog* pLog);
|
||||
|
||||
/**
|
||||
* When startup populating log entrues loaded from disk,
|
||||
* init raft memory log with snapshot index,term and log start idnex.
|
||||
**/
|
||||
/*
|
||||
void raftLogStart(RaftLog* pLog,
|
||||
RaftSnapshotMeta snapshot,
|
||||
SyncIndex startIndex);
|
||||
*/
|
||||
/**
|
||||
* Get the number of entries the log.
|
||||
**/
|
||||
int raftLogNumEntries(const RaftLog* pLog);
|
||||
|
||||
|
||||
|
||||
/**
|
||||
* return last term of in memory log, return 0 if log is empty
|
||||
**/
|
||||
SyncTerm raftLogLastTerm(RaftLog* pLog);
|
||||
|
||||
/**
|
||||
* return term of log with the given index, return 0 if the term of index cannot be found
|
||||
* , errCode will save the error code.
|
||||
**/
|
||||
SyncTerm raftLogTermOf(RaftLog* pLog, SyncIndex index, RaftCode* errCode);
|
||||
|
||||
/**
|
||||
* Get the last index of the most recent snapshot. Return 0 if there are no *
|
||||
* snapshots.
|
||||
**/
|
||||
SyncIndex raftLogSnapshotIndex(RaftLog* pLog);
|
||||
|
||||
/* Append a new entry to the log. */
|
||||
int raftLogAppend(RaftLog* pLog,
|
||||
SyncTerm term,
|
||||
const SSyncBuffer *buf);
|
||||
|
||||
/**
|
||||
* acquire log from given index onwards.
|
||||
**/
|
||||
/*
|
||||
int raftLogAcquire(RaftLog* pLog,
|
||||
SyncIndex index,
|
||||
RaftEntry **ppEntries,
|
||||
int *n);
|
||||
|
||||
void raftLogRelease(RaftLog* pLog,
|
||||
SyncIndex index,
|
||||
RaftEntry *pEntries,
|
||||
int n);
|
||||
*/
|
||||
/* Delete all entries from the given index (included) onwards. */
|
||||
void raftLogTruncate(RaftLog* pLog, SyncIndex index);
|
||||
|
||||
/**
|
||||
* when taking a new snapshot, the function will update the last snapshot information and delete
|
||||
* all entries up last_index - trailing (included). If the log contains no entry
|
||||
* a last_index - trailing, then no entry will be deleted.
|
||||
**/
|
||||
void raftLogSnapshot(RaftLog* pLog, SyncIndex index, SyncIndex trailing);
|
||||
|
||||
#endif
|
||||
|
||||
#endif /* TD_SYNC_RAFT_UNSTABLE_LOG_H */
|
|
@ -1,71 +0,0 @@
|
|||
/*
|
||||
* Copyright (c) 2019 TAOS Data, Inc. <cli@taosdata.com>
|
||||
*
|
||||
* This program is free software: you can use, redistribute, and/or modify
|
||||
* it under the terms of the GNU Affero General Public License, version 3
|
||||
* or later ("AGPL"), as published by the Free Software Foundation.
|
||||
*
|
||||
* This program is distributed in the hope that it will be useful, but WITHOUT
|
||||
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
|
||||
* FITNESS FOR A PARTICULAR PURPOSE.
|
||||
*
|
||||
* You should have received a copy of the GNU Affero General Public License
|
||||
* along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
|
||||
#ifndef _TD_LIBS_SYNC_INT_H
|
||||
#define _TD_LIBS_SYNC_INT_H
|
||||
|
||||
#include "thash.h"
|
||||
#include "os.h"
|
||||
#include "sync.h"
|
||||
#include "sync_type.h"
|
||||
#include "raft.h"
|
||||
#include "tlog.h"
|
||||
|
||||
#define TAOS_SYNC_MAX_WORKER 3
|
||||
|
||||
typedef struct SSyncWorker {
|
||||
pthread_t thread;
|
||||
} SSyncWorker;
|
||||
|
||||
struct SSyncNode {
|
||||
pthread_mutex_t mutex;
|
||||
int32_t refCount;
|
||||
SyncGroupId vgId;
|
||||
SSyncRaft raft;
|
||||
void* syncTimer;
|
||||
};
|
||||
|
||||
typedef struct SSyncManager {
|
||||
pthread_mutex_t mutex;
|
||||
|
||||
// sync server rpc
|
||||
void* serverRpc;
|
||||
// rpc server hash table base on FQDN:port key
|
||||
SHashObj* rpcServerTable;
|
||||
|
||||
// sync client rpc
|
||||
void* clientRpc;
|
||||
|
||||
// worker threads
|
||||
SSyncWorker worker[TAOS_SYNC_MAX_WORKER];
|
||||
|
||||
// vgroup hash table
|
||||
SHashObj* vgroupTable;
|
||||
|
||||
// timer manager
|
||||
void* syncTimerManager;
|
||||
|
||||
} SSyncManager;
|
||||
|
||||
extern SSyncManager* gSyncManager;
|
||||
|
||||
#define syncFatal(...) do { if (sDebugFlag & DEBUG_FATAL) { taosPrintLog("SYNC FATAL ", 255, __VA_ARGS__); }} while(0)
|
||||
#define syncError(...) do { if (sDebugFlag & DEBUG_ERROR) { taosPrintLog("SYNC ERROR ", 255, __VA_ARGS__); }} while(0)
|
||||
#define syncWarn(...) do { if (sDebugFlag & DEBUG_WARN) { taosPrintLog("SYNC WARN ", 255, __VA_ARGS__); }} while(0)
|
||||
#define syncInfo(...) do { if (sDebugFlag & DEBUG_INFO) { taosPrintLog("SYNC ", 255, __VA_ARGS__); }} while(0)
|
||||
#define syncDebug(...) do { if (sDebugFlag & DEBUG_DEBUG) { taosPrintLog("SYNC ", sDebugFlag, __VA_ARGS__); }} while(0)
|
||||
#define syncTrace(...) do { if (sDebugFlag & DEBUG_TRACE) { taosPrintLog("SYNC ", sDebugFlag, __VA_ARGS__); }} while(0)
|
||||
|
||||
#endif /* _TD_LIBS_SYNC_INT_H */
|
|
@ -1,25 +0,0 @@
|
|||
/*
|
||||
* Copyright (c) 2019 TAOS Data, Inc. <cli@taosdata.com>
|
||||
*
|
||||
* This program is free software: you can use, redistribute, and/or modify
|
||||
* it under the terms of the GNU Affero General Public License, version 3
|
||||
* or later ("AGPL"), as published by the Free Software Foundation.
|
||||
*
|
||||
* This program is distributed in the hope that it will be useful, but WITHOUT
|
||||
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
|
||||
* FITNESS FOR A PARTICULAR PURPOSE.
|
||||
*
|
||||
* You should have received a copy of the GNU Affero General Public License
|
||||
* along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
|
||||
#ifndef _TD_LIBS_SYNC_CONST_H
|
||||
#define _TD_LIBS_SYNC_CONST_H
|
||||
|
||||
#include "sync.h"
|
||||
|
||||
static int kSyncRaftMaxInflghtMsgs = 20;
|
||||
|
||||
static SyncIndex kMaxCommitIndex = UINT64_MAX;
|
||||
|
||||
#endif /* _TD_LIBS_SYNC_CONST_H */
|
|
@ -1,47 +0,0 @@
|
|||
/*
|
||||
* Copyright (c) 2019 TAOS Data, Inc. <cli@taosdata.com>
|
||||
*
|
||||
* This program is free software: you can use, redistribute, and/or modify
|
||||
* it under the terms of the GNU Affero General Public License, version 3
|
||||
* or later ("AGPL"), as published by the Free Software Foundation.
|
||||
*
|
||||
* This program is distributed in the hope that it will be useful, but WITHOUT
|
||||
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
|
||||
* FITNESS FOR A PARTICULAR PURPOSE.
|
||||
*
|
||||
* You should have received a copy of the GNU Affero General Public License
|
||||
* along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
|
||||
#ifndef TD_SYNC_RAFT_CONFIG_CHANGE_H
|
||||
#define TD_SYNC_RAFT_CONFIG_CHANGE_H
|
||||
|
||||
#include "sync_type.h"
|
||||
#include "sync_raft_proto.h"
|
||||
|
||||
/**
|
||||
* Changer facilitates configuration changes. It exposes methods to handle
|
||||
* simple and joint consensus while performing the proper validation that allows
|
||||
* refusing invalid configuration changes before they affect the active
|
||||
* configuration.
|
||||
**/
|
||||
struct SSyncRaftChanger {
|
||||
SSyncRaftProgressTracker* tracker;
|
||||
SyncIndex lastIndex;
|
||||
};
|
||||
|
||||
typedef int (*configChangeFp)(SSyncRaftChanger* changer, const SSyncConfChangeSingleArray* css,
|
||||
SSyncRaftProgressTrackerConfig* config, SSyncRaftProgressMap* progressMap);
|
||||
|
||||
// Simple carries out a series of configuration changes that (in aggregate)
|
||||
// mutates the incoming majority config Voters[0] by at most one. This method
|
||||
// will return an error if that is not the case, if the resulting quorum is
|
||||
// zero, or if the configuration is in a joint state (i.e. if there is an
|
||||
// outgoing configuration).
|
||||
int syncRaftChangerSimpleConfig(SSyncRaftChanger* changer, const SSyncConfChangeSingleArray* css,
|
||||
SSyncRaftProgressTrackerConfig* config, SSyncRaftProgressMap* progressMap);
|
||||
|
||||
int syncRaftChangerEnterJoint(SSyncRaftChanger* changer, bool autoLeave, const SSyncConfChangeSingleArray* css,
|
||||
SSyncRaftProgressTrackerConfig* config, SSyncRaftProgressMap* progressMap);
|
||||
|
||||
#endif /* TD_SYNC_RAFT_CONFIG_CHANGE_H */
|
|
@ -1,58 +0,0 @@
|
|||
/*
|
||||
* Copyright (c) 2019 TAOS Data, Inc. <cli@taosdata.com>
|
||||
*
|
||||
* This program is free software: you can use, redistribute, and/or modify
|
||||
* it under the terms of the GNU Affero General Public License, version 3
|
||||
* or later ("AGPL"), as published by the Free Software Foundation.
|
||||
*
|
||||
* This program is distributed in the hope that it will be useful, but WITHOUT
|
||||
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
|
||||
* FITNESS FOR A PARTICULAR PURPOSE.
|
||||
*
|
||||
* You should have received a copy of the GNU Affero General Public License
|
||||
* along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
|
||||
#ifndef _TD_LIBS_SYNC_RAFT_IMPL_H
|
||||
#define _TD_LIBS_SYNC_RAFT_IMPL_H
|
||||
|
||||
#include "sync.h"
|
||||
#include "sync_type.h"
|
||||
#include "raft_message.h"
|
||||
#include "sync_raft_quorum.h"
|
||||
|
||||
void syncRaftBecomeFollower(SSyncRaft* pRaft, SyncTerm term, SyncNodeId leaderId);
|
||||
void syncRaftBecomePreCandidate(SSyncRaft* pRaft);
|
||||
void syncRaftBecomeCandidate(SSyncRaft* pRaft);
|
||||
void syncRaftBecomeLeader(SSyncRaft* pRaft);
|
||||
|
||||
void syncRaftStartElection(SSyncRaft* pRaft, ESyncRaftElectionType cType);
|
||||
|
||||
void syncRaftCampaign(SSyncRaft* pRaft, ESyncRaftElectionType cType);
|
||||
|
||||
void syncRaftTriggerHeartbeat(SSyncRaft* pRaft);
|
||||
|
||||
void syncRaftRandomizedElectionTimeout(SSyncRaft* pRaft);
|
||||
bool syncRaftIsPromotable(SSyncRaft* pRaft);
|
||||
bool syncRaftIsPastElectionTimeout(SSyncRaft* pRaft);
|
||||
int syncRaftQuorum(SSyncRaft* pRaft);
|
||||
|
||||
bool syncRaftMaybeCommit(SSyncRaft* pRaft);
|
||||
|
||||
ESyncRaftVoteResult syncRaftPollVote(SSyncRaft* pRaft, SyncNodeId id,
|
||||
bool preVote, bool accept,
|
||||
int* rejectNum, int *granted);
|
||||
|
||||
static FORCE_INLINE bool syncRaftIsEmptyServerState(const SSyncServerState* serverState) {
|
||||
return serverState->commitIndex == 0 &&
|
||||
serverState->term == SYNC_NON_TERM &&
|
||||
serverState->voteFor == SYNC_NON_NODE_ID;
|
||||
}
|
||||
|
||||
void syncRaftLoadState(SSyncRaft* pRaft, const SSyncServerState* serverState);
|
||||
|
||||
void syncRaftBroadcastAppend(SSyncRaft* pRaft);
|
||||
|
||||
SNodeInfo* syncRaftGetNodeById(SSyncRaft *pRaft, SyncNodeId id);
|
||||
|
||||
#endif /* _TD_LIBS_SYNC_RAFT_IMPL_H */
|
|
@ -1,70 +0,0 @@
|
|||
/*
|
||||
* Copyright (c) 2019 TAOS Data, Inc. <cli@taosdata.com>
|
||||
*
|
||||
* This program is free software: you can use, redistribute, and/or modify
|
||||
* it under the terms of the GNU Affero General Public License, version 3
|
||||
* or later ("AGPL"), as published by the Free Software Foundation.
|
||||
*
|
||||
* This program is distributed in the hope that it will be useful, but WITHOUT
|
||||
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
|
||||
* FITNESS FOR A PARTICULAR PURPOSE.
|
||||
*
|
||||
* You should have received a copy of the GNU Affero General Public License
|
||||
* along with this program. If not, see <http: *www.gnu.org/licenses/>.
|
||||
*/
|
||||
|
||||
#ifndef TD_SYNC_RAFT_INFLIGHTS_H
|
||||
#define TD_SYNC_RAFT_INFLIGHTS_H
|
||||
|
||||
#include "sync.h"
|
||||
|
||||
// Inflights limits the number of MsgApp (represented by the largest index
|
||||
// contained within) sent to followers but not yet acknowledged by them. Callers
|
||||
// use Full() to check whether more messages can be sent, call Add() whenever
|
||||
// they are sending a new append, and release "quota" via FreeLE() whenever an
|
||||
// ack is received.
|
||||
typedef struct SSyncRaftInflights {
|
||||
// the starting index in the buffer
|
||||
int start;
|
||||
|
||||
// number of inflights in the buffer
|
||||
int count;
|
||||
|
||||
// the size of the buffer
|
||||
int size;
|
||||
|
||||
// buffer contains the index of the last entry
|
||||
// inside one message.
|
||||
SyncIndex* buffer;
|
||||
} SSyncRaftInflights;
|
||||
|
||||
SSyncRaftInflights* syncRaftOpenInflights(int size);
|
||||
void syncRaftCloseInflights(SSyncRaftInflights*);
|
||||
|
||||
// reset frees all inflights.
|
||||
static FORCE_INLINE void syncRaftInflightReset(SSyncRaftInflights* inflights) {
|
||||
inflights->count = 0;
|
||||
inflights->start = 0;
|
||||
}
|
||||
|
||||
// Full returns true if no more messages can be sent at the moment.
|
||||
static FORCE_INLINE bool syncRaftInflightFull(SSyncRaftInflights* inflights) {
|
||||
return inflights->count == inflights->size;
|
||||
}
|
||||
|
||||
// Add notifies the Inflights that a new message with the given index is being
|
||||
// dispatched. Full() must be called prior to Add() to verify that there is room
|
||||
// for one more message, and consecutive calls to add Add() must provide a
|
||||
// monotonic sequence of indexes.
|
||||
void syncRaftInflightAdd(SSyncRaftInflights* inflights, SyncIndex inflightIndex);
|
||||
|
||||
// FreeLE frees the inflights smaller or equal to the given `to` flight.
|
||||
void syncRaftInflightFreeLE(SSyncRaftInflights* inflights, SyncIndex toIndex);
|
||||
|
||||
/**
|
||||
* syncRaftInflightFreeFirstOne releases the first inflight.
|
||||
* This is a no-op if nothing is inflight.
|
||||
**/
|
||||
void syncRaftInflightFreeFirstOne(SSyncRaftInflights* inflights);
|
||||
|
||||
#endif /* TD_SYNC_RAFT_INFLIGHTS_H */
|
|
@ -1,49 +0,0 @@
|
|||
/*
|
||||
* Copyright (c) 2019 TAOS Data, Inc. <cli@taosdata.com>
|
||||
*
|
||||
* This program is free software: you can use, redistribute, and/or modify
|
||||
* it under the terms of the GNU Affero General Public License, version 3
|
||||
* or later ("AGPL"), as published by the Free Software Foundation.
|
||||
*
|
||||
* This program is distributed in the hope that it will be useful, but WITHOUT
|
||||
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
|
||||
* FITNESS FOR A PARTICULAR PURPOSE.
|
||||
*
|
||||
* You should have received a copy of the GNU Affero General Public License
|
||||
* along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
|
||||
#ifndef _TD_LIBS_SYNC_RAFT_NODE_MAP_H
|
||||
#define _TD_LIBS_SYNC_RAFT_NODE_MAP_H
|
||||
|
||||
#include "thash.h"
|
||||
#include "sync.h"
|
||||
#include "sync_type.h"
|
||||
|
||||
struct SSyncRaftNodeMap {
|
||||
SHashObj* nodeIdMap;
|
||||
};
|
||||
|
||||
void syncRaftInitNodeMap(SSyncRaftNodeMap* nodeMap);
|
||||
void syncRaftFreeNodeMap(SSyncRaftNodeMap* nodeMap);
|
||||
|
||||
void syncRaftClearNodeMap(SSyncRaftNodeMap* nodeMap);
|
||||
|
||||
bool syncRaftIsInNodeMap(const SSyncRaftNodeMap* nodeMap, SyncNodeId nodeId);
|
||||
|
||||
void syncRaftCopyNodeMap(SSyncRaftNodeMap* from, SSyncRaftNodeMap* to);
|
||||
|
||||
void syncRaftUnionNodeMap(SSyncRaftNodeMap* nodeMap, SSyncRaftNodeMap* to);
|
||||
|
||||
void syncRaftAddToNodeMap(SSyncRaftNodeMap* nodeMap, SyncNodeId nodeId);
|
||||
|
||||
void syncRaftRemoveFromNodeMap(SSyncRaftNodeMap* nodeMap, SyncNodeId nodeId);
|
||||
|
||||
int32_t syncRaftNodeMapSize(const SSyncRaftNodeMap* nodeMap);
|
||||
|
||||
// return true if reach the end
|
||||
bool syncRaftIterateNodeMap(const SSyncRaftNodeMap* nodeMap, SyncNodeId *pId);
|
||||
|
||||
bool syncRaftIsAllNodeInProgressMap(SSyncRaftNodeMap* nodeMap, SSyncRaftProgressMap* progressMap);
|
||||
|
||||
#endif /* _TD_LIBS_SYNC_RAFT_NODE_MAP_H */
|
|
@ -1,259 +0,0 @@
|
|||
/*
|
||||
* Copyright (c) 2019 TAOS Data, Inc. <cli@taosdata.com>
|
||||
*
|
||||
* This program is free software: you can use, redistribute, and/or modify
|
||||
* it under the terms of the GNU Affero General Public License, version 3
|
||||
* or later ("AGPL"), as published by the Free Software Foundation.
|
||||
*
|
||||
* This program is distributed in the hope that it will be useful, but WITHOUT
|
||||
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
|
||||
* FITNESS FOR A PARTICULAR PURPOSE.
|
||||
*
|
||||
* You should have received a copy of the GNU Affero General Public License
|
||||
* along with this program. If not, see <http: *www.gnu.org/licenses/>.
|
||||
*/
|
||||
|
||||
#ifndef TD_SYNC_RAFT_PROGRESS_H
|
||||
#define TD_SYNC_RAFT_PROGRESS_H
|
||||
|
||||
#include "sync_type.h"
|
||||
#include "sync_raft_inflights.h"
|
||||
#include "thash.h"
|
||||
|
||||
/**
|
||||
* State defines how the leader should interact with the follower.
|
||||
*
|
||||
* When in PROGRESS_STATE_PROBE, leader sends at most one replication message
|
||||
* per heartbeat interval. It also probes actual progress of the follower.
|
||||
*
|
||||
* When in PROGRESS_STATE_REPLICATE, leader optimistically increases next
|
||||
* to the latest entry sent after sending replication message. This is
|
||||
* an optimized state for fast replicating log entries to the follower.
|
||||
*
|
||||
* When in PROGRESS_STATE_SNAPSHOT, leader should have sent out snapshot
|
||||
* before and stops sending any replication message.
|
||||
*
|
||||
* PROGRESS_STATE_PROBE is the initial state.
|
||||
**/
|
||||
typedef enum ESyncRaftProgressState {
|
||||
/**
|
||||
* StateProbe indicates a follower whose last index isn't known. Such a
|
||||
* follower is "probed" (i.e. an append sent periodically) to narrow down
|
||||
* its last index. In the ideal (and common) case, only one round of probing
|
||||
* is necessary as the follower will react with a hint. Followers that are
|
||||
* probed over extended periods of time are often offline.
|
||||
**/
|
||||
PROGRESS_STATE_PROBE = 0,
|
||||
|
||||
/**
|
||||
* StateReplicate is the state steady in which a follower eagerly receives
|
||||
* log entries to append to its log.
|
||||
**/
|
||||
PROGRESS_STATE_REPLICATE,
|
||||
|
||||
/**
|
||||
* StateSnapshot indicates a follower that needs log entries not available
|
||||
* from the leader's Raft log. Such a follower needs a full snapshot to
|
||||
* return to StateReplicate.
|
||||
**/
|
||||
PROGRESS_STATE_SNAPSHOT,
|
||||
} ESyncRaftProgressState;
|
||||
|
||||
static const char* kProgressStateString[] = {
|
||||
"Probe",
|
||||
"Replicate",
|
||||
"Snapshot",
|
||||
};
|
||||
|
||||
// Progress represents a follower’s progress in the view of the leader. Leader
|
||||
// maintains progresses of all followers, and sends entries to the follower
|
||||
// based on its progress.
|
||||
//
|
||||
// NB(tbg): Progress is basically a state machine whose transitions are mostly
|
||||
// strewn around `*raft.raft`. Additionally, some fields are only used when in a
|
||||
// certain State. All of this isn't ideal.
|
||||
struct SSyncRaftProgress {
|
||||
SyncGroupId groupId;
|
||||
|
||||
SyncNodeId id;
|
||||
|
||||
int16_t refCount;
|
||||
|
||||
SyncIndex nextIndex;
|
||||
|
||||
SyncIndex matchIndex;
|
||||
|
||||
// State defines how the leader should interact with the follower.
|
||||
//
|
||||
// When in StateProbe, leader sends at most one replication message
|
||||
// per heartbeat interval. It also probes actual progress of the follower.
|
||||
//
|
||||
// When in StateReplicate, leader optimistically increases next
|
||||
// to the latest entry sent after sending replication message. This is
|
||||
// an optimized state for fast replicating log entries to the follower.
|
||||
//
|
||||
// When in StateSnapshot, leader should have sent out snapshot
|
||||
// before and stops sending any replication message.
|
||||
ESyncRaftProgressState state;
|
||||
|
||||
// PendingSnapshot is used in StateSnapshot.
|
||||
// If there is a pending snapshot, the pendingSnapshot will be set to the
|
||||
// index of the snapshot. If pendingSnapshot is set, the replication process of
|
||||
// this Progress will be paused. raft will not resend snapshot until the pending one
|
||||
// is reported to be failed.
|
||||
SyncIndex pendingSnapshotIndex;
|
||||
|
||||
// RecentActive is true if the progress is recently active. Receiving any messages
|
||||
// from the corresponding follower indicates the progress is active.
|
||||
// RecentActive can be reset to false after an election timeout.
|
||||
//
|
||||
// TODO(tbg): the leader should always have this set to true.
|
||||
bool recentActive;
|
||||
|
||||
// ProbeSent is used while this follower is in StateProbe. When ProbeSent is
|
||||
// true, raft should pause sending replication message to this peer until
|
||||
// ProbeSent is reset. See ProbeAcked() and IsPaused().
|
||||
bool probeSent;
|
||||
|
||||
// Inflights is a sliding window for the inflight messages.
|
||||
// Each inflight message contains one or more log entries.
|
||||
// The max number of entries per message is defined in raft config as MaxSizePerMsg.
|
||||
// Thus inflight effectively limits both the number of inflight messages
|
||||
// and the bandwidth each Progress can use.
|
||||
// When inflights is Full, no more message should be sent.
|
||||
// When a leader sends out a message, the index of the last
|
||||
// entry should be added to inflights. The index MUST be added
|
||||
// into inflights in order.
|
||||
// When a leader receives a reply, the previous inflights should
|
||||
// be freed by calling inflights.FreeLE with the index of the last
|
||||
// received entry.
|
||||
SSyncRaftInflights* inflights;
|
||||
|
||||
// IsLearner is true if this progress is tracked for a learner.
|
||||
bool isLearner;
|
||||
};
|
||||
|
||||
struct SSyncRaftProgressMap {
|
||||
// map nodeId -> SSyncRaftProgress*
|
||||
SHashObj* progressMap;
|
||||
};
|
||||
|
||||
static FORCE_INLINE const char* syncRaftProgressStateString(const SSyncRaftProgress* progress) {
|
||||
return kProgressStateString[progress->state];
|
||||
}
|
||||
|
||||
void syncRaftResetProgress(SSyncRaft* pRaft, SSyncRaftProgress* progress);
|
||||
|
||||
// BecomeProbe transitions into StateProbe. Next is reset to Match+1 or,
|
||||
// optionally and if larger, the index of the pending snapshot.
|
||||
void syncRaftProgressBecomeProbe(SSyncRaftProgress* progress);
|
||||
|
||||
// BecomeReplicate transitions into StateReplicate, resetting Next to Match+1.
|
||||
void syncRaftProgressBecomeReplicate(SSyncRaftProgress* progress);
|
||||
|
||||
// MaybeUpdate is called when an MsgAppResp arrives from the follower, with the
|
||||
// index acked by it. The method returns false if the given n index comes from
|
||||
// an outdated message. Otherwise it updates the progress and returns true.
|
||||
bool syncRaftProgressMaybeUpdate(SSyncRaftProgress* progress, SyncIndex lastIndex);
|
||||
|
||||
// OptimisticUpdate signals that appends all the way up to and including index n
|
||||
// are in-flight. As a result, Next is increased to n+1.
|
||||
static FORCE_INLINE void syncRaftProgressOptimisticNextIndex(SSyncRaftProgress* progress, SyncIndex nextIndex) {
|
||||
progress->nextIndex = nextIndex + 1;
|
||||
}
|
||||
|
||||
// MaybeDecrTo adjusts the Progress to the receipt of a MsgApp rejection. The
|
||||
// arguments are the index of the append message rejected by the follower, and
|
||||
// the hint that we want to decrease to.
|
||||
//
|
||||
// Rejections can happen spuriously as messages are sent out of order or
|
||||
// duplicated. In such cases, the rejection pertains to an index that the
|
||||
// Progress already knows were previously acknowledged, and false is returned
|
||||
// without changing the Progress.
|
||||
//
|
||||
// If the rejection is genuine, Next is lowered sensibly, and the Progress is
|
||||
// cleared for sending log entries.
|
||||
bool syncRaftProgressMaybeDecrTo(SSyncRaftProgress* progress,
|
||||
SyncIndex rejected, SyncIndex matchHint);
|
||||
|
||||
// IsPaused returns whether sending log entries to this node has been throttled.
|
||||
// This is done when a node has rejected recent MsgApps, is currently waiting
|
||||
// for a snapshot, or has reached the MaxInflightMsgs limit. In normal
|
||||
// operation, this is false. A throttled node will be contacted less frequently
|
||||
// until it has reached a state in which it's able to accept a steady stream of
|
||||
// log entries again.
|
||||
bool syncRaftProgressIsPaused(SSyncRaftProgress* progress);
|
||||
|
||||
static FORCE_INLINE SyncIndex syncRaftProgressNextIndex(SSyncRaftProgress* progress) {
|
||||
return progress->nextIndex;
|
||||
}
|
||||
|
||||
static FORCE_INLINE ESyncRaftProgressState syncRaftProgressInReplicate(SSyncRaftProgress* progress) {
|
||||
return progress->state == PROGRESS_STATE_REPLICATE;
|
||||
}
|
||||
|
||||
static FORCE_INLINE ESyncRaftProgressState syncRaftProgressInSnapshot(SSyncRaftProgress* progress) {
|
||||
return progress->state == PROGRESS_STATE_SNAPSHOT;
|
||||
}
|
||||
|
||||
static FORCE_INLINE ESyncRaftProgressState syncRaftProgressInProbe(SSyncRaftProgress* progress) {
|
||||
return progress->state == PROGRESS_STATE_PROBE;
|
||||
}
|
||||
|
||||
static FORCE_INLINE bool syncRaftProgressRecentActive(SSyncRaftProgress* progress) {
|
||||
return progress->recentActive;
|
||||
}
|
||||
|
||||
void syncRaftInitProgressMap(SSyncRaftProgressMap* progressMap);
|
||||
void syncRaftFreeProgressMap(SSyncRaftProgressMap* progressMap);
|
||||
|
||||
void syncRaftClearProgressMap(SSyncRaftProgressMap* progressMap);
|
||||
void syncRaftCopyProgressMap(SSyncRaftProgressMap* from, SSyncRaftProgressMap* to);
|
||||
|
||||
SSyncRaftProgress* syncRaftFindProgressByNodeId(const SSyncRaftProgressMap* progressMap, SyncNodeId id);
|
||||
|
||||
int syncRaftAddToProgressMap(SSyncRaftProgressMap* progressMap, SSyncRaftProgress* progress);
|
||||
|
||||
void syncRaftRemoveFromProgressMap(SSyncRaftProgressMap* progressMap, SyncNodeId id);
|
||||
|
||||
bool syncRaftIsInProgressMap(SSyncRaftProgressMap* progressMap, SyncNodeId id);
|
||||
|
||||
/**
|
||||
* return true if progress's log is up-todate
|
||||
**/
|
||||
bool syncRaftProgressIsUptodate(SSyncRaft* pRaft, SSyncRaftProgress* progress);
|
||||
|
||||
// BecomeSnapshot moves the Progress to StateSnapshot with the specified pending
|
||||
// snapshot index.
|
||||
void syncRaftProgressBecomeSnapshot(SSyncRaftProgress* progress, SyncIndex snapshotIndex);
|
||||
|
||||
void syncRaftCopyProgress(const SSyncRaftProgress* from, SSyncRaftProgress* to);
|
||||
|
||||
// return true if reach the end
|
||||
bool syncRaftIterateProgressMap(const SSyncRaftProgressMap* progressMap, SSyncRaftProgress *pProgress);
|
||||
|
||||
bool syncRaftVisitProgressMap(SSyncRaftProgressMap* progressMap, visitProgressFp fp, void* arg);
|
||||
|
||||
#if 0
|
||||
|
||||
void syncRaftProgressAbortSnapshot(SSyncRaft* pRaft, int i);
|
||||
|
||||
|
||||
|
||||
SyncIndex syncRaftProgressMatchIndex(SSyncRaft* pRaft, int i);
|
||||
|
||||
void syncRaftProgressUpdateLastSend(SSyncRaft* pRaft, int i);
|
||||
|
||||
void syncRaftProgressUpdateSnapshotLastSend(SSyncRaft* pRaft, int i);
|
||||
|
||||
bool syncRaftProgressResetRecentRecv(SSyncRaft* pRaft, int i);
|
||||
|
||||
void syncRaftProgressMarkRecentRecv(SSyncRaft* pRaft, int i);
|
||||
|
||||
|
||||
|
||||
void syncRaftProgressAbortSnapshot(SSyncRaft* pRaft, int i);
|
||||
|
||||
#endif
|
||||
|
||||
#endif /* TD_SYNC_RAFT_PROGRESS_H */
|
|
@ -1,132 +0,0 @@
|
|||
/*
|
||||
* Copyright (c) 2019 TAOS Data, Inc. <cli@taosdata.com>
|
||||
*
|
||||
* This program is free software: you can use, redistribute, and/or modify
|
||||
* it under the terms of the GNU Affero General Public License, version 3
|
||||
* or later ("AGPL"), as published by the Free Software Foundation.
|
||||
*
|
||||
* This program is distributed in the hope that it will be useful, but WITHOUT
|
||||
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
|
||||
* FITNESS FOR A PARTICULAR PURPOSE.
|
||||
*
|
||||
* You should have received a copy of the GNU Affero General Public License
|
||||
* along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
|
||||
#ifndef _TD_LIBS_SYNC_RAFT_PROGRESS_TRACKER_H
|
||||
#define _TD_LIBS_SYNC_RAFT_PROGRESS_TRACKER_H
|
||||
|
||||
#include "sync_type.h"
|
||||
#include "sync_raft_quorum.h"
|
||||
#include "sync_raft_quorum_joint.h"
|
||||
#include "sync_raft_progress.h"
|
||||
#include "sync_raft_proto.h"
|
||||
#include "thash.h"
|
||||
|
||||
// Config reflects the configuration tracked in a ProgressTracker.
|
||||
struct SSyncRaftProgressTrackerConfig {
|
||||
SSyncRaftQuorumJointConfig voters;
|
||||
|
||||
// autoLeave is true if the configuration is joint and a transition to the
|
||||
// incoming configuration should be carried out automatically by Raft when
|
||||
// this is possible. If false, the configuration will be joint until the
|
||||
// application initiates the transition manually.
|
||||
bool autoLeave;
|
||||
|
||||
// Learners is a set of IDs corresponding to the learners active in the
|
||||
// current configuration.
|
||||
//
|
||||
// Invariant: Learners and Voters does not intersect, i.e. if a peer is in
|
||||
// either half of the joint config, it can't be a learner; if it is a
|
||||
// learner it can't be in either half of the joint config. This invariant
|
||||
// simplifies the implementation since it allows peers to have clarity about
|
||||
// its current role without taking into account joint consensus.
|
||||
SSyncRaftNodeMap learners;
|
||||
|
||||
// When we turn a voter into a learner during a joint consensus transition,
|
||||
// we cannot add the learner directly when entering the joint state. This is
|
||||
// because this would violate the invariant that the intersection of
|
||||
// voters and learners is empty. For example, assume a Voter is removed and
|
||||
// immediately re-added as a learner (or in other words, it is demoted):
|
||||
//
|
||||
// Initially, the configuration will be
|
||||
//
|
||||
// voters: {1 2 3}
|
||||
// learners: {}
|
||||
//
|
||||
// and we want to demote 3. Entering the joint configuration, we naively get
|
||||
//
|
||||
// voters: {1 2} & {1 2 3}
|
||||
// learners: {3}
|
||||
//
|
||||
// but this violates the invariant (3 is both voter and learner). Instead,
|
||||
// we get
|
||||
//
|
||||
// voters: {1 2} & {1 2 3}
|
||||
// learners: {}
|
||||
// next_learners: {3}
|
||||
//
|
||||
// Where 3 is now still purely a voter, but we are remembering the intention
|
||||
// to make it a learner upon transitioning into the final configuration:
|
||||
//
|
||||
// voters: {1 2}
|
||||
// learners: {3}
|
||||
// next_learners: {}
|
||||
//
|
||||
// Note that next_learners is not used while adding a learner that is not
|
||||
// also a voter in the joint config. In this case, the learner is added
|
||||
// right away when entering the joint configuration, so that it is caught up
|
||||
// as soon as possible.
|
||||
SSyncRaftNodeMap learnersNext;
|
||||
};
|
||||
|
||||
struct SSyncRaftProgressTracker {
|
||||
SSyncRaftProgressTrackerConfig config;
|
||||
|
||||
SSyncRaftProgressMap progressMap;
|
||||
|
||||
// nodeid -> ESyncRaftVoteType map
|
||||
SHashObj* votesMap;
|
||||
|
||||
int maxInflightMsgs;
|
||||
|
||||
SSyncRaft* pRaft;
|
||||
};
|
||||
|
||||
SSyncRaftProgressTracker* syncRaftOpenProgressTracker(SSyncRaft* pRaft);
|
||||
|
||||
void syncRaftInitTrackConfig(SSyncRaftProgressTrackerConfig* config);
|
||||
void syncRaftFreeTrackConfig(SSyncRaftProgressTrackerConfig* config);
|
||||
|
||||
void syncRaftFreeTrackConfig(SSyncRaftProgressTrackerConfig* config);
|
||||
|
||||
// ResetVotes prepares for a new round of vote counting via recordVote.
|
||||
void syncRaftResetVotes(SSyncRaftProgressTracker*);
|
||||
|
||||
void syncRaftProgressVisit(SSyncRaftProgressTracker*, visitProgressFp visit, void* arg);
|
||||
|
||||
// RecordVote records that the node with the given id voted for this Raft
|
||||
// instance if v == true (and declined it otherwise).
|
||||
void syncRaftRecordVote(SSyncRaftProgressTracker* tracker, SyncNodeId id, bool grant);
|
||||
|
||||
void syncRaftCopyTrackerConfig(const SSyncRaftProgressTrackerConfig* from, SSyncRaftProgressTrackerConfig* to);
|
||||
|
||||
int syncRaftCheckTrackerConfigInProgress(SSyncRaftProgressTrackerConfig* config, SSyncRaftProgressMap* progressMap);
|
||||
|
||||
// TallyVotes returns the number of granted and rejected Votes, and whether the
|
||||
// election outcome is known.
|
||||
ESyncRaftVoteResult syncRaftTallyVotes(SSyncRaftProgressTracker* tracker, int* rejected, int *granted);
|
||||
|
||||
void syncRaftConfigState(SSyncRaftProgressTracker* tracker, SSyncConfigState* cs);
|
||||
|
||||
// Committed returns the largest log index known to be committed based on what
|
||||
// the voting members of the group have acknowledged.
|
||||
SyncIndex syncRaftCommittedIndex(SSyncRaftProgressTracker* tracker);
|
||||
|
||||
// QuorumActive returns true if the quorum is active from the view of the local
|
||||
// raft state machine. Otherwise, it returns false.
|
||||
bool syncRaftQuorumActive(SSyncRaftProgressTracker* tracker);
|
||||
|
||||
bool syncRaftIsInNodeMap(const SSyncRaftNodeMap* nodeMap, SyncNodeId nodeId);
|
||||
|
||||
#endif /* _TD_LIBS_SYNC_RAFT_PROGRESS_TRACKER_H */
|
|
@ -1,77 +0,0 @@
|
|||
/*
|
||||
* Copyright (c) 2019 TAOS Data, Inc. <cli@taosdata.com>
|
||||
*
|
||||
* This program is free software: you can use, redistribute, and/or modify
|
||||
* it under the terms of the GNU Affero General Public License, version 3
|
||||
* or later ("AGPL"), as published by the Free Software Foundation.
|
||||
*
|
||||
* This program is distributed in the hope that it will be useful, but WITHOUT
|
||||
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
|
||||
* FITNESS FOR A PARTICULAR PURPOSE.
|
||||
*
|
||||
* You should have received a copy of the GNU Affero General Public License
|
||||
* along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
|
||||
#ifndef TD_SYNC_RAFT_PROTO_H
|
||||
#define TD_SYNC_RAFT_PROTO_H
|
||||
|
||||
#include "sync_type.h"
|
||||
#include "sync_raft_node_map.h"
|
||||
|
||||
typedef enum ESyncRaftConfChangeType {
|
||||
SYNC_RAFT_Conf_AddNode = 0,
|
||||
SYNC_RAFT_Conf_RemoveNode = 1,
|
||||
SYNC_RAFT_Conf_UpdateNode = 2,
|
||||
SYNC_RAFT_Conf_AddLearnerNode = 3,
|
||||
} ESyncRaftConfChangeType;
|
||||
|
||||
// ConfChangeSingle is an individual configuration change operation. Multiple
|
||||
// such operations can be carried out atomically via a ConfChangeV2.
|
||||
typedef struct SSyncConfChangeSingle {
|
||||
ESyncRaftConfChangeType type;
|
||||
SyncNodeId nodeId;
|
||||
} SSyncConfChangeSingle;
|
||||
|
||||
typedef struct SSyncConfChangeSingleArray {
|
||||
int n;
|
||||
SSyncConfChangeSingle* changes;
|
||||
} SSyncConfChangeSingleArray;
|
||||
|
||||
typedef struct SSyncConfigState {
|
||||
// The voters in the incoming config. (If the configuration is not joint,
|
||||
// then the outgoing config is empty).
|
||||
SSyncRaftNodeMap voters;
|
||||
|
||||
// The learners in the incoming config.
|
||||
SSyncRaftNodeMap learners;
|
||||
|
||||
// The voters in the outgoing config.
|
||||
SSyncRaftNodeMap votersOutgoing;
|
||||
|
||||
// The nodes that will become learners when the outgoing config is removed.
|
||||
// These nodes are necessarily currently in nodes_joint (or they would have
|
||||
// been added to the incoming config right away).
|
||||
SSyncRaftNodeMap learnersNext;
|
||||
|
||||
// If set, the config is joint and Raft will automatically transition into
|
||||
// the final config (i.e. remove the outgoing config) when this is safe.
|
||||
bool autoLeave;
|
||||
} SSyncConfigState;
|
||||
|
||||
static FORCE_INLINE bool syncRaftConfArrayIsEmpty(const SSyncConfChangeSingleArray* ary) {
|
||||
return ary->n == 0;
|
||||
}
|
||||
|
||||
static FORCE_INLINE void syncRaftInitConfArray(SSyncConfChangeSingleArray* ary) {
|
||||
*ary = (SSyncConfChangeSingleArray) {
|
||||
.changes = NULL,
|
||||
.n = 0,
|
||||
};
|
||||
}
|
||||
|
||||
static FORCE_INLINE void syncRaftFreeConfArray(SSyncConfChangeSingleArray* ary) {
|
||||
if (ary->changes != NULL) free(ary->changes);
|
||||
}
|
||||
|
||||
#endif /* TD_SYNC_RAFT_PROTO_H */
|
|
@ -1,40 +0,0 @@
|
|||
/*
|
||||
* Copyright (c) 2019 TAOS Data, Inc. <cli@taosdata.com>
|
||||
*
|
||||
* This program is free software: you can use, redistribute, and/or modify
|
||||
* it under the terms of the GNU Affero General Public License, version 3
|
||||
* or later ("AGPL"), as published by the Free Software Foundation.
|
||||
*
|
||||
* This program is distributed in the hope that it will be useful, but WITHOUT
|
||||
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
|
||||
* FITNESS FOR A PARTICULAR PURPOSE.
|
||||
*
|
||||
* You should have received a copy of the GNU Affero General Public License
|
||||
* along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
|
||||
#ifndef TD_SYNC_RAFT_QUORUM_H
|
||||
#define TD_SYNC_RAFT_QUORUM_H
|
||||
|
||||
/**
|
||||
* ESyncRaftVoteResult indicates the outcome of a vote.
|
||||
**/
|
||||
typedef enum {
|
||||
/**
|
||||
* SYNC_RAFT_VOTE_PENDING indicates that the decision of the vote depends on future
|
||||
* votes, i.e. neither "yes" or "no" has reached quorum yet.
|
||||
**/
|
||||
SYNC_RAFT_VOTE_PENDING = 1,
|
||||
|
||||
/**
|
||||
* SYNC_RAFT_VOTE_LOST indicates that the quorum has voted "no".
|
||||
**/
|
||||
SYNC_RAFT_VOTE_LOST = 2,
|
||||
|
||||
/**
|
||||
* SYNC_RAFT_VOTE_WON indicates that the quorum has voted "yes".
|
||||
**/
|
||||
SYNC_RAFT_VOTE_WON = 3,
|
||||
} ESyncRaftVoteResult;
|
||||
|
||||
#endif /* TD_SYNC_RAFT_QUORUM_H */
|
|
@ -1,84 +0,0 @@
|
|||
/*
|
||||
* Copyright (c) 2019 TAOS Data, Inc. <cli@taosdata.com>
|
||||
*
|
||||
* This program is free software: you can use, redistribute, and/or modify
|
||||
* it under the terms of the GNU Affero General Public License, version 3
|
||||
* or later ("AGPL"), as published by the Free Software Foundation.
|
||||
*
|
||||
* This program is distributed in the hope that it will be useful, but WITHOUT
|
||||
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
|
||||
* FITNESS FOR A PARTICULAR PURPOSE.
|
||||
*
|
||||
* You should have received a copy of the GNU Affero General Public License
|
||||
* along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
|
||||
#ifndef _TD_LIBS_SYNC_RAFT_QUORUM_JOINT_H
|
||||
#define _TD_LIBS_SYNC_RAFT_QUORUM_JOINT_H
|
||||
|
||||
#include "taosdef.h"
|
||||
#include "sync.h"
|
||||
#include "sync_type.h"
|
||||
#include "sync_raft_node_map.h"
|
||||
#include "thash.h"
|
||||
|
||||
// JointConfig is a configuration of two groups of (possibly overlapping)
|
||||
// majority configurations. Decisions require the support of both majorities.
|
||||
typedef struct SSyncRaftQuorumJointConfig {
|
||||
SSyncRaftNodeMap outgoing;
|
||||
SSyncRaftNodeMap incoming;
|
||||
} SSyncRaftQuorumJointConfig;
|
||||
|
||||
// IDs returns a newly initialized map representing the set of voters present
|
||||
// in the joint configuration.
|
||||
void syncRaftJointConfigIDs(SSyncRaftQuorumJointConfig* config, SSyncRaftNodeMap* nodeMap);
|
||||
|
||||
// CommittedIndex returns the largest committed index for the given joint
|
||||
// quorum. An index is jointly committed if it is committed in both constituent
|
||||
// majorities.
|
||||
SyncIndex syncRaftJointConfigCommittedIndex(const SSyncRaftQuorumJointConfig* config, matchAckIndexerFp indexer, void* arg);
|
||||
|
||||
// VoteResult takes a mapping of voters to yes/no (true/false) votes and returns
|
||||
// a result indicating whether the vote is pending, lost, or won. A joint quorum
|
||||
// requires both majority quorums to vote in favor.
|
||||
ESyncRaftVoteType syncRaftVoteResult(SSyncRaftQuorumJointConfig* config, SHashObj* votesMap);
|
||||
|
||||
void syncRaftInitQuorumJointConfig(SSyncRaftQuorumJointConfig* config);
|
||||
|
||||
static FORCE_INLINE bool syncRaftJointConfigInOutgoing(const SSyncRaftQuorumJointConfig* config, SyncNodeId id) {
|
||||
return syncRaftIsInNodeMap(&config->outgoing, id);
|
||||
}
|
||||
|
||||
static FORCE_INLINE bool syncRaftJointConfigInIncoming(const SSyncRaftQuorumJointConfig* config, SyncNodeId id) {
|
||||
return syncRaftIsInNodeMap(&config->incoming, id);
|
||||
}
|
||||
|
||||
void syncRaftJointConfigAddToIncoming(SSyncRaftQuorumJointConfig* config, SyncNodeId id);
|
||||
|
||||
void syncRaftJointConfigRemoveFromIncoming(SSyncRaftQuorumJointConfig* config, SyncNodeId id);
|
||||
|
||||
static FORCE_INLINE const SSyncRaftNodeMap* syncRaftJointConfigIncoming(const SSyncRaftQuorumJointConfig* config) {
|
||||
return &config->incoming;
|
||||
}
|
||||
|
||||
static FORCE_INLINE const SSyncRaftNodeMap* syncRaftJointConfigOutgoing(const SSyncRaftQuorumJointConfig* config) {
|
||||
return &config->outgoing;
|
||||
}
|
||||
|
||||
static FORCE_INLINE void syncRaftJointConfigClearOutgoing(SSyncRaftQuorumJointConfig* config) {
|
||||
syncRaftClearNodeMap(&config->outgoing);
|
||||
}
|
||||
|
||||
static FORCE_INLINE bool syncRaftJointConfigIsIncomingEmpty(const SSyncRaftQuorumJointConfig* config) {
|
||||
return syncRaftNodeMapSize(&config->incoming) == 0;
|
||||
}
|
||||
|
||||
static FORCE_INLINE bool syncRaftJointConfigIsOutgoingEmpty(const SSyncRaftQuorumJointConfig* config) {
|
||||
return syncRaftNodeMapSize(&config->outgoing) == 0;
|
||||
}
|
||||
|
||||
static FORCE_INLINE bool syncRaftJointConfigIsInOutgoing(const SSyncRaftQuorumJointConfig* config, SyncNodeId id) {
|
||||
return syncRaftIsInNodeMap(&config->outgoing, id);
|
||||
}
|
||||
|
||||
#endif /* _TD_LIBS_SYNC_RAFT_QUORUM_JOINT_H */
|
|
@ -1,36 +0,0 @@
|
|||
/*
|
||||
* Copyright (c) 2019 TAOS Data, Inc. <cli@taosdata.com>
|
||||
*
|
||||
* This program is free software: you can use, redistribute, and/or modify
|
||||
* it under the terms of the GNU Affero General Public License, version 3
|
||||
* or later ("AGPL"), as published by the Free Software Foundation.
|
||||
*
|
||||
* This program is distributed in the hope that it will be useful, but WITHOUT
|
||||
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
|
||||
* FITNESS FOR A PARTICULAR PURPOSE.
|
||||
*
|
||||
* You should have received a copy of the GNU Affero General Public License
|
||||
* along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
|
||||
#ifndef _TD_LIBS_SYNC_RAFT_QUORUM_MAJORITY_H
|
||||
#define _TD_LIBS_SYNC_RAFT_QUORUM_MAJORITY_H
|
||||
|
||||
#include "sync.h"
|
||||
#include "sync_type.h"
|
||||
#include "sync_raft_quorum.h"
|
||||
#include "thash.h"
|
||||
|
||||
/**
|
||||
* syncRaftMajorityVoteResult takes a mapping of voters to yes/no (true/false) votes and returns
|
||||
* a result indicating whether the vote is pending (i.e. neither a quorum of
|
||||
* yes/no has been reached), won (a quorum of yes has been reached), or lost (a
|
||||
* quorum of no has been reached).
|
||||
**/
|
||||
ESyncRaftVoteResult syncRaftMajorityVoteResult(SSyncRaftNodeMap* config, SHashObj* votesMap);
|
||||
|
||||
// CommittedIndex computes the committed index from those supplied via the
|
||||
// provided AckedIndexer (for the active config).
|
||||
SyncIndex syncRaftMajorityConfigCommittedIndex(const SSyncRaftNodeMap* config, matchAckIndexerFp indexer, void* arg);
|
||||
|
||||
#endif /* _TD_LIBS_SYNC_RAFT_QUORUM_MAJORITY_H */
|
|
@ -1,33 +0,0 @@
|
|||
/*
|
||||
* Copyright (c) 2019 TAOS Data, Inc. <cli@taosdata.com>
|
||||
*
|
||||
* This program is free software: you can use, redistribute, and/or modify
|
||||
* it under the terms of the GNU Affero General Public License, version 3
|
||||
* or later ("AGPL"), as published by the Free Software Foundation.
|
||||
*
|
||||
* This program is distributed in the hope that it will be useful, but WITHOUT
|
||||
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
|
||||
* FITNESS FOR A PARTICULAR PURPOSE.
|
||||
*
|
||||
* You should have received a copy of the GNU Affero General Public License
|
||||
* along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
|
||||
#ifndef TD_SYNC_RAFT_RESTORE_H
|
||||
#define TD_SYNC_RAFT_RESTORE_H
|
||||
|
||||
#include "sync_type.h"
|
||||
#include "sync_raft_proto.h"
|
||||
|
||||
// syncRaftRestoreConfig takes a Changer (which must represent an empty configuration), and
|
||||
// runs a sequence of changes enacting the configuration described in the
|
||||
// ConfState.
|
||||
//
|
||||
// TODO(tbg) it's silly that this takes a Changer. Unravel this by making sure
|
||||
// the Changer only needs a ProgressMap (not a whole Tracker) at which point
|
||||
// this can just take LastIndex and MaxInflight directly instead and cook up
|
||||
// the results from that alone.
|
||||
int syncRaftRestoreConfig(SSyncRaftChanger* changer, const SSyncConfigState* cs,
|
||||
SSyncRaftProgressTrackerConfig* config, SSyncRaftProgressMap* progressMap);
|
||||
|
||||
#endif /* TD_SYNC_RAFT_RESTORE_H */
|
|
@ -1,91 +0,0 @@
|
|||
/*
|
||||
* Copyright (c) 2019 TAOS Data, Inc. <cli@taosdata.com>
|
||||
*
|
||||
* This program is free software: you can use, redistribute, and/or modify
|
||||
* it under the terms of the GNU Affero General Public License, version 3
|
||||
* or later ("AGPL"), as published by the Free Software Foundation.
|
||||
*
|
||||
* This program is distributed in the hope that it will be useful, but WITHOUT
|
||||
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
|
||||
* FITNESS FOR A PARTICULAR PURPOSE.
|
||||
*
|
||||
* You should have received a copy of the GNU Affero General Public License
|
||||
* along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
|
||||
#ifndef _TD_LIBS_SYNC_TYPE_H
|
||||
#define _TD_LIBS_SYNC_TYPE_H
|
||||
|
||||
#include <stdint.h>
|
||||
#include "sync.h"
|
||||
#include "osMath.h"
|
||||
|
||||
#define SYNC_NON_NODE_ID -1
|
||||
#define SYNC_NON_TERM 0
|
||||
|
||||
typedef int32_t SyncTime;
|
||||
typedef uint32_t SyncTick;
|
||||
|
||||
typedef struct SSyncRaft SSyncRaft;
|
||||
|
||||
typedef struct SSyncRaftProgress SSyncRaftProgress;
|
||||
typedef struct SSyncRaftProgressMap SSyncRaftProgressMap;
|
||||
typedef struct SSyncRaftProgressTrackerConfig SSyncRaftProgressTrackerConfig;
|
||||
|
||||
typedef struct SSyncRaftNodeMap SSyncRaftNodeMap;
|
||||
|
||||
typedef struct SSyncRaftProgressTracker SSyncRaftProgressTracker;
|
||||
|
||||
typedef struct SSyncRaftChanger SSyncRaftChanger;
|
||||
|
||||
typedef struct SSyncRaftLog SSyncRaftLog;
|
||||
|
||||
typedef struct SSyncRaftEntry SSyncRaftEntry;
|
||||
|
||||
#if 0
|
||||
#ifndef TMIN
|
||||
#define TMIN(x, y) (((x) < (y)) ? (x) : (y))
|
||||
#endif
|
||||
|
||||
#ifndef TMAX
|
||||
#define TMAX(x, y) (((x) > (y)) ? (x) : (y))
|
||||
#endif
|
||||
#endif
|
||||
|
||||
|
||||
typedef struct SSyncServerState {
|
||||
SyncNodeId voteFor;
|
||||
SyncTerm term;
|
||||
SyncIndex commitIndex;
|
||||
} SSyncServerState;
|
||||
|
||||
typedef struct SSyncClusterConfig {
|
||||
// Log index number of current cluster config.
|
||||
SyncIndex index;
|
||||
|
||||
// Log index number of previous cluster config.
|
||||
SyncIndex prevIndex;
|
||||
|
||||
// current cluster
|
||||
const SSyncCluster* cluster;
|
||||
} SSyncClusterConfig;
|
||||
|
||||
typedef enum {
|
||||
SYNC_RAFT_CAMPAIGN_PRE_ELECTION = 0,
|
||||
SYNC_RAFT_CAMPAIGN_ELECTION = 1,
|
||||
SYNC_RAFT_CAMPAIGN_TRANSFER = 2,
|
||||
} ESyncRaftElectionType;
|
||||
|
||||
typedef enum {
|
||||
// grant the vote request
|
||||
SYNC_RAFT_VOTE_RESP_GRANT = 1,
|
||||
|
||||
// reject the vote request
|
||||
SYNC_RAFT_VOTE_RESP_REJECT = 2,
|
||||
} ESyncRaftVoteType;
|
||||
|
||||
typedef void (*visitProgressFp)(SSyncRaftProgress* progress, void* arg);
|
||||
|
||||
typedef void (*matchAckIndexerFp)(SyncNodeId id, void* arg, SyncIndex* index);
|
||||
|
||||
#endif /* _TD_LIBS_SYNC_TYPE_H */
|
|
@ -1,325 +0,0 @@
|
|||
/*
|
||||
* Copyright (c) 2019 TAOS Data, Inc. <cli@taosdata.com>
|
||||
*
|
||||
* This program is free software: you can use, redistribute, and/or modify
|
||||
* it under the terms of the GNU Affero General Public License, version 3
|
||||
* or later ("AGPL"), as published by the Free Software Foundation.
|
||||
*
|
||||
* This program is distributed in the hope that it will be useful, but WITHOUT
|
||||
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
|
||||
* FITNESS FOR A PARTICULAR PURPOSE.
|
||||
*
|
||||
* You should have received a copy of the GNU Affero General Public License
|
||||
* along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
|
||||
#include "raft.h"
|
||||
#include "sync_raft_impl.h"
|
||||
#include "raft_log.h"
|
||||
#include "sync_raft_restore.h"
|
||||
#include "raft_replication.h"
|
||||
#include "sync_raft_config_change.h"
|
||||
#include "sync_raft_progress_tracker.h"
|
||||
#include "syncInt.h"
|
||||
|
||||
#define RAFT_READ_LOG_MAX_NUM 100
|
||||
|
||||
static int deserializeServerStateFromBuffer(SSyncServerState* server, const char* buffer, int n);
|
||||
static int deserializeClusterStateFromBuffer(SSyncConfigState* cluster, const char* buffer, int n);
|
||||
|
||||
static void switchToConfig(SSyncRaft* pRaft, const SSyncRaftProgressTrackerConfig* config,
|
||||
const SSyncRaftProgressMap* progressMap, SSyncConfigState* cs);
|
||||
|
||||
static void abortLeaderTransfer(SSyncRaft* pRaft);
|
||||
|
||||
static bool preHandleMessage(SSyncRaft* pRaft, const SSyncMessage* pMsg);
|
||||
static bool preHandleNewTermMessage(SSyncRaft* pRaft, const SSyncMessage* pMsg);
|
||||
static bool preHandleOldTermMessage(SSyncRaft* pRaft, const SSyncMessage* pMsg);
|
||||
|
||||
int32_t syncRaftStart(SSyncRaft* pRaft, const SSyncInfo* pInfo) {
|
||||
SSyncNode* pNode = pRaft->pNode;
|
||||
SSyncServerState serverState;
|
||||
SSyncConfigState confState;
|
||||
SStateManager* stateManager;
|
||||
SSyncLogStore* logStore;
|
||||
SSyncFSM* fsm;
|
||||
SSyncBuffer buffer[RAFT_READ_LOG_MAX_NUM];
|
||||
int nBuf, limit, i;
|
||||
char* buf;
|
||||
int n;
|
||||
SSyncRaftChanger changer;
|
||||
|
||||
memset(pRaft, 0, sizeof(SSyncRaft));
|
||||
|
||||
memcpy(&pRaft->fsm, &pInfo->fsm, sizeof(SSyncFSM));
|
||||
memcpy(&pRaft->logStore, &pInfo->logStore, sizeof(SSyncLogStore));
|
||||
memcpy(&pRaft->stateManager, &pInfo->stateManager, sizeof(SStateManager));
|
||||
|
||||
stateManager = &(pRaft->stateManager);
|
||||
logStore = &(pRaft->logStore);
|
||||
fsm = &(pRaft->fsm);
|
||||
|
||||
pRaft->nodeInfoMap = taosHashInit(TSDB_MAX_REPLICA, taosGetDefaultHashFunction(TSDB_DATA_TYPE_INT), true, HASH_ENTRY_LOCK);
|
||||
if (pRaft->nodeInfoMap == NULL) {
|
||||
return -1;
|
||||
}
|
||||
|
||||
// init progress tracker
|
||||
pRaft->tracker = syncRaftOpenProgressTracker(pRaft);
|
||||
if (pRaft->tracker == NULL) {
|
||||
return -1;
|
||||
}
|
||||
|
||||
// open raft log
|
||||
if ((pRaft->log = syncRaftLogOpen()) == NULL) {
|
||||
return -1;
|
||||
}
|
||||
// read server state
|
||||
if (stateManager->readServerState(stateManager, &buf, &n) != 0) {
|
||||
syncError("readServerState for vgid %d fail", pInfo->vgId);
|
||||
return -1;
|
||||
}
|
||||
if (deserializeServerStateFromBuffer(&serverState, buf, n) != 0) {
|
||||
syncError("deserializeServerStateFromBuffer for vgid %d fail", pInfo->vgId);
|
||||
return -1;
|
||||
}
|
||||
free(buf);
|
||||
//assert(initIndex <= serverState.commitIndex);
|
||||
|
||||
// read config state
|
||||
if (stateManager->readClusterState(stateManager, &buf, &n) != 0) {
|
||||
syncError("readClusterState for vgid %d fail", pInfo->vgId);
|
||||
return -1;
|
||||
}
|
||||
if (deserializeClusterStateFromBuffer(&confState, buf, n) != 0) {
|
||||
syncError("deserializeClusterStateFromBuffer for vgid %d fail", pInfo->vgId);
|
||||
return -1;
|
||||
}
|
||||
free(buf);
|
||||
|
||||
changer = (SSyncRaftChanger) {
|
||||
.tracker = pRaft->tracker,
|
||||
.lastIndex = syncRaftLogLastIndex(pRaft->log),
|
||||
};
|
||||
SSyncRaftProgressTrackerConfig config;
|
||||
SSyncRaftProgressMap progressMap;
|
||||
|
||||
if (syncRaftRestoreConfig(&changer, &confState, &config, &progressMap) < 0) {
|
||||
syncError("syncRaftRestoreConfig for vgid %d fail", pInfo->vgId);
|
||||
return -1;
|
||||
}
|
||||
|
||||
// save restored config and progress map to tracker
|
||||
syncRaftCopyProgressMap(&progressMap, &pRaft->tracker->progressMap);
|
||||
syncRaftCopyTrackerConfig(&config, &pRaft->tracker->config);
|
||||
|
||||
// free progress map and config
|
||||
syncRaftFreeProgressMap(&progressMap);
|
||||
syncRaftFreeTrackConfig(&config);
|
||||
|
||||
if (!syncRaftIsEmptyServerState(&serverState)) {
|
||||
syncRaftLoadState(pRaft, &serverState);
|
||||
}
|
||||
|
||||
if (pInfo->appliedIndex > 0) {
|
||||
syncRaftLogAppliedTo(pRaft->log, pInfo->appliedIndex);
|
||||
}
|
||||
|
||||
syncRaftBecomeFollower(pRaft, pRaft->term, SYNC_NON_NODE_ID);
|
||||
|
||||
syncInfo("[%d:%d] restore vgid %d state: snapshot index success",
|
||||
pRaft->selfGroupId, pRaft->selfId, pInfo->vgId);
|
||||
return 0;
|
||||
}
|
||||
|
||||
int32_t syncRaftStep(SSyncRaft* pRaft, const SSyncMessage* pMsg) {
|
||||
syncDebug("from %d, type:%d, term:%" PRId64 ", state:%d",
|
||||
pMsg->from, pMsg->msgType, pMsg->term, pRaft->state);
|
||||
|
||||
if (preHandleMessage(pRaft, pMsg)) {
|
||||
syncFreeMessage(pMsg);
|
||||
return 0;
|
||||
}
|
||||
|
||||
ESyncRaftMessageType msgType = pMsg->msgType;
|
||||
if (msgType == RAFT_MSG_INTERNAL_ELECTION) {
|
||||
syncRaftHandleElectionMessage(pRaft, pMsg);
|
||||
} else if (msgType == RAFT_MSG_VOTE) {
|
||||
syncRaftHandleVoteMessage(pRaft, pMsg);
|
||||
} else {
|
||||
pRaft->stepFp(pRaft, pMsg);
|
||||
}
|
||||
|
||||
syncFreeMessage(pMsg);
|
||||
return 0;
|
||||
}
|
||||
|
||||
int32_t syncRaftTick(SSyncRaft* pRaft) {
|
||||
pRaft->currentTick += 1;
|
||||
pRaft->tickFp(pRaft);
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int deserializeServerStateFromBuffer(SSyncServerState* server, const char* buffer, int n) {
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int deserializeClusterStateFromBuffer(SSyncConfigState* cluster, const char* buffer, int n) {
|
||||
return 0;
|
||||
}
|
||||
|
||||
static void visitProgressMaybeSendAppend(SSyncRaftProgress* progress, void* arg) {
|
||||
syncRaftMaybeSendAppend(arg, progress, false);
|
||||
}
|
||||
|
||||
// switchToConfig reconfigures this node to use the provided configuration. It
|
||||
// updates the in-memory state and, when necessary, carries out additional
|
||||
// actions such as reacting to the removal of nodes or changed quorum
|
||||
// requirements.
|
||||
//
|
||||
// The inputs usually result from restoring a ConfState or applying a ConfChange.
|
||||
static void switchToConfig(SSyncRaft* pRaft, const SSyncRaftProgressTrackerConfig* config,
|
||||
const SSyncRaftProgressMap* progressMap, SSyncConfigState* cs) {
|
||||
SyncNodeId selfId = pRaft->selfId;
|
||||
int i;
|
||||
bool exist;
|
||||
SSyncRaftProgress* progress = NULL;
|
||||
|
||||
syncRaftConfigState(pRaft->tracker, cs);
|
||||
progress = syncRaftFindProgressByNodeId(&pRaft->tracker->progressMap, selfId);
|
||||
exist = (progress != NULL);
|
||||
|
||||
// Update whether the node itself is a learner, resetting to false when the
|
||||
// node is removed.
|
||||
if (exist) {
|
||||
pRaft->isLearner = progress->isLearner;
|
||||
} else {
|
||||
pRaft->isLearner = false;
|
||||
}
|
||||
|
||||
if ((!exist || pRaft->isLearner) && pRaft->state == TAOS_SYNC_STATE_LEADER) {
|
||||
// This node is leader and was removed or demoted. We prevent demotions
|
||||
// at the time writing but hypothetically we handle them the same way as
|
||||
// removing the leader: stepping down into the next Term.
|
||||
//
|
||||
// TODO(tbg): step down (for sanity) and ask follower with largest Match
|
||||
// to TimeoutNow (to avoid interruption). This might still drop some
|
||||
// proposals but it's better than nothing.
|
||||
//
|
||||
// TODO(tbg): test this branch. It is untested at the time of writing.
|
||||
return;
|
||||
}
|
||||
|
||||
// The remaining steps only make sense if this node is the leader and there
|
||||
// are other nodes.
|
||||
if (pRaft->state != TAOS_SYNC_STATE_LEADER || syncRaftNodeMapSize(&cs->voters) == 0) {
|
||||
return;
|
||||
}
|
||||
|
||||
if (syncRaftMaybeCommit(pRaft)) {
|
||||
// If the configuration change means that more entries are committed now,
|
||||
// broadcast/append to everyone in the updated config.
|
||||
syncRaftBroadcastAppend(pRaft);
|
||||
} else {
|
||||
// Otherwise, still probe the newly added replicas; there's no reason to
|
||||
// let them wait out a heartbeat interval (or the next incoming
|
||||
// proposal).
|
||||
syncRaftProgressVisit(pRaft->tracker, visitProgressMaybeSendAppend, pRaft);
|
||||
|
||||
// If the the leadTransferee was removed or demoted, abort the leadership transfer.
|
||||
SyncNodeId leadTransferee = pRaft->leadTransferee;
|
||||
if (leadTransferee != SYNC_NON_NODE_ID) {
|
||||
if (!syncRaftIsInNodeMap(&pRaft->tracker->config.voters.incoming, leadTransferee) &&
|
||||
!syncRaftIsInNodeMap(&pRaft->tracker->config.voters.outgoing, leadTransferee)) {
|
||||
abortLeaderTransfer(pRaft);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
static void abortLeaderTransfer(SSyncRaft* pRaft) {
|
||||
pRaft->leadTransferee = SYNC_NON_NODE_ID;
|
||||
}
|
||||
|
||||
/**
|
||||
* pre-handle message, return true means no need to continue
|
||||
* Handle the message term, which may result in our stepping down to a follower.
|
||||
**/
|
||||
static bool preHandleMessage(SSyncRaft* pRaft, const SSyncMessage* pMsg) {
|
||||
// local message?
|
||||
if (pMsg->term == 0) {
|
||||
return false;
|
||||
}
|
||||
|
||||
if (pMsg->term > pRaft->term) {
|
||||
return preHandleNewTermMessage(pRaft, pMsg);
|
||||
} else if (pMsg->term < pRaft->term) {
|
||||
return preHandleOldTermMessage(pRaft, pMsg);
|
||||
}
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
static bool preHandleNewTermMessage(SSyncRaft* pRaft, const SSyncMessage* pMsg) {
|
||||
SyncNodeId leaderId = pMsg->from;
|
||||
ESyncRaftMessageType msgType = pMsg->msgType;
|
||||
|
||||
if (msgType == RAFT_MSG_VOTE) {
|
||||
// TODO
|
||||
leaderId = SYNC_NON_NODE_ID;
|
||||
}
|
||||
|
||||
if (syncIsPreVoteMsg(pMsg)) {
|
||||
// Never change our term in response to a PreVote
|
||||
} else if (syncIsPreVoteRespMsg(pMsg) && !pMsg->voteResp.rejected) {
|
||||
/**
|
||||
* We send pre-vote requests with a term in our future. If the
|
||||
* pre-vote is granted, we will increment our term when we get a
|
||||
* quorum. If it is not, the term comes from the node that
|
||||
* rejected our vote so we should become a follower at the new
|
||||
* term.
|
||||
**/
|
||||
} else {
|
||||
syncInfo("[%d:%d] [term:%" PRId64 "] received a %d message with higher term from %d [term:%" PRId64 "]",
|
||||
pRaft->selfGroupId, pRaft->selfId, pRaft->term, msgType, pMsg->from, pMsg->term);
|
||||
syncRaftBecomeFollower(pRaft, pMsg->term, leaderId);
|
||||
}
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
static bool preHandleOldTermMessage(SSyncRaft* pRaft, const SSyncMessage* pMsg) {
|
||||
if (pRaft->checkQuorum && pMsg->msgType == RAFT_MSG_APPEND) {
|
||||
/**
|
||||
* We have received messages from a leader at a lower term. It is possible
|
||||
* that these messages were simply delayed in the network, but this could
|
||||
* also mean that this node has advanced its term number during a network
|
||||
* partition, and it is now unable to either win an election or to rejoin
|
||||
* the majority on the old term. If checkQuorum is false, this will be
|
||||
* handled by incrementing term numbers in response to MsgVote with a
|
||||
* higher term, but if checkQuorum is true we may not advance the term on
|
||||
* MsgVote and must generate other messages to advance the term. The net
|
||||
* result of these two features is to minimize the disruption caused by
|
||||
* nodes that have been removed from the cluster's configuration: a
|
||||
* removed node will send MsgVotes (or MsgPreVotes) which will be ignored,
|
||||
* but it will not receive MsgApp or MsgHeartbeat, so it will not create
|
||||
* disruptive term increases
|
||||
**/
|
||||
SNodeInfo* pNode = syncRaftGetNodeById(pRaft, pMsg->from);
|
||||
if (pNode == NULL) {
|
||||
return true;
|
||||
}
|
||||
SSyncMessage* msg = syncNewEmptyAppendRespMsg(pRaft->selfGroupId, pRaft->selfId, pRaft->term);
|
||||
if (msg == NULL) {
|
||||
return true;
|
||||
}
|
||||
|
||||
pRaft->io.send(msg, pNode);
|
||||
} else {
|
||||
// ignore other cases
|
||||
syncInfo("[%d:%d] [term:%" PRId64 "] ignored a %d message with lower term from %d [term:%" PRId64 "]",
|
||||
pRaft->selfGroupId, pRaft->selfId, pRaft->term, pMsg->msgType, pMsg->from, pMsg->term);
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
|
@ -1,48 +0,0 @@
|
|||
/*
|
||||
* Copyright (c) 2019 TAOS Data, Inc. <cli@taosdata.com>
|
||||
*
|
||||
* This program is free software: you can use, redistribute, and/or modify
|
||||
* it under the terms of the GNU Affero General Public License, version 3
|
||||
* or later ("AGPL"), as published by the Free Software Foundation.
|
||||
*
|
||||
* This program is distributed in the hope that it will be useful, but WITHOUT
|
||||
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
|
||||
* FITNESS FOR A PARTICULAR PURPOSE.
|
||||
*
|
||||
* You should have received a copy of the GNU Affero General Public License
|
||||
* along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
|
||||
#include "syncInt.h"
|
||||
#include "raft.h"
|
||||
#include "raft_log.h"
|
||||
#include "sync_raft_impl.h"
|
||||
#include "raft_message.h"
|
||||
|
||||
int syncRaftHandleAppendEntriesMessage(SSyncRaft* pRaft, const SSyncMessage* pMsg) {
|
||||
const RaftMsg_Append_Entries *appendEntries = &(pMsg->appendEntries);
|
||||
|
||||
SNodeInfo* pNode = syncRaftGetNodeById(pRaft, pMsg->from);
|
||||
if (pNode == NULL) {
|
||||
return 0;
|
||||
}
|
||||
|
||||
SSyncMessage* pRespMsg = syncNewEmptyAppendRespMsg(pRaft->selfGroupId, pRaft->selfId, pRaft->term);
|
||||
if (pRespMsg == NULL) {
|
||||
return 0;
|
||||
}
|
||||
|
||||
RaftMsg_Append_Resp *appendResp = &(pRespMsg->appendResp);
|
||||
// ignore committed logs
|
||||
if (syncRaftLogIsCommitted(pRaft->log, appendEntries->index)) {
|
||||
appendResp->index = pRaft->log->commitIndex;
|
||||
goto out;
|
||||
}
|
||||
|
||||
syncInfo("[%d:%d] recv append from %d index %" PRId64"",
|
||||
pRaft->selfGroupId, pRaft->selfId, pMsg->from, appendEntries->index);
|
||||
|
||||
out:
|
||||
pRaft->io.send(pRespMsg, pNode);
|
||||
return 0;
|
||||
}
|
|
@ -1,29 +0,0 @@
|
|||
/*
|
||||
* Copyright (c) 2019 TAOS Data, Inc. <cli@taosdata.com>
|
||||
*
|
||||
* This program is free software: you can use, redistribute, and/or modify
|
||||
* it under the terms of the GNU Affero General Public License, version 3
|
||||
* or later ("AGPL"), as published by the Free Software Foundation.
|
||||
*
|
||||
* This program is distributed in the hope that it will be useful, but WITHOUT
|
||||
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
|
||||
* FITNESS FOR A PARTICULAR PURPOSE.
|
||||
*
|
||||
* You should have received a copy of the GNU Affero General Public License
|
||||
* along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
|
||||
#include "syncInt.h"
|
||||
#include "raft.h"
|
||||
#include "raft_log.h"
|
||||
#include "raft_message.h"
|
||||
|
||||
int syncRaftHandleElectionMessage(SSyncRaft* pRaft, const SSyncMessage* pMsg) {
|
||||
if (pRaft->preVote) {
|
||||
syncRaftStartElection(pRaft, SYNC_RAFT_CAMPAIGN_PRE_ELECTION);
|
||||
} else {
|
||||
syncRaftStartElection(pRaft, SYNC_RAFT_CAMPAIGN_ELECTION);
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
|
@ -1,61 +0,0 @@
|
|||
/*
|
||||
* Copyright (c) 2019 TAOS Data, Inc. <cli@taosdata.com>
|
||||
*
|
||||
* This program is free software: you can use, redistribute, and/or modify
|
||||
* it under the terms of the GNU Affero General Public License, version 3
|
||||
* or later ("AGPL"), as published by the Free Software Foundation.
|
||||
*
|
||||
* This program is distributed in the hope that it will be useful, but WITHOUT
|
||||
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
|
||||
* FITNESS FOR A PARTICULAR PURPOSE.
|
||||
*
|
||||
* You should have received a copy of the GNU Affero General Public License
|
||||
* along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
|
||||
#include "syncInt.h"
|
||||
#include "raft.h"
|
||||
#include "sync_raft_impl.h"
|
||||
#include "raft_log.h"
|
||||
#include "raft_message.h"
|
||||
|
||||
static bool canGrantVoteMessage(SSyncRaft* pRaft, const SSyncMessage* pMsg);
|
||||
|
||||
int syncRaftHandleVoteMessage(SSyncRaft* pRaft, const SSyncMessage* pMsg) {
|
||||
SSyncMessage* pRespMsg;
|
||||
SNodeInfo* pNode = syncRaftGetNodeById(pRaft, pMsg->from);
|
||||
if (pNode == NULL) {
|
||||
return 0;
|
||||
}
|
||||
|
||||
bool grant;
|
||||
SyncIndex lastIndex = syncRaftLogLastIndex(pRaft->log);
|
||||
SyncTerm lastTerm = syncRaftLogLastTerm(pRaft->log);
|
||||
|
||||
grant = canGrantVoteMessage(pRaft, pMsg);
|
||||
pRespMsg = syncNewVoteRespMsg(pRaft->selfGroupId, pRaft->selfId, pMsg->vote.cType, !grant);
|
||||
if (pRespMsg == NULL) {
|
||||
return 0;
|
||||
}
|
||||
syncInfo("[%d:%d] [logterm: %" PRId64 ", index: %" PRId64 ", vote: %d] %s for %d"
|
||||
"[logterm: %" PRId64 ", index: %" PRId64 "] at term %" PRId64 "",
|
||||
pRaft->selfGroupId, pRaft->selfId, lastTerm, lastIndex, pRaft->voteFor,
|
||||
grant ? "grant" : "reject",
|
||||
pMsg->from, pMsg->vote.lastTerm, pMsg->vote.lastIndex, pRaft->term);
|
||||
|
||||
pRaft->io.send(pRespMsg, pNode);
|
||||
return 0;
|
||||
}
|
||||
|
||||
static bool canGrantVoteMessage(SSyncRaft* pRaft, const SSyncMessage* pMsg) {
|
||||
bool canVote =
|
||||
// We can vote if this is a repeat of a vote we've already cast...
|
||||
pRaft->voteFor == pMsg->from ||
|
||||
// ...we haven't voted and we don't think there's a leader yet in this term...
|
||||
(pRaft->voteFor == SYNC_NON_NODE_ID && pRaft->leaderId == SYNC_NON_NODE_ID) ||
|
||||
// ...or this is a PreVote for a future term...
|
||||
(pMsg->vote.cType == SYNC_RAFT_CAMPAIGN_PRE_ELECTION && pMsg->term > pRaft->term);
|
||||
|
||||
// ...and we believe the candidate is up to date.
|
||||
return canVote && syncRaftLogIsUptodate(pRaft->log, pMsg->vote.lastIndex, pMsg->vote.lastTerm);
|
||||
}
|
|
@ -1,60 +0,0 @@
|
|||
/*
|
||||
* Copyright (c) 2019 TAOS Data, Inc. <cli@taosdata.com>
|
||||
*
|
||||
* This program is free software: you can use, redistribute, and/or modify
|
||||
* it under the terms of the GNU Affero General Public License, version 3
|
||||
* or later ("AGPL"), as published by the Free Software Foundation.
|
||||
*
|
||||
* This program is distributed in the hope that it will be useful, but WITHOUT
|
||||
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
|
||||
* FITNESS FOR A PARTICULAR PURPOSE.
|
||||
*
|
||||
* You should have received a copy of the GNU Affero General Public License
|
||||
* along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
|
||||
#include "syncInt.h"
|
||||
#include "raft.h"
|
||||
#include "sync_raft_impl.h"
|
||||
#include "raft_message.h"
|
||||
|
||||
int syncRaftHandleVoteRespMessage(SSyncRaft* pRaft, const SSyncMessage* pMsg) {
|
||||
int granted, rejected;
|
||||
int quorum;
|
||||
int voterIndex;
|
||||
|
||||
assert(pRaft->state == TAOS_SYNC_STATE_CANDIDATE);
|
||||
|
||||
SNodeInfo* pNode = syncRaftGetNodeById(pRaft, pMsg->from);
|
||||
if (pNode == NULL) {
|
||||
syncError("[%d:%d] recv vote resp from unknown server %d", pRaft->selfGroupId, pRaft->selfId, pMsg->from);
|
||||
return 0;
|
||||
}
|
||||
|
||||
if (pRaft->state != TAOS_SYNC_STATE_CANDIDATE) {
|
||||
syncError("[%d:%d] is not candidate, ignore vote resp", pRaft->selfGroupId, pRaft->selfId);
|
||||
return 0;
|
||||
}
|
||||
|
||||
ESyncRaftVoteResult result = syncRaftPollVote(pRaft, pMsg->from,
|
||||
pMsg->voteResp.cType == SYNC_RAFT_CAMPAIGN_PRE_ELECTION,
|
||||
!pMsg->voteResp.rejected, &rejected, &granted);
|
||||
|
||||
syncInfo("[%d:%d] [quorum:%d] has received %d votes and %d vote rejections",
|
||||
pRaft->selfGroupId, pRaft->selfId, quorum, granted, rejected);
|
||||
|
||||
if (result == SYNC_RAFT_VOTE_WON) {
|
||||
if (pRaft->candidateState.inPreVote) {
|
||||
syncRaftCampaign(pRaft, SYNC_RAFT_CAMPAIGN_ELECTION);
|
||||
} else {
|
||||
syncRaftBecomeLeader(pRaft);
|
||||
syncRaftBroadcastAppend(pRaft);
|
||||
}
|
||||
} else if (result == SYNC_RAFT_VOTE_LOST) {
|
||||
// pb.MsgPreVoteResp contains future term of pre-candidate
|
||||
// m.Term > r.Term; reuse r.Term
|
||||
syncRaftBecomeFollower(pRaft, pRaft->term, SYNC_NON_NODE_ID);
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
|
@ -1,66 +0,0 @@
|
|||
/*
|
||||
* Copyright (c) 2019 TAOS Data, Inc. <cli@taosdata.com>
|
||||
*
|
||||
* This program is free software: you can use, redistribute, and/or modify
|
||||
* it under the terms of the GNU Affero General Public License, version 3
|
||||
* or later ("AGPL"), as published by the Free Software Foundation.
|
||||
*
|
||||
* This program is distributed in the hope that it will be useful, but WITHOUT
|
||||
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
|
||||
* FITNESS FOR A PARTICULAR PURPOSE.
|
||||
*
|
||||
* You should have received a copy of the GNU Affero General Public License
|
||||
* along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
|
||||
#include "raft_log.h"
|
||||
|
||||
SSyncRaftLog* syncRaftLogOpen() {
|
||||
return NULL;
|
||||
}
|
||||
|
||||
SyncIndex syncRaftLogLastIndex(SSyncRaftLog* pLog) {
|
||||
return 0;
|
||||
}
|
||||
|
||||
SyncIndex syncRaftLogSnapshotIndex(SSyncRaftLog* pLog) {
|
||||
return 0;
|
||||
}
|
||||
|
||||
SyncTerm syncRaftLogLastTerm(SSyncRaftLog* pLog) {
|
||||
return 0;
|
||||
}
|
||||
|
||||
void syncRaftLogAppliedTo(SSyncRaftLog* pLog, SyncIndex appliedIndex) {
|
||||
|
||||
}
|
||||
|
||||
bool syncRaftLogIsUptodate(SSyncRaftLog* pLog, SyncIndex index, SyncTerm term) {
|
||||
return true;
|
||||
}
|
||||
|
||||
int syncRaftLogNumOfPendingConf(SSyncRaftLog* pLog) {
|
||||
return 0;
|
||||
}
|
||||
|
||||
bool syncRaftHasUnappliedLog(SSyncRaftLog* pLog) {
|
||||
return pLog->commitIndex > pLog->appliedIndex;
|
||||
}
|
||||
|
||||
SyncTerm syncRaftLogTermOf(SSyncRaftLog* pLog, SyncIndex index) {
|
||||
return SYNC_NON_TERM;
|
||||
}
|
||||
|
||||
int syncRaftLogAppend(SSyncRaftLog* pLog, SSyncRaftEntry *pEntries, int n) {
|
||||
|
||||
}
|
||||
|
||||
int syncRaftLogAcquire(SSyncRaftLog* pLog, SyncIndex index, int maxMsgSize,
|
||||
SSyncRaftEntry **ppEntries, int *n) {
|
||||
return 0;
|
||||
}
|
||||
|
||||
void syncRaftLogRelease(SSyncRaftLog* pLog, SyncIndex index,
|
||||
SSyncRaftEntry *pEntries, int n) {
|
||||
return;
|
||||
}
|
|
@ -1,22 +0,0 @@
|
|||
/*
|
||||
* Copyright (c) 2019 TAOS Data, Inc. <cli@taosdata.com>
|
||||
*
|
||||
* This program is free software: you can use, redistribute, and/or modify
|
||||
* it under the terms of the GNU Affero General Public License, version 3
|
||||
* or later ("AGPL"), as published by the Free Software Foundation.
|
||||
*
|
||||
* This program is distributed in the hope that it will be useful, but WITHOUT
|
||||
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
|
||||
* FITNESS FOR A PARTICULAR PURPOSE.
|
||||
*
|
||||
* You should have received a copy of the GNU Affero General Public License
|
||||
* along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
|
||||
#include "raft_message.h"
|
||||
|
||||
void syncFreeMessage(const SSyncMessage* pMsg) {
|
||||
if (!syncIsInternalMsg(pMsg->msgType)) {
|
||||
free((SSyncMessage*)pMsg);
|
||||
}
|
||||
}
|
|
@ -1,110 +0,0 @@
|
|||
/*
|
||||
* Copyright (c) 2019 TAOS Data, Inc. <cli@taosdata.com>
|
||||
*
|
||||
* This program is free software: you can use, redistribute, and/or modify
|
||||
* it under the terms of the GNU Affero General Public License, version 3
|
||||
* or later ("AGPL"), as published by the Free Software Foundation.
|
||||
*
|
||||
* This program is distributed in the hope that it will be useful, but WITHOUT
|
||||
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
|
||||
* FITNESS FOR A PARTICULAR PURPOSE.
|
||||
*
|
||||
* You should have received a copy of the GNU Affero General Public License
|
||||
* along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
|
||||
#include "raft.h"
|
||||
#include "raft_log.h"
|
||||
#include "sync_raft_progress.h"
|
||||
#include "syncInt.h"
|
||||
#include "raft_replication.h"
|
||||
|
||||
static bool sendSnapshot(SSyncRaft* pRaft, SSyncRaftProgress* progress);
|
||||
static bool sendAppendEntries(SSyncRaft* pRaft, SSyncRaftProgress* progress,
|
||||
SyncIndex prevIndex, SyncTerm prevTerm,
|
||||
SSyncRaftEntry *entries, int nEntry);
|
||||
|
||||
// maybeSendAppend sends an append RPC with new entries to the given peer,
|
||||
// if necessary. Returns true if a message was sent. The sendIfEmpty
|
||||
// argument controls whether messages with no entries will be sent
|
||||
// ("empty" messages are useful to convey updated Commit indexes, but
|
||||
// are undesirable when we're sending multiple messages in a batch).
|
||||
bool syncRaftMaybeSendAppend(SSyncRaft* pRaft, SSyncRaftProgress* progress, bool sendIfEmpty) {
|
||||
assert(pRaft->state == TAOS_SYNC_STATE_LEADER);
|
||||
SyncNodeId nodeId = progress->id;
|
||||
|
||||
if (syncRaftProgressIsPaused(progress)) {
|
||||
syncInfo("node [%d:%d] paused", pRaft->selfGroupId, nodeId);
|
||||
return false;
|
||||
}
|
||||
|
||||
SyncIndex nextIndex = syncRaftProgressNextIndex(progress);
|
||||
SSyncRaftEntry *entries;
|
||||
int nEntry;
|
||||
SyncIndex prevIndex;
|
||||
SyncTerm prevTerm;
|
||||
|
||||
prevIndex = nextIndex - 1;
|
||||
prevTerm = syncRaftLogTermOf(pRaft->log, prevIndex);
|
||||
int ret = syncRaftLogAcquire(pRaft->log, nextIndex, pRaft->maxMsgSize, &entries, &nEntry);
|
||||
|
||||
if (nEntry == 0 && !sendIfEmpty) {
|
||||
return false;
|
||||
}
|
||||
|
||||
if (ret != 0 || prevTerm == SYNC_NON_TERM) {
|
||||
return sendSnapshot(pRaft, progress);
|
||||
}
|
||||
|
||||
return sendAppendEntries(pRaft, progress, prevIndex, prevTerm, entries, nEntry);
|
||||
}
|
||||
|
||||
static bool sendSnapshot(SSyncRaft* pRaft, SSyncRaftProgress* progress) {
|
||||
if (!syncRaftProgressRecentActive(progress)) {
|
||||
return false;
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
static bool sendAppendEntries(SSyncRaft* pRaft, SSyncRaftProgress* progress,
|
||||
SyncIndex prevIndex, SyncTerm prevTerm,
|
||||
SSyncRaftEntry *entries, int nEntry) {
|
||||
SNodeInfo* pNode = syncRaftGetNodeById(pRaft, progress->id);
|
||||
if (pNode == NULL) {
|
||||
return false;
|
||||
}
|
||||
SyncIndex lastIndex;
|
||||
SyncTerm logTerm = prevTerm;
|
||||
|
||||
SSyncMessage* msg = syncNewAppendMsg(pRaft->selfGroupId, pRaft->selfId, pRaft->term,
|
||||
prevIndex, prevTerm, pRaft->log->commitIndex,
|
||||
nEntry, entries);
|
||||
|
||||
if (msg == NULL) {
|
||||
goto err_release_log;
|
||||
}
|
||||
|
||||
if (nEntry != 0) {
|
||||
switch (progress->state) {
|
||||
// optimistically increase the next when in StateReplicate
|
||||
case PROGRESS_STATE_REPLICATE:
|
||||
lastIndex = entries[nEntry - 1].index;
|
||||
syncRaftProgressOptimisticNextIndex(progress, lastIndex);
|
||||
syncRaftInflightAdd(progress->inflights, lastIndex);
|
||||
break;
|
||||
case PROGRESS_STATE_PROBE:
|
||||
progress->probeSent = true;
|
||||
break;
|
||||
default:
|
||||
syncFatal("[%d:%d] is sending append in unhandled state %s",
|
||||
pRaft->selfGroupId, pRaft->selfId, syncRaftProgressStateString(progress));
|
||||
break;
|
||||
}
|
||||
}
|
||||
pRaft->io.send(msg, pNode);
|
||||
return true;
|
||||
|
||||
err_release_log:
|
||||
syncRaftLogRelease(pRaft->log, prevIndex + 1, entries, nEntry);
|
||||
return false;
|
||||
}
|
|
@ -1,23 +0,0 @@
|
|||
/*
|
||||
* Copyright (c) 2019 TAOS Data, Inc. <cli@taosdata.com>
|
||||
*
|
||||
* This program is free software: you can use, redistribute, and/or modify
|
||||
* it under the terms of the GNU Affero General Public License, version 3
|
||||
* or later ("AGPL"), as published by the Free Software Foundation.
|
||||
*
|
||||
* This program is distributed in the hope that it will be useful, but WITHOUT
|
||||
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
|
||||
* FITNESS FOR A PARTICULAR PURPOSE.
|
||||
*
|
||||
* You should have received a copy of the GNU Affero General Public License
|
||||
* along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
|
||||
#include "sync.h"
|
||||
#include "raft_unstable_log.h"
|
||||
|
||||
/*
|
||||
SyncIndex syncRaftLogLastIndex(SSyncRaftUnstableLog* pLog) {
|
||||
return 0;
|
||||
}
|
||||
*/
|
|
@ -1,302 +1 @@
|
|||
/*
|
||||
* Copyright (c) 2019 TAOS Data, Inc. <jhtao@taosdata.com>
|
||||
*
|
||||
* This program is free software: you can use, redistribute, and/or modify
|
||||
* it under the terms of the GNU Affero General Public License, version 3
|
||||
* or later ("AGPL"), as published by the Free Software Foundation.
|
||||
*
|
||||
* This program is distributed in the hope that it will be useful, but WITHOUT
|
||||
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
|
||||
* FITNESS FOR A PARTICULAR PURPOSE.
|
||||
*
|
||||
* You should have received a copy of the GNU Affero General Public License
|
||||
* along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
|
||||
#include "syncInt.h"
|
||||
#include "trpc.h"
|
||||
#include "ttimer.h"
|
||||
|
||||
SSyncManager* gSyncManager = NULL;
|
||||
|
||||
#define SYNC_TICK_TIMER 50
|
||||
#define SYNC_ACTIVITY_TIMER 5
|
||||
#define SYNC_SERVER_WORKER 2
|
||||
|
||||
static void syncProcessRsp(void *parent, SRpcMsg *pMsg, SEpSet *pEpSet);
|
||||
static void syncProcessReqMsg(void *parent, SRpcMsg *pMsg, SEpSet *pEpSet);
|
||||
|
||||
static int syncInitRpcServer(SSyncManager* syncManager, const SSyncCluster* pSyncCfg);
|
||||
static int syncInitRpcClient(SSyncManager* syncManager);
|
||||
static int syncOpenWorkerPool(SSyncManager* syncManager);
|
||||
static int syncCloseWorkerPool(SSyncManager* syncManager);
|
||||
static void *syncWorkerMain(void *argv);
|
||||
static void syncNodeTick(void *param, void *tmrId);
|
||||
|
||||
int32_t syncInit() {
|
||||
if (gSyncManager != NULL) {
|
||||
return 0;
|
||||
}
|
||||
|
||||
gSyncManager = (SSyncManager*)calloc(sizeof(SSyncManager), 0);
|
||||
if (gSyncManager == NULL) {
|
||||
syncError("malloc SSyncManager fail");
|
||||
return -1;
|
||||
}
|
||||
|
||||
pthread_mutex_init(&gSyncManager->mutex, NULL);
|
||||
|
||||
// init client rpc
|
||||
if (syncInitRpcClient(gSyncManager) != 0) {
|
||||
syncCleanUp();
|
||||
return -1;
|
||||
}
|
||||
|
||||
// init sync timer manager
|
||||
gSyncManager->syncTimerManager = taosTmrInit(1000, 50, 10000, "SYNC");
|
||||
if (gSyncManager->syncTimerManager == NULL) {
|
||||
syncCleanUp();
|
||||
return -1;
|
||||
}
|
||||
|
||||
// init worker pool
|
||||
if (syncOpenWorkerPool(gSyncManager) != 0) {
|
||||
syncCleanUp();
|
||||
return -1;
|
||||
}
|
||||
|
||||
// init vgroup hash table
|
||||
gSyncManager->vgroupTable = taosHashInit(TSDB_MIN_VNODES, taosGetDefaultHashFunction(TSDB_DATA_TYPE_INT), true, HASH_ENTRY_LOCK);
|
||||
if (gSyncManager->vgroupTable == NULL) {
|
||||
syncCleanUp();
|
||||
return -1;
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
void syncCleanUp() {
|
||||
if (gSyncManager == NULL) {
|
||||
return;
|
||||
}
|
||||
pthread_mutex_lock(&gSyncManager->mutex);
|
||||
if (gSyncManager->vgroupTable) {
|
||||
taosHashCleanup(gSyncManager->vgroupTable);
|
||||
}
|
||||
if (gSyncManager->clientRpc) {
|
||||
rpcClose(gSyncManager->clientRpc);
|
||||
syncInfo("sync inter-sync rpc client is closed");
|
||||
}
|
||||
if (gSyncManager->syncTimerManager) {
|
||||
taosTmrCleanUp(gSyncManager->syncTimerManager);
|
||||
}
|
||||
syncCloseWorkerPool(gSyncManager);
|
||||
pthread_mutex_unlock(&gSyncManager->mutex);
|
||||
pthread_mutex_destroy(&gSyncManager->mutex);
|
||||
free(gSyncManager);
|
||||
gSyncManager = NULL;
|
||||
}
|
||||
|
||||
SSyncNode* syncStart(const SSyncInfo* pInfo) {
|
||||
pthread_mutex_lock(&gSyncManager->mutex);
|
||||
|
||||
SSyncNode **ppNode = taosHashGet(gSyncManager->vgroupTable, &pInfo->vgId, sizeof(SyncGroupId*));
|
||||
if (ppNode != NULL) {
|
||||
syncInfo("vgroup %d already exist", pInfo->vgId);
|
||||
pthread_mutex_unlock(&gSyncManager->mutex);
|
||||
return *ppNode;
|
||||
}
|
||||
|
||||
// init rpc server
|
||||
if (syncInitRpcServer(gSyncManager, &pInfo->syncCfg) != 0) {
|
||||
pthread_mutex_unlock(&gSyncManager->mutex);
|
||||
return NULL;
|
||||
}
|
||||
|
||||
SSyncNode *pNode = (SSyncNode*)malloc(sizeof(SSyncNode));
|
||||
if (pNode == NULL) {
|
||||
syncError("malloc vgroup %d node fail", pInfo->vgId);
|
||||
pthread_mutex_unlock(&gSyncManager->mutex);
|
||||
return NULL;
|
||||
}
|
||||
|
||||
pNode->syncTimer = taosTmrStart(syncNodeTick, SYNC_TICK_TIMER, (void*)((int64_t)pInfo->vgId), gSyncManager->syncTimerManager);
|
||||
|
||||
// start raft
|
||||
pNode->raft.pNode = pNode;
|
||||
if (syncRaftStart(&pNode->raft, pInfo) != 0) {
|
||||
syncError("raft start at %d node fail", pInfo->vgId);
|
||||
pthread_mutex_unlock(&gSyncManager->mutex);
|
||||
return NULL;
|
||||
}
|
||||
|
||||
pthread_mutex_init(&pNode->mutex, NULL);
|
||||
|
||||
taosHashPut(gSyncManager->vgroupTable, &pInfo->vgId, sizeof(SyncGroupId), &pNode, sizeof(SSyncNode *));
|
||||
|
||||
pthread_mutex_unlock(&gSyncManager->mutex);
|
||||
return NULL;
|
||||
}
|
||||
|
||||
void syncStop(const SSyncNode* pNode) {
|
||||
pthread_mutex_lock(&gSyncManager->mutex);
|
||||
|
||||
SSyncNode **ppNode = taosHashGet(gSyncManager->vgroupTable, &pNode->vgId, sizeof(SyncGroupId*));
|
||||
if (ppNode == NULL) {
|
||||
syncInfo("vgroup %d not exist", pNode->vgId);
|
||||
pthread_mutex_unlock(&gSyncManager->mutex);
|
||||
return;
|
||||
}
|
||||
assert(*ppNode == pNode);
|
||||
taosTmrStop(pNode->syncTimer);
|
||||
|
||||
taosHashRemove(gSyncManager->vgroupTable, &pNode->vgId, sizeof(SyncGroupId));
|
||||
pthread_mutex_unlock(&gSyncManager->mutex);
|
||||
|
||||
pthread_mutex_destroy(&((*ppNode)->mutex));
|
||||
free(*ppNode);
|
||||
}
|
||||
|
||||
int32_t syncPropose(SSyncNode* syncNode, const SSyncBuffer* pBuf, void* pData, bool isWeak) {
|
||||
SSyncMessage msg;
|
||||
|
||||
pthread_mutex_lock(&syncNode->mutex);
|
||||
int32_t ret = syncRaftStep(&syncNode->raft, syncInitPropMsg(&msg, pBuf, pData, isWeak));
|
||||
pthread_mutex_unlock(&syncNode->mutex);
|
||||
return ret;
|
||||
}
|
||||
|
||||
void syncReconfig(const SSyncNode* pNode, const SSyncCluster* pCfg) {}
|
||||
|
||||
int32_t syncAddNode(SSyncNode syncNode, const SNodeInfo *pNode) {
|
||||
return 0;
|
||||
}
|
||||
|
||||
int32_t syncRemoveNode(SSyncNode syncNode, const SNodeInfo *pNode) {
|
||||
return 0;
|
||||
}
|
||||
|
||||
// process rpc rsp message from other sync server
|
||||
static void syncProcessRsp(void *parent, SRpcMsg *pMsg, SEpSet *pEpSet) {
|
||||
|
||||
}
|
||||
|
||||
// process rpc message from other sync server
|
||||
static void syncProcessReqMsg(void *parent, SRpcMsg *pMsg, SEpSet *pEpSet) {
|
||||
|
||||
}
|
||||
|
||||
static int syncInitRpcServer(SSyncManager* syncManager, const SSyncCluster* pSyncCfg) {
|
||||
if (gSyncManager->rpcServerTable == NULL) {
|
||||
gSyncManager->rpcServerTable = taosHashInit(TSDB_MIN_VNODES, taosGetDefaultHashFunction(TSDB_DATA_TYPE_BINARY), true, HASH_ENTRY_LOCK);
|
||||
if (gSyncManager->rpcServerTable == NULL) {
|
||||
syncError("init sync rpc server hash table error");
|
||||
return -1;
|
||||
}
|
||||
}
|
||||
assert(pSyncCfg->selfIndex < pSyncCfg->replica && pSyncCfg->selfIndex >= 0);
|
||||
const SNodeInfo* pNode = &(pSyncCfg->nodeInfo[pSyncCfg->replica]);
|
||||
char buffer[156] = {'\0'};
|
||||
snprintf(buffer, sizeof(buffer), "%s:%d", &(pNode->nodeFqdn[0]), pNode->nodePort);
|
||||
size_t len = strlen(buffer);
|
||||
void** ppRpcServer = taosHashGet(gSyncManager->rpcServerTable, buffer, len);
|
||||
if (ppRpcServer != NULL) {
|
||||
// already inited
|
||||
syncInfo("sync rpc server for %s already exist", buffer);
|
||||
return 0;
|
||||
}
|
||||
|
||||
SRpcInit rpcInit;
|
||||
memset(&rpcInit, 0, sizeof(rpcInit));
|
||||
rpcInit.localPort = pNode->nodePort;
|
||||
rpcInit.label = "sync-server";
|
||||
rpcInit.numOfThreads = SYNC_SERVER_WORKER;
|
||||
rpcInit.cfp = syncProcessReqMsg;
|
||||
rpcInit.sessions = TSDB_MAX_VNODES << 4;
|
||||
rpcInit.connType = TAOS_CONN_SERVER;
|
||||
rpcInit.idleTime = SYNC_ACTIVITY_TIMER * 1000;
|
||||
|
||||
void* rpcServer = rpcOpen(&rpcInit);
|
||||
if (rpcServer == NULL) {
|
||||
syncInfo("rpcOpen for sync rpc server for %s fail", buffer);
|
||||
return -1;
|
||||
}
|
||||
|
||||
taosHashPut(gSyncManager->rpcServerTable, buffer, strlen(buffer), rpcServer, len);
|
||||
syncInfo("sync rpc server for %s init success", buffer);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int syncInitRpcClient(SSyncManager* syncManager) {
|
||||
char secret[TSDB_PASSWORD_LEN] = "secret";
|
||||
SRpcInit rpcInit;
|
||||
memset(&rpcInit, 0, sizeof(rpcInit));
|
||||
rpcInit.label = "sync-client";
|
||||
rpcInit.numOfThreads = 1;
|
||||
rpcInit.cfp = syncProcessRsp;
|
||||
rpcInit.sessions = TSDB_MAX_VNODES << 4;
|
||||
rpcInit.connType = TAOS_CONN_CLIENT;
|
||||
rpcInit.idleTime = SYNC_ACTIVITY_TIMER * 1000;
|
||||
rpcInit.user = "t";
|
||||
rpcInit.ckey = "key";
|
||||
rpcInit.secret = secret;
|
||||
|
||||
syncManager->clientRpc = rpcOpen(&rpcInit);
|
||||
if (syncManager->clientRpc == NULL) {
|
||||
syncError("failed to init sync rpc client");
|
||||
return -1;
|
||||
}
|
||||
|
||||
syncInfo("sync inter-sync rpc client is initialized");
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int syncOpenWorkerPool(SSyncManager* syncManager) {
|
||||
int i;
|
||||
pthread_attr_t thattr;
|
||||
|
||||
pthread_attr_init(&thattr);
|
||||
pthread_attr_setdetachstate(&thattr, PTHREAD_CREATE_JOINABLE);
|
||||
|
||||
for (i = 0; i < TAOS_SYNC_MAX_WORKER; ++i) {
|
||||
SSyncWorker* pWorker = &(syncManager->worker[i]);
|
||||
|
||||
if (pthread_create(&(pWorker->thread), &thattr, (void *)syncWorkerMain, pWorker) != 0) {
|
||||
syncError("failed to create sync worker since %s", strerror(errno));
|
||||
|
||||
return -1;
|
||||
}
|
||||
}
|
||||
|
||||
pthread_attr_destroy(&thattr);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int syncCloseWorkerPool(SSyncManager* syncManager) {
|
||||
return 0;
|
||||
}
|
||||
|
||||
static void *syncWorkerMain(void *argv) {
|
||||
SSyncWorker* pWorker = (SSyncWorker *)argv;
|
||||
|
||||
taosBlockSIGPIPE();
|
||||
setThreadName("syncWorker");
|
||||
|
||||
return NULL;
|
||||
}
|
||||
|
||||
static void syncNodeTick(void *param, void *tmrId) {
|
||||
SyncGroupId vgId = (SyncGroupId)((int64_t)param);
|
||||
SSyncNode **ppNode = taosHashGet(gSyncManager->vgroupTable, &vgId, sizeof(SyncGroupId*));
|
||||
if (ppNode == NULL) {
|
||||
return;
|
||||
}
|
||||
SSyncNode *pNode = *ppNode;
|
||||
|
||||
pthread_mutex_lock(&pNode->mutex);
|
||||
syncRaftTick(&pNode->raft);
|
||||
pthread_mutex_unlock(&pNode->mutex);
|
||||
|
||||
pNode->syncTimer = taosTmrStart(syncNodeTick, SYNC_TICK_TIMER, (void*)(int64_t)pNode->vgId, gSyncManager->syncTimerManager);
|
||||
}
|
||||
#include "sync.h"
|
|
@ -1,409 +0,0 @@
|
|||
/*
|
||||
* Copyright (c) 2019 TAOS Data, Inc. <cli@taosdata.com>
|
||||
*
|
||||
* This program is free software: you can use, redistribute, and/or modify
|
||||
* it under the terms of the GNU Affero General Public License, version 3
|
||||
* or later ("AGPL"), as published by the Free Software Foundation.
|
||||
*
|
||||
* This program is distributed in the hope that it will be useful, but WITHOUT
|
||||
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
|
||||
* FITNESS FOR A PARTICULAR PURPOSE.
|
||||
*
|
||||
* You should have received a copy of the GNU Affero General Public License
|
||||
* along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
|
||||
#include "raft.h"
|
||||
#include "syncInt.h"
|
||||
#include "sync_raft_config_change.h"
|
||||
#include "sync_raft_progress.h"
|
||||
#include "sync_raft_progress_tracker.h"
|
||||
#include "sync_raft_quorum_joint.h"
|
||||
|
||||
static int checkAndCopy(SSyncRaftChanger* changer, SSyncRaftProgressTrackerConfig* config, SSyncRaftProgressMap* progressMap);
|
||||
static int checkAndReturn(SSyncRaftProgressTrackerConfig* config, SSyncRaftProgressMap* progressMap);
|
||||
static int checkInvariants(SSyncRaftProgressTrackerConfig* config, SSyncRaftProgressMap* progressMap);
|
||||
static int checkInvariants(SSyncRaftProgressTrackerConfig* config, SSyncRaftProgressMap* progressMap);
|
||||
static bool hasJointConfig(const SSyncRaftProgressTrackerConfig* config);
|
||||
static int applyConfig(SSyncRaftChanger* changer, SSyncRaftProgressTrackerConfig* config,
|
||||
SSyncRaftProgressMap* progressMap, const SSyncConfChangeSingleArray* css);
|
||||
|
||||
static int symDiff(const SSyncRaftNodeMap* l, const SSyncRaftNodeMap* r);
|
||||
|
||||
static void initProgress(SSyncRaftChanger* changer, SSyncRaftProgressTrackerConfig* config,
|
||||
SSyncRaftProgressMap* progressMap, SyncNodeId id, bool isLearner);
|
||||
|
||||
static void nilAwareDelete(SSyncRaftNodeMap* nodeMap, SyncNodeId id);
|
||||
static void nilAwareAdd(SSyncRaftNodeMap* nodeMap, SyncNodeId id);
|
||||
|
||||
static void makeVoter(SSyncRaftChanger* changer, SSyncRaftProgressTrackerConfig* config,
|
||||
SSyncRaftProgressMap* progressMap, SyncNodeId id);
|
||||
static void makeLearner(SSyncRaftChanger* changer, SSyncRaftProgressTrackerConfig* config,
|
||||
SSyncRaftProgressMap* progressMap, SyncNodeId id);
|
||||
static void removeNodeId(SSyncRaftChanger* changer, SSyncRaftProgressTrackerConfig* config,
|
||||
SSyncRaftProgressMap* progressMap, SyncNodeId id);
|
||||
|
||||
// EnterJoint verifies that the outgoing (=right) majority config of the joint
|
||||
// config is empty and initializes it with a copy of the incoming (=left)
|
||||
// majority config. That is, it transitions from
|
||||
//
|
||||
// (1 2 3)&&()
|
||||
// to
|
||||
// (1 2 3)&&(1 2 3).
|
||||
//
|
||||
// The supplied changes are then applied to the incoming majority config,
|
||||
// resulting in a joint configuration that in terms of the Raft thesis[1]
|
||||
// (Section 4.3) corresponds to `C_{new,old}`.
|
||||
//
|
||||
// [1]: https://github.com/ongardie/dissertation/blob/master/online-trim.pdf
|
||||
int syncRaftChangerEnterJoint(SSyncRaftChanger* changer, bool autoLeave, const SSyncConfChangeSingleArray* css,
|
||||
SSyncRaftProgressTrackerConfig* config, SSyncRaftProgressMap* progressMap) {
|
||||
int ret;
|
||||
|
||||
ret = checkAndCopy(changer, config, progressMap);
|
||||
if (ret != 0) {
|
||||
return ret;
|
||||
}
|
||||
|
||||
if (hasJointConfig(config)) {
|
||||
syncError("config is already joint");
|
||||
return -1;
|
||||
}
|
||||
|
||||
if(syncRaftJointConfigIsIncomingEmpty(&config->voters) == 0) {
|
||||
// We allow adding nodes to an empty config for convenience (testing and
|
||||
// bootstrap), but you can't enter a joint state.
|
||||
syncError("can't make a zero-voter config joint");
|
||||
return -1;
|
||||
}
|
||||
|
||||
// Clear the outgoing config.
|
||||
syncRaftJointConfigClearOutgoing(&config->voters);
|
||||
|
||||
// Copy incoming to outgoing.
|
||||
syncRaftCopyNodeMap(&config->voters.incoming, &config->voters.outgoing);
|
||||
|
||||
ret = applyConfig(changer, config, progressMap, css);
|
||||
if (ret != 0) {
|
||||
return ret;
|
||||
}
|
||||
|
||||
config->autoLeave = autoLeave;
|
||||
return checkAndReturn(config, progressMap);
|
||||
}
|
||||
|
||||
// Simple carries out a series of configuration changes that (in aggregate)
|
||||
// mutates the incoming majority config Voters[0] by at most one. This method
|
||||
// will return an error if that is not the case, if the resulting quorum is
|
||||
// zero, or if the configuration is in a joint state (i.e. if there is an
|
||||
// outgoing configuration).
|
||||
int syncRaftChangerSimpleConfig(SSyncRaftChanger* changer, const SSyncConfChangeSingleArray* css,
|
||||
SSyncRaftProgressTrackerConfig* config, SSyncRaftProgressMap* progressMap) {
|
||||
int ret;
|
||||
|
||||
ret = checkAndCopy(changer, config, progressMap);
|
||||
if (ret != 0) {
|
||||
return ret;
|
||||
}
|
||||
|
||||
if (hasJointConfig(config)) {
|
||||
syncError("can't apply simple config change in joint config");
|
||||
return -1;
|
||||
}
|
||||
|
||||
ret = applyConfig(changer, config, progressMap, css);
|
||||
if (ret != 0) {
|
||||
return ret;
|
||||
}
|
||||
|
||||
int n = symDiff(syncRaftJointConfigIncoming(&changer->tracker->config.voters),
|
||||
syncRaftJointConfigIncoming(&config->voters));
|
||||
if (n > 1) {
|
||||
syncError("more than one voter changed without entering joint config");
|
||||
return -1;
|
||||
}
|
||||
|
||||
return checkAndReturn(config, progressMap);
|
||||
}
|
||||
|
||||
// apply a change to the configuration. By convention, changes to voters are
|
||||
// always made to the incoming majority config Voters[0]. Voters[1] is either
|
||||
// empty or preserves the outgoing majority configuration while in a joint state.
|
||||
static int applyConfig(SSyncRaftChanger* changer, SSyncRaftProgressTrackerConfig* config,
|
||||
SSyncRaftProgressMap* progressMap, const SSyncConfChangeSingleArray* css) {
|
||||
int i;
|
||||
|
||||
for (i = 0; i < css->n; ++i) {
|
||||
const SSyncConfChangeSingle* cs = &(css->changes[i]);
|
||||
if (cs->nodeId == SYNC_NON_NODE_ID) {
|
||||
continue;
|
||||
}
|
||||
|
||||
ESyncRaftConfChangeType type = cs->type;
|
||||
switch (type) {
|
||||
case SYNC_RAFT_Conf_AddNode:
|
||||
makeVoter(changer, config, progressMap, cs->nodeId);
|
||||
break;
|
||||
case SYNC_RAFT_Conf_AddLearnerNode:
|
||||
makeLearner(changer, config, progressMap, cs->nodeId);
|
||||
break;
|
||||
case SYNC_RAFT_Conf_RemoveNode:
|
||||
removeNodeId(changer, config, progressMap, cs->nodeId);
|
||||
break;
|
||||
case SYNC_RAFT_Conf_UpdateNode:
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
if (syncRaftJointConfigIsIncomingEmpty(&config->voters)) {
|
||||
syncError("removed all voters");
|
||||
return -1;
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
||||
// makeVoter adds or promotes the given ID to be a voter in the incoming
|
||||
// majority config.
|
||||
static void makeVoter(SSyncRaftChanger* changer, SSyncRaftProgressTrackerConfig* config,
|
||||
SSyncRaftProgressMap* progressMap, SyncNodeId id) {
|
||||
SSyncRaftProgress* progress = syncRaftFindProgressByNodeId(progressMap, id);
|
||||
if (progress == NULL) {
|
||||
initProgress(changer, config, progressMap, id, false);
|
||||
return;
|
||||
}
|
||||
|
||||
progress->isLearner = false;
|
||||
nilAwareDelete(&config->learners, id);
|
||||
nilAwareDelete(&config->learnersNext, id);
|
||||
syncRaftJointConfigAddToIncoming(&config->voters, id);
|
||||
}
|
||||
|
||||
// makeLearner makes the given ID a learner or stages it to be a learner once
|
||||
// an active joint configuration is exited.
|
||||
//
|
||||
// The former happens when the peer is not a part of the outgoing config, in
|
||||
// which case we either add a new learner or demote a voter in the incoming
|
||||
// config.
|
||||
//
|
||||
// The latter case occurs when the configuration is joint and the peer is a
|
||||
// voter in the outgoing config. In that case, we do not want to add the peer
|
||||
// as a learner because then we'd have to track a peer as a voter and learner
|
||||
// simultaneously. Instead, we add the learner to LearnersNext, so that it will
|
||||
// be added to Learners the moment the outgoing config is removed by
|
||||
// LeaveJoint().
|
||||
static void makeLearner(SSyncRaftChanger* changer, SSyncRaftProgressTrackerConfig* config,
|
||||
SSyncRaftProgressMap* progressMap, SyncNodeId id) {
|
||||
SSyncRaftProgress* progress = syncRaftFindProgressByNodeId(progressMap, id);
|
||||
if (progress == NULL) {
|
||||
initProgress(changer, config, progressMap, id, true);
|
||||
return;
|
||||
}
|
||||
|
||||
if (progress->isLearner) {
|
||||
return;
|
||||
}
|
||||
// Remove any existing voter in the incoming config...
|
||||
removeNodeId(changer, config, progressMap, id);
|
||||
|
||||
// ... but save the Progress.
|
||||
syncRaftAddToProgressMap(progressMap, progress);
|
||||
|
||||
// Use LearnersNext if we can't add the learner to Learners directly, i.e.
|
||||
// if the peer is still tracked as a voter in the outgoing config. It will
|
||||
// be turned into a learner in LeaveJoint().
|
||||
//
|
||||
// Otherwise, add a regular learner right away.
|
||||
bool inInOutgoing = syncRaftJointConfigIsInOutgoing(&config->voters, id);
|
||||
if (inInOutgoing) {
|
||||
nilAwareAdd(&config->learnersNext, id);
|
||||
} else {
|
||||
nilAwareAdd(&config->learners, id);
|
||||
progress->isLearner = true;
|
||||
}
|
||||
}
|
||||
|
||||
// removeNodeId this peer as a voter or learner from the incoming config.
|
||||
static void removeNodeId(SSyncRaftChanger* changer, SSyncRaftProgressTrackerConfig* config,
|
||||
SSyncRaftProgressMap* progressMap, SyncNodeId id) {
|
||||
SSyncRaftProgress* progress = syncRaftFindProgressByNodeId(progressMap, id);
|
||||
if (progress == NULL) {
|
||||
return;
|
||||
}
|
||||
|
||||
syncRaftJointConfigRemoveFromIncoming(&config->voters, id);
|
||||
nilAwareDelete(&config->learners, id);
|
||||
nilAwareDelete(&config->learnersNext, id);
|
||||
|
||||
// If the peer is still a voter in the outgoing config, keep the Progress.
|
||||
bool inInOutgoing = syncRaftJointConfigIsInOutgoing(&config->voters, id);
|
||||
if (!inInOutgoing) {
|
||||
syncRaftRemoveFromProgressMap(progressMap, id);
|
||||
}
|
||||
}
|
||||
|
||||
// initProgress initializes a new progress for the given node or learner.
|
||||
static void initProgress(SSyncRaftChanger* changer, SSyncRaftProgressTrackerConfig* config,
|
||||
SSyncRaftProgressMap* progressMap, SyncNodeId id, bool isLearner) {
|
||||
if (!isLearner) {
|
||||
syncRaftJointConfigAddToIncoming(&config->voters, id);
|
||||
} else {
|
||||
nilAwareAdd(&config->learners, id);
|
||||
}
|
||||
|
||||
SSyncRaftProgress* pProgress = (SSyncRaftProgress*)malloc(sizeof(SSyncRaftProgress));
|
||||
assert (pProgress != NULL);
|
||||
*pProgress = (SSyncRaftProgress) {
|
||||
// Initializing the Progress with the last index means that the follower
|
||||
// can be probed (with the last index).
|
||||
//
|
||||
// TODO(tbg): seems awfully optimistic. Using the first index would be
|
||||
// better. The general expectation here is that the follower has no log
|
||||
// at all (and will thus likely need a snapshot), though the app may
|
||||
// have applied a snapshot out of band before adding the replica (thus
|
||||
// making the first index the better choice).
|
||||
.id = id,
|
||||
.groupId = changer->tracker->pRaft->selfGroupId,
|
||||
.nextIndex = changer->lastIndex,
|
||||
.matchIndex = 0,
|
||||
.state = PROGRESS_STATE_PROBE,
|
||||
.pendingSnapshotIndex = 0,
|
||||
.probeSent = false,
|
||||
.inflights = syncRaftOpenInflights(changer->tracker->maxInflightMsgs),
|
||||
.isLearner = isLearner,
|
||||
// When a node is first added, we should mark it as recently active.
|
||||
// Otherwise, CheckQuorum may cause us to step down if it is invoked
|
||||
// before the added node has had a chance to communicate with us.
|
||||
.recentActive = true,
|
||||
.refCount = 0,
|
||||
};
|
||||
|
||||
syncRaftAddToProgressMap(progressMap, pProgress);
|
||||
}
|
||||
|
||||
// checkInvariants makes sure that the config and progress are compatible with
|
||||
// each other. This is used to check both what the Changer is initialized with,
|
||||
// as well as what it returns.
|
||||
static int checkInvariants(SSyncRaftProgressTrackerConfig* config, SSyncRaftProgressMap* progressMap) {
|
||||
int ret = syncRaftCheckTrackerConfigInProgress(config, progressMap);
|
||||
if (ret != 0) {
|
||||
return ret;
|
||||
}
|
||||
|
||||
// Any staged learner was staged because it could not be directly added due
|
||||
// to a conflicting voter in the outgoing config.
|
||||
SyncNodeId* pNodeId = NULL;
|
||||
while (!syncRaftIterateNodeMap(&config->learnersNext, pNodeId)) {
|
||||
SyncNodeId nodeId = *pNodeId;
|
||||
if (!syncRaftJointConfigInOutgoing(&config->voters, nodeId)) {
|
||||
syncError("[%d] is in LearnersNext, but not outgoing", nodeId);
|
||||
return -1;
|
||||
}
|
||||
SSyncRaftProgress* progress = syncRaftFindProgressByNodeId(progressMap, nodeId);
|
||||
assert(progress);
|
||||
assert(progress->id == nodeId);
|
||||
if (progress->isLearner) {
|
||||
syncError("[%d:%d] is in LearnersNext, but is already marked as learner", progress->groupId, nodeId);
|
||||
return -1;
|
||||
}
|
||||
}
|
||||
|
||||
// Conversely Learners and Voters doesn't intersect at all.
|
||||
pNodeId = NULL;
|
||||
while (!syncRaftIterateNodeMap(&config->learners, pNodeId)) {
|
||||
SyncNodeId nodeId = *pNodeId;
|
||||
if (syncRaftJointConfigInOutgoing(&config->voters, nodeId)) {
|
||||
syncError("%d is in Learners and outgoing", nodeId);
|
||||
return -1;
|
||||
}
|
||||
SSyncRaftProgress* progress = syncRaftFindProgressByNodeId(progressMap, nodeId);
|
||||
assert(progress);
|
||||
assert(progress->id == nodeId);
|
||||
|
||||
if (!progress->isLearner) {
|
||||
syncError("[%d:%d] is in Learners, but is not marked as learner", progress->groupId, nodeId);
|
||||
return -1;
|
||||
}
|
||||
}
|
||||
|
||||
if (!hasJointConfig(config)) {
|
||||
// We enforce that empty maps are nil instead of zero.
|
||||
if (syncRaftNodeMapSize(&config->learnersNext) > 0) {
|
||||
syncError("cfg.LearnersNext must be nil when not joint");
|
||||
return -1;
|
||||
}
|
||||
if (config->autoLeave) {
|
||||
syncError("AutoLeave must be false when not joint");
|
||||
return -1;
|
||||
}
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
// checkAndCopy copies the tracker's config and progress map (deeply enough for
|
||||
// the purposes of the Changer) and returns those copies. It returns an error
|
||||
// if checkInvariants does.
|
||||
static int checkAndCopy(SSyncRaftChanger* changer, SSyncRaftProgressTrackerConfig* config, SSyncRaftProgressMap* progressMap) {
|
||||
syncRaftCopyTrackerConfig(&changer->tracker->config, config);
|
||||
syncRaftClearProgressMap(progressMap);
|
||||
|
||||
SSyncRaftProgress* pProgress = NULL;
|
||||
while (!syncRaftIterateProgressMap(&changer->tracker->progressMap, pProgress)) {
|
||||
syncRaftAddToProgressMap(progressMap, pProgress);
|
||||
}
|
||||
|
||||
return checkAndReturn(config, progressMap);
|
||||
}
|
||||
|
||||
// checkAndReturn calls checkInvariants on the input and returns either the
|
||||
// resulting error or the input.
|
||||
static int checkAndReturn(SSyncRaftProgressTrackerConfig* config, SSyncRaftProgressMap* progressMap) {
|
||||
if (checkInvariants(config, progressMap) != 0) {
|
||||
return -1;
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static bool hasJointConfig(const SSyncRaftProgressTrackerConfig* config) {
|
||||
return !syncRaftJointConfigIsOutgoingEmpty(&config->voters);
|
||||
}
|
||||
|
||||
// symdiff returns the count of the symmetric difference between the sets of
|
||||
// uint64s, i.e. len( (l - r) \union (r - l)).
|
||||
static int symDiff(const SSyncRaftNodeMap* l, const SSyncRaftNodeMap* r) {
|
||||
int n;
|
||||
int i;
|
||||
int j0, j1;
|
||||
const SSyncRaftNodeMap* pairs[2][2] = {
|
||||
{l, r}, // count elems in l but not in r
|
||||
{r, l}, // count elems in r but not in l
|
||||
};
|
||||
|
||||
for (n = 0, i = 0; i < 2; ++i) {
|
||||
const SSyncRaftNodeMap** pp = pairs[i];
|
||||
|
||||
const SSyncRaftNodeMap* p0 = pp[0];
|
||||
const SSyncRaftNodeMap* p1 = pp[1];
|
||||
SyncNodeId* pNodeId;
|
||||
while (!syncRaftIterateNodeMap(p0, pNodeId)) {
|
||||
if (!syncRaftIsInNodeMap(p1, *pNodeId)) {
|
||||
n+=1;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return n;
|
||||
}
|
||||
|
||||
// nilAwareDelete deletes from a map, nil'ing the map itself if it is empty after.
|
||||
static void nilAwareDelete(SSyncRaftNodeMap* nodeMap, SyncNodeId id) {
|
||||
syncRaftRemoveFromNodeMap(nodeMap, id);
|
||||
}
|
||||
|
||||
// nilAwareAdd populates a map entry, creating the map if necessary.
|
||||
static void nilAwareAdd(SSyncRaftNodeMap* nodeMap, SyncNodeId id) {
|
||||
syncRaftAddToNodeMap(nodeMap, id);
|
||||
}
|
|
@ -1,114 +0,0 @@
|
|||
/*
|
||||
* Copyright (c) 2019 TAOS Data, Inc. <cli@taosdata.com>
|
||||
*
|
||||
* This program is free software: you can use, redistribute, and/or modify
|
||||
* it under the terms of the GNU Affero General Public License, version 3
|
||||
* or later ("AGPL"), as published by the Free Software Foundation.
|
||||
*
|
||||
* This program is distributed in the hope that it will be useful, but WITHOUT
|
||||
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
|
||||
* FITNESS FOR A PARTICULAR PURPOSE.
|
||||
*
|
||||
* You should have received a copy of the GNU Affero General Public License
|
||||
* along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
|
||||
#include "syncInt.h"
|
||||
#include "raft.h"
|
||||
#include "raft_log.h"
|
||||
#include "raft_message.h"
|
||||
#include "sync_raft_progress_tracker.h"
|
||||
|
||||
void syncRaftStartElection(SSyncRaft* pRaft, ESyncRaftElectionType cType) {
|
||||
if (pRaft->state == TAOS_SYNC_STATE_LEADER) {
|
||||
syncDebug("[%d:%d] ignoring RAFT_MSG_INTERNAL_ELECTION because already leader", pRaft->selfGroupId, pRaft->selfId);
|
||||
return;
|
||||
}
|
||||
|
||||
if (!syncRaftIsPromotable(pRaft)) {
|
||||
syncWarn("[%d:%d] is unpromotable and can not syncRaftCampaign", pRaft->selfGroupId, pRaft->selfId);
|
||||
return;
|
||||
}
|
||||
|
||||
// if there is pending uncommitted config,cannot start election
|
||||
if (syncRaftLogNumOfPendingConf(pRaft->log) > 0 && syncRaftHasUnappliedLog(pRaft->log)) {
|
||||
syncWarn("[%d:%d] cannot syncRaftStartElection at term %" PRId64 " since there are still pending configuration changes to apply",
|
||||
pRaft->selfGroupId, pRaft->selfId, pRaft->term);
|
||||
return;
|
||||
}
|
||||
|
||||
syncInfo("[%d:%d] is starting a new election at term %" PRId64 "", pRaft->selfGroupId, pRaft->selfId, pRaft->term);
|
||||
|
||||
syncRaftCampaign(pRaft, cType);
|
||||
}
|
||||
|
||||
// syncRaftCampaign transitions the raft instance to candidate state. This must only be
|
||||
// called after verifying that this is a legitimate transition.
|
||||
void syncRaftCampaign(SSyncRaft* pRaft, ESyncRaftElectionType cType) {
|
||||
bool preVote;
|
||||
SyncTerm term;
|
||||
|
||||
if (syncRaftIsPromotable(pRaft)) {
|
||||
syncDebug("[%d:%d] is unpromotable; syncRaftCampaign() should have been called", pRaft->selfGroupId, pRaft->selfId);
|
||||
return;
|
||||
}
|
||||
|
||||
if (cType == SYNC_RAFT_CAMPAIGN_PRE_ELECTION) {
|
||||
syncRaftBecomePreCandidate(pRaft);
|
||||
preVote = true;
|
||||
// PreVote RPCs are sent for the next term before we've incremented r.Term.
|
||||
term = pRaft->term + 1;
|
||||
} else {
|
||||
syncRaftBecomeCandidate(pRaft);
|
||||
term = pRaft->term;
|
||||
preVote = false;
|
||||
}
|
||||
|
||||
int quorum = syncRaftQuorum(pRaft);
|
||||
ESyncRaftVoteResult result = syncRaftPollVote(pRaft, pRaft->selfId, preVote, true, NULL, NULL);
|
||||
if (result == SYNC_RAFT_VOTE_WON) {
|
||||
// We won the election after voting for ourselves (which must mean that
|
||||
// this is a single-node cluster). Advance to the next state.
|
||||
if (cType == SYNC_RAFT_CAMPAIGN_PRE_ELECTION) {
|
||||
syncRaftStartElection(pRaft, SYNC_RAFT_CAMPAIGN_ELECTION);
|
||||
} else {
|
||||
syncRaftBecomeLeader(pRaft);
|
||||
}
|
||||
return;
|
||||
}
|
||||
|
||||
// broadcast vote message to other peers
|
||||
int i;
|
||||
SyncIndex lastIndex = syncRaftLogLastIndex(pRaft->log);
|
||||
SyncTerm lastTerm = syncRaftLogLastTerm(pRaft->log);
|
||||
SSyncRaftNodeMap nodeMap;
|
||||
syncRaftJointConfigIDs(&pRaft->tracker->config.voters, &nodeMap);
|
||||
SyncNodeId *pNodeId = NULL;
|
||||
while (!syncRaftIterateNodeMap(&nodeMap, pNodeId)) {
|
||||
SyncNodeId nodeId = *pNodeId;
|
||||
if (nodeId == SYNC_NON_NODE_ID) {
|
||||
continue;
|
||||
}
|
||||
|
||||
if (nodeId == pRaft->selfId) {
|
||||
continue;
|
||||
}
|
||||
|
||||
SNodeInfo* pNode = syncRaftGetNodeById(pRaft, nodeId);
|
||||
if (pNode == NULL) {
|
||||
continue;
|
||||
}
|
||||
|
||||
SSyncMessage* pMsg = syncNewVoteMsg(pRaft->selfGroupId, pRaft->selfId,
|
||||
term, cType, lastIndex, lastTerm);
|
||||
if (pMsg == NULL) {
|
||||
continue;
|
||||
}
|
||||
|
||||
syncInfo("[%d:%d] [logterm: %" PRId64 ", index: %" PRId64 "] sent vote request to %d at term %" PRId64 "",
|
||||
pRaft->selfGroupId, pRaft->selfId, lastTerm,
|
||||
lastIndex, nodeId, pRaft->term);
|
||||
|
||||
pRaft->io.send(pMsg, pNode);
|
||||
}
|
||||
}
|
|
@ -1,369 +0,0 @@
|
|||
/*
|
||||
* Copyright (c) 2019 TAOS Data, Inc. <cli@taosdata.com>
|
||||
*
|
||||
* This program is free software: you can use, redistribute, and/or modify
|
||||
* it under the terms of the GNU Affero General Public License, version 3
|
||||
* or later ("AGPL"), as published by the Free Software Foundation.
|
||||
*
|
||||
* This program is distributed in the hope that it will be useful, but WITHOUT
|
||||
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
|
||||
* FITNESS FOR A PARTICULAR PURPOSE.
|
||||
*
|
||||
* You should have received a copy of the GNU Affero General Public License
|
||||
* along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
|
||||
#include "raft.h"
|
||||
#include "sync_raft_impl.h"
|
||||
#include "raft_log.h"
|
||||
#include "raft_replication.h"
|
||||
#include "sync_raft_progress_tracker.h"
|
||||
#include "syncInt.h"
|
||||
|
||||
static int convertClear(SSyncRaft* pRaft);
|
||||
static int stepFollower(SSyncRaft* pRaft, const SSyncMessage* pMsg);
|
||||
static int stepCandidate(SSyncRaft* pRaft, const SSyncMessage* pMsg);
|
||||
static int stepLeader(SSyncRaft* pRaft, const SSyncMessage* pMsg);
|
||||
|
||||
static bool increaseUncommittedSize(SSyncRaft* pRaft, SSyncRaftEntry* entries, int n);
|
||||
|
||||
static int triggerAll(SSyncRaft* pRaft);
|
||||
|
||||
static void tickElection(SSyncRaft* pRaft);
|
||||
static void tickHeartbeat(SSyncRaft* pRaft);
|
||||
|
||||
static void appendEntries(SSyncRaft* pRaft, SSyncRaftEntry* entries, int n);
|
||||
|
||||
static void abortLeaderTransfer(SSyncRaft* pRaft);
|
||||
|
||||
static void resetRaft(SSyncRaft* pRaft, SyncTerm term);
|
||||
|
||||
void syncRaftBecomeFollower(SSyncRaft* pRaft, SyncTerm term, SyncNodeId leaderId) {
|
||||
convertClear(pRaft);
|
||||
|
||||
pRaft->stepFp = stepFollower;
|
||||
resetRaft(pRaft, term);
|
||||
pRaft->tickFp = tickElection;
|
||||
pRaft->leaderId = leaderId;
|
||||
pRaft->state = TAOS_SYNC_STATE_FOLLOWER;
|
||||
syncInfo("[%d:%d] became followe at term %" PRId64 "", pRaft->selfGroupId, pRaft->selfId, pRaft->term);
|
||||
}
|
||||
|
||||
void syncRaftBecomePreCandidate(SSyncRaft* pRaft) {
|
||||
convertClear(pRaft);
|
||||
|
||||
/**
|
||||
* Becoming a pre-candidate changes our step functions and state,
|
||||
* but doesn't change anything else. In particular it does not increase
|
||||
* r.Term or change r.Vote.
|
||||
**/
|
||||
pRaft->stepFp = stepCandidate;
|
||||
pRaft->tickFp = tickElection;
|
||||
pRaft->state = TAOS_SYNC_STATE_CANDIDATE;
|
||||
pRaft->candidateState.inPreVote = true;
|
||||
syncInfo("[%d:%d] became pre-candidate at term %" PRId64 "", pRaft->selfGroupId, pRaft->selfId, pRaft->term);
|
||||
}
|
||||
|
||||
void syncRaftBecomeCandidate(SSyncRaft* pRaft) {
|
||||
convertClear(pRaft);
|
||||
|
||||
pRaft->candidateState.inPreVote = false;
|
||||
pRaft->stepFp = stepCandidate;
|
||||
// become candidate make term+1
|
||||
resetRaft(pRaft, pRaft->term + 1);
|
||||
pRaft->tickFp = tickElection;
|
||||
pRaft->voteFor = pRaft->selfId;
|
||||
pRaft->state = TAOS_SYNC_STATE_CANDIDATE;
|
||||
syncInfo("[%d:%d] became candidate at term %" PRId64 "", pRaft->selfGroupId, pRaft->selfId, pRaft->term);
|
||||
}
|
||||
|
||||
void syncRaftBecomeLeader(SSyncRaft* pRaft) {
|
||||
assert(pRaft->state != TAOS_SYNC_STATE_FOLLOWER);
|
||||
|
||||
pRaft->stepFp = stepLeader;
|
||||
resetRaft(pRaft, pRaft->term);
|
||||
pRaft->leaderId = pRaft->leaderId;
|
||||
pRaft->state = TAOS_SYNC_STATE_LEADER;
|
||||
|
||||
SSyncRaftProgress* progress = syncRaftFindProgressByNodeId(&pRaft->tracker->progressMap, pRaft->selfId);
|
||||
assert(progress != NULL);
|
||||
// Followers enter replicate mode when they've been successfully probed
|
||||
// (perhaps after having received a snapshot as a result). The leader is
|
||||
// trivially in this state. Note that r.reset() has initialized this
|
||||
// progress with the last index already.
|
||||
syncRaftProgressBecomeReplicate(progress);
|
||||
|
||||
// Conservatively set the pendingConfIndex to the last index in the
|
||||
// log. There may or may not be a pending config change, but it's
|
||||
// safe to delay any future proposals until we commit all our
|
||||
// pending log entries, and scanning the entire tail of the log
|
||||
// could be expensive.
|
||||
SyncIndex lastIndex = syncRaftLogLastIndex(pRaft->log);
|
||||
pRaft->pendingConfigIndex = lastIndex;
|
||||
|
||||
// after become leader, send a no-op log
|
||||
SSyncRaftEntry* entry = (SSyncRaftEntry*)malloc(sizeof(SSyncRaftEntry));
|
||||
if (entry == NULL) {
|
||||
return;
|
||||
}
|
||||
*entry = (SSyncRaftEntry) {
|
||||
.buffer = (SSyncBuffer) {
|
||||
.data = NULL,
|
||||
.len = 0,
|
||||
}
|
||||
};
|
||||
appendEntries(pRaft, entry, 1);
|
||||
//syncRaftTriggerHeartbeat(pRaft);
|
||||
syncInfo("[%d:%d] became leader at term %" PRId64 "", pRaft->selfGroupId, pRaft->selfId, pRaft->term);
|
||||
}
|
||||
|
||||
void syncRaftTriggerHeartbeat(SSyncRaft* pRaft) {
|
||||
triggerAll(pRaft);
|
||||
}
|
||||
|
||||
void syncRaftRandomizedElectionTimeout(SSyncRaft* pRaft) {
|
||||
// electionTimeoutTick in [3,6] tick
|
||||
pRaft->randomizedElectionTimeout = taosRand() % 4 + 3;
|
||||
}
|
||||
|
||||
bool syncRaftIsPromotable(SSyncRaft* pRaft) {
|
||||
return pRaft->selfId != SYNC_NON_NODE_ID;
|
||||
}
|
||||
|
||||
bool syncRaftIsPastElectionTimeout(SSyncRaft* pRaft) {
|
||||
return pRaft->electionElapsed >= pRaft->randomizedElectionTimeout;
|
||||
}
|
||||
|
||||
int syncRaftQuorum(SSyncRaft* pRaft) {
|
||||
return 0;
|
||||
//return pRaft->cluster.replica / 2 + 1;
|
||||
}
|
||||
|
||||
ESyncRaftVoteResult syncRaftPollVote(SSyncRaft* pRaft, SyncNodeId id,
|
||||
bool preVote, bool grant,
|
||||
int* rejected, int *granted) {
|
||||
SNodeInfo* pNode = syncRaftGetNodeById(pRaft, id);
|
||||
if (pNode == NULL) {
|
||||
return true;
|
||||
}
|
||||
|
||||
if (grant) {
|
||||
syncInfo("[%d:%d] received grant (pre-vote %d) from %d at term %" PRId64 "",
|
||||
pRaft->selfGroupId, pRaft->selfId, preVote, id, pRaft->term);
|
||||
} else {
|
||||
syncInfo("[%d:%d] received rejection (pre-vote %d) from %d at term %" PRId64 "",
|
||||
pRaft->selfGroupId, pRaft->selfId, preVote, id, pRaft->term);
|
||||
}
|
||||
|
||||
syncRaftRecordVote(pRaft->tracker, pNode->nodeId, grant);
|
||||
return syncRaftTallyVotes(pRaft->tracker, rejected, granted);
|
||||
}
|
||||
/*
|
||||
if (accept) {
|
||||
syncInfo("[%d:%d] received (pre-vote %d) from %d at term %" PRId64 "",
|
||||
pRaft->selfGroupId, pRaft->selfId, preVote, id, pRaft->term);
|
||||
} else {
|
||||
syncInfo("[%d:%d] received rejection from %d at term %" PRId64 "",
|
||||
pRaft->selfGroupId, pRaft->selfId, id, pRaft->term);
|
||||
}
|
||||
|
||||
int voteIndex = syncRaftGetNodeById(pRaft, id);
|
||||
assert(voteIndex < pRaft->cluster.replica && voteIndex >= 0);
|
||||
assert(pRaft->candidateState.votes[voteIndex] == SYNC_RAFT_VOTE_RESP_UNKNOWN);
|
||||
|
||||
pRaft->candidateState.votes[voteIndex] = accept ? SYNC_RAFT_VOTE_RESP_GRANT : SYNC_RAFT_VOTE_RESP_REJECT;
|
||||
int granted = 0, rejected = 0;
|
||||
int i;
|
||||
for (i = 0; i < pRaft->cluster.replica; ++i) {
|
||||
if (pRaft->candidateState.votes[i] == SYNC_RAFT_VOTE_RESP_GRANT) granted++;
|
||||
else if (pRaft->candidateState.votes[i] == SYNC_RAFT_VOTE_RESP_REJECT) rejected++;
|
||||
}
|
||||
|
||||
if (rejectNum) *rejectNum = rejected;
|
||||
return granted;
|
||||
*/
|
||||
|
||||
void syncRaftLoadState(SSyncRaft* pRaft, const SSyncServerState* serverState) {
|
||||
SyncIndex commitIndex = serverState->commitIndex;
|
||||
SyncIndex lastIndex = syncRaftLogLastIndex(pRaft->log);
|
||||
|
||||
if (commitIndex < pRaft->log->commitIndex || commitIndex > lastIndex) {
|
||||
syncFatal("[%d:%d] state.commit %"PRId64" is out of range [%" PRId64 ",%" PRId64 "",
|
||||
pRaft->selfGroupId, pRaft->selfId, commitIndex, pRaft->log->commitIndex, lastIndex);
|
||||
return;
|
||||
}
|
||||
|
||||
pRaft->log->commitIndex = commitIndex;
|
||||
pRaft->term = serverState->term;
|
||||
pRaft->voteFor = serverState->voteFor;
|
||||
}
|
||||
|
||||
static void visitProgressSendAppend(SSyncRaftProgress* progress, void* arg) {
|
||||
SSyncRaft* pRaft = (SSyncRaft*)arg;
|
||||
if (pRaft->selfId == progress->id) {
|
||||
return;
|
||||
}
|
||||
|
||||
syncRaftMaybeSendAppend(arg, progress, true);
|
||||
}
|
||||
|
||||
// bcastAppend sends RPC, with entries to all peers that are not up-to-date
|
||||
// according to the progress recorded in r.prs.
|
||||
void syncRaftBroadcastAppend(SSyncRaft* pRaft) {
|
||||
syncRaftProgressVisit(pRaft->tracker, visitProgressSendAppend, pRaft);
|
||||
}
|
||||
|
||||
SNodeInfo* syncRaftGetNodeById(SSyncRaft *pRaft, SyncNodeId id) {
|
||||
SNodeInfo **ppNode = taosHashGet(pRaft->nodeInfoMap, &id, sizeof(SyncNodeId*));
|
||||
if (ppNode != NULL) {
|
||||
return *ppNode;
|
||||
}
|
||||
|
||||
return NULL;
|
||||
}
|
||||
|
||||
static int convertClear(SSyncRaft* pRaft) {
|
||||
|
||||
}
|
||||
|
||||
static int stepFollower(SSyncRaft* pRaft, const SSyncMessage* pMsg) {
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int stepCandidate(SSyncRaft* pRaft, const SSyncMessage* pMsg) {
|
||||
/**
|
||||
* Only handle vote responses corresponding to our candidacy (while in
|
||||
* StateCandidate, we may get stale MsgPreVoteResp messages in this term from
|
||||
* our pre-candidate state).
|
||||
**/
|
||||
ESyncRaftMessageType msgType = pMsg->msgType;
|
||||
|
||||
if (msgType == RAFT_MSG_INTERNAL_PROP) {
|
||||
return 0;
|
||||
}
|
||||
|
||||
if (msgType == RAFT_MSG_VOTE_RESP) {
|
||||
syncRaftHandleVoteRespMessage(pRaft, pMsg);
|
||||
return 0;
|
||||
} else if (msgType == RAFT_MSG_APPEND) {
|
||||
syncRaftBecomeFollower(pRaft, pMsg->term, pMsg->from);
|
||||
syncRaftHandleAppendEntriesMessage(pRaft, pMsg);
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int stepLeader(SSyncRaft* pRaft, const SSyncMessage* pMsg) {
|
||||
convertClear(pRaft);
|
||||
return 0;
|
||||
}
|
||||
|
||||
// tickElection is run by followers and candidates after r.electionTimeout.
|
||||
static void tickElection(SSyncRaft* pRaft) {
|
||||
pRaft->electionElapsed += 1;
|
||||
|
||||
if (!syncRaftIsPromotable(pRaft)) {
|
||||
return;
|
||||
}
|
||||
|
||||
if (!syncRaftIsPastElectionTimeout(pRaft)) {
|
||||
return;
|
||||
}
|
||||
|
||||
// election timeout
|
||||
pRaft->electionElapsed = 0;
|
||||
SSyncMessage msg;
|
||||
syncRaftStep(pRaft, syncInitElectionMsg(&msg, pRaft->selfId));
|
||||
}
|
||||
|
||||
// tickHeartbeat is run by leaders to send a MsgBeat after r.heartbeatTimeout.
|
||||
static void tickHeartbeat(SSyncRaft* pRaft) {
|
||||
|
||||
}
|
||||
|
||||
// TODO
|
||||
static bool increaseUncommittedSize(SSyncRaft* pRaft, SSyncRaftEntry* entries, int n) {
|
||||
return false;
|
||||
}
|
||||
|
||||
static void appendEntries(SSyncRaft* pRaft, SSyncRaftEntry* entries, int n) {
|
||||
SyncIndex lastIndex = syncRaftLogLastIndex(pRaft->log);
|
||||
SyncTerm term = pRaft->term;
|
||||
int i;
|
||||
|
||||
for (i = 0; i < n; ++i) {
|
||||
entries[i].term = term;
|
||||
entries[i].index = lastIndex + 1 + i;
|
||||
}
|
||||
|
||||
// Track the size of this uncommitted proposal.
|
||||
if (!increaseUncommittedSize(pRaft, entries, n)) {
|
||||
// Drop the proposal.
|
||||
return;
|
||||
}
|
||||
|
||||
syncRaftLogAppend(pRaft->log, entries, n);
|
||||
|
||||
SSyncRaftProgress* progress = syncRaftFindProgressByNodeId(&pRaft->tracker->progressMap, pRaft->selfId);
|
||||
assert(progress != NULL);
|
||||
syncRaftProgressMaybeUpdate(progress, lastIndex);
|
||||
// Regardless of syncRaftMaybeCommit's return, our caller will call bcastAppend.
|
||||
syncRaftMaybeCommit(pRaft);
|
||||
}
|
||||
|
||||
// syncRaftMaybeCommit attempts to advance the commit index. Returns true if
|
||||
// the commit index changed (in which case the caller should call
|
||||
// r.bcastAppend).
|
||||
bool syncRaftMaybeCommit(SSyncRaft* pRaft) {
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
/**
|
||||
* trigger I/O requests for newly appended log entries or heartbeats.
|
||||
**/
|
||||
static int triggerAll(SSyncRaft* pRaft) {
|
||||
#if 0
|
||||
assert(pRaft->state == TAOS_SYNC_STATE_LEADER);
|
||||
int i;
|
||||
|
||||
for (i = 0; i < pRaft->cluster.replica; ++i) {
|
||||
if (i == pRaft->cluster.selfIndex) {
|
||||
continue;
|
||||
}
|
||||
|
||||
syncRaftMaybeSendAppend(pRaft, pRaft->tracker->progressMap.progress[i], true);
|
||||
}
|
||||
#endif
|
||||
return 0;
|
||||
}
|
||||
|
||||
static void abortLeaderTransfer(SSyncRaft* pRaft) {
|
||||
pRaft->leadTransferee = SYNC_NON_NODE_ID;
|
||||
}
|
||||
|
||||
static void resetProgress(SSyncRaftProgress* progress, void* arg) {
|
||||
syncRaftResetProgress((SSyncRaft*)arg, progress);
|
||||
}
|
||||
|
||||
static void resetRaft(SSyncRaft* pRaft, SyncTerm term) {
|
||||
if (pRaft->term != term) {
|
||||
pRaft->term = term;
|
||||
pRaft->voteFor = SYNC_NON_NODE_ID;
|
||||
}
|
||||
|
||||
pRaft->leaderId = SYNC_NON_NODE_ID;
|
||||
|
||||
pRaft->electionElapsed = 0;
|
||||
pRaft->heartbeatElapsed = 0;
|
||||
|
||||
syncRaftRandomizedElectionTimeout(pRaft);
|
||||
|
||||
abortLeaderTransfer(pRaft);
|
||||
|
||||
syncRaftResetVotes(pRaft->tracker);
|
||||
syncRaftProgressVisit(pRaft->tracker, resetProgress, pRaft);
|
||||
|
||||
pRaft->pendingConfigIndex = 0;
|
||||
pRaft->uncommittedSize = 0;
|
||||
}
|
|
@ -1,97 +0,0 @@
|
|||
/*
|
||||
* Copyright (c) 2019 TAOS Data, Inc. <cli@taosdata.com>
|
||||
*
|
||||
* This program is free software: you can use, redistribute, and/or modify
|
||||
* it under the terms of the GNU Affero General Public License, version 3
|
||||
* or later ("AGPL"), as published by the Free Software Foundation.
|
||||
*
|
||||
* This program is distributed in the hope that it will be useful, but WITHOUT
|
||||
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
|
||||
* FITNESS FOR A PARTICULAR PURPOSE.
|
||||
*
|
||||
* You should have received a copy of the GNU Affero General Public License
|
||||
* along with this program. If not, see <http: *www.gnu.org/licenses/>.
|
||||
*/
|
||||
|
||||
#include "sync_raft_inflights.h"
|
||||
|
||||
SSyncRaftInflights* syncRaftOpenInflights(int size) {
|
||||
SSyncRaftInflights* inflights = (SSyncRaftInflights*)malloc(sizeof(SSyncRaftInflights));
|
||||
if (inflights == NULL) {
|
||||
return NULL;
|
||||
}
|
||||
SyncIndex* buffer = (SyncIndex*)malloc(sizeof(SyncIndex) * size);
|
||||
if (buffer == NULL) {
|
||||
free(inflights);
|
||||
return NULL;
|
||||
}
|
||||
*inflights = (SSyncRaftInflights) {
|
||||
.buffer = buffer,
|
||||
.count = 0,
|
||||
.size = 0,
|
||||
.start = 0,
|
||||
};
|
||||
|
||||
return inflights;
|
||||
}
|
||||
|
||||
void syncRaftCloseInflights(SSyncRaftInflights* inflights) {
|
||||
free(inflights->buffer);
|
||||
free(inflights);
|
||||
}
|
||||
|
||||
// Add notifies the Inflights that a new message with the given index is being
|
||||
// dispatched. Full() must be called prior to Add() to verify that there is room
|
||||
// for one more message, and consecutive calls to add Add() must provide a
|
||||
// monotonic sequence of indexes.
|
||||
void syncRaftInflightAdd(SSyncRaftInflights* inflights, SyncIndex inflightIndex) {
|
||||
assert(!syncRaftInflightFull(inflights));
|
||||
|
||||
int next = inflights->start + inflights->count;
|
||||
int size = inflights->size;
|
||||
|
||||
if (next >= size) {
|
||||
next -= size;
|
||||
}
|
||||
|
||||
inflights->buffer[next] = inflightIndex;
|
||||
inflights->count++;
|
||||
}
|
||||
|
||||
// FreeLE frees the inflights smaller or equal to the given `to` flight.
|
||||
void syncRaftInflightFreeLE(SSyncRaftInflights* inflights, SyncIndex toIndex) {
|
||||
if (inflights->count == 0 || toIndex < inflights->buffer[inflights->start]) {
|
||||
// out of the left side of the window
|
||||
return;
|
||||
}
|
||||
|
||||
int i, idx;
|
||||
for (i = 0, idx = inflights->start; i < inflights->count; i++) {
|
||||
if (toIndex < inflights->buffer[idx]) { // found the first large inflight
|
||||
break;
|
||||
}
|
||||
|
||||
// increase index and maybe rotate
|
||||
int size = inflights->size;
|
||||
idx++;
|
||||
if (idx >= size) {
|
||||
idx -= size;
|
||||
}
|
||||
}
|
||||
|
||||
// free i inflights and set new start index
|
||||
inflights->count -= i;
|
||||
inflights->start = idx;
|
||||
assert(inflights->count >= 0);
|
||||
if (inflights->count == 0) {
|
||||
// inflights is empty, reset the start index so that we don't grow the
|
||||
// buffer unnecessarily.
|
||||
inflights->start = 0;
|
||||
}
|
||||
}
|
||||
|
||||
// FreeFirstOne releases the first inflight. This is a no-op if nothing is
|
||||
// inflight.
|
||||
void syncRaftInflightFreeFirstOne(SSyncRaftInflights* inflights) {
|
||||
syncRaftInflightFreeLE(inflights, inflights->buffer[inflights->start]);
|
||||
}
|
|
@ -1,82 +0,0 @@
|
|||
/*
|
||||
* Copyright (c) 2019 TAOS Data, Inc. <cli@taosdata.com>
|
||||
*
|
||||
* This program is free software: you can use, redistribute, and/or modify
|
||||
* it under the terms of the GNU Affero General Public License, version 3
|
||||
* or later ("AGPL"), as published by the Free Software Foundation.
|
||||
*
|
||||
* This program is distributed in the hope that it will be useful, but WITHOUT
|
||||
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
|
||||
* FITNESS FOR A PARTICULAR PURPOSE.
|
||||
*
|
||||
* You should have received a copy of the GNU Affero General Public License
|
||||
* along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
|
||||
#include "sync_raft_node_map.h"
|
||||
#include "sync_type.h"
|
||||
#include "sync_raft_progress.h"
|
||||
|
||||
void syncRaftInitNodeMap(SSyncRaftNodeMap* nodeMap) {
|
||||
nodeMap->nodeIdMap = taosHashInit(TSDB_MAX_REPLICA, taosGetDefaultHashFunction(TSDB_DATA_TYPE_INT), true, HASH_ENTRY_LOCK);
|
||||
}
|
||||
|
||||
void syncRaftFreeNodeMap(SSyncRaftNodeMap* nodeMap) {
|
||||
taosHashCleanup(nodeMap->nodeIdMap);
|
||||
}
|
||||
|
||||
void syncRaftClearNodeMap(SSyncRaftNodeMap* nodeMap) {
|
||||
taosHashClear(nodeMap->nodeIdMap);
|
||||
}
|
||||
|
||||
bool syncRaftIsInNodeMap(const SSyncRaftNodeMap* nodeMap, SyncNodeId nodeId) {
|
||||
SyncNodeId** ppId = (SyncNodeId**)taosHashGet(nodeMap->nodeIdMap, &nodeId, sizeof(SyncNodeId*));
|
||||
if (ppId == NULL) {
|
||||
return false;
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
void syncRaftCopyNodeMap(SSyncRaftNodeMap* from, SSyncRaftNodeMap* to) {
|
||||
SyncNodeId *pId = NULL;
|
||||
while (!syncRaftIterateNodeMap(from, pId)) {
|
||||
taosHashPut(to->nodeIdMap, &pId, sizeof(SyncNodeId*), &pId, sizeof(SyncNodeId*));
|
||||
}
|
||||
}
|
||||
|
||||
bool syncRaftIterateNodeMap(const SSyncRaftNodeMap* nodeMap, SyncNodeId *pId) {
|
||||
SyncNodeId **ppId = taosHashIterate(nodeMap->nodeIdMap, pId);
|
||||
if (ppId == NULL) {
|
||||
return true;
|
||||
}
|
||||
|
||||
*pId = *(*ppId);
|
||||
return false;
|
||||
}
|
||||
|
||||
bool syncRaftIsAllNodeInProgressMap(SSyncRaftNodeMap* nodeMap, SSyncRaftProgressMap* progressMap) {
|
||||
SyncNodeId *pId = NULL;
|
||||
while (!syncRaftIterateNodeMap(nodeMap, pId)) {
|
||||
if (!syncRaftIsInProgressMap(progressMap, *pId)) {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
void syncRaftUnionNodeMap(SSyncRaftNodeMap* nodeMap, SSyncRaftNodeMap* to) {
|
||||
syncRaftCopyNodeMap(nodeMap, to);
|
||||
}
|
||||
|
||||
void syncRaftAddToNodeMap(SSyncRaftNodeMap* nodeMap, SyncNodeId nodeId) {
|
||||
taosHashPut(nodeMap->nodeIdMap, &nodeId, sizeof(SyncNodeId*), &nodeId, sizeof(SyncNodeId*));
|
||||
}
|
||||
|
||||
void syncRaftRemoveFromNodeMap(SSyncRaftNodeMap* nodeMap, SyncNodeId nodeId) {
|
||||
taosHashRemove(nodeMap->nodeIdMap, &nodeId, sizeof(SyncNodeId*));
|
||||
}
|
||||
|
||||
int32_t syncRaftNodeMapSize(const SSyncRaftNodeMap* nodeMap) {
|
||||
return taosHashGetSize(nodeMap->nodeIdMap);
|
||||
}
|
|
@ -1,260 +0,0 @@
|
|||
/*
|
||||
* Copyright (c) 2019 TAOS Data, Inc. <cli@taosdata.com>
|
||||
*
|
||||
* This program is free software: you can use, redistribute, and/or modify
|
||||
* it under the terms of the GNU Affero General Public License, version 3
|
||||
* or later ("AGPL"), as published by the Free Software Foundation.
|
||||
*
|
||||
* This program is distributed in the hope that it will be useful, but WITHOUT
|
||||
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
|
||||
* FITNESS FOR A PARTICULAR PURPOSE.
|
||||
*
|
||||
* You should have received a copy of the GNU Affero General Public License
|
||||
* along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
|
||||
#include "raft.h"
|
||||
#include "raft_log.h"
|
||||
#include "sync_raft_progress.h"
|
||||
#include "sync_raft_progress_tracker.h"
|
||||
#include "sync.h"
|
||||
#include "syncInt.h"
|
||||
|
||||
static void copyProgress(SSyncRaftProgress* progress, void* arg);
|
||||
|
||||
static void refProgress(SSyncRaftProgress* progress);
|
||||
static void unrefProgress(SSyncRaftProgress* progress, void*);
|
||||
|
||||
static void resetProgressState(SSyncRaftProgress* progress, ESyncRaftProgressState state);
|
||||
static void probeAcked(SSyncRaftProgress* progress);
|
||||
|
||||
static void resumeProgress(SSyncRaftProgress* progress);
|
||||
|
||||
void syncRaftResetProgress(SSyncRaft* pRaft, SSyncRaftProgress* progress) {
|
||||
if (progress->inflights) {
|
||||
syncRaftCloseInflights(progress->inflights);
|
||||
}
|
||||
SSyncRaftInflights* inflights = syncRaftOpenInflights(pRaft->tracker->maxInflightMsgs);
|
||||
if (inflights == NULL) {
|
||||
return;
|
||||
}
|
||||
*progress = (SSyncRaftProgress) {
|
||||
.matchIndex = progress->id == pRaft->selfId ? syncRaftLogLastIndex(pRaft->log) : 0,
|
||||
.nextIndex = syncRaftLogLastIndex(pRaft->log) + 1,
|
||||
.inflights = inflights,
|
||||
.isLearner = false,
|
||||
.state = PROGRESS_STATE_PROBE,
|
||||
};
|
||||
}
|
||||
|
||||
// MaybeUpdate is called when an MsgAppResp arrives from the follower, with the
|
||||
// index acked by it. The method returns false if the given n index comes from
|
||||
// an outdated message. Otherwise it updates the progress and returns true.
|
||||
bool syncRaftProgressMaybeUpdate(SSyncRaftProgress* progress, SyncIndex lastIndex) {
|
||||
bool updated = false;
|
||||
|
||||
if (progress->matchIndex < lastIndex) {
|
||||
progress->matchIndex = lastIndex;
|
||||
updated = true;
|
||||
probeAcked(progress);
|
||||
}
|
||||
|
||||
progress->nextIndex = TMAX(progress->nextIndex, lastIndex + 1);
|
||||
|
||||
return updated;
|
||||
}
|
||||
|
||||
// MaybeDecrTo adjusts the Progress to the receipt of a MsgApp rejection. The
|
||||
// arguments are the index of the append message rejected by the follower, and
|
||||
// the hint that we want to decrease to.
|
||||
//
|
||||
// Rejections can happen spuriously as messages are sent out of order or
|
||||
// duplicated. In such cases, the rejection pertains to an index that the
|
||||
// Progress already knows were previously acknowledged, and false is returned
|
||||
// without changing the Progress.
|
||||
//
|
||||
// If the rejection is genuine, Next is lowered sensibly, and the Progress is
|
||||
// cleared for sending log entries.
|
||||
bool syncRaftProgressMaybeDecrTo(SSyncRaftProgress* progress,
|
||||
SyncIndex rejected, SyncIndex matchHint) {
|
||||
if (progress->state == PROGRESS_STATE_REPLICATE) {
|
||||
// The rejection must be stale if the progress has matched and "rejected"
|
||||
// is smaller than "match".
|
||||
if (rejected <= progress->matchIndex) {
|
||||
syncDebug("match index is up to date,ignore");
|
||||
return false;
|
||||
}
|
||||
|
||||
// Directly decrease next to match + 1.
|
||||
//
|
||||
// TODO(tbg): why not use matchHint if it's larger?
|
||||
progress->nextIndex = progress->matchIndex + 1;
|
||||
return true;
|
||||
}
|
||||
|
||||
// The rejection must be stale if "rejected" does not match next - 1. This
|
||||
// is because non-replicating followers are probed one entry at a time.
|
||||
if (rejected != progress->nextIndex - 1) {
|
||||
syncDebug("rejected index %" PRId64 " different from next index %" PRId64 " -> ignore"
|
||||
, rejected, progress->nextIndex);
|
||||
return false;
|
||||
}
|
||||
|
||||
progress->nextIndex = TMAX(TMIN(rejected, matchHint + 1), 1);
|
||||
|
||||
progress->probeSent = false;
|
||||
return true;
|
||||
}
|
||||
|
||||
// IsPaused returns whether sending log entries to this node has been throttled.
|
||||
// This is done when a node has rejected recent MsgApps, is currently waiting
|
||||
// for a snapshot, or has reached the MaxInflightMsgs limit. In normal
|
||||
// operation, this is false. A throttled node will be contacted less frequently
|
||||
// until it has reached a state in which it's able to accept a steady stream of
|
||||
// log entries again.
|
||||
bool syncRaftProgressIsPaused(SSyncRaftProgress* progress) {
|
||||
switch (progress->state) {
|
||||
case PROGRESS_STATE_PROBE:
|
||||
return progress->probeSent;
|
||||
case PROGRESS_STATE_REPLICATE:
|
||||
return syncRaftInflightFull(progress->inflights);
|
||||
case PROGRESS_STATE_SNAPSHOT:
|
||||
return true;
|
||||
default:
|
||||
syncFatal("error sync state:%d", progress->state);
|
||||
}
|
||||
}
|
||||
|
||||
SSyncRaftProgress* syncRaftFindProgressByNodeId(const SSyncRaftProgressMap* progressMap, SyncNodeId id) {
|
||||
SSyncRaftProgress** ppProgress = (SSyncRaftProgress**)taosHashGet(progressMap->progressMap, &id, sizeof(SyncNodeId*));
|
||||
if (ppProgress == NULL) {
|
||||
return NULL;
|
||||
}
|
||||
|
||||
return *ppProgress;
|
||||
}
|
||||
|
||||
int syncRaftAddToProgressMap(SSyncRaftProgressMap* progressMap, SSyncRaftProgress* progress) {
|
||||
refProgress(progress);
|
||||
taosHashPut(progressMap->progressMap, &progress->id, sizeof(SyncNodeId*), &progress, sizeof(SSyncRaftProgress*));
|
||||
}
|
||||
|
||||
void syncRaftRemoveFromProgressMap(SSyncRaftProgressMap* progressMap, SyncNodeId id) {
|
||||
SSyncRaftProgress** ppProgress = (SSyncRaftProgress**)taosHashGet(progressMap->progressMap, &id, sizeof(SyncNodeId*));
|
||||
if (ppProgress == NULL) {
|
||||
return;
|
||||
}
|
||||
unrefProgress(*ppProgress, NULL);
|
||||
|
||||
taosHashRemove(progressMap->progressMap, &id, sizeof(SyncNodeId*));
|
||||
}
|
||||
|
||||
bool syncRaftIsInProgressMap(SSyncRaftProgressMap* progressMap, SyncNodeId id) {
|
||||
return taosHashGet(progressMap->progressMap, &id, sizeof(SyncNodeId*)) != NULL;
|
||||
}
|
||||
|
||||
bool syncRaftProgressIsUptodate(SSyncRaft* pRaft, SSyncRaftProgress* progress) {
|
||||
return syncRaftLogLastIndex(pRaft->log) + 1 == progress->nextIndex;
|
||||
}
|
||||
|
||||
// BecomeProbe transitions into StateProbe. Next is reset to Match+1 or,
|
||||
// optionally and if larger, the index of the pending snapshot.
|
||||
void syncRaftProgressBecomeProbe(SSyncRaftProgress* progress) {
|
||||
// If the original state is StateSnapshot, progress knows that
|
||||
// the pending snapshot has been sent to this peer successfully, then
|
||||
// probes from pendingSnapshot + 1.
|
||||
if (progress->state == PROGRESS_STATE_SNAPSHOT) {
|
||||
SyncIndex pendingSnapshotIndex = progress->pendingSnapshotIndex;
|
||||
resetProgressState(progress, PROGRESS_STATE_PROBE);
|
||||
progress->nextIndex = TMAX(progress->matchIndex + 1, pendingSnapshotIndex + 1);
|
||||
} else {
|
||||
resetProgressState(progress, PROGRESS_STATE_PROBE);
|
||||
progress->nextIndex = progress->matchIndex + 1;
|
||||
}
|
||||
}
|
||||
|
||||
// BecomeReplicate transitions into StateReplicate, resetting Next to Match+1.
|
||||
void syncRaftProgressBecomeReplicate(SSyncRaftProgress* progress) {
|
||||
resetProgressState(progress, PROGRESS_STATE_REPLICATE);
|
||||
progress->nextIndex = progress->matchIndex + 1;
|
||||
}
|
||||
|
||||
// BecomeSnapshot moves the Progress to StateSnapshot with the specified pending
|
||||
// snapshot index.
|
||||
void syncRaftProgressBecomeSnapshot(SSyncRaftProgress* progress, SyncIndex snapshotIndex) {
|
||||
resetProgressState(progress, PROGRESS_STATE_SNAPSHOT);
|
||||
progress->pendingSnapshotIndex = snapshotIndex;
|
||||
}
|
||||
|
||||
void syncRaftCopyProgress(const SSyncRaftProgress* progress, SSyncRaftProgress* out) {
|
||||
memcpy(out, progress, sizeof(SSyncRaftProgress));
|
||||
}
|
||||
|
||||
void syncRaftInitProgressMap(SSyncRaftProgressMap* progressMap) {
|
||||
progressMap->progressMap = taosHashInit(TSDB_MAX_REPLICA, taosGetDefaultHashFunction(TSDB_DATA_TYPE_INT), true, HASH_ENTRY_LOCK);
|
||||
}
|
||||
|
||||
void syncRaftFreeProgressMap(SSyncRaftProgressMap* progressMap) {
|
||||
syncRaftVisitProgressMap(progressMap, unrefProgress, NULL);
|
||||
taosHashCleanup(progressMap->progressMap);
|
||||
}
|
||||
|
||||
void syncRaftClearProgressMap(SSyncRaftProgressMap* progressMap) {
|
||||
taosHashClear(progressMap->progressMap);
|
||||
}
|
||||
|
||||
void syncRaftCopyProgressMap(SSyncRaftProgressMap* from, SSyncRaftProgressMap* to) {
|
||||
syncRaftVisitProgressMap(from, copyProgress, to);
|
||||
}
|
||||
|
||||
bool syncRaftIterateProgressMap(const SSyncRaftProgressMap* progressMap, SSyncRaftProgress *pProgress) {
|
||||
SSyncRaftProgress **ppProgress = taosHashIterate(progressMap->progressMap, pProgress);
|
||||
if (ppProgress == NULL) {
|
||||
return true;
|
||||
}
|
||||
|
||||
*pProgress = *(*ppProgress);
|
||||
return false;
|
||||
}
|
||||
|
||||
bool syncRaftVisitProgressMap(SSyncRaftProgressMap* progressMap, visitProgressFp fp, void* arg) {
|
||||
SSyncRaftProgress *pProgress;
|
||||
while (!syncRaftIterateProgressMap(progressMap, pProgress)) {
|
||||
fp(pProgress, arg);
|
||||
}
|
||||
}
|
||||
|
||||
static void copyProgress(SSyncRaftProgress* progress, void* arg) {
|
||||
assert(progress->refCount > 0);
|
||||
SSyncRaftProgressMap* to = (SSyncRaftProgressMap*)arg;
|
||||
syncRaftAddToProgressMap(to, progress);
|
||||
}
|
||||
|
||||
static void refProgress(SSyncRaftProgress* progress) {
|
||||
progress->refCount += 1;
|
||||
}
|
||||
|
||||
static void unrefProgress(SSyncRaftProgress* progress, void* arg) {
|
||||
(void)arg;
|
||||
progress->refCount -= 1;
|
||||
assert(progress->refCount >= 0);
|
||||
if (progress->refCount == 0) {
|
||||
free(progress);
|
||||
}
|
||||
}
|
||||
|
||||
// ResetState moves the Progress into the specified State, resetting ProbeSent,
|
||||
// PendingSnapshot, and Inflights.
|
||||
static void resetProgressState(SSyncRaftProgress* progress, ESyncRaftProgressState state) {
|
||||
progress->probeSent = false;
|
||||
progress->pendingSnapshotIndex = 0;
|
||||
progress->state = state;
|
||||
syncRaftInflightReset(progress->inflights);
|
||||
}
|
||||
|
||||
// ProbeAcked is called when this peer has accepted an append. It resets
|
||||
// ProbeSent to signal that additional append messages should be sent without
|
||||
// further delay.
|
||||
static void probeAcked(SSyncRaftProgress* progress) {
|
||||
progress->probeSent = false;
|
||||
}
|
|
@ -1,156 +0,0 @@
|
|||
/*
|
||||
* Copyright (c) 2019 TAOS Data, Inc. <cli@taosdata.com>
|
||||
*
|
||||
* This program is free software: you can use, redistribute, and/or modify
|
||||
* it under the terms of the GNU Affero General Public License, version 3
|
||||
* or later ("AGPL"), as published by the Free Software Foundation.
|
||||
*
|
||||
* This program is distributed in the hope that it will be useful, but WITHOUT
|
||||
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
|
||||
* FITNESS FOR A PARTICULAR PURPOSE.
|
||||
*
|
||||
* You should have received a copy of the GNU Affero General Public License
|
||||
* along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
|
||||
#include "raft.h"
|
||||
#include "sync_const.h"
|
||||
#include "sync_raft_progress_tracker.h"
|
||||
#include "sync_raft_proto.h"
|
||||
|
||||
SSyncRaftProgressTracker* syncRaftOpenProgressTracker(SSyncRaft* pRaft) {
|
||||
SSyncRaftProgressTracker* tracker = (SSyncRaftProgressTracker*)malloc(sizeof(SSyncRaftProgressTracker));
|
||||
if (tracker == NULL) {
|
||||
return NULL;
|
||||
}
|
||||
|
||||
tracker->votesMap = taosHashInit(TSDB_MAX_REPLICA, taosGetDefaultHashFunction(TSDB_DATA_TYPE_INT), true, HASH_ENTRY_LOCK);
|
||||
|
||||
syncRaftInitTrackConfig(&tracker->config);
|
||||
tracker->pRaft = pRaft;
|
||||
tracker->maxInflightMsgs = kSyncRaftMaxInflghtMsgs;
|
||||
|
||||
return tracker;
|
||||
}
|
||||
|
||||
void syncRaftInitTrackConfig(SSyncRaftProgressTrackerConfig* config) {
|
||||
syncRaftInitNodeMap(&config->learners);
|
||||
syncRaftInitNodeMap(&config->learnersNext);
|
||||
syncRaftInitQuorumJointConfig(&config->voters);
|
||||
config->autoLeave = false;
|
||||
}
|
||||
|
||||
void syncRaftFreeTrackConfig(SSyncRaftProgressTrackerConfig* config) {
|
||||
syncRaftFreeNodeMap(&config->learners);
|
||||
syncRaftFreeNodeMap(&config->learnersNext);
|
||||
syncRaftFreeNodeMap(&config->voters.incoming);
|
||||
syncRaftFreeNodeMap(&config->voters.outgoing);
|
||||
}
|
||||
|
||||
// ResetVotes prepares for a new round of vote counting via recordVote.
|
||||
void syncRaftResetVotes(SSyncRaftProgressTracker* tracker) {
|
||||
taosHashClear(tracker->votesMap);
|
||||
}
|
||||
|
||||
void syncRaftProgressVisit(SSyncRaftProgressTracker* tracker, visitProgressFp visit, void* arg) {
|
||||
syncRaftVisitProgressMap(&tracker->progressMap, visit, arg);
|
||||
}
|
||||
|
||||
// RecordVote records that the node with the given id voted for this Raft
|
||||
// instance if v == true (and declined it otherwise).
|
||||
void syncRaftRecordVote(SSyncRaftProgressTracker* tracker, SyncNodeId id, bool grant) {
|
||||
ESyncRaftVoteType* pType = taosHashGet(tracker->votesMap, &id, sizeof(SyncNodeId*));
|
||||
if (pType != NULL) {
|
||||
return;
|
||||
}
|
||||
|
||||
taosHashPut(tracker->votesMap, &id, sizeof(SyncNodeId), &grant, sizeof(bool*));
|
||||
}
|
||||
|
||||
void syncRaftCopyTrackerConfig(const SSyncRaftProgressTrackerConfig* from, SSyncRaftProgressTrackerConfig* to) {
|
||||
memcpy(to, from, sizeof(SSyncRaftProgressTrackerConfig));
|
||||
}
|
||||
|
||||
int syncRaftCheckTrackerConfigInProgress(SSyncRaftProgressTrackerConfig* config, SSyncRaftProgressMap* progressMap) {
|
||||
// NB: intentionally allow the empty config. In production we'll never see a
|
||||
// non-empty config (we prevent it from being created) but we will need to
|
||||
// be able to *create* an initial config, for example during bootstrap (or
|
||||
// during tests). Instead of having to hand-code this, we allow
|
||||
// transitioning from an empty config into any other legal and non-empty
|
||||
// config.
|
||||
if (!syncRaftIsAllNodeInProgressMap(&config->voters.incoming, progressMap)) return -1;
|
||||
if (!syncRaftIsAllNodeInProgressMap(&config->voters.outgoing, progressMap)) return -1;
|
||||
if (!syncRaftIsAllNodeInProgressMap(&config->learners, progressMap)) return -1;
|
||||
if (!syncRaftIsAllNodeInProgressMap(&config->learnersNext, progressMap)) return -1;
|
||||
return 0;
|
||||
}
|
||||
|
||||
// TallyVotes returns the number of granted and rejected Votes, and whether the
|
||||
// election outcome is known.
|
||||
ESyncRaftVoteResult syncRaftTallyVotes(SSyncRaftProgressTracker* tracker, int* rejected, int *granted) {
|
||||
SSyncRaftProgress* progress = NULL;
|
||||
int r, g;
|
||||
|
||||
// Make sure to populate granted/rejected correctly even if the Votes slice
|
||||
// contains members no longer part of the configuration. This doesn't really
|
||||
// matter in the way the numbers are used (they're informational), but might
|
||||
// as well get it right.
|
||||
while (!syncRaftIterateProgressMap(&tracker->progressMap, progress)) {
|
||||
if (progress->id == SYNC_NON_NODE_ID) {
|
||||
continue;
|
||||
}
|
||||
|
||||
bool* v = taosHashGet(tracker->votesMap, &progress->id, sizeof(SyncNodeId*));
|
||||
if (v == NULL) {
|
||||
continue;
|
||||
}
|
||||
|
||||
if (*v) {
|
||||
g++;
|
||||
} else {
|
||||
r++;
|
||||
}
|
||||
}
|
||||
|
||||
if (rejected) *rejected = r;
|
||||
if (granted) *granted = g;
|
||||
return syncRaftVoteResult(&(tracker->config.voters), tracker->votesMap);
|
||||
}
|
||||
|
||||
void syncRaftConfigState(SSyncRaftProgressTracker* tracker, SSyncConfigState* cs) {
|
||||
syncRaftCopyNodeMap(&tracker->config.voters.incoming, &cs->voters);
|
||||
syncRaftCopyNodeMap(&tracker->config.voters.outgoing, &cs->votersOutgoing);
|
||||
syncRaftCopyNodeMap(&tracker->config.learners, &cs->learners);
|
||||
syncRaftCopyNodeMap(&tracker->config.learnersNext, &cs->learnersNext);
|
||||
cs->autoLeave = tracker->config.autoLeave;
|
||||
}
|
||||
|
||||
static void matchAckIndexer(SyncNodeId id, void* arg, SyncIndex* index) {
|
||||
SSyncRaftProgressTracker* tracker = (SSyncRaftProgressTracker*)arg;
|
||||
SSyncRaftProgress* progress = syncRaftFindProgressByNodeId(&tracker->progressMap, id);
|
||||
if (progress == NULL) {
|
||||
*index = 0;
|
||||
return;
|
||||
}
|
||||
*index = progress->matchIndex;
|
||||
}
|
||||
|
||||
// Committed returns the largest log index known to be committed based on what
|
||||
// the voting members of the group have acknowledged.
|
||||
SyncIndex syncRaftCommittedIndex(SSyncRaftProgressTracker* tracker) {
|
||||
return syncRaftJointConfigCommittedIndex(&tracker->config.voters, matchAckIndexer, tracker);
|
||||
}
|
||||
|
||||
static void visitProgressActive(SSyncRaftProgress* progress, void* arg) {
|
||||
SHashObj* votesMap = (SHashObj*)arg;
|
||||
taosHashPut(votesMap, &progress->id, sizeof(SyncNodeId), &progress->recentActive, sizeof(bool));
|
||||
}
|
||||
|
||||
// QuorumActive returns true if the quorum is active from the view of the local
|
||||
// raft state machine. Otherwise, it returns false.
|
||||
bool syncRaftQuorumActive(SSyncRaftProgressTracker* tracker) {
|
||||
SHashObj* votesMap = taosHashInit(TSDB_MAX_REPLICA, taosGetDefaultHashFunction(TSDB_DATA_TYPE_INT), true, HASH_ENTRY_LOCK);
|
||||
syncRaftVisitProgressMap(&tracker->progressMap, visitProgressActive, votesMap);
|
||||
|
||||
return syncRaftVoteResult(&tracker->config.voters, votesMap) == SYNC_RAFT_VOTE_WON;
|
||||
}
|
|
@ -1,75 +0,0 @@
|
|||
/*
|
||||
* Copyright (c) 2019 TAOS Data, Inc. <cli@taosdata.com>
|
||||
*
|
||||
* This program is free software: you can use, redistribute, and/or modify
|
||||
* it under the terms of the GNU Affero General Public License, version 3
|
||||
* or later ("AGPL"), as published by the Free Software Foundation.
|
||||
*
|
||||
* This program is distributed in the hope that it will be useful, but WITHOUT
|
||||
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
|
||||
* FITNESS FOR A PARTICULAR PURPOSE.
|
||||
*
|
||||
* You should have received a copy of the GNU Affero General Public License
|
||||
* along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
|
||||
#include "sync_raft_node_map.h"
|
||||
#include "sync_raft_quorum_majority.h"
|
||||
#include "sync_raft_quorum_joint.h"
|
||||
#include "sync_raft_quorum.h"
|
||||
|
||||
/**
|
||||
* syncRaftVoteResult takes a mapping of voters to yes/no (true/false) votes and returns
|
||||
* a result indicating whether the vote is pending, lost, or won. A joint quorum
|
||||
* requires both majority quorums to vote in favor.
|
||||
**/
|
||||
ESyncRaftVoteType syncRaftVoteResult(SSyncRaftQuorumJointConfig* config, SHashObj* votesMap) {
|
||||
ESyncRaftVoteResult r1 = syncRaftMajorityVoteResult(&(config->incoming), votesMap);
|
||||
ESyncRaftVoteResult r2 = syncRaftMajorityVoteResult(&(config->outgoing), votesMap);
|
||||
|
||||
if (r1 == r2) {
|
||||
// If they agree, return the agreed state.
|
||||
return r1;
|
||||
}
|
||||
|
||||
if (r1 == SYNC_RAFT_VOTE_LOST || r2 == SYNC_RAFT_VOTE_LOST) {
|
||||
// If either config has lost, loss is the only possible outcome.
|
||||
return SYNC_RAFT_VOTE_LOST;
|
||||
}
|
||||
|
||||
// One side won, the other one is pending, so the whole outcome is.
|
||||
return SYNC_RAFT_VOTE_PENDING;
|
||||
}
|
||||
|
||||
void syncRaftInitQuorumJointConfig(SSyncRaftQuorumJointConfig* config) {
|
||||
syncRaftInitNodeMap(&config->incoming);
|
||||
syncRaftInitNodeMap(&config->outgoing);
|
||||
}
|
||||
|
||||
void syncRaftFreeQuorumJointConfig(SSyncRaftQuorumJointConfig* config) {
|
||||
syncRaftFreeNodeMap(&config->incoming);
|
||||
syncRaftFreeNodeMap(&config->outgoing);
|
||||
}
|
||||
|
||||
void syncRaftJointConfigAddToIncoming(SSyncRaftQuorumJointConfig* config, SyncNodeId id) {
|
||||
syncRaftAddToNodeMap(&config->incoming, id);
|
||||
}
|
||||
|
||||
void syncRaftJointConfigRemoveFromIncoming(SSyncRaftQuorumJointConfig* config, SyncNodeId id) {
|
||||
syncRaftRemoveFromNodeMap(&config->incoming, id);
|
||||
}
|
||||
|
||||
void syncRaftJointConfigIDs(SSyncRaftQuorumJointConfig* config, SSyncRaftNodeMap* nodeMap) {
|
||||
syncRaftCopyNodeMap(&config->incoming, nodeMap);
|
||||
|
||||
syncRaftUnionNodeMap(&config->outgoing, nodeMap);
|
||||
}
|
||||
|
||||
SyncIndex syncRaftJointConfigCommittedIndex(const SSyncRaftQuorumJointConfig* config, matchAckIndexerFp indexer, void* arg) {
|
||||
SyncIndex index0, index1;
|
||||
|
||||
index0 = syncRaftMajorityConfigCommittedIndex(&config->incoming, indexer, arg);
|
||||
index1 = syncRaftMajorityConfigCommittedIndex(&config->outgoing, indexer, arg);
|
||||
|
||||
return index0 < index1 ? index0 : index1;
|
||||
}
|
|
@ -1,121 +0,0 @@
|
|||
/*
|
||||
* Copyright (c) 2019 TAOS Data, Inc. <cli@taosdata.com>
|
||||
*
|
||||
* This program is free software: you can use, redistribute, and/or modify
|
||||
* it under the terms of the GNU Affero General Public License, version 3
|
||||
* or later ("AGPL"), as published by the Free Software Foundation.
|
||||
*
|
||||
* This program is distributed in the hope that it will be useful, but WITHOUT
|
||||
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
|
||||
* FITNESS FOR A PARTICULAR PURPOSE.
|
||||
*
|
||||
* You should have received a copy of the GNU Affero General Public License
|
||||
* along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
|
||||
#include "sync_const.h"
|
||||
#include "sync_raft_quorum.h"
|
||||
#include "sync_raft_quorum_majority.h"
|
||||
#include "sync_raft_node_map.h"
|
||||
|
||||
// VoteResult takes a mapping of voters to yes/no (true/false) votes and returns
|
||||
// a result indicating whether the vote is pending (i.e. neither a quorum of
|
||||
// yes/no has been reached), won (a quorum of yes has been reached), or lost (a
|
||||
// quorum of no has been reached).
|
||||
ESyncRaftVoteResult syncRaftMajorityVoteResult(SSyncRaftNodeMap* config, SHashObj* votesMap) {
|
||||
int n = syncRaftNodeMapSize(config);
|
||||
if (n == 0) {
|
||||
// By convention, the elections on an empty config win. This comes in
|
||||
// handy with joint quorums because it'll make a half-populated joint
|
||||
// quorum behave like a majority quorum.
|
||||
return SYNC_RAFT_VOTE_WON;
|
||||
}
|
||||
|
||||
int i, g, r, missing;
|
||||
i = g = r = missing = 0;
|
||||
SyncNodeId* pId = NULL;
|
||||
while (!syncRaftIterateNodeMap(config, pId)) {
|
||||
const bool* v = (const bool*)taosHashGet(votesMap, pId, sizeof(SyncNodeId*));
|
||||
if (v == NULL) {
|
||||
missing += 1;
|
||||
continue;
|
||||
}
|
||||
|
||||
if (*v) {
|
||||
g +=1;
|
||||
} else {
|
||||
r += 1;
|
||||
}
|
||||
}
|
||||
|
||||
int quorum = n / 2 + 1;
|
||||
if (g >= quorum) {
|
||||
return SYNC_RAFT_VOTE_WON;
|
||||
}
|
||||
if (g + missing >= quorum) {
|
||||
return SYNC_RAFT_VOTE_PENDING;
|
||||
}
|
||||
|
||||
return SYNC_RAFT_VOTE_LOST;
|
||||
}
|
||||
|
||||
int compSyncIndex(const void * elem1, const void * elem2) {
|
||||
SyncIndex index1 = *((SyncIndex*)elem1);
|
||||
SyncIndex index2 = *((SyncIndex*)elem1);
|
||||
if (index1 > index2) return 1;
|
||||
if (index1 < index2) return -1;
|
||||
return 0;
|
||||
}
|
||||
|
||||
SyncIndex syncRaftMajorityConfigCommittedIndex(const SSyncRaftNodeMap* config, matchAckIndexerFp indexer, void* arg) {
|
||||
int n = syncRaftNodeMapSize(config);
|
||||
if (n == 0) {
|
||||
// This plays well with joint quorums which, when one half is the zero
|
||||
// MajorityConfig, should behave like the other half.
|
||||
return kMaxCommitIndex;
|
||||
}
|
||||
|
||||
// Use an on-stack slice to collect the committed indexes when n <= 7
|
||||
// (otherwise we alloc). The alternative is to stash a slice on
|
||||
// MajorityConfig, but this impairs usability (as is, MajorityConfig is just
|
||||
// a map, and that's nice). The assumption is that running with a
|
||||
// replication factor of >7 is rare, and in cases in which it happens
|
||||
// performance is a lesser concern (additionally the performance
|
||||
// implications of an allocation here are far from drastic).
|
||||
SyncIndex* srt = NULL;
|
||||
SyncIndex srk[TSDB_MAX_REPLICA];
|
||||
if (n > TSDB_MAX_REPLICA) {
|
||||
srt = (SyncIndex*)malloc(sizeof(SyncIndex) * n);
|
||||
if (srt == NULL) {
|
||||
return kMaxCommitIndex;
|
||||
}
|
||||
} else {
|
||||
srt = &srk[0];
|
||||
}
|
||||
|
||||
// Fill the slice with the indexes observed. Any unused slots will be
|
||||
// left as zero; these correspond to voters that may report in, but
|
||||
// haven't yet. We fill from the right (since the zeroes will end up on
|
||||
// the left after sorting below anyway).
|
||||
SyncNodeId *pId = NULL;
|
||||
int i = 0;
|
||||
SyncIndex index;
|
||||
while (!syncRaftIterateNodeMap(config, pId)) {
|
||||
indexer(*pId, arg, &index);
|
||||
srt[i++] = index;
|
||||
}
|
||||
|
||||
// Sort by index. Use a bespoke algorithm (copied from the stdlib's sort
|
||||
// package) to keep srt on the stack.
|
||||
qsort(srt, n, sizeof(SyncIndex), compSyncIndex);
|
||||
|
||||
// The smallest index into the array for which the value is acked by a
|
||||
// quorum. In other words, from the end of the slice, move n/2+1 to the
|
||||
// left (accounting for zero-indexing).
|
||||
index = srt[n - (n/2 + 1)];
|
||||
if (srt != &srk[0]) {
|
||||
free(srt);
|
||||
}
|
||||
|
||||
return index;
|
||||
}
|
|
@ -1,180 +0,0 @@
|
|||
/*
|
||||
* Copyright (c) 2019 TAOS Data, Inc. <cli@taosdata.com>
|
||||
*
|
||||
* This program is free software: you can use, redistribute, and/or modify
|
||||
* it under the terms of the GNU Affero General Public License, version 3
|
||||
* or later ("AGPL"), as published by the Free Software Foundation.
|
||||
*
|
||||
* This program is distributed in the hope that it will be useful, but WITHOUT
|
||||
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
|
||||
* FITNESS FOR A PARTICULAR PURPOSE.
|
||||
*
|
||||
* You should have received a copy of the GNU Affero General Public License
|
||||
* along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
|
||||
#include "sync_raft_config_change.h"
|
||||
#include "sync_raft_restore.h"
|
||||
#include "sync_raft_progress_tracker.h"
|
||||
|
||||
static void addToConfChangeSingleArray(SSyncConfChangeSingleArray* out, int* i, const SSyncRaftNodeMap* nodeMap, ESyncRaftConfChangeType t);
|
||||
static int toConfChangeSingle(const SSyncConfigState* cs, SSyncConfChangeSingleArray* out, SSyncConfChangeSingleArray* in);
|
||||
|
||||
// syncRaftRestoreConfig takes a Changer (which must represent an empty configuration), and
|
||||
// runs a sequence of changes enacting the configuration described in the
|
||||
// ConfState.
|
||||
//
|
||||
// TODO(tbg) it's silly that this takes a Changer. Unravel this by making sure
|
||||
// the Changer only needs a ProgressMap (not a whole Tracker) at which point
|
||||
// this can just take LastIndex and MaxInflight directly instead and cook up
|
||||
// the results from that alone.
|
||||
int syncRaftRestoreConfig(SSyncRaftChanger* changer, const SSyncConfigState* cs,
|
||||
SSyncRaftProgressTrackerConfig* config, SSyncRaftProgressMap* progressMap) {
|
||||
SSyncConfChangeSingleArray outgoing;
|
||||
SSyncConfChangeSingleArray incoming;
|
||||
SSyncConfChangeSingleArray css;
|
||||
SSyncRaftProgressTracker* tracker = changer->tracker;
|
||||
int i, ret;
|
||||
|
||||
syncRaftInitConfArray(&outgoing);
|
||||
syncRaftInitConfArray(&incoming);
|
||||
|
||||
syncRaftInitTrackConfig(config);
|
||||
syncRaftInitProgressMap(progressMap);
|
||||
|
||||
ret = toConfChangeSingle(cs, &outgoing, &incoming);
|
||||
if (ret != 0) {
|
||||
goto out;
|
||||
}
|
||||
|
||||
if (syncRaftConfArrayIsEmpty(&outgoing)) {
|
||||
// No outgoing config, so just apply the incoming changes one by one.
|
||||
for (i = 0; i < incoming.n; ++i) {
|
||||
css = (SSyncConfChangeSingleArray) {
|
||||
.n = 1,
|
||||
.changes = &incoming.changes[i],
|
||||
};
|
||||
ret = syncRaftChangerSimpleConfig(changer, &css, config, progressMap);
|
||||
if (ret != 0) {
|
||||
goto out;
|
||||
}
|
||||
|
||||
syncRaftCopyTrackerConfig(config, &changer->tracker->config);
|
||||
syncRaftCopyProgressMap(progressMap, &changer->tracker->progressMap);
|
||||
}
|
||||
} else {
|
||||
// The ConfState describes a joint configuration.
|
||||
//
|
||||
// First, apply all of the changes of the outgoing config one by one, so
|
||||
// that it temporarily becomes the incoming active config. For example,
|
||||
// if the config is (1 2 3)&(2 3 4), this will establish (2 3 4)&().
|
||||
for (i = 0; i < outgoing.n; ++i) {
|
||||
css = (SSyncConfChangeSingleArray) {
|
||||
.n = 1,
|
||||
.changes = &outgoing.changes[i],
|
||||
};
|
||||
ret = syncRaftChangerSimpleConfig(changer, &css, config, progressMap);
|
||||
if (ret != 0) {
|
||||
goto out;
|
||||
}
|
||||
syncRaftCopyTrackerConfig(config, &changer->tracker->config);
|
||||
syncRaftCopyProgressMap(progressMap, &changer->tracker->progressMap);
|
||||
}
|
||||
|
||||
ret = syncRaftChangerEnterJoint(changer, cs->autoLeave, &incoming, config, progressMap);
|
||||
if (ret != 0) {
|
||||
goto out;
|
||||
}
|
||||
}
|
||||
|
||||
out:
|
||||
syncRaftFreeConfArray(&incoming);
|
||||
syncRaftFreeConfArray(&outgoing);
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
static void addToConfChangeSingleArray(SSyncConfChangeSingleArray* out, int* i, const SSyncRaftNodeMap* nodeMap, ESyncRaftConfChangeType t) {
|
||||
SyncNodeId* pId = NULL;
|
||||
|
||||
while (!syncRaftIterateNodeMap(nodeMap, pId)) {
|
||||
out->changes[*i] = (SSyncConfChangeSingle) {
|
||||
.type = t,
|
||||
.nodeId = *pId,
|
||||
};
|
||||
*i += 1;
|
||||
}
|
||||
}
|
||||
|
||||
// toConfChangeSingle translates a conf state into 1) a slice of operations creating
|
||||
// first the config that will become the outgoing one, and then the incoming one, and
|
||||
// b) another slice that, when applied to the config resulted from 1), represents the
|
||||
// ConfState.
|
||||
static int toConfChangeSingle(const SSyncConfigState* cs, SSyncConfChangeSingleArray* out, SSyncConfChangeSingleArray* in) {
|
||||
int i;
|
||||
|
||||
out->n = syncRaftNodeMapSize(&cs->votersOutgoing);
|
||||
out->changes = (SSyncConfChangeSingle*)malloc(sizeof(SSyncConfChangeSingle) * out->n);
|
||||
if (out->changes == NULL) {
|
||||
out->n = 0;
|
||||
return -1;
|
||||
}
|
||||
in->n = syncRaftNodeMapSize(&cs->votersOutgoing) +
|
||||
syncRaftNodeMapSize(&cs->voters) +
|
||||
syncRaftNodeMapSize(&cs->learners) +
|
||||
syncRaftNodeMapSize(&cs->learnersNext);
|
||||
out->changes = (SSyncConfChangeSingle*)malloc(sizeof(SSyncConfChangeSingle) * in->n);
|
||||
if (in->changes == NULL) {
|
||||
in->n = 0;
|
||||
return -1;
|
||||
}
|
||||
|
||||
// Example to follow along this code:
|
||||
// voters=(1 2 3) learners=(5) outgoing=(1 2 4 6) learners_next=(4)
|
||||
//
|
||||
// This means that before entering the joint config, the configuration
|
||||
// had voters (1 2 4 6) and perhaps some learners that are already gone.
|
||||
// The new set of voters is (1 2 3), i.e. (1 2) were kept around, and (4 6)
|
||||
// are no longer voters; however 4 is poised to become a learner upon leaving
|
||||
// the joint state.
|
||||
// We can't tell whether 5 was a learner before entering the joint config,
|
||||
// but it doesn't matter (we'll pretend that it wasn't).
|
||||
//
|
||||
// The code below will construct
|
||||
// outgoing = add 1; add 2; add 4; add 6
|
||||
// incoming = remove 1; remove 2; remove 4; remove 6
|
||||
// add 1; add 2; add 3;
|
||||
// add-learner 5;
|
||||
// add-learner 4;
|
||||
//
|
||||
// So, when starting with an empty config, after applying 'outgoing' we have
|
||||
//
|
||||
// quorum=(1 2 4 6)
|
||||
//
|
||||
// From which we enter a joint state via 'incoming'
|
||||
//
|
||||
// quorum=(1 2 3)&&(1 2 4 6) learners=(5) learners_next=(4)
|
||||
//
|
||||
// as desired.
|
||||
|
||||
// If there are outgoing voters, first add them one by one so that the
|
||||
// (non-joint) config has them all.
|
||||
i = 0;
|
||||
addToConfChangeSingleArray(out, &i, &cs->votersOutgoing, SYNC_RAFT_Conf_AddNode);
|
||||
assert(i == out->n);
|
||||
|
||||
// We're done constructing the outgoing slice, now on to the incoming one
|
||||
// (which will apply on top of the config created by the outgoing slice).
|
||||
i = 0;
|
||||
|
||||
// First, we'll remove all of the outgoing voters.
|
||||
addToConfChangeSingleArray(in, &i, &cs->votersOutgoing, SYNC_RAFT_Conf_RemoveNode);
|
||||
|
||||
// Then we'll add the incoming voters and learners.
|
||||
addToConfChangeSingleArray(in, &i, &cs->voters, SYNC_RAFT_Conf_AddNode);
|
||||
addToConfChangeSingleArray(in, &i, &cs->learners, SYNC_RAFT_Conf_AddLearnerNode);
|
||||
addToConfChangeSingleArray(in, &i, &cs->learnersNext, SYNC_RAFT_Conf_AddLearnerNode);
|
||||
assert(i == in->n);
|
||||
|
||||
return 0;
|
||||
}
|
|
@ -14,17 +14,18 @@ target_link_libraries(
|
|||
PUBLIC common
|
||||
)
|
||||
if (${BUILD_WITH_UV_TRANS})
|
||||
if (${BUILD_WITH_UV})
|
||||
target_include_directories(
|
||||
transport
|
||||
PUBLIC "${CMAKE_SOURCE_DIR}/contrib/libuv/include"
|
||||
)
|
||||
|
||||
#LINK_DIRECTORIES("${CMAKE_SOURCE_DIR}/debug/contrib/libuv")
|
||||
target_link_libraries(
|
||||
transport
|
||||
PUBLIC uv_a
|
||||
)
|
||||
add_definitions(-DUSE_UV)
|
||||
endif(${BUILD_WITH_UV})
|
||||
endif(${BUILD_WITH_UV_TRANS})
|
||||
|
||||
if (${BUILD_TEST})
|
||||
|
|
|
@ -1383,7 +1383,7 @@ static void rpcSendMsgToPeer(SRpcConn *pConn, void *msg, int msgLen) {
|
|||
static void rpcProcessConnError(void *param, void *id) {
|
||||
SRpcReqContext *pContext = (SRpcReqContext *)param;
|
||||
SRpcInfo * pRpc = pContext->pRpc;
|
||||
SRpcMsg rpcMsg;
|
||||
SRpcMsg rpcMsg = {0};
|
||||
|
||||
if (pRpc == NULL) {
|
||||
return;
|
||||
|
|
|
@ -35,6 +35,7 @@ void* rpcOpen(const SRpcInit* pInit) {
|
|||
pRpc->connType = pInit->connType;
|
||||
pRpc->idleTime = pInit->idleTime;
|
||||
pRpc->tcphandle = (*taosInitHandle[pRpc->connType])(0, pInit->localPort, pRpc->label, pRpc->numOfThreads, NULL, pRpc);
|
||||
pRpc->parent = pInit->parent;
|
||||
|
||||
return pRpc;
|
||||
}
|
||||
|
|
|
@ -123,11 +123,13 @@ static void clientHandleResp(SCliConn* conn) {
|
|||
rpcMsg.code = pHead->code;
|
||||
rpcMsg.msgType = pHead->msgType;
|
||||
rpcMsg.ahandle = pCtx->ahandle;
|
||||
|
||||
tDebug("client conn %p %s received from %s:%d", conn, TMSG_INFO(pHead->msgType), pMsg->ctx->ip, pMsg->ctx->port);
|
||||
if (pCtx->pSem == NULL) {
|
||||
tDebug("conn %p handle resp", conn);
|
||||
(pRpc->cfp)(NULL, &rpcMsg, NULL);
|
||||
tTrace("client conn(sync) %p handle resp", conn);
|
||||
(pRpc->cfp)(pRpc->parent, &rpcMsg, NULL);
|
||||
} else {
|
||||
tDebug("conn %p handle resp", conn);
|
||||
tTrace("client conn(sync) %p handle resp", conn);
|
||||
memcpy((char*)pCtx->pRsp, (char*)&rpcMsg, sizeof(rpcMsg));
|
||||
tsem_post(pCtx->pSem);
|
||||
}
|
||||
|
@ -154,7 +156,7 @@ static void clientHandleExcept(SCliConn* pConn) {
|
|||
clientConnDestroy(pConn, true);
|
||||
return;
|
||||
}
|
||||
tDebug("conn %p start to destroy", pConn);
|
||||
tDebug("client conn %p start to destroy", pConn);
|
||||
SCliMsg* pMsg = pConn->data;
|
||||
|
||||
destroyUserdata(&pMsg->msg);
|
||||
|
@ -166,7 +168,7 @@ static void clientHandleExcept(SCliConn* pConn) {
|
|||
rpcMsg.code = TSDB_CODE_RPC_NETWORK_UNAVAIL;
|
||||
if (pCtx->pSem == NULL) {
|
||||
// SRpcInfo* pRpc = pMsg->ctx->pRpc;
|
||||
(pCtx->pTransInst->cfp)(NULL, &rpcMsg, NULL);
|
||||
(pCtx->pTransInst->cfp)(pCtx->pTransInst->parent, &rpcMsg, NULL);
|
||||
} else {
|
||||
memcpy((char*)(pCtx->pRsp), (char*)(&rpcMsg), sizeof(rpcMsg));
|
||||
// SRpcMsg rpcMsg
|
||||
|
@ -184,7 +186,7 @@ static void clientTimeoutCb(uv_timer_t* handle) {
|
|||
SCliThrdObj* pThrd = handle->data;
|
||||
SRpcInfo* pRpc = pThrd->pTransInst;
|
||||
int64_t currentTime = pThrd->nextTimeout;
|
||||
tDebug("timeout, try to remove expire conn from conn pool");
|
||||
tDebug("client conn timeout, try to remove expire conn from conn pool");
|
||||
|
||||
SConnList* p = taosHashIterate((SHashObj*)pThrd->pool, NULL);
|
||||
while (p != NULL) {
|
||||
|
@ -253,7 +255,7 @@ static void addConnToPool(void* pool, char* ip, uint32_t port, SCliConn* conn) {
|
|||
|
||||
tstrncpy(key, ip, strlen(ip));
|
||||
tstrncpy(key + strlen(key), (char*)(&port), sizeof(port));
|
||||
tDebug("conn %p added to conn pool, read buf cap: %d", conn, conn->readBuf.cap);
|
||||
tDebug("client conn %p added to conn pool, read buf cap: %d", conn, conn->readBuf.cap);
|
||||
|
||||
SRpcInfo* pRpc = ((SCliThrdObj*)conn->hostThrd)->pTransInst;
|
||||
|
||||
|
@ -294,10 +296,10 @@ static void clientReadCb(uv_stream_t* handle, ssize_t nread, const uv_buf_t* buf
|
|||
pBuf->len += nread;
|
||||
if (clientReadComplete(pBuf)) {
|
||||
uv_read_stop((uv_stream_t*)conn->stream);
|
||||
tDebug("conn %p read complete", conn);
|
||||
tDebug("client conn %p read complete", conn);
|
||||
clientHandleResp(conn);
|
||||
} else {
|
||||
tDebug("conn %p read partial packet, continue to read", conn);
|
||||
tDebug("client conn %p read partial packet, continue to read", conn);
|
||||
}
|
||||
return;
|
||||
}
|
||||
|
@ -309,7 +311,7 @@ static void clientReadCb(uv_stream_t* handle, ssize_t nread, const uv_buf_t* buf
|
|||
return;
|
||||
}
|
||||
if (nread < 0 || nread == UV_EOF) {
|
||||
tError("conn %p read error: %s", conn, uv_err_name(nread));
|
||||
tError("client conn %p read error: %s", conn, uv_err_name(nread));
|
||||
clientHandleExcept(conn);
|
||||
}
|
||||
// tDebug("Read error %s\n", uv_err_name(nread));
|
||||
|
@ -320,9 +322,9 @@ static void clientConnDestroy(SCliConn* conn, bool clear) {
|
|||
//
|
||||
conn->ref--;
|
||||
if (conn->ref == 0) {
|
||||
tDebug("conn %p remove from conn pool", conn);
|
||||
tDebug("client conn %p remove from conn pool", conn);
|
||||
QUEUE_REMOVE(&conn->conn);
|
||||
tDebug("conn %p remove from conn pool successfully", conn);
|
||||
tDebug("client conn %p remove from conn pool successfully", conn);
|
||||
if (clear) {
|
||||
uv_close((uv_handle_t*)conn->stream, clientDestroy);
|
||||
}
|
||||
|
@ -334,7 +336,7 @@ static void clientDestroy(uv_handle_t* handle) {
|
|||
|
||||
free(conn->stream);
|
||||
free(conn->writeReq);
|
||||
tDebug("conn %p destroy successfully", conn);
|
||||
tDebug("client conn %p destroy successfully", conn);
|
||||
free(conn);
|
||||
|
||||
// clientConnDestroy(conn, false);
|
||||
|
@ -343,7 +345,7 @@ static void clientDestroy(uv_handle_t* handle) {
|
|||
static void clientWriteCb(uv_write_t* req, int status) {
|
||||
SCliConn* pConn = req->data;
|
||||
if (status == 0) {
|
||||
tDebug("conn %p data already was written out", pConn);
|
||||
tDebug("client conn %p data already was written out", pConn);
|
||||
SCliMsg* pMsg = pConn->data;
|
||||
if (pMsg == NULL) {
|
||||
// handle
|
||||
|
@ -351,7 +353,7 @@ static void clientWriteCb(uv_write_t* req, int status) {
|
|||
}
|
||||
destroyUserdata(&pMsg->msg);
|
||||
} else {
|
||||
tError("conn %p failed to write: %s", pConn, uv_err_name(status));
|
||||
tError("client conn %p failed to write: %s", pConn, uv_err_name(status));
|
||||
clientHandleExcept(pConn);
|
||||
return;
|
||||
}
|
||||
|
@ -370,7 +372,7 @@ static void clientWrite(SCliConn* pConn) {
|
|||
pHead->msgLen = (int32_t)htonl((uint32_t)msgLen);
|
||||
|
||||
uv_buf_t wb = uv_buf_init((char*)pHead, msgLen);
|
||||
tDebug("conn %p data write out, msgType : %d, len: %d", pConn, pHead->msgType, msgLen);
|
||||
tDebug("conn %p %s is send to %s:%d", pConn, TMSG_INFO(pHead->msgType), pCliMsg->ctx->ip, pCliMsg->ctx->port);
|
||||
uv_write(pConn->writeReq, (uv_stream_t*)pConn->stream, &wb, 1, clientWriteCb);
|
||||
}
|
||||
static void clientConnCb(uv_connect_t* req, int status) {
|
||||
|
@ -378,11 +380,11 @@ static void clientConnCb(uv_connect_t* req, int status) {
|
|||
SCliConn* pConn = req->data;
|
||||
if (status != 0) {
|
||||
// tError("failed to connect server(%s, %d), errmsg: %s", pCtx->ip, pCtx->port, uv_strerror(status));
|
||||
tError("conn %p failed to connect server: %s", pConn, uv_strerror(status));
|
||||
tError("client conn %p failed to connect server: %s", pConn, uv_strerror(status));
|
||||
clientHandleExcept(pConn);
|
||||
return;
|
||||
}
|
||||
tDebug("conn %p create", pConn);
|
||||
tDebug("client conn %p create", pConn);
|
||||
|
||||
assert(pConn->stream == req->handle);
|
||||
clientWrite(pConn);
|
||||
|
@ -400,14 +402,14 @@ static void clientHandleQuit(SCliMsg* pMsg, SCliThrdObj* pThrd) {
|
|||
static void clientHandleReq(SCliMsg* pMsg, SCliThrdObj* pThrd) {
|
||||
uint64_t et = taosGetTimestampUs();
|
||||
uint64_t el = et - pMsg->st;
|
||||
tDebug("msg tran time cost: %" PRIu64 "", el);
|
||||
tDebug("client msg tran time cost: %" PRIu64 "", el);
|
||||
et = taosGetTimestampUs();
|
||||
|
||||
STransConnCtx* pCtx = pMsg->ctx;
|
||||
SCliConn* conn = getConnFromPool(pThrd->pool, pCtx->ip, pCtx->port);
|
||||
if (conn != NULL) {
|
||||
// impl later
|
||||
tDebug("conn %p get from conn pool", conn);
|
||||
tDebug("client get conn %p from pool", conn);
|
||||
conn->data = pMsg;
|
||||
conn->writeReq->data = conn;
|
||||
transDestroyBuffer(&conn->readBuf);
|
||||
|
|
|
@ -33,7 +33,7 @@ typedef struct SSrvConn {
|
|||
void* hostThrd;
|
||||
void* pSrvMsg;
|
||||
|
||||
struct sockaddr peername;
|
||||
struct sockaddr_in addr;
|
||||
|
||||
// SRpcMsg sendMsg;
|
||||
// del later
|
||||
|
@ -236,14 +236,6 @@ static void uvHandleReq(SSrvConn* pConn) {
|
|||
assert(transIsReq(pHead->msgType));
|
||||
|
||||
SRpcInfo* pRpc = (SRpcInfo*)p->shandle;
|
||||
// auth here
|
||||
// auth should not do in rpc thread
|
||||
|
||||
// int8_t code = uvAuthMsg(pConn, (char*)pHead, p->msgLen);
|
||||
// if (code != 0) {
|
||||
// terrno = code;
|
||||
// return;
|
||||
//}
|
||||
pHead->code = htonl(pHead->code);
|
||||
|
||||
int32_t dlen = 0;
|
||||
|
@ -266,6 +258,8 @@ static void uvHandleReq(SSrvConn* pConn) {
|
|||
|
||||
transClearBuffer(&pConn->readBuf);
|
||||
pConn->ref++;
|
||||
tDebug("%p %s received from %s:%d", pConn, TMSG_INFO(rpcMsg.msgType), inet_ntoa(pConn->addr.sin_addr),
|
||||
ntohs(pConn->addr.sin_port));
|
||||
(*(pRpc->cfp))(pRpc->parent, &rpcMsg, NULL);
|
||||
// uv_timer_start(pConn->pTimer, uvHandleActivityTimeout, pRpc->idleTime * 10000, 0);
|
||||
// auth
|
||||
|
@ -278,12 +272,12 @@ void uvOnReadCb(uv_stream_t* cli, ssize_t nread, const uv_buf_t* buf) {
|
|||
SConnBuffer* pBuf = &conn->readBuf;
|
||||
if (nread > 0) {
|
||||
pBuf->len += nread;
|
||||
tDebug("conn %p read summroy, total read: %d, current read: %d", conn, pBuf->len, (int)nread);
|
||||
tTrace("conn %p read summary, total read: %d, current read: %d", conn, pBuf->len, (int)nread);
|
||||
if (readComplete(pBuf)) {
|
||||
tDebug("conn %p alread read complete packet", conn);
|
||||
tTrace("conn %p alread read complete packet", conn);
|
||||
uvHandleReq(conn);
|
||||
} else {
|
||||
tDebug("conn %p read partial packet, continue to read", conn);
|
||||
tTrace("conn %p read partial packet, continue to read", conn);
|
||||
}
|
||||
return;
|
||||
}
|
||||
|
@ -338,6 +332,7 @@ static void uvPrepareSendData(SSrvMsg* smsg, uv_buf_t* wb) {
|
|||
// impl later;
|
||||
tDebug("conn %p prepare to send resp", smsg->pConn);
|
||||
SRpcMsg* pMsg = &smsg->msg;
|
||||
SSrvConn* pConn = smsg->pConn;
|
||||
if (pMsg->pCont == 0) {
|
||||
pMsg->pCont = (void*)rpcMallocCont(0);
|
||||
pMsg->contLen = 0;
|
||||
|
@ -350,6 +345,9 @@ static void uvPrepareSendData(SSrvMsg* smsg, uv_buf_t* wb) {
|
|||
if (transCompressMsg(msg, len, NULL)) {
|
||||
// impl later
|
||||
}
|
||||
tDebug("%p start to send %s to %s:%d", pConn, TMSG_INFO(pHead->msgType), inet_ntoa(pConn->addr.sin_addr),
|
||||
ntohs(pConn->addr.sin_port));
|
||||
|
||||
pHead->msgLen = htonl(len);
|
||||
wb->base = msg;
|
||||
wb->len = len;
|
||||
|
@ -489,8 +487,8 @@ void uvOnConnectionCb(uv_stream_t* q, ssize_t nread, const uv_buf_t* buf) {
|
|||
uv_os_fd_t fd;
|
||||
uv_fileno((const uv_handle_t*)pConn->pTcp, &fd);
|
||||
tDebug("conn %p created, fd: %d", pConn, fd);
|
||||
int namelen = sizeof(pConn->peername);
|
||||
if (0 != uv_tcp_getpeername(pConn->pTcp, &pConn->peername, &namelen)) {
|
||||
int addrlen = sizeof(pConn->addr);
|
||||
if (0 != uv_tcp_getpeername(pConn->pTcp, (struct sockaddr*)&pConn->addr, &addrlen)) {
|
||||
tError("failed to get peer name");
|
||||
destroyConn(pConn, true);
|
||||
} else {
|
||||
|
@ -717,6 +715,9 @@ void taosCloseServer(void* arg) {
|
|||
}
|
||||
|
||||
void rpcSendResponse(const SRpcMsg* pMsg) {
|
||||
if (pMsg->handle == NULL) {
|
||||
return;
|
||||
}
|
||||
SSrvConn* pConn = pMsg->handle;
|
||||
SWorkThrdObj* pThrd = pConn->hostThrd;
|
||||
|
||||
|
@ -728,18 +729,18 @@ void rpcSendResponse(const SRpcMsg* pMsg) {
|
|||
// QUEUE_PUSH(&pThrd->msg, &srvMsg->q);
|
||||
// pthread_mutex_unlock(&pThrd->msgMtx);
|
||||
|
||||
tDebug("conn %p start to send resp", pConn);
|
||||
tTrace("conn %p start to send resp", pConn);
|
||||
transSendAsync(pThrd->asyncPool, &srvMsg->q);
|
||||
// uv_async_send(pThrd->workerAsync);
|
||||
}
|
||||
|
||||
int rpcGetConnInfo(void* thandle, SRpcConnInfo* pInfo) {
|
||||
SSrvConn* pConn = thandle;
|
||||
struct sockaddr* pPeerName = &pConn->peername;
|
||||
// struct sockaddr* pPeerName = &pConn->peername;
|
||||
|
||||
struct sockaddr_in caddr = *(struct sockaddr_in*)(pPeerName);
|
||||
pInfo->clientIp = (uint32_t)(caddr.sin_addr.s_addr);
|
||||
pInfo->clientPort = ntohs(caddr.sin_port);
|
||||
struct sockaddr_in addr = pConn->addr;
|
||||
pInfo->clientIp = (uint32_t)(addr.sin_addr.s_addr);
|
||||
pInfo->clientPort = ntohs(addr.sin_port);
|
||||
|
||||
tstrncpy(pInfo->user, pConn->user, sizeof(pInfo->user));
|
||||
return 0;
|
||||
|
|
|
@ -75,6 +75,35 @@ static bool walkNode(SNode* pNode, ETraversalOrder order, FQueryNodeWalker walke
|
|||
case QUERY_NODE_ORDER_BY_EXPR:
|
||||
res = walkNode(((SOrderByExprNode*)pNode)->pExpr, order, walker, pContext);
|
||||
break;
|
||||
case QUERY_NODE_STATE_WINDOW:
|
||||
res = walkNode(((SStateWindowNode*)pNode)->pCol, order, walker, pContext);
|
||||
break;
|
||||
case QUERY_NODE_SESSION_WINDOW:
|
||||
res = walkNode(((SSessionWindowNode*)pNode)->pCol, order, walker, pContext);
|
||||
break;
|
||||
case QUERY_NODE_INTERVAL_WINDOW: {
|
||||
SIntervalWindowNode* pInterval = (SIntervalWindowNode*)pNode;
|
||||
res = walkNode(pInterval->pInterval, order, walker, pContext);
|
||||
if (res) {
|
||||
res = walkNode(pInterval->pOffset, order, walker, pContext);
|
||||
}
|
||||
if (res) {
|
||||
res = walkNode(pInterval->pSliding, order, walker, pContext);
|
||||
}
|
||||
if (res) {
|
||||
res = walkNode(pInterval->pFill, order, walker, pContext);
|
||||
}
|
||||
break;
|
||||
}
|
||||
case QUERY_NODE_NODE_LIST:
|
||||
res = walkList(((SNodeListNode*)pNode)->pNodeList, order, walker, pContext);
|
||||
break;
|
||||
case QUERY_NODE_FILL:
|
||||
res = walkNode(((SFillNode*)pNode)->pValues, order, walker, pContext);
|
||||
break;
|
||||
case QUERY_NODE_RAW_EXPR:
|
||||
res = walkNode(((SRawExprNode*)pNode)->pNode, order, walker, pContext);
|
||||
break;
|
||||
default:
|
||||
break;
|
||||
}
|
||||
|
|
|
@ -58,6 +58,12 @@ SNode* nodesMakeNode(ENodeType type) {
|
|||
return makeNode(type, sizeof(SSessionWindowNode));
|
||||
case QUERY_NODE_INTERVAL_WINDOW:
|
||||
return makeNode(type, sizeof(SIntervalWindowNode));
|
||||
case QUERY_NODE_NODE_LIST:
|
||||
return makeNode(type, sizeof(SNodeListNode));
|
||||
case QUERY_NODE_FILL:
|
||||
return makeNode(type, sizeof(SFillNode));
|
||||
case QUERY_NODE_RAW_EXPR:
|
||||
return makeNode(type, sizeof(SRawExprNode));
|
||||
case QUERY_NODE_SET_OPERATOR:
|
||||
return makeNode(type, sizeof(SSetOperator));
|
||||
case QUERY_NODE_SELECT_STMT:
|
||||
|
|
|
@ -204,7 +204,12 @@ void* taosArrayGetLast(const SArray* pArray) {
|
|||
return TARRAY_GET_ELEM(pArray, pArray->size - 1);
|
||||
}
|
||||
|
||||
size_t taosArrayGetSize(const SArray* pArray) { return pArray->size; }
|
||||
size_t taosArrayGetSize(const SArray* pArray) {
|
||||
if (pArray == NULL) {
|
||||
return 0;
|
||||
}
|
||||
return pArray->size;
|
||||
}
|
||||
|
||||
void taosArraySetSize(SArray* pArray, size_t size) {
|
||||
assert(size <= pArray->capacity);
|
||||
|
@ -296,7 +301,7 @@ SArray* taosArrayDup(const SArray* pSrc) {
|
|||
}
|
||||
|
||||
void taosArrayClear(SArray* pArray) {
|
||||
assert( pArray != NULL );
|
||||
if (pArray == NULL) return;
|
||||
pArray->size = 0;
|
||||
}
|
||||
|
||||
|
|
Some files were not shown because too many files have changed in this diff Show More
Loading…
Reference in New Issue