enh: check snapshot receiver and sender by signature
This commit is contained in:
parent
eb4e2aa58f
commit
e901adfdf3
|
@ -79,8 +79,6 @@ int32_t snapshotSenderStart(SSyncSnapshotSender *pSender) {
|
||||||
int8_t started = atomic_val_compare_exchange_8(&pSender->start, false, true);
|
int8_t started = atomic_val_compare_exchange_8(&pSender->start, false, true);
|
||||||
if (started) return 0;
|
if (started) return 0;
|
||||||
|
|
||||||
taosMsleep(1);
|
|
||||||
|
|
||||||
pSender->seq = SYNC_SNAPSHOT_SEQ_BEGIN;
|
pSender->seq = SYNC_SNAPSHOT_SEQ_BEGIN;
|
||||||
pSender->ack = SYNC_SNAPSHOT_SEQ_INVALID;
|
pSender->ack = SYNC_SNAPSHOT_SEQ_INVALID;
|
||||||
pSender->pReader = NULL;
|
pSender->pReader = NULL;
|
||||||
|
@ -394,6 +392,14 @@ bool snapshotReceiverIsStart(SSyncSnapshotReceiver *pReceiver) {
|
||||||
return (pReceiver != NULL ? atomic_load_8(&pReceiver->start) : false);
|
return (pReceiver != NULL ? atomic_load_8(&pReceiver->start) : false);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static int32_t snapshotReceiverSignatureCmp(SSyncSnapshotReceiver *pReceiver, SyncSnapshotSend *pMsg) {
|
||||||
|
if (pReceiver->term < pMsg->term) return -1;
|
||||||
|
if (pReceiver->term > pMsg->term) return 1;
|
||||||
|
if (pReceiver->startTime < pMsg->startTime) return -1;
|
||||||
|
if (pReceiver->startTime > pMsg->startTime) return 1;
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
static int32_t snapshotReceiverStartWriter(SSyncSnapshotReceiver *pReceiver, SyncSnapshotSend *pBeginMsg) {
|
static int32_t snapshotReceiverStartWriter(SSyncSnapshotReceiver *pReceiver, SyncSnapshotSend *pBeginMsg) {
|
||||||
if (pReceiver->pWriter != NULL) {
|
if (pReceiver->pWriter != NULL) {
|
||||||
sRError(pReceiver, "vgId:%d, snapshot receiver writer is not null", pReceiver->pSyncNode->vgId);
|
sRError(pReceiver, "vgId:%d, snapshot receiver writer is not null", pReceiver->pSyncNode->vgId);
|
||||||
|
@ -434,7 +440,7 @@ void snapshotReceiverStart(SSyncSnapshotReceiver *pReceiver, SyncSnapshotSend *p
|
||||||
if (started) return;
|
if (started) return;
|
||||||
|
|
||||||
pReceiver->ack = SYNC_SNAPSHOT_SEQ_PREP_SNAPSHOT;
|
pReceiver->ack = SYNC_SNAPSHOT_SEQ_PREP_SNAPSHOT;
|
||||||
pReceiver->term = raftStoreGetTerm(pReceiver->pSyncNode);
|
pReceiver->term = pPreMsg->term;
|
||||||
pReceiver->fromId = pPreMsg->srcId;
|
pReceiver->fromId = pPreMsg->srcId;
|
||||||
pReceiver->startTime = pPreMsg->startTime;
|
pReceiver->startTime = pPreMsg->startTime;
|
||||||
ASSERT(pReceiver->startTime);
|
ASSERT(pReceiver->startTime);
|
||||||
|
@ -585,18 +591,25 @@ static int32_t syncNodeOnSnapshotPrep(SSyncNode *pSyncNode, SyncSnapshotSend *pM
|
||||||
|
|
||||||
if (snapshotReceiverIsStart(pReceiver)) {
|
if (snapshotReceiverIsStart(pReceiver)) {
|
||||||
// already start
|
// already start
|
||||||
if (pMsg->startTime > pReceiver->startTime) {
|
int32_t order = 0;
|
||||||
sRInfo(pReceiver, "snapshot receiver startTime:%" PRId64 " > msg startTime:%" PRId64 " start receiver",
|
if ((order = snapshotReceiverSignatureCmp(pReceiver, pMsg)) < 0) {
|
||||||
pReceiver->startTime, pMsg->startTime);
|
sRInfo(pReceiver,
|
||||||
|
"received a new snapshot preparation. restart receiver"
|
||||||
|
"receiver signature: (%" PRId64 ", %" PRId64 "), msg signature:(%" PRId64 ", %" PRId64 ")",
|
||||||
|
pReceiver->term, pReceiver->startTime, pMsg->term, pMsg->startTime);
|
||||||
goto _START_RECEIVER;
|
goto _START_RECEIVER;
|
||||||
} else if (pMsg->startTime == pReceiver->startTime) {
|
} else if (order == 0) {
|
||||||
sRInfo(pReceiver, "snapshot receiver startTime:%" PRId64 " == msg startTime:%" PRId64 " send reply",
|
sRInfo(pReceiver,
|
||||||
pReceiver->startTime, pMsg->startTime);
|
"received a duplicate snapshot preparation. send reply"
|
||||||
|
"receiver signature: (%" PRId64 ", %" PRId64 "), msg signature:(%" PRId64 ", %" PRId64 ")",
|
||||||
|
pReceiver->term, pReceiver->startTime, pMsg->term, pMsg->startTime);
|
||||||
goto _SEND_REPLY;
|
goto _SEND_REPLY;
|
||||||
} else {
|
} else {
|
||||||
// ignore
|
// ignore
|
||||||
sRError(pReceiver, "snapshot receiver startTime:%" PRId64 " < msg startTime:%" PRId64 " ignore",
|
sRError(pReceiver,
|
||||||
pReceiver->startTime, pMsg->startTime);
|
"received a stale snapshot preparation. ignore"
|
||||||
|
"receiver signature: (%" PRId64 ", %" PRId64 "), msg signature:(%" PRId64 ", %" PRId64 ")",
|
||||||
|
pReceiver->term, pReceiver->startTime, pMsg->term, pMsg->startTime);
|
||||||
terrno = TSDB_CODE_SYN_MISMATCHED_SIGNATURE;
|
terrno = TSDB_CODE_SYN_MISMATCHED_SIGNATURE;
|
||||||
code = terrno;
|
code = terrno;
|
||||||
goto _SEND_REPLY;
|
goto _SEND_REPLY;
|
||||||
|
@ -703,10 +716,9 @@ static int32_t syncNodeOnSnapshotBegin(SSyncNode *pSyncNode, SyncSnapshotSend *p
|
||||||
goto _SEND_REPLY;
|
goto _SEND_REPLY;
|
||||||
}
|
}
|
||||||
|
|
||||||
if (pReceiver->startTime != pMsg->startTime) {
|
if (snapshotReceiverSignatureCmp(pReceiver, pMsg) != 0) {
|
||||||
sRError(pReceiver, "snapshot receiver begin failed since startTime:%" PRId64 " not equal to msg startTime:%" PRId64,
|
|
||||||
pReceiver->startTime, pMsg->startTime);
|
|
||||||
terrno = TSDB_CODE_SYN_MISMATCHED_SIGNATURE;
|
terrno = TSDB_CODE_SYN_MISMATCHED_SIGNATURE;
|
||||||
|
sRError(pReceiver, "snapshot receiver begin failed since %s", terrstr());
|
||||||
goto _SEND_REPLY;
|
goto _SEND_REPLY;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -759,10 +771,9 @@ static int32_t syncNodeOnSnapshotReceive(SSyncNode *pSyncNode, SyncSnapshotSend
|
||||||
int64_t timeNow = taosGetTimestampMs();
|
int64_t timeNow = taosGetTimestampMs();
|
||||||
int32_t code = 0;
|
int32_t code = 0;
|
||||||
|
|
||||||
if (pReceiver->startTime != pMsg->startTime) {
|
if (snapshotReceiverSignatureCmp(pReceiver, pMsg) != 0) {
|
||||||
sRError(pReceiver, "snapshot receive failed since startTime:%" PRId64 " not equal to msg startTime:%" PRId64,
|
|
||||||
pReceiver->startTime, pMsg->startTime);
|
|
||||||
terrno = TSDB_CODE_SYN_MISMATCHED_SIGNATURE;
|
terrno = TSDB_CODE_SYN_MISMATCHED_SIGNATURE;
|
||||||
|
sRError(pReceiver, "snapshot receive failed since %s.", terrstr());
|
||||||
code = terrno;
|
code = terrno;
|
||||||
goto _SEND_REPLY;
|
goto _SEND_REPLY;
|
||||||
}
|
}
|
||||||
|
@ -811,7 +822,7 @@ static int32_t syncNodeOnSnapshotEnd(SSyncNode *pSyncNode, SyncSnapshotSend *pMs
|
||||||
int64_t timeNow = taosGetTimestampMs();
|
int64_t timeNow = taosGetTimestampMs();
|
||||||
int32_t code = 0;
|
int32_t code = 0;
|
||||||
|
|
||||||
if (pReceiver->startTime != pMsg->startTime) {
|
if (snapshotReceiverSignatureCmp(pReceiver, pMsg) != 0) {
|
||||||
sRError(pReceiver, "snapshot end failed since startTime:%" PRId64 " not equal to msg startTime:%" PRId64,
|
sRError(pReceiver, "snapshot end failed since startTime:%" PRId64 " not equal to msg startTime:%" PRId64,
|
||||||
pReceiver->startTime, pMsg->startTime);
|
pReceiver->startTime, pMsg->startTime);
|
||||||
terrno = TSDB_CODE_SYN_MISMATCHED_SIGNATURE;
|
terrno = TSDB_CODE_SYN_MISMATCHED_SIGNATURE;
|
||||||
|
@ -880,13 +891,13 @@ int32_t syncNodeOnSnapshot(SSyncNode *pSyncNode, const SRpcMsg *pRpcMsg) {
|
||||||
// if already drop replica, do not process
|
// if already drop replica, do not process
|
||||||
if (!syncNodeInRaftGroup(pSyncNode, &pMsg->srcId)) {
|
if (!syncNodeInRaftGroup(pSyncNode, &pMsg->srcId)) {
|
||||||
syncLogRecvSyncSnapshotSend(pSyncNode, pMsg, "not in my config");
|
syncLogRecvSyncSnapshotSend(pSyncNode, pMsg, "not in my config");
|
||||||
terrno = TSDB_CODE_SYN_INTERNAL_ERROR;
|
terrno = TSDB_CODE_SYN_MISMATCHED_SIGNATURE;
|
||||||
return -1;
|
return -1;
|
||||||
}
|
}
|
||||||
|
|
||||||
if (pMsg->term < raftStoreGetTerm(pSyncNode)) {
|
if (pMsg->term < raftStoreGetTerm(pSyncNode)) {
|
||||||
syncLogRecvSyncSnapshotSend(pSyncNode, pMsg, "reject since small term");
|
syncLogRecvSyncSnapshotSend(pSyncNode, pMsg, "reject since small term");
|
||||||
terrno = TSDB_CODE_SYN_INTERNAL_ERROR;
|
terrno = TSDB_CODE_SYN_MISMATCHED_SIGNATURE;
|
||||||
return -1;
|
return -1;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -1015,6 +1026,14 @@ static int32_t syncNodeOnSnapshotPrepRsp(SSyncNode *pSyncNode, SSyncSnapshotSend
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static int32_t snapshotSenderSignatureCmp(SSyncSnapshotSender *pSender, SyncSnapshotRsp *pMsg) {
|
||||||
|
if (pSender->term < pMsg->term) return -1;
|
||||||
|
if (pSender->term > pMsg->term) return 1;
|
||||||
|
if (pSender->startTime < pMsg->startTime) return -1;
|
||||||
|
if (pSender->startTime > pMsg->startTime) return 1;
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
// sender on message
|
// sender on message
|
||||||
//
|
//
|
||||||
// condition 1 sender receives SYNC_SNAPSHOT_SEQ_END, close sender
|
// condition 1 sender receives SYNC_SNAPSHOT_SEQ_END, close sender
|
||||||
|
@ -1045,19 +1064,21 @@ int32_t syncNodeOnSnapshotRsp(SSyncNode *pSyncNode, const SRpcMsg *pRpcMsg) {
|
||||||
return -1;
|
return -1;
|
||||||
}
|
}
|
||||||
|
|
||||||
if (pMsg->startTime < pSender->startTime) {
|
// check signature
|
||||||
sSError(pSender, "ignore stale rsp received. sender startTime:%" PRId64 ", msg startTime:%" PRId64,
|
int32_t order = 0;
|
||||||
pSender->startTime, pMsg->startTime);
|
if ((order = snapshotSenderSignatureCmp(pSender, pMsg)) > 0) {
|
||||||
terrno = pMsg->code;
|
sSError(pSender,
|
||||||
|
"received a stale snapshot rsp. ignore it"
|
||||||
|
"sender signature: (%" PRId64 ", %" PRId64 "), msg signature:(%" PRId64 ", %" PRId64 ")",
|
||||||
|
pSender->term, pSender->startTime, pMsg->term, pMsg->startTime);
|
||||||
|
terrno = TSDB_CODE_SYN_MISMATCHED_SIGNATURE;
|
||||||
return -1;
|
return -1;
|
||||||
} else if (pMsg->startTime > pSender->startTime) {
|
} else if (order < 0) {
|
||||||
sSError(pSender, "unexpected start time in msg. sender startTime:%" PRId64 ", msg startTime:%" PRId64,
|
sSError(pSender, "snapshot sender is stale. stop");
|
||||||
pSender->startTime, pMsg->startTime);
|
terrno = TSDB_CODE_SYN_MISMATCHED_SIGNATURE;
|
||||||
goto _ERROR;
|
goto _ERROR;
|
||||||
}
|
}
|
||||||
|
|
||||||
ASSERT(pMsg->startTime == pSender->startTime);
|
|
||||||
|
|
||||||
// state, term, seq/ack
|
// state, term, seq/ack
|
||||||
if (pSyncNode->state != TAOS_SYNC_STATE_LEADER) {
|
if (pSyncNode->state != TAOS_SYNC_STATE_LEADER) {
|
||||||
syncLogRecvSyncSnapshotRsp(pSyncNode, pMsg, "snapshot sender not leader");
|
syncLogRecvSyncSnapshotRsp(pSyncNode, pMsg, "snapshot sender not leader");
|
||||||
|
|
Loading…
Reference in New Issue