From 05372238d0c78b3e03850b5601872273fb0df679 Mon Sep 17 00:00:00 2001
From: Liu Jicong <liujicong@qq.com>
Date: Tue, 11 Jan 2022 19:55:00 +0800
Subject: [PATCH 01/20] refactor walRepairMeta

---
 source/libs/wal/src/walMeta.c | 71 +++++++++++++++--------------------
 1 file changed, 30 insertions(+), 41 deletions(-)

diff --git a/source/libs/wal/src/walMeta.c b/source/libs/wal/src/walMeta.c
index cac80c0a5f..dea178eb54 100644
--- a/source/libs/wal/src/walMeta.c
+++ b/source/libs/wal/src/walMeta.c
@@ -36,7 +36,7 @@ void* tmemmem(char* haystack, int hlen, char* needle, int nlen) {
   char* limit;
 
   if (nlen == 0 || hlen < nlen) {
-    return false;
+    return NULL;
   }
 
   limit = haystack + hlen - nlen + 1;
@@ -54,10 +54,12 @@ static inline int64_t walScanLogGetLastVer(SWal* pWal) {
   ASSERT(pWal->fileInfoSet != NULL);
   int sz = taosArrayGetSize(pWal->fileInfoSet);
   ASSERT(sz > 0);
+#if 0
   for (int i = 0; i < sz; i++) {
     SWalFileInfo* pFileInfo = taosArrayGet(pWal->fileInfoSet, i);
-
   }
+#endif
+
   SWalFileInfo *pLastFileInfo = taosArrayGet(pWal->fileInfoSet, sz-1);
   char fnameStr[WAL_FILE_LEN];
   walBuildLogName(pWal, pLastFileInfo->firstVer, fnameStr);
@@ -143,8 +145,6 @@ int walCheckAndRepairMeta(SWal* pWal) {
       SWalFileInfo fileInfo;
       memset(&fileInfo, -1, sizeof(SWalFileInfo));
       sscanf(name, "%" PRId64 ".log", &fileInfo.firstVer);
-      //get lastVer
-      //get size
       taosArrayPush(pLogInfoArray, &fileInfo);
     }
   }
@@ -158,54 +158,43 @@ int walCheckAndRepairMeta(SWal* pWal) {
     oldSz = taosArrayGetSize(pWal->fileInfoSet);
   }
   int newSz = taosArrayGetSize(pLogInfoArray);
-  // case 1. meta file not exist / cannot be parsed
-  if (oldSz < newSz) {
+
+  if (oldSz > newSz) {
+    taosArrayPopFrontBatch(pWal->fileInfoSet, oldSz - newSz); 
+  } else if (oldSz < newSz) {
     for (int i = oldSz; i < newSz; i++) {
       SWalFileInfo *pFileInfo = taosArrayGet(pLogInfoArray, i);
       taosArrayPush(pWal->fileInfoSet, pFileInfo);
     }
-
-    pWal->writeCur = newSz - 1;
-    pWal->vers.firstVer = ((SWalFileInfo*)taosArrayGet(pLogInfoArray, 0))->firstVer;
-    pWal->vers.lastVer = walScanLogGetLastVer(pWal);
-    ((SWalFileInfo*)taosArrayGetLast(pWal->fileInfoSet))->lastVer = pWal->vers.lastVer;
-    ASSERT(pWal->vers.lastVer != -1);
-
-    int code = walSaveMeta(pWal);
-    if (code < 0) {
-      taosArrayDestroy(pLogInfoArray);
-      return -1;
-    }
   }
-  
-  // case 2. versions in meta not match log 
-  //         or some log not included in meta
-  // (e.g. program killed)
-  //
-  // case 3. other corrupt cases
-  //
-#if 0
-  int sz = taosArrayGetSize(pLogInfoArray);
-  for (int i = 0; i < sz; i++) {
-    SWalFileInfo* pFileInfo = taosArrayGet(pLogInfoArray, i);
-    if (i == 0 && pFileInfo->firstVer != walGetFirstVer(pWal)) {
-      //repair
-    }
+  taosArrayDestroy(pLogInfoArray);
 
-    if (i > 0) {
-      SWalFileInfo* pLastFileInfo = taosArrayGet(pLogInfoArray, i-1);
-      if (pLastFileInfo->lastVer != pFileInfo->firstVer) {
+  pWal->writeCur = newSz - 1;
+  if (newSz > 0) {
+    pWal->vers.firstVer = ((SWalFileInfo*)taosArrayGet(pWal->fileInfoSet, 0))->firstVer;
 
+    SWalFileInfo *pLastFileInfo = taosArrayGet(pWal->fileInfoSet, newSz-1);
+    char fnameStr[WAL_FILE_LEN];
+    walBuildLogName(pWal, pLastFileInfo->firstVer, fnameStr);
+    struct stat statbuf;
+    stat(fnameStr, &statbuf);
+
+    if (oldSz != newSz || pLastFileInfo->fileSize != statbuf.st_size) {
+      pLastFileInfo->fileSize = statbuf.st_size;
+      pWal->vers.lastVer = walScanLogGetLastVer(pWal);
+      ((SWalFileInfo*)taosArrayGetLast(pWal->fileInfoSet))->lastVer = pWal->vers.lastVer;
+      ASSERT(pWal->vers.lastVer != -1);
+
+      int code = walSaveMeta(pWal);
+      if (code < 0) {
+        taosArrayDestroy(pLogInfoArray);
+        return -1;
       }
     }
   }
-#endif
 
-
-  // get last version of this file
-  //
-  // rebuild meta
-  taosArrayDestroy(pLogInfoArray);
+  //TODO: set fileSize and lastVer if necessary
+  
   return 0;
 }
 

From 876942b4f2a44de024a0afb5585467d3edb39f3d Mon Sep 17 00:00:00 2001
From: dapan1121 <wpan@taosdata.com>
Date: Wed, 12 Jan 2022 09:22:48 +0800
Subject: [PATCH 02/20] feature/qnode

---
 source/libs/scheduler/inc/schedulerInt.h      | 28 ++++++++--
 source/libs/scheduler/src/scheduler.c         | 55 ++++++++++---------
 source/libs/scheduler/test/schedulerTests.cpp | 21 +++++++
 3 files changed, 75 insertions(+), 29 deletions(-)

diff --git a/source/libs/scheduler/inc/schedulerInt.h b/source/libs/scheduler/inc/schedulerInt.h
index c83eba4232..661beee5d5 100644
--- a/source/libs/scheduler/inc/schedulerInt.h
+++ b/source/libs/scheduler/inc/schedulerInt.h
@@ -36,11 +36,31 @@ enum {
   SCH_WRITE,
 };
 
+typedef struct SSchApiStat {
+
+} SSchApiStat;
+
+typedef struct SSchRuntimeStat {
+
+} SSchRuntimeStat;
+
+typedef struct SSchJobStat {
+
+} SSchJobStat;
+
+typedef struct SSchedulerStat {
+  SSchApiStat      api;
+  SSchRuntimeStat  runtime;
+  SSchJobStat      job;
+} SSchedulerStat;
+
+
 typedef struct SSchedulerMgmt {
-  uint64_t      taskId; // sequential taksId
-  uint64_t      sId;    // schedulerId
-  SSchedulerCfg cfg;
-  SHashObj     *jobs;   // key: queryId, value: SQueryJob*
+  uint64_t       taskId; // sequential taksId
+  uint64_t       sId;    // schedulerId
+  SSchedulerCfg  cfg;
+  SHashObj      *jobs;   // key: queryId, value: SQueryJob*
+  SSchedulerStat stat;
 } SSchedulerMgmt;
 
 typedef struct SSchCallbackParam {
diff --git a/source/libs/scheduler/src/scheduler.c b/source/libs/scheduler/src/scheduler.c
index ed12408844..aeec1ff5a0 100644
--- a/source/libs/scheduler/src/scheduler.c
+++ b/source/libs/scheduler/src/scheduler.c
@@ -1462,35 +1462,38 @@ void scheduleFreeJob(void *job) {
   }
 
   SSchJob *pJob = job;
+  uint64_t queryId = pJob->queryId;
 
-  if (0 != taosHashRemove(schMgmt.jobs, &pJob->queryId, sizeof(pJob->queryId))) {
-    SCH_JOB_ELOG("taosHashRemove job from list failed, may already freed, pJob:%p", pJob);
-    return;
-  }
-
-  schCheckAndUpdateJobStatus(pJob, JOB_TASK_STATUS_DROPPING);
-
-  SCH_JOB_DLOG("job removed from list, no further ref, ref:%d", atomic_load_32(&pJob->ref));
-
-  while (true) {
-    int32_t ref = atomic_load_32(&pJob->ref);
-    if (0 == ref) {
-      break;
-    } else if (ref > 0) {
-      usleep(1);
-    } else {
-      assert(0);
+  if (SCH_GET_JOB_STATUS(pJob) > 0) {
+    if (0 != taosHashRemove(schMgmt.jobs, &pJob->queryId, sizeof(pJob->queryId))) {
+      SCH_JOB_ELOG("taosHashRemove job from list failed, may already freed, pJob:%p", pJob);
+      return;
     }
+
+    schCheckAndUpdateJobStatus(pJob, JOB_TASK_STATUS_DROPPING);
+
+    SCH_JOB_DLOG("job removed from list, no further ref, ref:%d", atomic_load_32(&pJob->ref));
+
+    while (true) {
+      int32_t ref = atomic_load_32(&pJob->ref);
+      if (0 == ref) {
+        break;
+      } else if (ref > 0) {
+        usleep(1);
+      } else {
+        assert(0);
+      }
+    }
+
+    SCH_JOB_DLOG("job no ref now, status:%d", SCH_GET_JOB_STATUS(pJob));
+
+    if (pJob->status == JOB_TASK_STATUS_EXECUTING) {
+      schCancelJob(pJob);
+    }
+
+    schDropJobAllTasks(pJob);
   }
 
-  SCH_JOB_DLOG("job no ref now, status:%d", SCH_GET_JOB_STATUS(pJob));
-
-  if (pJob->status == JOB_TASK_STATUS_EXECUTING) {
-    schCancelJob(pJob);
-  }
-
-  schDropJobAllTasks(pJob);
-
   pJob->subPlans = NULL; // it is a reference to pDag->pSubplans
   
   int32_t numOfLevels = taosArrayGetSize(pJob->levels);
@@ -1515,6 +1518,8 @@ void scheduleFreeJob(void *job) {
   tfree(pJob->res);
   
   tfree(pJob);
+
+  qDebug("QID:%"PRIx64" job freed", queryId);
 }
   
 void schedulerDestroy(void) {
diff --git a/source/libs/scheduler/test/schedulerTests.cpp b/source/libs/scheduler/test/schedulerTests.cpp
index b438521234..114b3c02b5 100644
--- a/source/libs/scheduler/test/schedulerTests.cpp
+++ b/source/libs/scheduler/test/schedulerTests.cpp
@@ -79,6 +79,7 @@ void schtBuildQueryDag(SQueryDag *dag) {
   scanPlan->level = 1;
   scanPlan->pParents = taosArrayInit(1, POINTER_BYTES);
   scanPlan->pNode = (SPhyNode*)calloc(1, sizeof(SPhyNode));
+  scanPlan->msgType = TDMT_VND_QUERY;
 
   mergePlan->id.queryId = qId;
   mergePlan->id.templateId = 0x4444444444;
@@ -89,6 +90,7 @@ void schtBuildQueryDag(SQueryDag *dag) {
   mergePlan->pChildren = taosArrayInit(1, POINTER_BYTES);
   mergePlan->pParents = NULL;
   mergePlan->pNode = (SPhyNode*)calloc(1, sizeof(SPhyNode));
+  mergePlan->msgType = TDMT_VND_QUERY;
 
   SSubplan *mergePointer = (SSubplan *)taosArrayPush(merge, &mergePlan);
   SSubplan *scanPointer = (SSubplan *)taosArrayPush(scan, &scanPlan);
@@ -163,6 +165,11 @@ void schtExecNode(SSubplan* subplan, uint64_t templateId, SQueryNodeAddr* ep) {
   
 }
 
+void schtRpcSendRequest(void *shandle, const SEpSet *pEpSet, SRpcMsg *pMsg, int64_t *pRid) {
+
+}
+
+
 
 void schtSetPlanToString() {
   static Stub stub;
@@ -190,6 +197,20 @@ void schtSetExecNode() {
   }
 }
 
+void schtSetRpcSendRequest() {
+  static Stub stub;
+  stub.set(rpcSendRequest, schtRpcSendRequest);
+  {
+    AddrAny any("libtransport.so");
+    std::map<std::string,void*> result;
+    any.get_global_func_addr_dynsym("^rpcSendRequest$", result);
+    for (const auto& f : result) {
+      stub.set(f.second, schtRpcSendRequest);
+    }
+  }
+}
+
+
 void *schtSendRsp(void *param) {
   SSchJob *job = NULL;
   int32_t code = 0;

From 5cf4edacbb6da2a5b554396f5fc7ccee6b80cd47 Mon Sep 17 00:00:00 2001
From: dapan1121 <wpan@taosdata.com>
Date: Wed, 12 Jan 2022 18:40:22 +0800
Subject: [PATCH 03/20] feature/qnode

---
 include/common/tmsg.h                     |   7 +
 include/common/tmsgdef.h                  |   2 +
 include/libs/qworker/qworker.h            |  19 +-
 source/dnode/vnode/impl/inc/vnodeQuery.h  |   1 +
 source/dnode/vnode/impl/src/vnodeQuery.c  |  17 +-
 source/libs/executor/src/executorMain.c   |   6 +-
 source/libs/qworker/inc/qworkerInt.h      |  76 +++--
 source/libs/qworker/src/qworker.c         | 393 ++++++++++++++++------
 source/libs/qworker/test/qworkerTests.cpp |  13 +-
 9 files changed, 389 insertions(+), 145 deletions(-)

diff --git a/include/common/tmsg.h b/include/common/tmsg.h
index 2aaa2168cc..339db43374 100644
--- a/include/common/tmsg.h
+++ b/include/common/tmsg.h
@@ -993,6 +993,13 @@ typedef struct {
   uint64_t taskId;
 } SSinkDataReq;
 
+typedef struct {
+  SMsgHead header;
+  uint64_t sId;
+  uint64_t queryId;
+  uint64_t taskId;
+} SQueryContinueReq;
+
 
 typedef struct {
   SMsgHead header;
diff --git a/include/common/tmsgdef.h b/include/common/tmsgdef.h
index 592672b32b..2f4bae2fa0 100644
--- a/include/common/tmsgdef.h
+++ b/include/common/tmsgdef.h
@@ -170,6 +170,8 @@ enum {
   TD_DEF_MSG_TYPE(TDMT_VND_DROP_TOPIC, "vnode-drop-topic", NULL, NULL)
   TD_DEF_MSG_TYPE(TDMT_VND_SHOW_TABLES, "vnode-show-tables", SVShowTablesReq, SVShowTablesRsp)
   TD_DEF_MSG_TYPE(TDMT_VND_SHOW_TABLES_FETCH, "vnode-show-tables-fetch", SVShowTablesFetchReq, SVShowTablesFetchRsp)
+  TD_DEF_MSG_TYPE(TDMT_VND_QUERY_CONTINUE, "vnode-query-continue", NULL, NULL)
+  TD_DEF_MSG_TYPE(TDMT_VND_SCHEDULE_DATA_SINK, "vnode-schedule-data-sink", NULL, NULL)
 
   TD_DEF_MSG_TYPE(TDMT_VND_SUBSCRIBE, "vnode-subscribe", SMVSubscribeReq, SMVSubscribeRsp)
 
diff --git a/include/libs/qworker/qworker.h b/include/libs/qworker/qworker.h
index 9897467230..08b5fb98e7 100644
--- a/include/libs/qworker/qworker.h
+++ b/include/libs/qworker/qworker.h
@@ -22,9 +22,18 @@ extern "C" {
 
 #include "trpc.h"
 
+
+enum {
+  NODE_TYPE_VNODE = 1,
+  NODE_TYPE_QNODE,
+  NODE_TYPE_SNODE,
+};
+
+
+
 typedef struct SQWorkerCfg {
   uint32_t maxSchedulerNum;
-  uint32_t maxResCacheNum;
+  uint32_t maxTaskNum;
   uint32_t maxSchTaskNum;
 } SQWorkerCfg;
 
@@ -39,11 +48,17 @@ typedef struct {
   uint64_t numOfErrors;
 } SQWorkerStat;
 
+typedef int32_t (*putReqToQueryQFp)(void *, struct SRpcMsg *);
 
-int32_t qWorkerInit(SQWorkerCfg *cfg, void **qWorkerMgmt);
+
+int32_t qWorkerInit(int8_t nodeType, int32_t nodeId, SQWorkerCfg *cfg, void **qWorkerMgmt, void *nodeObj, putReqToQueryQFp fp);
 
 int32_t qWorkerProcessQueryMsg(void *node, void *qWorkerMgmt, SRpcMsg *pMsg);
 
+int32_t qWorkerProcessQueryContinueMsg(void *node, void *qWorkerMgmt, SRpcMsg *pMsg);
+
+int32_t qWorkerProcessDataSinkMsg(void *node, void *qWorkerMgmt, SRpcMsg *pMsg);
+
 int32_t qWorkerProcessReadyMsg(void *node, void *qWorkerMgmt, SRpcMsg *pMsg);
 
 int32_t qWorkerProcessStatusMsg(void *node, void *qWorkerMgmt, SRpcMsg *pMsg);
diff --git a/source/dnode/vnode/impl/inc/vnodeQuery.h b/source/dnode/vnode/impl/inc/vnodeQuery.h
index d43f5b1cf1..b51aafb313 100644
--- a/source/dnode/vnode/impl/inc/vnodeQuery.h
+++ b/source/dnode/vnode/impl/inc/vnodeQuery.h
@@ -22,6 +22,7 @@ extern "C" {
 #include "vnodeInt.h"
 #include "qworker.h"
 
+
 typedef struct SQWorkerMgmt SQHandle;
 
 
diff --git a/source/dnode/vnode/impl/src/vnodeQuery.c b/source/dnode/vnode/impl/src/vnodeQuery.c
index 909b233efb..52b20f8411 100644
--- a/source/dnode/vnode/impl/src/vnodeQuery.c
+++ b/source/dnode/vnode/impl/src/vnodeQuery.c
@@ -19,11 +19,22 @@
 static int32_t vnodeGetTableList(SVnode *pVnode, SRpcMsg *pMsg);
 static int     vnodeGetTableMeta(SVnode *pVnode, SRpcMsg *pMsg, SRpcMsg **pRsp);
 
-int vnodeQueryOpen(SVnode *pVnode) { return qWorkerInit(NULL, &pVnode->pQuery); }
+int vnodeQueryOpen(SVnode *pVnode) { return qWorkerInit(NODE_TYPE_VNODE, pVnode->vgId, NULL, &pVnode->pQuery, pVnode, vnodePutReqToVQueryQ); }
 
 int vnodeProcessQueryReq(SVnode *pVnode, SRpcMsg *pMsg, SRpcMsg **pRsp) {
-  vTrace("query message is processed");
-  return qWorkerProcessQueryMsg(pVnode->pTsdb, pVnode->pQuery, pMsg);
+  vTrace("query message is processing");
+
+  switch (pMsg->msgType) {
+    case TDMT_VND_QUERY:
+      return qWorkerProcessQueryMsg(pVnode->pTsdb, pVnode->pQuery, pMsg);
+    case TDMT_VND_QUERY_CONTINUE:
+      return qWorkerProcessQueryContinueMsg(pVnode->pTsdb, pVnode->pQuery, pMsg);
+    case TDMT_VND_SCHEDULE_DATA_SINK:
+      return qWorkerProcessDataSinkMsg(pVnode->pTsdb, pVnode->pQuery, pMsg);
+    default:
+      vError("unknown msg type:%d in query queue", pMsg->msgType);
+      return TSDB_CODE_VND_APP_ERROR;
+  }
 }
 
 int vnodeProcessFetchReq(SVnode *pVnode, SRpcMsg *pMsg, SRpcMsg **pRsp) {
diff --git a/source/libs/executor/src/executorMain.c b/source/libs/executor/src/executorMain.c
index 50f69fb567..daeefba253 100644
--- a/source/libs/executor/src/executorMain.c
+++ b/source/libs/executor/src/executorMain.c
@@ -178,8 +178,10 @@ int32_t qExecTask(qTaskInfo_t tinfo, DataSinkHandle* handle) {
   publishOperatorProfEvent(pTaskInfo->pRoot, QUERY_PROF_BEFORE_OPERATOR_EXEC);
   int64_t st = 0;
 
-  *handle = pTaskInfo->dsHandle;
-
+  if (handle) {
+    *handle = pTaskInfo->dsHandle;
+  }
+  
   while(1) {
     st = taosGetTimestampUs();
     SSDataBlock* pRes = pTaskInfo->pRoot->exec(pTaskInfo->pRoot, &newgroup);
diff --git a/source/libs/qworker/inc/qworkerInt.h b/source/libs/qworker/inc/qworkerInt.h
index 7883079fbe..b5ec50cc04 100644
--- a/source/libs/qworker/inc/qworkerInt.h
+++ b/source/libs/qworker/inc/qworkerInt.h
@@ -23,7 +23,7 @@ extern "C" {
 #include "tlockfree.h"
 
 #define QWORKER_DEFAULT_SCHEDULER_NUMBER 10000
-#define QWORKER_DEFAULT_RES_CACHE_NUMBER 10000
+#define QWORKER_DEFAULT_TASK_NUMBER 10000
 #define QWORKER_DEFAULT_SCH_TASK_NUMBER 10000
 
 enum {
@@ -57,7 +57,6 @@ enum {
   QW_ADD_ACQUIRE,
 };
 
-
 typedef struct SQWTaskStatus {  
   SRWLatch lock;
   int32_t  code;
@@ -67,12 +66,14 @@ typedef struct SQWTaskStatus {
   bool     drop;
 } SQWTaskStatus;
 
-typedef struct SQWorkerTaskHandlesCache {
+typedef struct SQWTaskCtx {
   SRWLatch        lock;
+  int8_t          sinkScheduled;
+  int8_t          queryScheduled;
   bool            needRsp;
   qTaskInfo_t     taskHandle;
   DataSinkHandle  sinkHandle;
-} SQWorkerTaskHandlesCache;
+} SQWTaskCtx;
 
 typedef struct SQWSchStatus {
   int32_t   lastAccessTs; // timestamp in second
@@ -82,11 +83,15 @@ typedef struct SQWSchStatus {
 
 // Qnode/Vnode level task management
 typedef struct SQWorkerMgmt {
-  SQWorkerCfg cfg;
-  SRWLatch  schLock;
-  SRWLatch  resLock;
-  SHashObj *schHash;    //key: schedulerId, value: SQWSchStatus
-  SHashObj *resHash;       //key: queryId+taskId, value: SQWorkerResCache
+  SQWorkerCfg      cfg;
+  int8_t           nodeType;
+  int32_t          nodeId;
+  SRWLatch         schLock;
+  SRWLatch         ctxLock;
+  SHashObj        *schHash;       //key: schedulerId,    value: SQWSchStatus
+  SHashObj        *ctxHash;       //key: queryId+taskId, value: SQWTaskCtx
+  void            *nodeObj;
+  putReqToQueryQFp putToQueueFp;
 } SQWorkerMgmt;
 
 #define QW_GOT_RES_DATA(data) (true)
@@ -95,40 +100,63 @@ typedef struct SQWorkerMgmt {
 #define QW_TASK_NOT_EXIST(code) (TSDB_CODE_QRY_SCH_NOT_EXIST == (code) || TSDB_CODE_QRY_TASK_NOT_EXIST == (code))
 #define QW_TASK_ALREADY_EXIST(code) (TSDB_CODE_QRY_TASK_ALREADY_EXIST == (code))
 #define QW_TASK_READY_RESP(status) (status == JOB_TASK_STATUS_SUCCEED || status == JOB_TASK_STATUS_FAILED || status == JOB_TASK_STATUS_CANCELLED || status == JOB_TASK_STATUS_PARTIAL_SUCCEED)
-#define QW_SET_QTID(id, qid, tid) do { *(uint64_t *)(id) = (qid); *(uint64_t *)((char *)(id) + sizeof(qid)) = (tid); } while (0)
-#define QW_GET_QTID(id, qid, tid) do { (qid) = *(uint64_t *)(id); (tid) = *(uint64_t *)((char *)(id) + sizeof(qid)); } while (0)
-
+#define QW_SET_QTID(id, qId, tId) do { *(uint64_t *)(id) = (qId); *(uint64_t *)((char *)(id) + sizeof(qId)) = (tId); } while (0)
+#define QW_GET_QTID(id, qId, tId) do { (qId) = *(uint64_t *)(id); (tId) = *(uint64_t *)((char *)(id) + sizeof(qId)); } while (0)
+#define QW_IDS() sId, qId, tId
 
 #define QW_ERR_RET(c) do { int32_t _code = c; if (_code != TSDB_CODE_SUCCESS) { terrno = _code; return _code; } } while (0)
 #define QW_RET(c) do { int32_t _code = c; if (_code != TSDB_CODE_SUCCESS) { terrno = _code; } return _code; } while (0)
-#define QW_ERR_LRET(c,...) do { int32_t _code = c; if (_code != TSDB_CODE_SUCCESS) { qError(__VA_ARGS__); terrno = _code; return _code; } } while (0)
 #define QW_ERR_JRET(c) do { code = c; if (code != TSDB_CODE_SUCCESS) { terrno = code; goto _return; } } while (0)
 
+#define QW_ELOG(param, ...) qError("QW:%p " param, mgmt, __VA_ARGS__)
+#define QW_DLOG(param, ...) qDebug("QW:%p " param, mgmt, __VA_ARGS__)
+
+#define QW_SCH_ELOG(param, ...) qError("QW:%p SID:%"PRIx64 param, mgmt, sId, __VA_ARGS__)
+#define QW_SCH_DLOG(param, ...) qDebug("QW:%p SID:%"PRIx64 param, mgmt, sId, __VA_ARGS__)
+
+#define QW_TASK_ELOG(param, ...) qError("QW:%p SID:%"PRIx64",QID:%"PRIx64",TID:%"PRIx64 param, mgmt, sId, qId, tId, __VA_ARGS__)
+#define QW_TASK_WLOG(param, ...) qWarn("QW:%p SID:%"PRIx64",QID:%"PRIx64",TID:%"PRIx64 param, mgmt, sId, qId, tId, __VA_ARGS__)
+#define QW_TASK_DLOG(param, ...) qDebug("QW:%p SID:%"PRIx64",QID:%"PRIx64",TID:%"PRIx64 param, mgmt, sId, qId, tId, __VA_ARGS__)
+
+
+#define TD_RWLATCH_WRITE_FLAG_COPY 0x40000000
+
 #define QW_LOCK(type, _lock) do {   \
   if (QW_READ == (type)) {          \
-    if ((*(_lock)) < 0) assert(0);    \
-    taosRLockLatch(_lock);          \
-    qDebug("QW RLOCK%p, %s:%d", (_lock), __FILE__, __LINE__); \
+    assert(atomic_load_32((_lock)) >= 0);  \
+    qDebug("QW RLOCK%p:%d, %s:%d B", (_lock), atomic_load_32(_lock), __FILE__, __LINE__); \
+    taosRLockLatch(_lock);           \
+    qDebug("QW RLOCK%p:%d, %s:%d E", (_lock), atomic_load_32(_lock), __FILE__, __LINE__); \
+    assert(atomic_load_32((_lock)) > 0);  \
   } else {                                                \
-    if ((*(_lock)) < 0) assert(0);                          \
+    assert(atomic_load_32((_lock)) >= 0);  \
+    qDebug("QW WLOCK%p:%d, %s:%d B", (_lock), atomic_load_32(_lock), __FILE__, __LINE__);  \
     taosWLockLatch(_lock);                                \
-    qDebug("QW WLOCK%p, %s:%d", (_lock), __FILE__, __LINE__);  \
+    qDebug("QW WLOCK%p:%d, %s:%d E", (_lock), atomic_load_32(_lock), __FILE__, __LINE__);  \
+    assert(atomic_load_32((_lock)) == TD_RWLATCH_WRITE_FLAG_COPY);  \
   }                                                       \
 } while (0)
-
+    
 #define QW_UNLOCK(type, _lock) do {                       \
   if (QW_READ == (type)) {                                \
-    if ((*(_lock)) <= 0) assert(0);                         \
+    assert(atomic_load_32((_lock)) > 0);  \
+    qDebug("QW RULOCK%p:%d, %s:%d B", (_lock), atomic_load_32(_lock), __FILE__, __LINE__); \
     taosRUnLockLatch(_lock);                              \
-    qDebug("QW RULOCK%p, %s:%d", (_lock), __FILE__, __LINE__); \
+    qDebug("QW RULOCK%p:%d, %s:%d E", (_lock), atomic_load_32(_lock), __FILE__, __LINE__); \
+    assert(atomic_load_32((_lock)) >= 0);  \
   } else {                                                \
-    if ((*(_lock)) <= 0) assert(0);                         \
+    assert(atomic_load_32((_lock)) == TD_RWLATCH_WRITE_FLAG_COPY);  \
+    qDebug("QW WULOCK%p:%d, %s:%d B", (_lock), atomic_load_32(_lock), __FILE__, __LINE__); \
     taosWUnLockLatch(_lock);                              \
-    qDebug("QW WULOCK%p, %s:%d", (_lock), __FILE__, __LINE__); \
+    qDebug("QW WULOCK%p:%d, %s:%d E", (_lock), atomic_load_32(_lock), __FILE__, __LINE__); \
+    assert(atomic_load_32((_lock)) >= 0);  \
   }                                                       \
 } while (0)
 
-static int32_t qwAcquireScheduler(int32_t rwType, SQWorkerMgmt *mgmt, uint64_t sId, SQWSchStatus **sch, int32_t nOpt);
+
+
+static int32_t qwAcquireScheduler(int32_t rwType, SQWorkerMgmt *mgmt, uint64_t sId, SQWSchStatus **sch);
+static int32_t qwAddAcquireScheduler(int32_t rwType, SQWorkerMgmt *mgmt, uint64_t sId, SQWSchStatus **sch);
 
 
 #ifdef __cplusplus
diff --git a/source/libs/qworker/src/qworker.c b/source/libs/qworker/src/qworker.c
index 2a395fcfe1..23d74ae91d 100644
--- a/source/libs/qworker/src/qworker.c
+++ b/source/libs/qworker/src/qworker.c
@@ -8,7 +8,7 @@
 #include "tname.h"
 #include "dataSinkMgt.h"
 
-int32_t qwValidateStatus(int8_t oriStatus, int8_t newStatus) {
+int32_t qwValidateStatus(SQWorkerMgmt *mgmt, int8_t oriStatus, int8_t newStatus, uint64_t sId, uint64_t qId, uint64_t tId) {
   int32_t code = 0;
 
   if (oriStatus == newStatus) {
@@ -62,7 +62,7 @@ int32_t qwValidateStatus(int8_t oriStatus, int8_t newStatus) {
       break;
       
     default:
-      qError("invalid task status:%d", oriStatus);
+      QW_TASK_ELOG("invalid task status:%d", oriStatus);
       return TSDB_CODE_QRY_APP_ERROR;
   }
 
@@ -70,22 +70,27 @@ int32_t qwValidateStatus(int8_t oriStatus, int8_t newStatus) {
 
 _return:
 
-  qError("invalid task status, from %d to %d", oriStatus, newStatus);
-  QW_ERR_RET(code);
+  QW_TASK_ELOG("invalid task status update from %d to %d", oriStatus, newStatus);
+  QW_RET(code);
 }
 
-int32_t qwUpdateTaskInfo(SQWTaskStatus *task, int8_t type, void *data) {
+int32_t qwUpdateTaskInfo(SQWorkerMgmt *mgmt, SQWTaskStatus *task, int8_t type, void *data, uint64_t sId, uint64_t qId, uint64_t tId) {
   int32_t code = 0;
+  int8_t origStatus = 0;
   
   switch (type) {
     case QW_TASK_INFO_STATUS: {
       int8_t newStatus = *(int8_t *)data;
-      QW_ERR_RET(qwValidateStatus(task->status, newStatus));
+      QW_ERR_RET(qwValidateStatus(mgmt, task->status, newStatus, QW_IDS()));
+      
+      origStatus = task->status;
       task->status = newStatus;
+      
+      QW_TASK_DLOG("task status updated from %d to %d", origStatus, newStatus);
       break;
     }
     default:
-      qError("uknown task info type:%d", type);
+      QW_TASK_ELOG("unknown task info, type:%d", type);
       return TSDB_CODE_QRY_APP_ERROR;
   }
   
@@ -96,18 +101,18 @@ int32_t qwAddTaskHandlesToCache(SQWorkerMgmt *mgmt, uint64_t qId, uint64_t tId,
   char id[sizeof(qId) + sizeof(tId)] = {0};
   QW_SET_QTID(id, qId, tId);
 
-  SQWorkerTaskHandlesCache resCache = {0};
+  SQWTaskCtx resCache = {0};
   resCache.taskHandle = taskHandle;
   resCache.sinkHandle = sinkHandle;
 
-  QW_LOCK(QW_WRITE, &mgmt->resLock);
-  if (0 != taosHashPut(mgmt->resHash, id, sizeof(id), &resCache, sizeof(SQWorkerTaskHandlesCache))) {
-    QW_UNLOCK(QW_WRITE, &mgmt->resLock);
+  QW_LOCK(QW_WRITE, &mgmt->ctxLock);
+  if (0 != taosHashPut(mgmt->ctxHash, id, sizeof(id), &resCache, sizeof(SQWTaskCtx))) {
+    QW_UNLOCK(QW_WRITE, &mgmt->ctxLock);
     qError("taosHashPut queryId[%"PRIx64"] taskId[%"PRIx64"] to resHash failed", qId, tId);
     return TSDB_CODE_QRY_APP_ERROR;
   }
 
-  QW_UNLOCK(QW_WRITE, &mgmt->resLock);
+  QW_UNLOCK(QW_WRITE, &mgmt->ctxLock);
 
   return TSDB_CODE_SUCCESS;
 }
@@ -116,7 +121,7 @@ static int32_t qwAddScheduler(int32_t rwType, SQWorkerMgmt *mgmt, uint64_t sId,
   SQWSchStatus newSch = {0};
   newSch.tasksHash = taosHashInit(mgmt->cfg.maxSchTaskNum, taosGetDefaultHashFunction(TSDB_DATA_TYPE_BINARY), false, HASH_NO_LOCK);
   if (NULL == newSch.tasksHash) {
-    qError("taosHashInit %d failed", mgmt->cfg.maxSchTaskNum);
+    QW_SCH_DLOG("taosHashInit %d failed", mgmt->cfg.maxSchTaskNum);
     return TSDB_CODE_QRY_OUT_OF_MEMORY;
   }
 
@@ -126,14 +131,18 @@ static int32_t qwAddScheduler(int32_t rwType, SQWorkerMgmt *mgmt, uint64_t sId,
     if (0 != code) {
       if (!HASH_NODE_EXIST(code)) {
         QW_UNLOCK(QW_WRITE, &mgmt->schLock);
-        qError("taosHashPut sId[%"PRIx64"] to scheduleHash failed", sId);
+        QW_SCH_ELOG("taosHashPut new sch to scheduleHash failed, errno:%d", errno);
         taosHashCleanup(newSch.tasksHash);
         return TSDB_CODE_QRY_APP_ERROR;
       }
     }
     
     QW_UNLOCK(QW_WRITE, &mgmt->schLock);
-    if (TSDB_CODE_SUCCESS == qwAcquireScheduler(rwType, mgmt, sId, sch, QW_NOT_EXIST_ADD)) {
+    if (TSDB_CODE_SUCCESS == qwAcquireScheduler(rwType, mgmt, sId, sch)) {
+      if (code) {
+        taosHashCleanup(newSch.tasksHash);
+      }
+      
       return TSDB_CODE_SUCCESS;
     }
   }
@@ -141,7 +150,7 @@ static int32_t qwAddScheduler(int32_t rwType, SQWorkerMgmt *mgmt, uint64_t sId,
   return TSDB_CODE_SUCCESS;
 }
 
-static int32_t qwAcquireScheduler(int32_t rwType, SQWorkerMgmt *mgmt, uint64_t sId, SQWSchStatus **sch, int32_t nOpt) {
+static int32_t qwAcquireSchedulerImpl(int32_t rwType, SQWorkerMgmt *mgmt, uint64_t sId, SQWSchStatus **sch, int32_t nOpt) {
   QW_LOCK(rwType, &mgmt->schLock);
   *sch = taosHashGet(mgmt->schHash, &sId, sizeof(sId));
   if (NULL == (*sch)) {
@@ -159,6 +168,14 @@ static int32_t qwAcquireScheduler(int32_t rwType, SQWorkerMgmt *mgmt, uint64_t s
   return TSDB_CODE_SUCCESS;
 }
 
+static int32_t qwAddAcquireScheduler(int32_t rwType, SQWorkerMgmt *mgmt, uint64_t sId, SQWSchStatus **sch) {
+  return qwAcquireSchedulerImpl(rwType, mgmt, sId, sch, QW_NOT_EXIST_ADD);
+}
+
+static int32_t qwAcquireScheduler(int32_t rwType, SQWorkerMgmt *mgmt, uint64_t sId, SQWSchStatus **sch) {
+  return qwAcquireSchedulerImpl(rwType, mgmt, sId, sch, QW_NOT_EXIST_RET_ERR);
+}
+
 static FORCE_INLINE void qwReleaseScheduler(int32_t rwType, SQWorkerMgmt *mgmt) {
   QW_UNLOCK(rwType, &mgmt->schLock);
 }
@@ -234,7 +251,7 @@ int32_t qwAddTaskToSch(int32_t rwType, SQWSchStatus *sch, uint64_t qId, uint64_t
 
 static int32_t qwAddTask(SQWorkerMgmt *mgmt, uint64_t sId, uint64_t qId, uint64_t tId, int32_t status, int32_t eOpt, SQWSchStatus **sch, SQWTaskStatus **task) {
   SQWSchStatus *tsch = NULL;
-  QW_ERR_RET(qwAcquireScheduler(QW_READ, mgmt, sId, &tsch, QW_NOT_EXIST_ADD));
+  QW_ERR_RET(qwAddAcquireScheduler(QW_READ, mgmt, sId, &tsch));
 
   int32_t code = qwAddTaskToSch(QW_READ, tsch, qId, tId, status, eOpt, task);
   if (code) {
@@ -250,14 +267,14 @@ static int32_t qwAddTask(SQWorkerMgmt *mgmt, uint64_t sId, uint64_t qId, uint64_
   QW_RET(code);
 }
 
-static FORCE_INLINE int32_t qwAcquireTaskHandles(int32_t rwType, SQWorkerMgmt *mgmt, uint64_t queryId, uint64_t taskId, SQWorkerTaskHandlesCache **handles) {
+static FORCE_INLINE int32_t qwAcquireTaskCtx(int32_t rwType, SQWorkerMgmt *mgmt, uint64_t queryId, uint64_t taskId, SQWTaskCtx **handles) {
   char id[sizeof(queryId) + sizeof(taskId)] = {0};
   QW_SET_QTID(id, queryId, taskId);
   
-  QW_LOCK(rwType, &mgmt->resLock);
-  *handles = taosHashGet(mgmt->resHash, id, sizeof(id));
+  QW_LOCK(rwType, &mgmt->ctxLock);
+  *handles = taosHashGet(mgmt->ctxHash, id, sizeof(id));
   if (NULL == (*handles)) {
-    QW_UNLOCK(rwType, &mgmt->resLock);
+    QW_UNLOCK(rwType, &mgmt->ctxLock);
     return TSDB_CODE_QRY_RES_CACHE_NOT_EXIST;
   }
 
@@ -265,7 +282,7 @@ static FORCE_INLINE int32_t qwAcquireTaskHandles(int32_t rwType, SQWorkerMgmt *m
 }
 
 static FORCE_INLINE void qwReleaseTaskResCache(int32_t rwType, SQWorkerMgmt *mgmt) {
-  QW_UNLOCK(rwType, &mgmt->resLock);
+  QW_UNLOCK(rwType, &mgmt->ctxLock);
 }
 
 
@@ -273,7 +290,7 @@ int32_t qwGetSchTasksStatus(SQWorkerMgmt *mgmt, uint64_t sId, SSchedulerStatusRs
   SQWSchStatus *sch = NULL;
   int32_t taskNum = 0;
 
-  QW_ERR_RET(qwAcquireScheduler(QW_READ, mgmt, sId, &sch, QW_NOT_EXIST_RET_ERR));
+  QW_ERR_RET(qwAcquireScheduler(QW_READ, mgmt, sId, &sch));
   
   sch->lastAccessTs = taosGetTimestampSec();
 
@@ -319,7 +336,7 @@ int32_t qwGetSchTasksStatus(SQWorkerMgmt *mgmt, uint64_t sId, SSchedulerStatusRs
 int32_t qwUpdateSchLastAccess(SQWorkerMgmt *mgmt, uint64_t sId) {
   SQWSchStatus *sch = NULL;
 
-  QW_ERR_RET(qwAcquireScheduler(QW_READ, mgmt, sId, &sch, QW_NOT_EXIST_RET_ERR));
+  QW_ERR_RET(qwAcquireScheduler(QW_READ, mgmt, sId, &sch));
 
   sch->lastAccessTs = taosGetTimestampSec();
 
@@ -333,12 +350,12 @@ int32_t qwUpdateTaskStatus(SQWorkerMgmt *mgmt, uint64_t sId, uint64_t qId, uint6
   SQWTaskStatus *task = NULL;
   int32_t code = 0;
 
-  QW_ERR_RET(qwAcquireScheduler(QW_READ, mgmt, sId, &sch, QW_NOT_EXIST_RET_ERR));
+  QW_ERR_RET(qwAcquireScheduler(QW_READ, mgmt, sId, &sch));
 
   QW_ERR_JRET(qwAcquireTask(QW_READ, sch, qId, tId, &task));
 
   QW_LOCK(QW_WRITE, &task->lock);
-  qwUpdateTaskInfo(task, QW_TASK_INFO_STATUS, &status);
+  qwUpdateTaskInfo(mgmt, task, QW_TASK_INFO_STATUS, &status, QW_IDS());
   QW_UNLOCK(QW_WRITE, &task->lock);
   
 _return:
@@ -355,7 +372,7 @@ int32_t qwGetTaskStatus(SQWorkerMgmt *mgmt, uint64_t sId, uint64_t queryId, uint
   SQWTaskStatus *task = NULL;
   int32_t code = 0;
   
-  if (qwAcquireScheduler(QW_READ, mgmt, sId, &sch, QW_NOT_EXIST_RET_ERR)) {
+  if (qwAcquireScheduler(QW_READ, mgmt, sId, &sch)) {
     *taskStatus = JOB_TASK_STATUS_NULL;
     return TSDB_CODE_SUCCESS;
   }
@@ -376,17 +393,17 @@ int32_t qwGetTaskStatus(SQWorkerMgmt *mgmt, uint64_t sId, uint64_t queryId, uint
 }
 
 
-int32_t qwCancelTask(SQWorkerMgmt *mgmt, uint64_t sId, uint64_t queryId, uint64_t taskId) {
+int32_t qwCancelTask(SQWorkerMgmt *mgmt, uint64_t sId, uint64_t qId, uint64_t tId) {
   SQWSchStatus *sch = NULL;
   SQWTaskStatus *task = NULL;
   int32_t code = 0;
 
-  QW_ERR_RET(qwAcquireScheduler(QW_READ, mgmt, sId, &sch, QW_NOT_EXIST_ADD));
+  QW_ERR_RET(qwAddAcquireScheduler(QW_READ, mgmt, sId, &sch));
 
-  if (qwAcquireTask(QW_READ, sch, queryId, taskId, &task)) {
+  if (qwAcquireTask(QW_READ, sch, qId, tId, &task)) {
     qwReleaseScheduler(QW_READ, mgmt);
     
-    code = qwAddTask(mgmt, sId, queryId, taskId, JOB_TASK_STATUS_NOT_START, QW_EXIST_ACQUIRE, &sch, &task);
+    code = qwAddTask(mgmt, sId, qId, tId, JOB_TASK_STATUS_NOT_START, QW_EXIST_ACQUIRE, &sch, &task);
     if (code) {
       qwReleaseScheduler(QW_READ, mgmt);
       QW_ERR_RET(code);
@@ -409,10 +426,10 @@ int32_t qwCancelTask(SQWorkerMgmt *mgmt, uint64_t sId, uint64_t queryId, uint64_
     return TSDB_CODE_SUCCESS;
   } else if (task->status == JOB_TASK_STATUS_FAILED || task->status == JOB_TASK_STATUS_SUCCEED || task->status == JOB_TASK_STATUS_PARTIAL_SUCCEED) {
     newStatus = JOB_TASK_STATUS_CANCELLED;
-    QW_ERR_JRET(qwUpdateTaskInfo(task, QW_TASK_INFO_STATUS, &newStatus));
+    QW_ERR_JRET(qwUpdateTaskInfo(mgmt, task, QW_TASK_INFO_STATUS, &newStatus, QW_IDS()));
   } else {
     newStatus = JOB_TASK_STATUS_CANCELLING;
-    QW_ERR_JRET(qwUpdateTaskInfo(task, QW_TASK_INFO_STATUS, &newStatus));
+    QW_ERR_JRET(qwUpdateTaskInfo(mgmt, task, QW_TASK_INFO_STATUS, &newStatus, QW_IDS()));
   }
 
   QW_UNLOCK(QW_WRITE, &task->lock);
@@ -441,50 +458,60 @@ _return:
   QW_RET(code);
 }
 
-int32_t qwDropTask(SQWorkerMgmt *mgmt, uint64_t sId, uint64_t queryId, uint64_t taskId) {
+int32_t qwDropTask(SQWorkerMgmt *mgmt, uint64_t sId, uint64_t qId, uint64_t tId) {
   SQWSchStatus *sch = NULL;
   SQWTaskStatus *task = NULL;
   int32_t code = 0;
-  char id[sizeof(queryId) + sizeof(taskId)] = {0};
-  QW_SET_QTID(id, queryId, taskId);
-
-  QW_LOCK(QW_WRITE, &mgmt->resLock);
-  if (mgmt->resHash) {
-    taosHashRemove(mgmt->resHash, id, sizeof(id));
-  }
-  QW_UNLOCK(QW_WRITE, &mgmt->resLock);
   
-  if (TSDB_CODE_SUCCESS != qwAcquireScheduler(QW_WRITE, mgmt, sId, &sch, QW_NOT_EXIST_RET_ERR)) {
-    qWarn("scheduler %"PRIx64" doesn't exist", sId);
+  char id[sizeof(qId) + sizeof(tId)] = {0};
+  QW_SET_QTID(id, qId, tId);
+
+  QW_LOCK(QW_WRITE, &mgmt->ctxLock);
+  if (mgmt->ctxHash) {
+    if (taosHashRemove(mgmt->ctxHash, id, sizeof(id))) {
+      QW_TASK_WLOG("taosHashRemove from ctx hash failed, id:%s", id);
+    }
+  }
+  QW_UNLOCK(QW_WRITE, &mgmt->ctxLock);
+  
+  if (qwAcquireScheduler(QW_WRITE, mgmt, sId, &sch)) {
+    QW_TASK_WLOG("scheduler does not exist, sch:%p", sch);
     return TSDB_CODE_SUCCESS;
   }
 
-  if (qwAcquireTask(QW_WRITE, sch, queryId, taskId, &task)) {
+  if (qwAcquireTask(QW_WRITE, sch, qId, tId, &task)) {
     qwReleaseScheduler(QW_WRITE, mgmt);
     
-    qWarn("scheduler %"PRIx64" queryId %"PRIx64" taskId:%"PRIx64" doesn't exist", sId, queryId, taskId);
+    QW_TASK_WLOG("task does not exist, task:%p", task);
     return TSDB_CODE_SUCCESS;
   }
 
-  taosHashRemove(sch->tasksHash, id, sizeof(id));
+  QW_TASK_DLOG("drop task, status:%d, code:%x, ready:%d, cancel:%d, drop:%d", task->status, task->code, task->ready, task->cancel, task->drop);
+
+  if (taosHashRemove(sch->tasksHash, id, sizeof(id))) {
+    QW_TASK_ELOG("taosHashRemove task from hash failed, task:%p", task);
+    QW_ERR_JRET(TSDB_CODE_QRY_APP_ERROR);
+  }
+
+_return:
 
   qwReleaseTask(QW_WRITE, sch);
   qwReleaseScheduler(QW_WRITE, mgmt);
   
-  return TSDB_CODE_SUCCESS;
+  QW_RET(code);
 }
 
-int32_t qwCancelDropTask(SQWorkerMgmt *mgmt, uint64_t sId, uint64_t queryId, uint64_t taskId) {
+int32_t qwCancelDropTask(SQWorkerMgmt *mgmt, uint64_t sId, uint64_t qId, uint64_t tId) {
   SQWSchStatus *sch = NULL;
   SQWTaskStatus *task = NULL;
   int32_t code = 0;
 
-  QW_ERR_RET(qwAcquireScheduler(QW_READ, mgmt, sId, &sch, QW_NOT_EXIST_ADD));
+  QW_ERR_RET(qwAddAcquireScheduler(QW_READ, mgmt, sId, &sch));
 
-  if (qwAcquireTask(QW_READ, sch, queryId, taskId, &task)) {
+  if (qwAcquireTask(QW_READ, sch, qId, tId, &task)) {
     qwReleaseScheduler(QW_READ, mgmt);
     
-    code = qwAddTask(mgmt, sId, queryId, taskId, JOB_TASK_STATUS_NOT_START, QW_EXIST_ACQUIRE, &sch, &task);
+    code = qwAddTask(mgmt, sId, qId, tId, JOB_TASK_STATUS_NOT_START, QW_EXIST_ACQUIRE, &sch, &task);
     if (code) {
       qwReleaseScheduler(QW_READ, mgmt);
       QW_ERR_RET(code);
@@ -500,7 +527,7 @@ int32_t qwCancelDropTask(SQWorkerMgmt *mgmt, uint64_t sId, uint64_t queryId, uin
   
   if (task->status == JOB_TASK_STATUS_EXECUTING) {
     newStatus = JOB_TASK_STATUS_DROPPING;
-    QW_ERR_JRET(qwUpdateTaskInfo(task, QW_TASK_INFO_STATUS, &newStatus));
+    QW_ERR_JRET(qwUpdateTaskInfo(mgmt, task, QW_TASK_INFO_STATUS, &newStatus, QW_IDS()));
   } else if (task->status == JOB_TASK_STATUS_CANCELLING || task->status == JOB_TASK_STATUS_DROPPING || task->status == JOB_TASK_STATUS_NOT_START) {    
     QW_UNLOCK(QW_WRITE, &task->lock);
     qwReleaseTask(QW_READ, sch);
@@ -512,7 +539,7 @@ int32_t qwCancelDropTask(SQWorkerMgmt *mgmt, uint64_t sId, uint64_t queryId, uin
     qwReleaseTask(QW_READ, sch);
     qwReleaseScheduler(QW_READ, mgmt);
   
-    QW_ERR_RET(qwDropTask(mgmt, sId, queryId, taskId));
+    QW_ERR_RET(qwDropTask(mgmt, sId, qId, tId));
     return TSDB_CODE_SUCCESS;
   }
 
@@ -743,7 +770,7 @@ int32_t qwCheckAndSendReadyRsp(SQWorkerMgmt *mgmt, uint64_t sId, uint64_t queryI
   SQWTaskStatus *task = NULL;
   int32_t code = 0;
 
-  QW_ERR_RET(qwAcquireScheduler(QW_READ, mgmt, sId, &sch, QW_NOT_EXIST_RET_ERR));
+  QW_ERR_RET(qwAcquireScheduler(QW_READ, mgmt, sId, &sch));
 
   QW_ERR_JRET(qwAcquireTask(QW_READ, sch, queryId, taskId, &task));
 
@@ -785,7 +812,7 @@ int32_t qwSetAndSendReadyRsp(SQWorkerMgmt *mgmt, uint64_t sId, uint64_t queryId,
   SQWTaskStatus *task = NULL;
   int32_t code = 0;
 
-  QW_ERR_RET(qwAcquireScheduler(QW_READ, mgmt, sId, &sch, QW_NOT_EXIST_RET_ERR));
+  QW_ERR_RET(qwAcquireScheduler(QW_READ, mgmt, sId, &sch));
 
   QW_ERR_JRET(qwAcquireTask(QW_READ, sch, queryId, taskId, &task));
 
@@ -816,7 +843,7 @@ _return:
   QW_RET(code);
 }
 
-int32_t qwCheckTaskCancelDrop( SQWorkerMgmt *mgmt, uint64_t sId, uint64_t queryId, uint64_t taskId, bool *needStop) {
+int32_t qwCheckTaskCancelDrop(SQWorkerMgmt *mgmt, uint64_t sId, uint64_t qId, uint64_t tId, bool *needStop) {
   SQWSchStatus *sch = NULL;
   SQWTaskStatus *task = NULL;
   int32_t code = 0;
@@ -824,11 +851,11 @@ int32_t qwCheckTaskCancelDrop( SQWorkerMgmt *mgmt, uint64_t sId, uint64_t queryI
 
   *needStop = false;
 
-  if (qwAcquireScheduler(QW_READ, mgmt, sId, &sch, QW_NOT_EXIST_RET_ERR)) {
+  if (qwAcquireScheduler(QW_READ, mgmt, sId, &sch)) {
     return TSDB_CODE_SUCCESS;
   }
 
-  if (qwAcquireTask(QW_READ, sch, queryId, taskId, &task)) {
+  if (qwAcquireTask(QW_READ, sch, qId, tId, &task)) {
     qwReleaseScheduler(QW_READ, mgmt);
     return TSDB_CODE_SUCCESS;
   }
@@ -836,9 +863,10 @@ int32_t qwCheckTaskCancelDrop( SQWorkerMgmt *mgmt, uint64_t sId, uint64_t queryI
   QW_LOCK(QW_READ, &task->lock);
   
   if ((!task->cancel) && (!task->drop)) {
-    qError("no cancel or drop, but task:%"PRIx64" exists", taskId);
+    QW_TASK_ELOG("no cancel or drop but task exists, status:%d", task->status);
     
     QW_UNLOCK(QW_READ, &task->lock);
+    
     qwReleaseTask(QW_READ, sch);
     qwReleaseScheduler(QW_READ, mgmt);
 
@@ -851,17 +879,21 @@ int32_t qwCheckTaskCancelDrop( SQWorkerMgmt *mgmt, uint64_t sId, uint64_t queryI
   
   if (task->cancel) {
     QW_LOCK(QW_WRITE, &task->lock);
-    qwUpdateTaskInfo(task, QW_TASK_INFO_STATUS, &status);
+    code = qwUpdateTaskInfo(mgmt, task, QW_TASK_INFO_STATUS, &status, QW_IDS());
     QW_UNLOCK(QW_WRITE, &task->lock);
+    
+    QW_ERR_JRET(code);
   }
 
   if (task->drop) {
     qwReleaseTask(QW_READ, sch);
     qwReleaseScheduler(QW_READ, mgmt);
     
-    return qwDropTask(mgmt, sId, queryId, taskId);
+    QW_RET(qwDropTask(mgmt, sId, qId, tId));
   }
 
+_return:
+
   qwReleaseTask(QW_READ, sch);
   qwReleaseScheduler(QW_READ, mgmt);
 
@@ -875,7 +907,7 @@ int32_t qwQueryPostProcess(SQWorkerMgmt *mgmt, uint64_t sId, uint64_t qId, uint6
   int32_t code = 0;
   int8_t newStatus = JOB_TASK_STATUS_CANCELLED;
 
-  code = qwAcquireScheduler(QW_READ, mgmt, sId, &sch, QW_NOT_EXIST_ADD);
+  code = qwAddAcquireScheduler(QW_READ, mgmt, sId, &sch);
   if (code) {
     qError("sId:%"PRIx64" not in cache", sId);
     QW_ERR_RET(code);
@@ -895,7 +927,7 @@ int32_t qwQueryPostProcess(SQWorkerMgmt *mgmt, uint64_t sId, uint64_t qId, uint6
 
   if (task->cancel) {
     QW_LOCK(QW_WRITE, &task->lock);
-    qwUpdateTaskInfo(task, QW_TASK_INFO_STATUS, &newStatus);
+    qwUpdateTaskInfo(mgmt, task, QW_TASK_INFO_STATUS, &newStatus, QW_IDS());
     QW_UNLOCK(QW_WRITE, &task->lock);
   }
 
@@ -910,7 +942,7 @@ int32_t qwQueryPostProcess(SQWorkerMgmt *mgmt, uint64_t sId, uint64_t qId, uint6
 
   if (!(task->cancel || task->drop)) {
     QW_LOCK(QW_WRITE, &task->lock);
-    qwUpdateTaskInfo(task, QW_TASK_INFO_STATUS, &status);
+    qwUpdateTaskInfo(mgmt, task, QW_TASK_INFO_STATUS, &status, QW_IDS());
     task->code = errCode;
     QW_UNLOCK(QW_WRITE, &task->lock);
   }
@@ -921,6 +953,86 @@ int32_t qwQueryPostProcess(SQWorkerMgmt *mgmt, uint64_t sId, uint64_t qId, uint6
   return TSDB_CODE_SUCCESS;
 }
 
+int32_t qwScheduleDataSink(SQWTaskCtx *handles, SQWorkerMgmt *mgmt, uint64_t sId, uint64_t queryId, uint64_t taskId, SRpcMsg *pMsg) {
+  if (atomic_load_8(&handles->sinkScheduled)) {
+    qDebug("data sink already scheduled");
+    return TSDB_CODE_SUCCESS;
+  }
+  
+  SSinkDataReq * req = (SSinkDataReq *)rpcMallocCont(sizeof(SSinkDataReq));
+  if (NULL == req) {
+    qError("rpcMallocCont %d failed", (int32_t)sizeof(SSinkDataReq));
+    QW_ERR_RET(TSDB_CODE_QRY_OUT_OF_MEMORY);
+  }
+
+  req->header.vgId = mgmt->nodeId;
+  req->sId = sId;
+  req->queryId = queryId;
+  req->taskId = taskId;
+
+  SRpcMsg pNewMsg = {
+    .handle = pMsg->handle,
+    .ahandle = pMsg->ahandle, 
+    .msgType = TDMT_VND_SCHEDULE_DATA_SINK,
+    .pCont   = req,
+    .contLen = sizeof(SSinkDataReq),
+    .code    = 0,
+  };
+
+  int32_t code = (*mgmt->putToQueueFp)(mgmt->nodeObj, &pNewMsg);
+  if (TSDB_CODE_SUCCESS != code) {
+    qError("put data sink schedule msg to queue failed, code:%x", code);
+    rpcFreeCont(req);
+    QW_ERR_RET(code);
+  }
+
+  qDebug("put data sink schedule msg to query queue");
+
+  return TSDB_CODE_SUCCESS;
+}
+
+int32_t qwScheduleQuery(SQWTaskCtx *handles, SQWorkerMgmt *mgmt, uint64_t sId, uint64_t queryId, uint64_t taskId, SRpcMsg *pMsg) {
+  if (atomic_load_8(&handles->queryScheduled)) {
+    qDebug("query already scheduled");
+    return TSDB_CODE_SUCCESS;
+  }
+
+  QW_ERR_RET(qwUpdateTaskStatus(mgmt, sId, queryId, taskId, JOB_TASK_STATUS_EXECUTING));      
+
+  SQueryContinueReq * req = (SQueryContinueReq *)rpcMallocCont(sizeof(SQueryContinueReq));
+  if (NULL == req) {
+    qError("rpcMallocCont %d failed", (int32_t)sizeof(SQueryContinueReq));
+    QW_ERR_RET(TSDB_CODE_QRY_OUT_OF_MEMORY);
+  }
+
+  req->header.vgId = mgmt->nodeId;
+  req->sId = sId;
+  req->queryId = queryId;
+  req->taskId = taskId;
+
+  SRpcMsg pNewMsg = {
+    .handle = pMsg->handle,
+    .ahandle = pMsg->ahandle,
+    .msgType = TDMT_VND_QUERY_CONTINUE,
+    .pCont   = req,
+    .contLen = sizeof(SQueryContinueReq),
+    .code    = 0,
+  };
+
+  int32_t code = (*mgmt->putToQueueFp)(mgmt->nodeObj, &pNewMsg);
+  if (TSDB_CODE_SUCCESS != code) {
+    qError("put query continue msg to queue failed, code:%x", code);
+    rpcFreeCont(req);
+    QW_ERR_RET(code);
+  }
+
+
+  qDebug("put query continue msg to query queue");
+
+  return TSDB_CODE_SUCCESS;
+}
+
+
 
 int32_t qwHandleFetch(SQWorkerMgmt *mgmt, uint64_t sId, uint64_t queryId, uint64_t taskId, SRpcMsg *pMsg) {
   SQWSchStatus *sch = NULL;
@@ -932,11 +1044,15 @@ int32_t qwHandleFetch(SQWorkerMgmt *mgmt, uint64_t sId, uint64_t queryId, uint64
   int32_t dataLength = 0;
   SRetrieveTableRsp *rsp = NULL;
   bool queryEnd = false;
-  SQWorkerTaskHandlesCache *handles = NULL;
+  SQWTaskCtx *handles = NULL;
 
-  QW_ERR_JRET(qwAcquireTaskHandles(QW_READ, mgmt, queryId, taskId, &handles));
+  QW_ERR_JRET(qwAcquireTaskCtx(QW_READ, mgmt, queryId, taskId, &handles));
+  if (atomic_load_8(&handles->needRsp)) {
+    qError("last fetch not responsed");
+    QW_ERR_JRET(TSDB_CODE_QRY_APP_ERROR);
+  }
 
-  QW_ERR_JRET(qwAcquireScheduler(QW_READ, mgmt, sId, &sch, QW_NOT_EXIST_RET_ERR));
+  QW_ERR_JRET(qwAcquireScheduler(QW_READ, mgmt, sId, &sch));
   QW_ERR_JRET(qwAcquireTask(QW_READ, sch, queryId, taskId, &task));
 
   QW_LOCK(QW_READ, &task->lock);
@@ -974,15 +1090,15 @@ int32_t qwHandleFetch(SQWorkerMgmt *mgmt, uint64_t sId, uint64_t queryId, uint64
     
     if (DS_BUF_EMPTY == output.bufStatus && output.queryEnd) {
       rsp->completed = 1;
+      
+      QW_ERR_JRET(qwUpdateTaskStatus(mgmt, sId, queryId, taskId, JOB_TASK_STATUS_SUCCEED));      
     }
 
+    // Note: schedule data sink firstly and will schedule query after it's done
     if (output.needSchedule) {
-      //TODO
-    }
-
-    if ((!output.queryEnd) && DS_BUF_LOW == output.bufStatus) {
-      //TODO
-      //UPDATE STATUS TO EXECUTING
+      QW_ERR_JRET(qwScheduleDataSink(handles, mgmt, sId, queryId, taskId, pMsg));
+    } else if ((!output.queryEnd) && (DS_BUF_LOW == output.bufStatus || DS_BUF_EMPTY == output.bufStatus)) {
+      QW_ERR_JRET(qwScheduleQuery(handles, mgmt, sId, queryId, taskId, pMsg));
     }
   } else {
     if (dataLength < 0) {
@@ -991,12 +1107,11 @@ int32_t qwHandleFetch(SQWorkerMgmt *mgmt, uint64_t sId, uint64_t queryId, uint64
     }
     
     if (queryEnd) {
-      QW_ERR_JRET(qwQueryPostProcess(mgmt, sId, queryId, taskId, JOB_TASK_STATUS_SUCCEED, code));
+      QW_ERR_JRET(qwUpdateTaskStatus(mgmt, sId, queryId, taskId, JOB_TASK_STATUS_SUCCEED));
     } else {
-      if (task->status != JOB_TASK_STATUS_EXECUTING) {
-        qError("invalid status %d for fetch without res", task->status);
-        QW_ERR_JRET(TSDB_CODE_QRY_APP_ERROR);
-      }
+      assert(0 == handles->needRsp);
+      
+      qDebug("no res data in sink, need response later");
 
       QW_LOCK(QW_WRITE, &handles->lock);
       handles->needRsp = true;
@@ -1028,7 +1143,12 @@ _return:
   QW_RET(code);
 }
 
-int32_t qWorkerInit(SQWorkerCfg *cfg, void **qWorkerMgmt) {
+int32_t qWorkerInit(int8_t nodeType, int32_t nodeId, SQWorkerCfg *cfg, void **qWorkerMgmt, void *nodeObj, putReqToQueryQFp fp) {
+  if (NULL == qWorkerMgmt || NULL == nodeObj || NULL == fp) {
+    qError("invalid param to init qworker");
+    QW_RET(TSDB_CODE_QRY_INVALID_INPUT);
+  }
+  
   SQWorkerMgmt *mgmt = calloc(1, sizeof(SQWorkerMgmt));
   if (NULL == mgmt) {
     qError("calloc %d failed", (int32_t)sizeof(SQWorkerMgmt));
@@ -1037,29 +1157,46 @@ int32_t qWorkerInit(SQWorkerCfg *cfg, void **qWorkerMgmt) {
 
   if (cfg) {
     mgmt->cfg = *cfg;
+    if (0 == mgmt->cfg.maxSchedulerNum) {
+      mgmt->cfg.maxSchedulerNum = QWORKER_DEFAULT_SCHEDULER_NUMBER;
+    }
+    if (0 == mgmt->cfg.maxTaskNum) {
+      mgmt->cfg.maxTaskNum = QWORKER_DEFAULT_TASK_NUMBER;
+    }
+    if (0 == mgmt->cfg.maxSchTaskNum) {
+      mgmt->cfg.maxSchTaskNum = QWORKER_DEFAULT_SCH_TASK_NUMBER;
+    }
   } else {
     mgmt->cfg.maxSchedulerNum = QWORKER_DEFAULT_SCHEDULER_NUMBER;
-    mgmt->cfg.maxResCacheNum = QWORKER_DEFAULT_RES_CACHE_NUMBER;
+    mgmt->cfg.maxTaskNum = QWORKER_DEFAULT_TASK_NUMBER;
     mgmt->cfg.maxSchTaskNum = QWORKER_DEFAULT_SCH_TASK_NUMBER;
   }
 
   mgmt->schHash = taosHashInit(mgmt->cfg.maxSchedulerNum, taosGetDefaultHashFunction(TSDB_DATA_TYPE_UBIGINT), false, HASH_NO_LOCK);
   if (NULL == mgmt->schHash) {
     tfree(mgmt);
-    QW_ERR_LRET(TSDB_CODE_QRY_OUT_OF_MEMORY, "init %d schduler hash failed", mgmt->cfg.maxSchedulerNum);
+    qError("init %d scheduler hash failed", mgmt->cfg.maxSchedulerNum);
+    QW_ERR_RET(TSDB_CODE_QRY_OUT_OF_MEMORY);
   }
 
-  mgmt->resHash = taosHashInit(mgmt->cfg.maxResCacheNum, taosGetDefaultHashFunction(TSDB_DATA_TYPE_BINARY), false, HASH_NO_LOCK);
-  if (NULL == mgmt->resHash) {
+  mgmt->ctxHash = taosHashInit(mgmt->cfg.maxTaskNum, taosGetDefaultHashFunction(TSDB_DATA_TYPE_BINARY), false, HASH_NO_LOCK);
+  if (NULL == mgmt->ctxHash) {
     taosHashCleanup(mgmt->schHash);
     mgmt->schHash = NULL;
     tfree(mgmt);
-    
-    QW_ERR_LRET(TSDB_CODE_QRY_OUT_OF_MEMORY, "init %d res cache hash failed", mgmt->cfg.maxResCacheNum);
+    qError("init %d task ctx hash failed", mgmt->cfg.maxTaskNum);
+    QW_ERR_RET(TSDB_CODE_QRY_OUT_OF_MEMORY);
   }
 
+  mgmt->nodeType = nodeType;
+  mgmt->nodeId = nodeId;
+  mgmt->nodeObj = nodeObj;
+  mgmt->putToQueueFp = fp;
+
   *qWorkerMgmt = mgmt;
 
+  qDebug("qworker initialized for node, type:%d, id:%d, handle:%p", mgmt->nodeType, mgmt->nodeId, mgmt);
+
   return TSDB_CODE_SUCCESS;
 }
 
@@ -1069,25 +1206,31 @@ int32_t qWorkerProcessQueryMsg(void *node, void *qWorkerMgmt, SRpcMsg *pMsg) {
   }
 
   int32_t code = 0;
-  SSubQueryMsg *msg = pMsg->pCont;
-  if (NULL == msg || pMsg->contLen <= sizeof(*msg)) {
-    qError("invalid query msg");
-    QW_ERR_JRET(TSDB_CODE_QRY_INVALID_INPUT);
-  }
-
-  msg->sId = htobe64(msg->sId);
-  msg->queryId = htobe64(msg->queryId);
-  msg->taskId = htobe64(msg->taskId);
-  msg->contentLen = ntohl(msg->contentLen);
-  
   bool queryRsped = false;
   bool needStop = false;
   struct SSubplan *plan = NULL;
+  SSubQueryMsg *msg = pMsg->pCont;
+  SQWorkerMgmt *mgmt = (SQWorkerMgmt *)qWorkerMgmt;
+  
+  if (NULL == msg || pMsg->contLen <= sizeof(*msg)) {
+    QW_ELOG("invalid query msg, contLen:%d", pMsg->contLen);
+    QW_ERR_JRET(TSDB_CODE_QRY_INVALID_INPUT);
+  }
+
+  msg->sId = be64toh(msg->sId);
+  msg->queryId = be64toh(msg->queryId);
+  msg->taskId = be64toh(msg->taskId);
+  msg->contentLen = ntohl(msg->contentLen);
+  
+  uint64_t sId = msg->sId;
+  uint64_t qId = msg->queryId;
+  uint64_t tId = msg->taskId;
 
   QW_ERR_JRET(qwCheckTaskCancelDrop(qWorkerMgmt, msg->sId, msg->queryId, msg->taskId, &needStop));
   if (needStop) {
     qWarn("task need stop");
-    QW_ERR_JRET(TSDB_CODE_QRY_TASK_CANCELLED);
+    qwBuildAndSendQueryRsp(pMsg, TSDB_CODE_QRY_TASK_CANCELLED);
+    QW_ERR_RET(TSDB_CODE_QRY_TASK_CANCELLED);
   }
   
   code = qStringToSubplan(msg->msg, &plan);
@@ -1144,30 +1287,59 @@ int32_t qWorkerProcessQueryContinueMsg(void *node, void *qWorkerMgmt, SRpcMsg *p
   int32_t code = 0;
   int8_t status = 0;
   bool queryDone = false;
-  uint64_t sId, qId, tId;
+  SQueryContinueReq *req = (SQueryContinueReq *)pMsg->pCont;
+  bool needStop = false;
+  SQWTaskCtx *handles = NULL;
 
-  //TODO call executer to continue execute subquery
-  code = 0; 
-  void *data = NULL;
-  queryDone = false;
-  //TODO call executer to continue execute subquery
+  QW_ERR_JRET(qwAcquireTaskCtx(QW_READ, qWorkerMgmt, req->queryId, req->taskId, &handles));
+
+  qTaskInfo_t     taskHandle = handles->taskHandle;
+  DataSinkHandle  sinkHandle = handles->sinkHandle;
+  bool needRsp = handles->needRsp;
+
+  qwReleaseTaskResCache(QW_READ, qWorkerMgmt);
+  
+  QW_ERR_JRET(qwCheckTaskCancelDrop(qWorkerMgmt, req->sId, req->queryId, req->taskId, &needStop));
+  if (needStop) {
+    qWarn("task need stop");
+    if (needRsp) {
+      qwBuildAndSendQueryRsp(pMsg, TSDB_CODE_QRY_TASK_CANCELLED);
+    }
+    QW_ERR_RET(TSDB_CODE_QRY_TASK_CANCELLED);
+  }
+
+  DataSinkHandle newHandle = NULL;
+  code = qExecTask(taskHandle, &newHandle);
+  if (code) {
+    qError("qExecTask failed, code:%x", code);  
+    QW_ERR_JRET(code);
+  }
+  
+  if (sinkHandle != newHandle) {
+    qError("data sink mis-match");
+    QW_ERR_JRET(TSDB_CODE_QRY_APP_ERROR);
+  }
+  
+_return:
+
+  if (needRsp) {
+    code = qwBuildAndSendQueryRsp(pMsg, code);
+  }
   
   if (TSDB_CODE_SUCCESS != code) {
     status = JOB_TASK_STATUS_FAILED;
-  } else if (queryDone) {
-    status = JOB_TASK_STATUS_SUCCEED;
   } else {
     status = JOB_TASK_STATUS_PARTIAL_SUCCEED;
   }
 
-  code = qwQueryPostProcess(qWorkerMgmt, sId, qId, tId, status, code);
-
+  code = qwQueryPostProcess(qWorkerMgmt, req->sId, req->queryId, req->taskId, status, code);
+  
   QW_RET(code);
 }
 
 
 
-int32_t qWorkerProcessSinkDataMsg(void *node, void *qWorkerMgmt, SRpcMsg *pMsg){
+int32_t qWorkerProcessDataSinkMsg(void *node, void *qWorkerMgmt, SRpcMsg *pMsg){
   if (NULL == node || NULL == qWorkerMgmt || NULL == pMsg) {
     return TSDB_CODE_QRY_INVALID_INPUT;
   }
@@ -1176,8 +1348,9 @@ int32_t qWorkerProcessSinkDataMsg(void *node, void *qWorkerMgmt, SRpcMsg *pMsg){
   if (NULL == msg || pMsg->contLen < sizeof(*msg)) {
     qError("invalid sink data msg");
     QW_ERR_RET(TSDB_CODE_QRY_INVALID_INPUT);
-  }  
+  }
 
+  //dsScheduleProcess();
   //TODO
 
   return TSDB_CODE_SUCCESS;
diff --git a/source/libs/qworker/test/qworkerTests.cpp b/source/libs/qworker/test/qworkerTests.cpp
index eaa79fd39a..4962eab460 100644
--- a/source/libs/qworker/test/qworkerTests.cpp
+++ b/source/libs/qworker/test/qworkerTests.cpp
@@ -42,6 +42,11 @@ int32_t qwtStringToPlan(const char* str, SSubplan** subplan) {
   return 0;
 }
 
+int32_t qwtPutReqToQueue(void *node, struct SRpcMsg *pMsg) {
+  return 0;
+}
+
+
 void qwtRpcSendResponse(const SRpcMsg *pRsp) {
   if (TDMT_VND_TASKS_STATUS_RSP == pRsp->msgType) {
     SSchedulerStatusRsp *rsp = (SSchedulerStatusRsp *)pRsp->pCont;
@@ -258,7 +263,7 @@ TEST(seqTest, normalCase) {
   stubSetStringToPlan();
   stubSetRpcSendResponse();
   
-  code = qWorkerInit(NULL, &mgmt);
+  code = qWorkerInit(NODE_TYPE_VNODE, 1, NULL, &mgmt, mockPointer, qwtPutReqToQueue);
   ASSERT_EQ(code, 0);
 
   statusMsg.sId = htobe64(1);
@@ -328,7 +333,7 @@ TEST(seqTest, cancelFirst) {
   stubSetStringToPlan();
   stubSetRpcSendResponse();
   
-  code = qWorkerInit(NULL, &mgmt);
+  code = qWorkerInit(NODE_TYPE_VNODE, 1, NULL, &mgmt, mockPointer, qwtPutReqToQueue);
   ASSERT_EQ(code, 0);
 
   statusMsg.sId = htobe64(1);
@@ -402,7 +407,7 @@ TEST(seqTest, randCase) {
 
   srand(time(NULL));
   
-  code = qWorkerInit(NULL, &mgmt);
+  code = qWorkerInit(NODE_TYPE_VNODE, 1, NULL, &mgmt, mockPointer, qwtPutReqToQueue);
   ASSERT_EQ(code, 0);
 
   int32_t t = 0;
@@ -446,7 +451,7 @@ TEST(seqTest, multithreadRand) {
 
   srand(time(NULL));
   
-  code = qWorkerInit(NULL, &mgmt);
+  code = qWorkerInit(NODE_TYPE_VNODE, 1, NULL, &mgmt, mockPointer, qwtPutReqToQueue);
   ASSERT_EQ(code, 0);
 
   pthread_attr_t thattr;

From 5c6924e9e66f2f62ace4e57c731c77047974f0f1 Mon Sep 17 00:00:00 2001
From: yihaoDeng <luomoxyz@126.com>
Date: Wed, 12 Jan 2022 23:36:15 +0800
Subject: [PATCH 04/20] add libuv

---
 source/libs/transport/src/rpcMain.c | 91 +++++++++++++++++++----------
 1 file changed, 60 insertions(+), 31 deletions(-)

diff --git a/source/libs/transport/src/rpcMain.c b/source/libs/transport/src/rpcMain.c
index 3095ddb9d2..542bde37b9 100644
--- a/source/libs/transport/src/rpcMain.c
+++ b/source/libs/transport/src/rpcMain.c
@@ -13,9 +13,7 @@
  * along with this program. If not, see <http://www.gnu.org/licenses/>.
  */
 
-#ifdef USE_UV
 #include <uv.h>
-#endif
 #include "lz4.h"
 #include "os.h"
 #include "rpcCache.h"
@@ -78,12 +76,15 @@ typedef struct SThreadObj {
 } SThreadObj;
 
 typedef struct SServerObj {
+  pthread_t    thread;
   uv_tcp_t     server;
   uv_loop_t*   loop;
   int          workerIdx;
   int          numOfThread;
   SThreadObj** pThreadObj;
   uv_pipe_t**  pipe;
+  uint32_t     ip;
+  uint32_t     port;
 } SServerObj;
 
 typedef struct SConnCtx {
@@ -93,33 +94,31 @@ typedef struct SConnCtx {
   int         ref;
 } SConnCtx;
 
-static void  allocBuffer(uv_handle_t* handle, size_t suggested_size, uv_buf_t* buf);
-static void  onTimeout(uv_timer_t* handle);
-static void  onRead(uv_stream_t* cli, ssize_t nread, const uv_buf_t* buf);
-static void  onWrite(uv_write_t* req, int status);
-static void  onAccept(uv_stream_t* stream, int status);
-void         onConnection(uv_stream_t* q, ssize_t nread, const uv_buf_t* buf);
-static void  workerAsyncCB(uv_async_t* handle);
+static void allocBuffer(uv_handle_t* handle, size_t suggested_size, uv_buf_t* buf);
+static void onTimeout(uv_timer_t* handle);
+static void onRead(uv_stream_t* cli, ssize_t nread, const uv_buf_t* buf);
+static void onWrite(uv_write_t* req, int status);
+static void onAccept(uv_stream_t* stream, int status);
+static void onConnection(uv_stream_t* q, ssize_t nread, const uv_buf_t* buf);
+static void workerAsyncCB(uv_async_t* handle);
+
 static void* workerThread(void* arg);
+static void* acceptThread(void* arg);
+
+void* taosInitServer(uint32_t ip, uint32_t port, char* label, int numOfThreads, void* fp, void* shandle);
 
 int32_t rpcInit() { return -1; }
 void    rpcCleanup() { return; };
-void*   rpcOpen(const SRpcInit* pInit) {
-  SRpcInfo* pRpc = calloc(1, sizeof(SRpcInfo));
-  if (pRpc == NULL) {
-    return NULL;
-  }
-  if (pInit->label) {
-    tstrncpy(pRpc->label, pInit->label, sizeof(pRpc->label));
-  }
-  pRpc->numOfThreads = pInit->numOfThreads > TSDB_MAX_RPC_THREADS ? TSDB_MAX_RPC_THREADS : pInit->numOfThreads;
 
+void* taosInitServer(uint32_t ip, uint32_t port, char* label, int numOfThreads, void* fp, void* shandle) {
   SServerObj* srv = calloc(1, sizeof(SServerObj));
   srv->loop = (uv_loop_t*)malloc(sizeof(uv_loop_t));
-  srv->numOfThread = pRpc->numOfThreads;
+  srv->numOfThread = numOfThreads;
   srv->workerIdx = 0;
   srv->pThreadObj = (SThreadObj**)calloc(srv->numOfThread, sizeof(SThreadObj*));
   srv->pipe = (uv_pipe_t**)calloc(srv->numOfThread, sizeof(uv_pipe_t*));
+  srv->ip = ip;
+  srv->port = port;
   uv_loop_init(srv->loop);
 
   for (int i = 0; i < srv->numOfThread; i++) {
@@ -136,24 +135,34 @@ void*   rpcOpen(const SRpcInit* pInit) {
     srv->pThreadObj[i]->pipe = &(srv->pipe[i][1]);  // init read
     int err = pthread_create(&(srv->pThreadObj[i]->thread), NULL, workerThread, (void*)(srv->pThreadObj[i]));
     if (err == 0) {
-      tError("sucess to create worker thread %d", i);
+      tDebug("sucess to create worker thread %d", i);
       // printf("thread %d create\n", i);
     } else {
+      // clear all resource later
       tError("failed to create worker thread %d", i);
-      return NULL;
     }
   }
-  uv_tcp_init(srv->loop, &srv->server);
-  struct sockaddr_in bind_addr;
-  uv_ip4_addr("0.0.0.0", pInit->localPort, &bind_addr);
-  uv_tcp_bind(&srv->server, (const struct sockaddr*)&bind_addr, 0);
-  int err = 0;
-  if ((err = uv_listen((uv_stream_t*)&srv->server, 128, onAccept)) != 0) {
-    tError("Listen error %s\n", uv_err_name(err));
+
+  int err = pthread_create(&srv->thread, NULL, acceptThread, (void*)srv);
+  if (err == 0) {
+    tDebug("success to create accept thread");
+  } else {
+    // clear all resource later
+  }
+
+  return srv;
+}
+void* rpcOpen(const SRpcInit* pInit) {
+  SRpcInfo* pRpc = calloc(1, sizeof(SRpcInfo));
+  if (pRpc == NULL) {
     return NULL;
   }
-  uv_run(srv->loop, UV_RUN_DEFAULT);
+  if (pInit->label) {
+    tstrncpy(pRpc->label, pInit->label, sizeof(pRpc->label));
+  }
+  pRpc->numOfThreads = pInit->numOfThreads > TSDB_MAX_RPC_THREADS ? TSDB_MAX_RPC_THREADS : pInit->numOfThreads;
 
+  pRpc->tcphandle = taosInitServer(0, pInit->localPort, pRpc->label, pRpc->numOfThreads, NULL, pRpc);
   return pRpc;
 }
 void  rpcClose(void* arg) { return; }
@@ -186,8 +195,11 @@ void onRead(uv_stream_t* cli, ssize_t nread, const uv_buf_t* buf) {
 }
 
 void onWrite(uv_write_t* req, int status) {
+  if (status == 0) {
+    tDebug("data already was written on stream");
+  }
+
   // opt
-  if (req) tDebug("data already was written on stream");
 }
 
 void workerAsyncCB(uv_async_t* handle) {
@@ -207,7 +219,7 @@ void onAccept(uv_stream_t* stream, int status) {
     uv_write_t* wr = (uv_write_t*)malloc(sizeof(uv_write_t));
 
     uv_buf_t buf = uv_buf_init("a", 1);
-    // despatch to worker thread
+
     pObj->workerIdx = (pObj->workerIdx + 1) % pObj->numOfThread;
     uv_write2(wr, (uv_stream_t*)&(pObj->pipe[pObj->workerIdx][0]), &buf, 1, (uv_stream_t*)cli, onWrite);
   } else {
@@ -257,6 +269,23 @@ void onConnection(uv_stream_t* q, ssize_t nread, const uv_buf_t* buf) {
   }
 }
 
+void* acceptThread(void* arg) {
+  // opt
+  SServerObj* srv = (SServerObj*)arg;
+  uv_tcp_init(srv->loop, &srv->server);
+
+  struct sockaddr_in bind_addr;
+
+  int port = 6030;
+  uv_ip4_addr("0.0.0.0", srv->port, &bind_addr);
+  uv_tcp_bind(&srv->server, (const struct sockaddr*)&bind_addr, 0);
+  int err = 0;
+  if ((err = uv_listen((uv_stream_t*)&srv->server, 128, onAccept)) != 0) {
+    tError("Listen error %s\n", uv_err_name(err));
+    return NULL;
+  }
+  uv_run(srv->loop, UV_RUN_DEFAULT);
+}
 void* workerThread(void* arg) {
   SThreadObj* pObj = (SThreadObj*)arg;
   int         fd = pObj->fd;

From 4c5a6ccf946df9efaf661223babdc45cafc2e847 Mon Sep 17 00:00:00 2001
From: dapan1121 <wpan@taosdata.com>
Date: Thu, 13 Jan 2022 11:36:58 +0800
Subject: [PATCH 05/20] feature/qnode

---
 source/libs/qworker/inc/qworkerInt.h |  15 +-
 source/libs/qworker/src/qworker.c    | 286 ++++++++++++++-------------
 2 files changed, 160 insertions(+), 141 deletions(-)

diff --git a/source/libs/qworker/inc/qworkerInt.h b/source/libs/qworker/inc/qworkerInt.h
index b5ec50cc04..2f47c79065 100644
--- a/source/libs/qworker/inc/qworkerInt.h
+++ b/source/libs/qworker/inc/qworkerInt.h
@@ -114,9 +114,13 @@ typedef struct SQWorkerMgmt {
 #define QW_SCH_ELOG(param, ...) qError("QW:%p SID:%"PRIx64 param, mgmt, sId, __VA_ARGS__)
 #define QW_SCH_DLOG(param, ...) qDebug("QW:%p SID:%"PRIx64 param, mgmt, sId, __VA_ARGS__)
 
-#define QW_TASK_ELOG(param, ...) qError("QW:%p SID:%"PRIx64",QID:%"PRIx64",TID:%"PRIx64 param, mgmt, sId, qId, tId, __VA_ARGS__)
-#define QW_TASK_WLOG(param, ...) qWarn("QW:%p SID:%"PRIx64",QID:%"PRIx64",TID:%"PRIx64 param, mgmt, sId, qId, tId, __VA_ARGS__)
-#define QW_TASK_DLOG(param, ...) qDebug("QW:%p SID:%"PRIx64",QID:%"PRIx64",TID:%"PRIx64 param, mgmt, sId, qId, tId, __VA_ARGS__)
+#define QW_TASK_ELOG(param, ...) qError("QW:%p QID:%"PRIx64",TID:%"PRIx64 param, mgmt, qId, tId, __VA_ARGS__)
+#define QW_TASK_WLOG(param, ...) qWarn("QW:%p QID:%"PRIx64",TID:%"PRIx64 param, mgmt, qId, tId, __VA_ARGS__)
+#define QW_TASK_DLOG(param, ...) qDebug("QW:%p QID:%"PRIx64",TID:%"PRIx64 param, mgmt, qId, tId, __VA_ARGS__)
+
+#define QW_SCH_TASK_ELOG(param, ...) qError("QW:%p SID:%"PRIx64",QID:%"PRIx64",TID:%"PRIx64 param, mgmt, sId, qId, tId, __VA_ARGS__)
+#define QW_SCH_TASK_WLOG(param, ...) qWarn("QW:%p SID:%"PRIx64",QID:%"PRIx64",TID:%"PRIx64 param, mgmt, sId, qId, tId, __VA_ARGS__)
+#define QW_SCH_TASK_DLOG(param, ...) qDebug("QW:%p SID:%"PRIx64",QID:%"PRIx64",TID:%"PRIx64 param, mgmt, sId, qId, tId, __VA_ARGS__)
 
 
 #define TD_RWLATCH_WRITE_FLAG_COPY 0x40000000
@@ -155,8 +159,9 @@ typedef struct SQWorkerMgmt {
 
 
 
-static int32_t qwAcquireScheduler(int32_t rwType, SQWorkerMgmt *mgmt, uint64_t sId, SQWSchStatus **sch);
-static int32_t qwAddAcquireScheduler(int32_t rwType, SQWorkerMgmt *mgmt, uint64_t sId, SQWSchStatus **sch);
+int32_t qwAcquireScheduler(int32_t rwType, SQWorkerMgmt *mgmt, uint64_t sId, SQWSchStatus **sch);
+int32_t qwAcquireAddScheduler(int32_t rwType, SQWorkerMgmt *mgmt, uint64_t sId, SQWSchStatus **sch);
+int32_t qwAcquireTask(SQWorkerMgmt *mgmt, int32_t rwType, SQWSchStatus *sch, uint64_t qId, uint64_t tId, SQWTaskStatus **task);
 
 
 #ifdef __cplusplus
diff --git a/source/libs/qworker/src/qworker.c b/source/libs/qworker/src/qworker.c
index 23d74ae91d..921298995f 100644
--- a/source/libs/qworker/src/qworker.c
+++ b/source/libs/qworker/src/qworker.c
@@ -108,7 +108,7 @@ int32_t qwAddTaskHandlesToCache(SQWorkerMgmt *mgmt, uint64_t qId, uint64_t tId,
   QW_LOCK(QW_WRITE, &mgmt->ctxLock);
   if (0 != taosHashPut(mgmt->ctxHash, id, sizeof(id), &resCache, sizeof(SQWTaskCtx))) {
     QW_UNLOCK(QW_WRITE, &mgmt->ctxLock);
-    qError("taosHashPut queryId[%"PRIx64"] taskId[%"PRIx64"] to resHash failed", qId, tId);
+    QW_TASK_ELOG("taosHashPut task ctx to ctxHash failed, taskHandle:%p, sinkHandle:%p", taskHandle, sinkHandle);
     return TSDB_CODE_QRY_APP_ERROR;
   }
 
@@ -117,7 +117,7 @@ int32_t qwAddTaskHandlesToCache(SQWorkerMgmt *mgmt, uint64_t qId, uint64_t tId,
   return TSDB_CODE_SUCCESS;
 }
 
-static int32_t qwAddScheduler(int32_t rwType, SQWorkerMgmt *mgmt, uint64_t sId, SQWSchStatus **sch) {
+int32_t qwAddScheduler(int32_t rwType, SQWorkerMgmt *mgmt, uint64_t sId, SQWSchStatus **sch) {
   SQWSchStatus newSch = {0};
   newSch.tasksHash = taosHashInit(mgmt->cfg.maxSchTaskNum, taosGetDefaultHashFunction(TSDB_DATA_TYPE_BINARY), false, HASH_NO_LOCK);
   if (NULL == newSch.tasksHash) {
@@ -150,7 +150,7 @@ static int32_t qwAddScheduler(int32_t rwType, SQWorkerMgmt *mgmt, uint64_t sId,
   return TSDB_CODE_SUCCESS;
 }
 
-static int32_t qwAcquireSchedulerImpl(int32_t rwType, SQWorkerMgmt *mgmt, uint64_t sId, SQWSchStatus **sch, int32_t nOpt) {
+int32_t qwAcquireSchedulerImpl(int32_t rwType, SQWorkerMgmt *mgmt, uint64_t sId, SQWSchStatus **sch, int32_t nOpt) {
   QW_LOCK(rwType, &mgmt->schLock);
   *sch = taosHashGet(mgmt->schHash, &sId, sizeof(sId));
   if (NULL == (*sch)) {
@@ -168,42 +168,19 @@ static int32_t qwAcquireSchedulerImpl(int32_t rwType, SQWorkerMgmt *mgmt, uint64
   return TSDB_CODE_SUCCESS;
 }
 
-static int32_t qwAddAcquireScheduler(int32_t rwType, SQWorkerMgmt *mgmt, uint64_t sId, SQWSchStatus **sch) {
+int32_t qwAcquireAddScheduler(int32_t rwType, SQWorkerMgmt *mgmt, uint64_t sId, SQWSchStatus **sch) {
   return qwAcquireSchedulerImpl(rwType, mgmt, sId, sch, QW_NOT_EXIST_ADD);
 }
 
-static int32_t qwAcquireScheduler(int32_t rwType, SQWorkerMgmt *mgmt, uint64_t sId, SQWSchStatus **sch) {
+int32_t qwAcquireScheduler(int32_t rwType, SQWorkerMgmt *mgmt, uint64_t sId, SQWSchStatus **sch) {
   return qwAcquireSchedulerImpl(rwType, mgmt, sId, sch, QW_NOT_EXIST_RET_ERR);
 }
 
-static FORCE_INLINE void qwReleaseScheduler(int32_t rwType, SQWorkerMgmt *mgmt) {
+void qwReleaseScheduler(int32_t rwType, SQWorkerMgmt *mgmt) {
   QW_UNLOCK(rwType, &mgmt->schLock);
 }
 
-static int32_t qwAcquireTaskImpl(int32_t rwType, SQWSchStatus *sch, uint64_t qId, uint64_t tId, SQWTaskStatus **task) {
-  char id[sizeof(qId) + sizeof(tId)] = {0};
-  QW_SET_QTID(id, qId, tId);
-
-  QW_LOCK(rwType, &sch->tasksLock);
-  *task = taosHashGet(sch->tasksHash, id, sizeof(id));
-  if (NULL == (*task)) {
-    QW_UNLOCK(rwType, &sch->tasksLock);
-
-    return TSDB_CODE_QRY_TASK_NOT_EXIST;
-  }
-
-  return TSDB_CODE_SUCCESS;
-}
-
-static int32_t qwAcquireTask(int32_t rwType, SQWSchStatus *sch, uint64_t qId, uint64_t tId, SQWTaskStatus **task) {
-  return qwAcquireTaskImpl(rwType, sch, qId, tId, task);
-}
-
-static FORCE_INLINE void qwReleaseTask(int32_t rwType, SQWSchStatus *sch) {
-  QW_UNLOCK(rwType, &sch->tasksLock);
-}
-
-int32_t qwAddTaskToSch(int32_t rwType, SQWSchStatus *sch, uint64_t qId, uint64_t tId, int8_t status, int32_t eOpt, SQWTaskStatus **task) {
+int32_t qwAddTaskImpl(SQWorkerMgmt *mgmt, SQWSchStatus *sch, int32_t rwType, uint64_t qId, uint64_t tId, int32_t status, int32_t eOpt, SQWTaskStatus **task) {
   int32_t code = 0;
 
   char id[sizeof(qId) + sizeof(tId)] = {0};
@@ -212,62 +189,83 @@ int32_t qwAddTaskToSch(int32_t rwType, SQWSchStatus *sch, uint64_t qId, uint64_t
   SQWTaskStatus ntask = {0};
   ntask.status = status;
 
-  while (true) {
-    QW_LOCK(QW_WRITE, &sch->tasksLock);
-    int32_t code = taosHashPut(sch->tasksHash, id, sizeof(id), &ntask, sizeof(ntask));
-    if (0 != code) {
-      QW_UNLOCK(QW_WRITE, &sch->tasksLock);
-      if (HASH_NODE_EXIST(code)) {
-        if (QW_EXIST_ACQUIRE == eOpt && rwType && task) {
-          if (qwAcquireTask(rwType, sch, qId, tId, task)) {
-            continue;
-          }
-        } else if (QW_EXIST_RET_ERR == eOpt) {
-          return TSDB_CODE_QRY_TASK_ALREADY_EXIST;
-        } else {
-          assert(0);
-        }
-
-        break;
-      } else {
-        qError("taosHashPut queryId[%"PRIx64"] taskId[%"PRIx64"] to scheduleHash failed", qId, tId);
-        return TSDB_CODE_QRY_APP_ERROR;
-      }
-    }
-    
+  QW_LOCK(QW_WRITE, &sch->tasksLock);
+  code = taosHashPut(sch->tasksHash, id, sizeof(id), &ntask, sizeof(ntask));
+  if (0 != code) {
     QW_UNLOCK(QW_WRITE, &sch->tasksLock);
-
-    if (rwType && task) {
-      if (TSDB_CODE_SUCCESS == qwAcquireTask(rwType, sch, qId, tId, task)) {
-        return TSDB_CODE_SUCCESS;
+    if (HASH_NODE_EXIST(code)) {
+      if (QW_EXIST_ACQUIRE == eOpt && rwType && task) {
+        QW_ERR_RET(qwAcquireTask(mgmt, rwType, sch, qId, tId, task));
+      } else if (QW_EXIST_RET_ERR == eOpt) {
+        return TSDB_CODE_QRY_TASK_ALREADY_EXIST;
+      } else {
+        assert(0);
       }
     } else {
-      break;
+      qError("taosHashPut queryId[%"PRIx64"] taskId[%"PRIx64"] to scheduleHash failed", qId, tId);
+      return TSDB_CODE_QRY_APP_ERROR;
     }
-  }  
+  }
+  
+  QW_UNLOCK(QW_WRITE, &sch->tasksLock);
+
+  if (QW_EXIST_ACQUIRE == eOpt && rwType && task) {
+    QW_ERR_RET(qwAcquireTask(mgmt, rwType, sch, qId, tId, task));
+  }
 
   return TSDB_CODE_SUCCESS;
 }
 
-static int32_t qwAddTask(SQWorkerMgmt *mgmt, uint64_t sId, uint64_t qId, uint64_t tId, int32_t status, int32_t eOpt, SQWSchStatus **sch, SQWTaskStatus **task) {
+int32_t qwAddTask(SQWorkerMgmt *mgmt, uint64_t sId, uint64_t qId, uint64_t tId, int32_t status) {
   SQWSchStatus *tsch = NULL;
-  QW_ERR_RET(qwAddAcquireScheduler(QW_READ, mgmt, sId, &tsch));
+  int32_t code = 0;
+  QW_ERR_RET(qwAcquireAddScheduler(QW_READ, mgmt, sId, &tsch));
 
-  int32_t code = qwAddTaskToSch(QW_READ, tsch, qId, tId, status, eOpt, task);
-  if (code) {
-    qwReleaseScheduler(QW_WRITE, mgmt);
-  }
+  QW_ERR_JRET(qwAddTaskImpl(mgmt, tsch, 0, qId, tId, JOB_TASK_STATUS_NOT_START, QW_EXIST_RET_ERR, NULL));
 
-  if (NULL == task) {
-    qwReleaseScheduler(QW_READ, mgmt);
-  } else if (sch) {
-    *sch = tsch;
-  }
+_return:
 
-  QW_RET(code);
+  qwReleaseScheduler(QW_READ, mgmt);
+  QW_ERR_RET(code);
 }
 
-static FORCE_INLINE int32_t qwAcquireTaskCtx(int32_t rwType, SQWorkerMgmt *mgmt, uint64_t queryId, uint64_t taskId, SQWTaskCtx **handles) {
+
+int32_t qwAcquireTaskImpl(SQWorkerMgmt *mgmt, int32_t rwType, SQWSchStatus *sch, uint64_t qId, uint64_t tId, int32_t status, int32_t nOpt, SQWTaskStatus **task) {
+  char id[sizeof(qId) + sizeof(tId)] = {0};
+  QW_SET_QTID(id, qId, tId);
+
+  QW_LOCK(rwType, &sch->tasksLock);
+  *task = taosHashGet(sch->tasksHash, id, sizeof(id));
+  if (NULL == (*task)) {
+    QW_UNLOCK(rwType, &sch->tasksLock);
+
+    if (QW_NOT_EXIST_ADD == nOpt) {
+      QW_ERR_RET(qwAddTaskImpl(mgmt, sch, rwType, qId, tId, status, QW_EXIST_ACQUIRE, task));
+    } else if (QW_NOT_EXIST_RET_ERR == nOpt) {
+      return TSDB_CODE_QRY_TASK_NOT_EXIST;
+    } else {
+      assert(0);
+    }
+  }
+
+  return TSDB_CODE_SUCCESS;
+}
+
+int32_t qwAcquireTask(SQWorkerMgmt *mgmt, int32_t rwType, SQWSchStatus *sch, uint64_t qId, uint64_t tId, SQWTaskStatus **task) {
+  return qwAcquireTaskImpl(mgmt, rwType, sch, qId, tId, 0, QW_NOT_EXIST_RET_ERR, task);
+}
+
+int32_t qwAcquireAddTask(SQWorkerMgmt *mgmt, int32_t rwType, SQWSchStatus *sch, uint64_t qId, uint64_t tId, int32_t status, SQWTaskStatus **task) {
+  return qwAcquireTaskImpl(mgmt, rwType, sch, qId, tId, status, QW_NOT_EXIST_ADD, task);
+}
+
+
+void qwReleaseTask(int32_t rwType, SQWSchStatus *sch) {
+  QW_UNLOCK(rwType, &sch->tasksLock);
+}
+
+
+int32_t qwAcquireTaskCtx(int32_t rwType, SQWorkerMgmt *mgmt, uint64_t queryId, uint64_t taskId, SQWTaskCtx **handles) {
   char id[sizeof(queryId) + sizeof(taskId)] = {0};
   QW_SET_QTID(id, queryId, taskId);
   
@@ -281,7 +279,7 @@ static FORCE_INLINE int32_t qwAcquireTaskCtx(int32_t rwType, SQWorkerMgmt *mgmt,
   return TSDB_CODE_SUCCESS;
 }
 
-static FORCE_INLINE void qwReleaseTaskResCache(int32_t rwType, SQWorkerMgmt *mgmt) {
+void qwReleaseTaskResCache(int32_t rwType, SQWorkerMgmt *mgmt) {
   QW_UNLOCK(rwType, &mgmt->ctxLock);
 }
 
@@ -352,7 +350,7 @@ int32_t qwUpdateTaskStatus(SQWorkerMgmt *mgmt, uint64_t sId, uint64_t qId, uint6
 
   QW_ERR_RET(qwAcquireScheduler(QW_READ, mgmt, sId, &sch));
 
-  QW_ERR_JRET(qwAcquireTask(QW_READ, sch, qId, tId, &task));
+  QW_ERR_JRET(qwAcquireTask(mgmt, QW_READ, sch, qId, tId, &task));
 
   QW_LOCK(QW_WRITE, &task->lock);
   qwUpdateTaskInfo(mgmt, task, QW_TASK_INFO_STATUS, &status, QW_IDS());
@@ -377,7 +375,7 @@ int32_t qwGetTaskStatus(SQWorkerMgmt *mgmt, uint64_t sId, uint64_t queryId, uint
     return TSDB_CODE_SUCCESS;
   }
 
-  if (qwAcquireTask(QW_READ, sch, queryId, taskId, &task)) {
+  if (qwAcquireTask(mgmt, QW_READ, sch, queryId, taskId, &task)) {
     qwReleaseScheduler(QW_READ, mgmt);
     
     *taskStatus = JOB_TASK_STATUS_NULL;
@@ -398,17 +396,10 @@ int32_t qwCancelTask(SQWorkerMgmt *mgmt, uint64_t sId, uint64_t qId, uint64_t tI
   SQWTaskStatus *task = NULL;
   int32_t code = 0;
 
-  QW_ERR_RET(qwAddAcquireScheduler(QW_READ, mgmt, sId, &sch));
+  QW_ERR_RET(qwAcquireAddScheduler(QW_READ, mgmt, sId, &sch));
+
+  QW_ERR_JRET(qwAcquireAddTask(mgmt, QW_READ, sch, qId, tId, JOB_TASK_STATUS_NOT_START, &task));
 
-  if (qwAcquireTask(QW_READ, sch, qId, tId, &task)) {
-    qwReleaseScheduler(QW_READ, mgmt);
-    
-    code = qwAddTask(mgmt, sId, qId, tId, JOB_TASK_STATUS_NOT_START, QW_EXIST_ACQUIRE, &sch, &task);
-    if (code) {
-      qwReleaseScheduler(QW_READ, mgmt);
-      QW_ERR_RET(code);
-    }
-  }
 
   QW_LOCK(QW_WRITE, &task->lock);
 
@@ -458,6 +449,42 @@ _return:
   QW_RET(code);
 }
 
+
+// caller should make sure task is not running
+int32_t qwDropTaskCtx(SQWorkerMgmt *mgmt, uint64_t sId, uint64_t qId, uint64_t tId) {
+  char id[sizeof(qId) + sizeof(tId)] = {0};
+  QW_SET_QTID(id, qId, tId);
+
+  QW_LOCK(QW_WRITE, &mgmt->ctxLock);
+  SQWTaskCtx *ctx = taosHashGet(mgmt->ctxHash, id, sizeof(id));
+  if (NULL == ctx) {
+    QW_UNLOCK(QW_WRITE, &mgmt->ctxLock);
+    return TSDB_CODE_QRY_RES_CACHE_NOT_EXIST;
+  }
+
+  if (ctx->taskHandle) {
+    qDestroyTask(ctx->taskHandle);
+    ctx->taskHandle = NULL;
+  }
+
+  if (ctx->sinkHandle) {
+    dsDestroyDataSinker(ctx->sinkHandle);
+    ctx->sinkHandle = NULL;
+  }
+  
+  if (taosHashRemove(mgmt->ctxHash, id, sizeof(id))) {
+    QW_TASK_ELOG("taosHashRemove from ctx hash failed, id:%s", id);
+    
+    QW_UNLOCK(QW_WRITE, &mgmt->ctxLock);
+    return TSDB_CODE_QRY_RES_CACHE_NOT_EXIST;
+  }
+  
+  QW_UNLOCK(QW_WRITE, &mgmt->ctxLock);
+
+  return TSDB_CODE_SUCCESS;
+}
+
+
 int32_t qwDropTask(SQWorkerMgmt *mgmt, uint64_t sId, uint64_t qId, uint64_t tId) {
   SQWSchStatus *sch = NULL;
   SQWTaskStatus *task = NULL;
@@ -466,20 +493,14 @@ int32_t qwDropTask(SQWorkerMgmt *mgmt, uint64_t sId, uint64_t qId, uint64_t tId)
   char id[sizeof(qId) + sizeof(tId)] = {0};
   QW_SET_QTID(id, qId, tId);
 
-  QW_LOCK(QW_WRITE, &mgmt->ctxLock);
-  if (mgmt->ctxHash) {
-    if (taosHashRemove(mgmt->ctxHash, id, sizeof(id))) {
-      QW_TASK_WLOG("taosHashRemove from ctx hash failed, id:%s", id);
-    }
-  }
-  QW_UNLOCK(QW_WRITE, &mgmt->ctxLock);
+  qwDropTaskCtx(mgmt, sId, qId, tId);
   
   if (qwAcquireScheduler(QW_WRITE, mgmt, sId, &sch)) {
     QW_TASK_WLOG("scheduler does not exist, sch:%p", sch);
     return TSDB_CODE_SUCCESS;
   }
 
-  if (qwAcquireTask(QW_WRITE, sch, qId, tId, &task)) {
+  if (qwAcquireTask(mgmt, QW_WRITE, sch, qId, tId, &task)) {
     qwReleaseScheduler(QW_WRITE, mgmt);
     
     QW_TASK_WLOG("task does not exist, task:%p", task);
@@ -506,17 +527,9 @@ int32_t qwCancelDropTask(SQWorkerMgmt *mgmt, uint64_t sId, uint64_t qId, uint64_
   SQWTaskStatus *task = NULL;
   int32_t code = 0;
 
-  QW_ERR_RET(qwAddAcquireScheduler(QW_READ, mgmt, sId, &sch));
+  QW_ERR_RET(qwAcquireAddScheduler(QW_READ, mgmt, sId, &sch));
 
-  if (qwAcquireTask(QW_READ, sch, qId, tId, &task)) {
-    qwReleaseScheduler(QW_READ, mgmt);
-    
-    code = qwAddTask(mgmt, sId, qId, tId, JOB_TASK_STATUS_NOT_START, QW_EXIST_ACQUIRE, &sch, &task);
-    if (code) {
-      qwReleaseScheduler(QW_READ, mgmt);
-      QW_ERR_RET(code);
-    }
-  }
+  QW_ERR_JRET(qwAcquireAddTask(mgmt, QW_READ, sch, qId, tId, JOB_TASK_STATUS_NOT_START, &task));
 
   QW_LOCK(QW_WRITE, &task->lock);
 
@@ -772,7 +785,7 @@ int32_t qwCheckAndSendReadyRsp(SQWorkerMgmt *mgmt, uint64_t sId, uint64_t queryI
 
   QW_ERR_RET(qwAcquireScheduler(QW_READ, mgmt, sId, &sch));
 
-  QW_ERR_JRET(qwAcquireTask(QW_READ, sch, queryId, taskId, &task));
+  QW_ERR_JRET(qwAcquireTask(mgmt, QW_READ, sch, queryId, taskId, &task));
 
   QW_LOCK(QW_WRITE, &task->lock);
 
@@ -814,7 +827,7 @@ int32_t qwSetAndSendReadyRsp(SQWorkerMgmt *mgmt, uint64_t sId, uint64_t queryId,
 
   QW_ERR_RET(qwAcquireScheduler(QW_READ, mgmt, sId, &sch));
 
-  QW_ERR_JRET(qwAcquireTask(QW_READ, sch, queryId, taskId, &task));
+  QW_ERR_JRET(qwAcquireTask(mgmt, QW_READ, sch, queryId, taskId, &task));
 
   QW_LOCK(QW_WRITE, &task->lock);
   if (QW_TASK_READY_RESP(task->status)) {
@@ -843,7 +856,7 @@ _return:
   QW_RET(code);
 }
 
-int32_t qwCheckTaskCancelDrop(SQWorkerMgmt *mgmt, uint64_t sId, uint64_t qId, uint64_t tId, bool *needStop) {
+int32_t qwCheckAndProcessTaskDrop(SQWorkerMgmt *mgmt, uint64_t sId, uint64_t qId, uint64_t tId, bool *needStop) {
   SQWSchStatus *sch = NULL;
   SQWTaskStatus *task = NULL;
   int32_t code = 0;
@@ -855,7 +868,7 @@ int32_t qwCheckTaskCancelDrop(SQWorkerMgmt *mgmt, uint64_t sId, uint64_t qId, ui
     return TSDB_CODE_SUCCESS;
   }
 
-  if (qwAcquireTask(QW_READ, sch, qId, tId, &task)) {
+  if (qwAcquireTask(mgmt, QW_READ, sch, qId, tId, &task)) {
     qwReleaseScheduler(QW_READ, mgmt);
     return TSDB_CODE_SUCCESS;
   }
@@ -867,10 +880,7 @@ int32_t qwCheckTaskCancelDrop(SQWorkerMgmt *mgmt, uint64_t sId, uint64_t qId, ui
     
     QW_UNLOCK(QW_READ, &task->lock);
     
-    qwReleaseTask(QW_READ, sch);
-    qwReleaseScheduler(QW_READ, mgmt);
-
-    QW_RET(TSDB_CODE_QRY_APP_ERROR);
+    QW_ERR_JRET(TSDB_CODE_QRY_APP_ERROR);
   }
 
   QW_UNLOCK(QW_READ, &task->lock);
@@ -907,22 +917,16 @@ int32_t qwQueryPostProcess(SQWorkerMgmt *mgmt, uint64_t sId, uint64_t qId, uint6
   int32_t code = 0;
   int8_t newStatus = JOB_TASK_STATUS_CANCELLED;
 
-  code = qwAddAcquireScheduler(QW_READ, mgmt, sId, &sch);
+  code = qwAcquireAddScheduler(QW_READ, mgmt, sId, &sch);
   if (code) {
-    qError("sId:%"PRIx64" not in cache", sId);
+    QW_TASK_ELOG("sId:%"PRIx64" not in cache", sId);
     QW_ERR_RET(code);
   }
 
-  code = qwAcquireTask(QW_READ, sch, qId, tId, &task);
+  code = qwAcquireTask(mgmt, QW_READ, sch, qId, tId, &task);
   if (code) {
-    qwReleaseScheduler(QW_READ, mgmt);
-    
-    if (JOB_TASK_STATUS_PARTIAL_SUCCEED == status || JOB_TASK_STATUS_SUCCEED == status) {
-      qError("sId:%"PRIx64" queryId:%"PRIx64" taskId:%"PRIx64" not in cache", sId, qId, tId);
-      QW_ERR_RET(code);
-    }
-
-    QW_ERR_RET(qwAddTask(mgmt, sId, qId, tId, status, QW_EXIST_ACQUIRE, &sch, &task));
+    QW_TASK_ELOG("sId:%"PRIx64" queryId:%"PRIx64" taskId:%"PRIx64" not in cache", sId, qId, tId);
+    QW_ERR_RET(code);
   }
 
   if (task->cancel) {
@@ -940,7 +944,7 @@ int32_t qwQueryPostProcess(SQWorkerMgmt *mgmt, uint64_t sId, uint64_t qId, uint6
     return TSDB_CODE_SUCCESS;
   }
 
-  if (!(task->cancel || task->drop)) {
+  if ((!(task->cancel || task->drop)) && status > 0) {
     QW_LOCK(QW_WRITE, &task->lock);
     qwUpdateTaskInfo(mgmt, task, QW_TASK_INFO_STATUS, &status, QW_IDS());
     task->code = errCode;
@@ -1053,7 +1057,7 @@ int32_t qwHandleFetch(SQWorkerMgmt *mgmt, uint64_t sId, uint64_t queryId, uint64
   }
 
   QW_ERR_JRET(qwAcquireScheduler(QW_READ, mgmt, sId, &sch));
-  QW_ERR_JRET(qwAcquireTask(QW_READ, sch, queryId, taskId, &task));
+  QW_ERR_JRET(qwAcquireTask(mgmt, QW_READ, sch, queryId, taskId, &task));
 
   QW_LOCK(QW_READ, &task->lock);
 
@@ -1208,6 +1212,7 @@ int32_t qWorkerProcessQueryMsg(void *node, void *qWorkerMgmt, SRpcMsg *pMsg) {
   int32_t code = 0;
   bool queryRsped = false;
   bool needStop = false;
+  bool taskAdded = false;
   struct SSubplan *plan = NULL;
   SSubQueryMsg *msg = pMsg->pCont;
   SQWorkerMgmt *mgmt = (SQWorkerMgmt *)qWorkerMgmt;
@@ -1226,27 +1231,30 @@ int32_t qWorkerProcessQueryMsg(void *node, void *qWorkerMgmt, SRpcMsg *pMsg) {
   uint64_t qId = msg->queryId;
   uint64_t tId = msg->taskId;
 
-  QW_ERR_JRET(qwCheckTaskCancelDrop(qWorkerMgmt, msg->sId, msg->queryId, msg->taskId, &needStop));
+  QW_ERR_JRET(qwCheckAndProcessTaskDrop(qWorkerMgmt, msg->sId, msg->queryId, msg->taskId, &needStop));
   if (needStop) {
-    qWarn("task need stop");
+    QW_TASK_DLOG("task need stop, msgLen:%d", msg->contentLen);
     qwBuildAndSendQueryRsp(pMsg, TSDB_CODE_QRY_TASK_CANCELLED);
     QW_ERR_RET(TSDB_CODE_QRY_TASK_CANCELLED);
   }
   
   code = qStringToSubplan(msg->msg, &plan);
   if (TSDB_CODE_SUCCESS != code) {
-    qError("schId:%"PRIx64",qId:%"PRIx64",taskId:%"PRIx64" string to subplan failed, code:%d", msg->sId, msg->queryId, msg->taskId, code);
+    QW_TASK_ELOG("string to subplan failed, code:%d", code);
     QW_ERR_JRET(code);
   }
 
   qTaskInfo_t pTaskInfo = NULL;
   code = qCreateExecTask(node, 0, (struct SSubplan *)plan, &pTaskInfo);
   if (code) {
-    qError("qCreateExecTask failed, code:%x", code);
+    QW_TASK_ELOG("qCreateExecTask failed, code:%x", code);
+    QW_ERR_JRET(qwAddTask(qWorkerMgmt, sId, qId, tId, JOB_TASK_STATUS_FAILED));
     QW_ERR_JRET(code);
-  } else {
-    QW_ERR_JRET(qwAddTask(qWorkerMgmt, msg->sId, msg->queryId, msg->taskId, JOB_TASK_STATUS_EXECUTING, QW_EXIST_RET_ERR, NULL, NULL));
   }
+  
+  QW_ERR_JRET(qwAddTask(qWorkerMgmt, sId, qId, tId, JOB_TASK_STATUS_EXECUTING));
+
+  taskAdded = true;
 
   QW_ERR_JRET(qwBuildAndSendQueryRsp(pMsg, TSDB_CODE_SUCCESS));
 
@@ -1256,12 +1264,12 @@ int32_t qWorkerProcessQueryMsg(void *node, void *qWorkerMgmt, SRpcMsg *pMsg) {
   code = qExecTask(pTaskInfo, &sinkHandle);
 
   if (code) {
-    qError("qExecTask failed, code:%x", code);  
+    QW_TASK_ELOG("qExecTask failed, code:%x", code);
     QW_ERR_JRET(code);
-  } else {
-    QW_ERR_JRET(qwAddTaskHandlesToCache(qWorkerMgmt, msg->queryId, msg->taskId, pTaskInfo, sinkHandle));
-    QW_ERR_JRET(qwUpdateTaskStatus(qWorkerMgmt, msg->sId, msg->queryId, msg->taskId, JOB_TASK_STATUS_PARTIAL_SUCCEED));
-  } 
+  }
+
+  QW_ERR_JRET(qwAddTaskHandlesToCache(qWorkerMgmt, msg->queryId, msg->taskId, pTaskInfo, sinkHandle));
+  QW_ERR_JRET(qwUpdateTaskStatus(qWorkerMgmt, msg->sId, msg->queryId, msg->taskId, JOB_TASK_STATUS_PARTIAL_SUCCEED));
 
 _return:
 
@@ -1278,6 +1286,12 @@ _return:
     status = JOB_TASK_STATUS_PARTIAL_SUCCEED;
   }
 
+  if (!taskAdded) {
+    qwAddTask(qWorkerMgmt, sId, qId, tId, status);
+    
+    status = -1;
+  }
+
   qwQueryPostProcess(qWorkerMgmt, msg->sId, msg->queryId, msg->taskId, status, code);
   
   QW_RET(code);
@@ -1299,7 +1313,7 @@ int32_t qWorkerProcessQueryContinueMsg(void *node, void *qWorkerMgmt, SRpcMsg *p
 
   qwReleaseTaskResCache(QW_READ, qWorkerMgmt);
   
-  QW_ERR_JRET(qwCheckTaskCancelDrop(qWorkerMgmt, req->sId, req->queryId, req->taskId, &needStop));
+  QW_ERR_JRET(qwCheckAndProcessTaskDrop(qWorkerMgmt, req->sId, req->queryId, req->taskId, &needStop));
   if (needStop) {
     qWarn("task need stop");
     if (needRsp) {

From b1980e8f44f71d51c7c4184693878ad06f3ab2a9 Mon Sep 17 00:00:00 2001
From: dapan1121 <wpan@taosdata.com>
Date: Thu, 13 Jan 2022 17:30:37 +0800
Subject: [PATCH 06/20] feature/qnode

---
 source/libs/qworker/inc/qworkerInt.h |  19 +--
 source/libs/qworker/src/qworker.c    | 203 ++++++++++++++++-----------
 2 files changed, 130 insertions(+), 92 deletions(-)

diff --git a/source/libs/qworker/inc/qworkerInt.h b/source/libs/qworker/inc/qworkerInt.h
index 2f47c79065..4030ad82ad 100644
--- a/source/libs/qworker/inc/qworkerInt.h
+++ b/source/libs/qworker/inc/qworkerInt.h
@@ -70,6 +70,7 @@ typedef struct SQWTaskCtx {
   SRWLatch        lock;
   int8_t          sinkScheduled;
   int8_t          queryScheduled;
+  
   bool            needRsp;
   qTaskInfo_t     taskHandle;
   DataSinkHandle  sinkHandle;
@@ -99,7 +100,7 @@ typedef struct SQWorkerMgmt {
 
 #define QW_TASK_NOT_EXIST(code) (TSDB_CODE_QRY_SCH_NOT_EXIST == (code) || TSDB_CODE_QRY_TASK_NOT_EXIST == (code))
 #define QW_TASK_ALREADY_EXIST(code) (TSDB_CODE_QRY_TASK_ALREADY_EXIST == (code))
-#define QW_TASK_READY_RESP(status) (status == JOB_TASK_STATUS_SUCCEED || status == JOB_TASK_STATUS_FAILED || status == JOB_TASK_STATUS_CANCELLED || status == JOB_TASK_STATUS_PARTIAL_SUCCEED)
+#define QW_TASK_READY(status) (status == JOB_TASK_STATUS_SUCCEED || status == JOB_TASK_STATUS_FAILED || status == JOB_TASK_STATUS_CANCELLED || status == JOB_TASK_STATUS_PARTIAL_SUCCEED)
 #define QW_SET_QTID(id, qId, tId) do { *(uint64_t *)(id) = (qId); *(uint64_t *)((char *)(id) + sizeof(qId)) = (tId); } while (0)
 #define QW_GET_QTID(id, qId, tId) do { (qId) = *(uint64_t *)(id); (tId) = *(uint64_t *)((char *)(id) + sizeof(qId)); } while (0)
 #define QW_IDS() sId, qId, tId
@@ -111,16 +112,16 @@ typedef struct SQWorkerMgmt {
 #define QW_ELOG(param, ...) qError("QW:%p " param, mgmt, __VA_ARGS__)
 #define QW_DLOG(param, ...) qDebug("QW:%p " param, mgmt, __VA_ARGS__)
 
-#define QW_SCH_ELOG(param, ...) qError("QW:%p SID:%"PRIx64 param, mgmt, sId, __VA_ARGS__)
-#define QW_SCH_DLOG(param, ...) qDebug("QW:%p SID:%"PRIx64 param, mgmt, sId, __VA_ARGS__)
+#define QW_SCH_ELOG(param, ...) qError("QW:%p SID:%"PRIx64" " param, mgmt, sId, __VA_ARGS__)
+#define QW_SCH_DLOG(param, ...) qDebug("QW:%p SID:%"PRIx64" " param, mgmt, sId, __VA_ARGS__)
 
-#define QW_TASK_ELOG(param, ...) qError("QW:%p QID:%"PRIx64",TID:%"PRIx64 param, mgmt, qId, tId, __VA_ARGS__)
-#define QW_TASK_WLOG(param, ...) qWarn("QW:%p QID:%"PRIx64",TID:%"PRIx64 param, mgmt, qId, tId, __VA_ARGS__)
-#define QW_TASK_DLOG(param, ...) qDebug("QW:%p QID:%"PRIx64",TID:%"PRIx64 param, mgmt, qId, tId, __VA_ARGS__)
+#define QW_TASK_ELOG(param, ...) qError("QW:%p QID:%"PRIx64",TID:%"PRIx64" " param, mgmt, qId, tId, __VA_ARGS__)
+#define QW_TASK_WLOG(param, ...) qWarn("QW:%p QID:%"PRIx64",TID:%"PRIx64" " param, mgmt, qId, tId, __VA_ARGS__)
+#define QW_TASK_DLOG(param, ...) qDebug("QW:%p QID:%"PRIx64",TID:%"PRIx64" " param, mgmt, qId, tId, __VA_ARGS__)
 
-#define QW_SCH_TASK_ELOG(param, ...) qError("QW:%p SID:%"PRIx64",QID:%"PRIx64",TID:%"PRIx64 param, mgmt, sId, qId, tId, __VA_ARGS__)
-#define QW_SCH_TASK_WLOG(param, ...) qWarn("QW:%p SID:%"PRIx64",QID:%"PRIx64",TID:%"PRIx64 param, mgmt, sId, qId, tId, __VA_ARGS__)
-#define QW_SCH_TASK_DLOG(param, ...) qDebug("QW:%p SID:%"PRIx64",QID:%"PRIx64",TID:%"PRIx64 param, mgmt, sId, qId, tId, __VA_ARGS__)
+#define QW_SCH_TASK_ELOG(param, ...) qError("QW:%p SID:%"PRIx64",QID:%"PRIx64",TID:%"PRIx64" " param, mgmt, sId, qId, tId, __VA_ARGS__)
+#define QW_SCH_TASK_WLOG(param, ...) qWarn("QW:%p SID:%"PRIx64",QID:%"PRIx64",TID:%"PRIx64" " param, mgmt, sId, qId, tId, __VA_ARGS__)
+#define QW_SCH_TASK_DLOG(param, ...) qDebug("QW:%p SID:%"PRIx64",QID:%"PRIx64",TID:%"PRIx64" " param, mgmt, sId, qId, tId, __VA_ARGS__)
 
 
 #define TD_RWLATCH_WRITE_FLAG_COPY 0x40000000
diff --git a/source/libs/qworker/src/qworker.c b/source/libs/qworker/src/qworker.c
index 921298995f..1be190065a 100644
--- a/source/libs/qworker/src/qworker.c
+++ b/source/libs/qworker/src/qworker.c
@@ -221,7 +221,7 @@ int32_t qwAddTask(SQWorkerMgmt *mgmt, uint64_t sId, uint64_t qId, uint64_t tId,
   int32_t code = 0;
   QW_ERR_RET(qwAcquireAddScheduler(QW_READ, mgmt, sId, &tsch));
 
-  QW_ERR_JRET(qwAddTaskImpl(mgmt, tsch, 0, qId, tId, JOB_TASK_STATUS_NOT_START, QW_EXIST_RET_ERR, NULL));
+  QW_ERR_JRET(qwAddTaskImpl(mgmt, tsch, 0, qId, tId, status, QW_EXIST_RET_ERR, NULL));
 
 _return:
 
@@ -557,6 +557,7 @@ int32_t qwCancelDropTask(SQWorkerMgmt *mgmt, uint64_t sId, uint64_t qId, uint64_
   }
 
   QW_UNLOCK(QW_WRITE, &task->lock);
+  
   qwReleaseTask(QW_READ, sch);
   qwReleaseScheduler(QW_READ, mgmt);
 
@@ -778,30 +779,35 @@ int32_t qwBuildAndSendShowFetchRsp(SRpcMsg *pMsg, SVShowTablesFetchReq* pFetchRe
   return TSDB_CODE_SUCCESS;
 }
 
-int32_t qwCheckAndSendReadyRsp(SQWorkerMgmt *mgmt, uint64_t sId, uint64_t queryId, uint64_t taskId, SRpcMsg *pMsg, int32_t rspCode) {
+int32_t qwCheckAndSendReadyRsp(SQWorkerMgmt *mgmt, uint64_t sId, uint64_t qId, uint64_t tId, SRpcMsg *pMsg) {
   SQWSchStatus *sch = NULL;
   SQWTaskStatus *task = NULL;
   int32_t code = 0;
 
   QW_ERR_RET(qwAcquireScheduler(QW_READ, mgmt, sId, &sch));
 
-  QW_ERR_JRET(qwAcquireTask(mgmt, QW_READ, sch, queryId, taskId, &task));
+  QW_ERR_JRET(qwAcquireTask(mgmt, QW_READ, sch, qId, tId, &task));
 
   QW_LOCK(QW_WRITE, &task->lock);
 
   if (QW_READY_NOT_RECEIVED == task->ready) {
+    QW_SCH_TASK_DLOG("ready not received, ready:%d", task->ready);
+    goto _return;
+  } else if (QW_READY_RECEIVED == task->ready) {
+    task->ready = QW_READY_RESPONSED;
+    int32_t rspCode = task->code;
+    
     QW_UNLOCK(QW_WRITE, &task->lock);
-
     qwReleaseTask(QW_READ, sch);
     qwReleaseScheduler(QW_READ, mgmt);
     
-    return TSDB_CODE_SUCCESS;
-  } else if (QW_READY_RECEIVED == task->ready) {
-    QW_ERR_JRET(qwBuildAndSendReadyRsp(pMsg, rspCode));
+    QW_ERR_RET(qwBuildAndSendReadyRsp(pMsg, rspCode));
+    
+    QW_SCH_TASK_DLOG("ready response sent, ready:%d", task->ready);
 
-    task->ready = QW_READY_RESPONSED;
+    return TSDB_CODE_SUCCESS;
   } else if (QW_READY_RESPONSED == task->ready) {
-    qError("query response already send");
+    QW_SCH_TASK_ELOG("ready response already send, ready:%d", task->ready);
     QW_ERR_JRET(TSDB_CODE_QRY_APP_ERROR);
   } else {
     assert(0);
@@ -812,7 +818,6 @@ _return:
   if (task) {
     QW_UNLOCK(QW_WRITE, &task->lock);
     qwReleaseTask(QW_READ, sch);
-
   }
 
   qwReleaseScheduler(QW_READ, mgmt);
@@ -820,34 +825,39 @@ _return:
   QW_RET(code);
 }
 
-int32_t qwSetAndSendReadyRsp(SQWorkerMgmt *mgmt, uint64_t sId, uint64_t queryId, uint64_t taskId, SRpcMsg *pMsg) {
+int32_t qwSetAndSendReadyRsp(SQWorkerMgmt *mgmt, uint64_t sId, uint64_t qId, uint64_t tId, SRpcMsg *pMsg) {
   SQWSchStatus *sch = NULL;
   SQWTaskStatus *task = NULL;
   int32_t code = 0;
 
   QW_ERR_RET(qwAcquireScheduler(QW_READ, mgmt, sId, &sch));
 
-  QW_ERR_JRET(qwAcquireTask(mgmt, QW_READ, sch, queryId, taskId, &task));
+  QW_ERR_JRET(qwAcquireTask(mgmt, QW_READ, sch, qId, tId, &task));
 
   QW_LOCK(QW_WRITE, &task->lock);
-  if (QW_TASK_READY_RESP(task->status)) {
-    QW_ERR_JRET(qwBuildAndSendReadyRsp(pMsg, task->code));
 
+  int8_t status = task->status;
+  int32_t errCode = task->code;
+  
+  if (QW_TASK_READY(status)) {
     task->ready = QW_READY_RESPONSED;
+
+    QW_UNLOCK(QW_WRITE, &task->lock);
+    
+    QW_ERR_JRET(qwBuildAndSendReadyRsp(pMsg, errCode));
+
+    QW_SCH_TASK_DLOG("task ready responsed, status:%d", status);
   } else {
     task->ready = QW_READY_RECEIVED;
+
     QW_UNLOCK(QW_WRITE, &task->lock);
 
-    qwReleaseTask(QW_READ, sch);
-    qwReleaseScheduler(QW_READ, mgmt);
-    
-    return TSDB_CODE_SUCCESS;
+    QW_SCH_TASK_DLOG("task ready NOT responsed, status:%d", status);
   }
 
 _return:
 
   if (task) {
-    QW_UNLOCK(QW_WRITE, &task->lock);
     qwReleaseTask(QW_READ, sch);
   }
 
@@ -872,22 +882,15 @@ int32_t qwCheckAndProcessTaskDrop(SQWorkerMgmt *mgmt, uint64_t sId, uint64_t qId
     qwReleaseScheduler(QW_READ, mgmt);
     return TSDB_CODE_SUCCESS;
   }
-
-  QW_LOCK(QW_READ, &task->lock);
   
-  if ((!task->cancel) && (!task->drop)) {
-    QW_TASK_ELOG("no cancel or drop but task exists, status:%d", task->status);
-    
-    QW_UNLOCK(QW_READ, &task->lock);
-    
+  if ((!atomic_load_8(&task->cancel)) && (!atomic_load_8(&task->drop))) {
+    QW_TASK_ELOG("no cancel or drop but task exists, status:%d", atomic_load_8(&task->status));
     QW_ERR_JRET(TSDB_CODE_QRY_APP_ERROR);
   }
 
-  QW_UNLOCK(QW_READ, &task->lock);
-
   *needStop = true;
   
-  if (task->cancel) {
+  if (atomic_load_8(&task->cancel)) {
     QW_LOCK(QW_WRITE, &task->lock);
     code = qwUpdateTaskInfo(mgmt, task, QW_TASK_INFO_STATUS, &status, QW_IDS());
     QW_UNLOCK(QW_WRITE, &task->lock);
@@ -929,13 +932,15 @@ int32_t qwQueryPostProcess(SQWorkerMgmt *mgmt, uint64_t sId, uint64_t qId, uint6
     QW_ERR_RET(code);
   }
 
+  QW_LOCK(QW_WRITE, &task->lock);
+
   if (task->cancel) {
-    QW_LOCK(QW_WRITE, &task->lock);
     qwUpdateTaskInfo(mgmt, task, QW_TASK_INFO_STATUS, &newStatus, QW_IDS());
-    QW_UNLOCK(QW_WRITE, &task->lock);
   }
 
   if (task->drop) {
+    QW_UNLOCK(QW_WRITE, &task->lock);
+    
     qwReleaseTask(QW_READ, sch);
     qwReleaseScheduler(QW_READ, mgmt);
     
@@ -944,12 +949,12 @@ int32_t qwQueryPostProcess(SQWorkerMgmt *mgmt, uint64_t sId, uint64_t qId, uint6
     return TSDB_CODE_SUCCESS;
   }
 
-  if ((!(task->cancel || task->drop)) && status > 0) {
-    QW_LOCK(QW_WRITE, &task->lock);
+  if (!(task->cancel || task->drop)) {
     qwUpdateTaskInfo(mgmt, task, QW_TASK_INFO_STATUS, &status, QW_IDS());
     task->code = errCode;
-    QW_UNLOCK(QW_WRITE, &task->lock);
   }
+
+  QW_UNLOCK(QW_WRITE, &task->lock);
   
   qwReleaseTask(QW_READ, sch);
   qwReleaseScheduler(QW_READ, mgmt);
@@ -995,24 +1000,24 @@ int32_t qwScheduleDataSink(SQWTaskCtx *handles, SQWorkerMgmt *mgmt, uint64_t sId
   return TSDB_CODE_SUCCESS;
 }
 
-int32_t qwScheduleQuery(SQWTaskCtx *handles, SQWorkerMgmt *mgmt, uint64_t sId, uint64_t queryId, uint64_t taskId, SRpcMsg *pMsg) {
+int32_t qwScheduleQuery(SQWTaskCtx *handles, SQWorkerMgmt *mgmt, uint64_t sId, uint64_t qId, uint64_t tId, SRpcMsg *pMsg) {
   if (atomic_load_8(&handles->queryScheduled)) {
-    qDebug("query already scheduled");
+    QW_SCH_TASK_ELOG("query already scheduled, queryScheduled:%d", handles->queryScheduled);
     return TSDB_CODE_SUCCESS;
   }
 
-  QW_ERR_RET(qwUpdateTaskStatus(mgmt, sId, queryId, taskId, JOB_TASK_STATUS_EXECUTING));      
+  QW_ERR_RET(qwUpdateTaskStatus(mgmt, sId, qId, tId, JOB_TASK_STATUS_EXECUTING));      
 
   SQueryContinueReq * req = (SQueryContinueReq *)rpcMallocCont(sizeof(SQueryContinueReq));
   if (NULL == req) {
-    qError("rpcMallocCont %d failed", (int32_t)sizeof(SQueryContinueReq));
+    QW_SCH_TASK_ELOG("rpcMallocCont %d failed", (int32_t)sizeof(SQueryContinueReq));
     QW_ERR_RET(TSDB_CODE_QRY_OUT_OF_MEMORY);
   }
 
   req->header.vgId = mgmt->nodeId;
   req->sId = sId;
-  req->queryId = queryId;
-  req->taskId = taskId;
+  req->queryId = qId;
+  req->taskId = tId;
 
   SRpcMsg pNewMsg = {
     .handle = pMsg->handle,
@@ -1025,20 +1030,21 @@ int32_t qwScheduleQuery(SQWTaskCtx *handles, SQWorkerMgmt *mgmt, uint64_t sId, u
 
   int32_t code = (*mgmt->putToQueueFp)(mgmt->nodeObj, &pNewMsg);
   if (TSDB_CODE_SUCCESS != code) {
-    qError("put query continue msg to queue failed, code:%x", code);
+    QW_SCH_TASK_ELOG("put query continue msg to queue failed, code:%x", code);
     rpcFreeCont(req);
     QW_ERR_RET(code);
   }
 
+  handles->queryScheduled = true;
 
-  qDebug("put query continue msg to query queue");
+  QW_SCH_TASK_DLOG("put query continue msg to query queue, vgId:%d", mgmt->nodeId);
 
   return TSDB_CODE_SUCCESS;
 }
 
 
 
-int32_t qwHandleFetch(SQWorkerMgmt *mgmt, uint64_t sId, uint64_t queryId, uint64_t taskId, SRpcMsg *pMsg) {
+int32_t qwHandleFetch(SQWorkerMgmt *mgmt, uint64_t sId, uint64_t qId, uint64_t tId, SRpcMsg *pMsg) {
   SQWSchStatus *sch = NULL;
   SQWTaskStatus *task = NULL;
   int32_t code = 0;
@@ -1049,25 +1055,29 @@ int32_t qwHandleFetch(SQWorkerMgmt *mgmt, uint64_t sId, uint64_t queryId, uint64
   SRetrieveTableRsp *rsp = NULL;
   bool queryEnd = false;
   SQWTaskCtx *handles = NULL;
+  int8_t status = 0;
 
-  QW_ERR_JRET(qwAcquireTaskCtx(QW_READ, mgmt, queryId, taskId, &handles));
-  if (atomic_load_8(&handles->needRsp)) {
-    qError("last fetch not responsed");
+  QW_ERR_JRET(qwAcquireTaskCtx(QW_READ, mgmt, qId, tId, &handles));
+  QW_LOCK(QW_WRITE, &handles->lock);
+ 
+  if (handles->needRsp) {
+    QW_UNLOCK(QW_WRITE, &handles->lock);
+    QW_SCH_TASK_ELOG("last fetch not responsed, needRsp:%d", handles->needRsp);
     QW_ERR_JRET(TSDB_CODE_QRY_APP_ERROR);
   }
 
-  QW_ERR_JRET(qwAcquireScheduler(QW_READ, mgmt, sId, &sch));
-  QW_ERR_JRET(qwAcquireTask(mgmt, QW_READ, sch, queryId, taskId, &task));
+  QW_UNLOCK(QW_WRITE, &handles->lock);
 
-  QW_LOCK(QW_READ, &task->lock);
+  QW_ERR_JRET(qwAcquireScheduler(QW_READ, mgmt, sId, &sch));
+  QW_ERR_JRET(qwAcquireTask(mgmt, QW_READ, sch, qId, tId, &task));
 
   if (task->cancel || task->drop) {
-    qError("task is already cancelled or dropped");
+    QW_SCH_TASK_ELOG("task is already cancelled or dropped, cancel:%d, drop:%d", task->cancel, task->drop);
     QW_ERR_JRET(TSDB_CODE_QRY_APP_ERROR);
   }
 
   if (task->status != JOB_TASK_STATUS_EXECUTING && task->status != JOB_TASK_STATUS_PARTIAL_SUCCEED) {
-    qError("invalid status %d for fetch", task->status);
+    QW_SCH_TASK_ELOG("invalid status %d for fetch", task->status);
     QW_ERR_JRET(TSDB_CODE_QRY_APP_ERROR);
   }
 
@@ -1075,6 +1085,9 @@ int32_t qwHandleFetch(SQWorkerMgmt *mgmt, uint64_t sId, uint64_t queryId, uint64
   
   if (dataLength > 0) {
     SOutputData output = {0};
+
+    QW_SCH_TASK_DLOG("task got data in sink, dataLength:%d", dataLength);
+    
     QW_ERR_JRET(qwInitFetchRsp(dataLength, &rsp));
     
     output.pData = rsp->data;
@@ -1095,27 +1108,38 @@ int32_t qwHandleFetch(SQWorkerMgmt *mgmt, uint64_t sId, uint64_t queryId, uint64
     if (DS_BUF_EMPTY == output.bufStatus && output.queryEnd) {
       rsp->completed = 1;
       
-      QW_ERR_JRET(qwUpdateTaskStatus(mgmt, sId, queryId, taskId, JOB_TASK_STATUS_SUCCEED));      
+      status = JOB_TASK_STATUS_SUCCEED;
+      
+      QW_SCH_TASK_DLOG("task all fetched, status:%d", status);
+      QW_ERR_JRET(qwUpdateTaskInfo(mgmt, task, QW_TASK_INFO_STATUS, &status, QW_IDS()));
     }
 
     // Note: schedule data sink firstly and will schedule query after it's done
     if (output.needSchedule) {
-      QW_ERR_JRET(qwScheduleDataSink(handles, mgmt, sId, queryId, taskId, pMsg));
-    } else if ((!output.queryEnd) && (DS_BUF_LOW == output.bufStatus || DS_BUF_EMPTY == output.bufStatus)) {
-      QW_ERR_JRET(qwScheduleQuery(handles, mgmt, sId, queryId, taskId, pMsg));
+      QW_SCH_TASK_DLOG("sink need schedule, queryEnd:%d", output.queryEnd);
+      QW_ERR_JRET(qwScheduleDataSink(handles, mgmt, sId, qId, tId, pMsg));
+    } else if ((!output.queryEnd) && (DS_BUF_LOW == output.bufStatus || DS_BUF_EMPTY == output.bufStatus)) {    
+      QW_SCH_TASK_DLOG("task not end, need to continue, bufStatus:%d", output.bufStatus);
+      QW_ERR_JRET(qwScheduleQuery(handles, mgmt, sId, qId, tId, pMsg));
     }
   } else {
     if (dataLength < 0) {
-      qError("invalid length from dsGetDataLength, length:%d", dataLength);
+      QW_SCH_TASK_ELOG("invalid length from dsGetDataLength, length:%d", dataLength);
       QW_ERR_JRET(TSDB_CODE_QRY_INVALID_INPUT);
     }
     
     if (queryEnd) {
-      QW_ERR_JRET(qwUpdateTaskStatus(mgmt, sId, queryId, taskId, JOB_TASK_STATUS_SUCCEED));
+      status = JOB_TASK_STATUS_SUCCEED;
+
+      QW_SCH_TASK_DLOG("no data in sink and query end, dataLength:%d", dataLength);
+      
+      QW_ERR_JRET(qwUpdateTaskInfo(mgmt, task, QW_TASK_INFO_STATUS, &status, QW_IDS()));
     } else {
       assert(0 == handles->needRsp);
+
+      // MUST IN SCHEDULE OR IN SINK SCHEDULE
       
-      qDebug("no res data in sink, need response later");
+      QW_SCH_TASK_DLOG("no res data in sink, need response later, queryEnd:%d", queryEnd);
 
       QW_LOCK(QW_WRITE, &handles->lock);
       handles->needRsp = true;
@@ -1128,7 +1152,6 @@ int32_t qwHandleFetch(SQWorkerMgmt *mgmt, uint64_t sId, uint64_t queryId, uint64
 _return:
 
   if (task) {
-    QW_UNLOCK(QW_READ, &task->lock);
     qwReleaseTask(QW_READ, sch);    
   }
   
@@ -1212,10 +1235,10 @@ int32_t qWorkerProcessQueryMsg(void *node, void *qWorkerMgmt, SRpcMsg *pMsg) {
   int32_t code = 0;
   bool queryRsped = false;
   bool needStop = false;
-  bool taskAdded = false;
   struct SSubplan *plan = NULL;
   SSubQueryMsg *msg = pMsg->pCont;
   SQWorkerMgmt *mgmt = (SQWorkerMgmt *)qWorkerMgmt;
+  int32_t rspCode = 0;
   
   if (NULL == msg || pMsg->contLen <= sizeof(*msg)) {
     QW_ELOG("invalid query msg, contLen:%d", pMsg->contLen);
@@ -1237,6 +1260,8 @@ int32_t qWorkerProcessQueryMsg(void *node, void *qWorkerMgmt, SRpcMsg *pMsg) {
     qwBuildAndSendQueryRsp(pMsg, TSDB_CODE_QRY_TASK_CANCELLED);
     QW_ERR_RET(TSDB_CODE_QRY_TASK_CANCELLED);
   }
+
+  QW_ERR_JRET(qwAddTask(qWorkerMgmt, sId, qId, tId, JOB_TASK_STATUS_EXECUTING));
   
   code = qStringToSubplan(msg->msg, &plan);
   if (TSDB_CODE_SUCCESS != code) {
@@ -1248,53 +1273,49 @@ int32_t qWorkerProcessQueryMsg(void *node, void *qWorkerMgmt, SRpcMsg *pMsg) {
   code = qCreateExecTask(node, 0, (struct SSubplan *)plan, &pTaskInfo);
   if (code) {
     QW_TASK_ELOG("qCreateExecTask failed, code:%x", code);
-    QW_ERR_JRET(qwAddTask(qWorkerMgmt, sId, qId, tId, JOB_TASK_STATUS_FAILED));
     QW_ERR_JRET(code);
   }
   
-  QW_ERR_JRET(qwAddTask(qWorkerMgmt, sId, qId, tId, JOB_TASK_STATUS_EXECUTING));
-
-  taskAdded = true;
-
   QW_ERR_JRET(qwBuildAndSendQueryRsp(pMsg, TSDB_CODE_SUCCESS));
 
   queryRsped = true;
 
   DataSinkHandle sinkHandle = NULL;
   code = qExecTask(pTaskInfo, &sinkHandle);
-
   if (code) {
     QW_TASK_ELOG("qExecTask failed, code:%x", code);
     QW_ERR_JRET(code);
   }
 
   QW_ERR_JRET(qwAddTaskHandlesToCache(qWorkerMgmt, msg->queryId, msg->taskId, pTaskInfo, sinkHandle));
-  QW_ERR_JRET(qwUpdateTaskStatus(qWorkerMgmt, msg->sId, msg->queryId, msg->taskId, JOB_TASK_STATUS_PARTIAL_SUCCEED));
 
 _return:
 
-  if (queryRsped) {
-    code = qwCheckAndSendReadyRsp(qWorkerMgmt, msg->sId, msg->queryId, msg->taskId, pMsg, code);
-  } else {
-    code = qwBuildAndSendQueryRsp(pMsg, code);
+  if (code) {
+    rspCode = code;
+  }
+  
+  if (!queryRsped) {
+    code = qwBuildAndSendQueryRsp(pMsg, rspCode);
+    if (TSDB_CODE_SUCCESS == rspCode && code) {
+      rspCode = code;
+    }
   }
   
   int8_t status = 0;
-  if (TSDB_CODE_SUCCESS != code) {
+  if (TSDB_CODE_SUCCESS != rspCode) {
     status = JOB_TASK_STATUS_FAILED;
   } else {
     status = JOB_TASK_STATUS_PARTIAL_SUCCEED;
   }
 
-  if (!taskAdded) {
-    qwAddTask(qWorkerMgmt, sId, qId, tId, status);
-    
-    status = -1;
-  }
+  qwQueryPostProcess(qWorkerMgmt, msg->sId, msg->queryId, msg->taskId, status, rspCode);
 
-  qwQueryPostProcess(qWorkerMgmt, msg->sId, msg->queryId, msg->taskId, status, code);
+  if (queryRsped) {
+    qwCheckAndSendReadyRsp(qWorkerMgmt, msg->sId, msg->queryId, msg->taskId, pMsg);
+  }
   
-  QW_RET(code);
+  QW_RET(rspCode);
 }
 
 int32_t qWorkerProcessQueryContinueMsg(void *node, void *qWorkerMgmt, SRpcMsg *pMsg) {
@@ -1306,19 +1327,27 @@ int32_t qWorkerProcessQueryContinueMsg(void *node, void *qWorkerMgmt, SRpcMsg *p
   SQWTaskCtx *handles = NULL;
 
   QW_ERR_JRET(qwAcquireTaskCtx(QW_READ, qWorkerMgmt, req->queryId, req->taskId, &handles));
+  QW_LOCK(QW_WRITE, &handles->lock);
 
   qTaskInfo_t     taskHandle = handles->taskHandle;
   DataSinkHandle  sinkHandle = handles->sinkHandle;
-  bool needRsp = handles->needRsp;
 
+  QW_UNLOCK(QW_WRITE, &handles->lock);
   qwReleaseTaskResCache(QW_READ, qWorkerMgmt);
   
   QW_ERR_JRET(qwCheckAndProcessTaskDrop(qWorkerMgmt, req->sId, req->queryId, req->taskId, &needStop));
   if (needStop) {
     qWarn("task need stop");
-    if (needRsp) {
+
+    QW_ERR_JRET(qwAcquireTaskCtx(QW_READ, qWorkerMgmt, req->queryId, req->taskId, &handles));
+    QW_LOCK(QW_WRITE, &handles->lock);
+    if (handles->needRsp) {
       qwBuildAndSendQueryRsp(pMsg, TSDB_CODE_QRY_TASK_CANCELLED);
+      handles->needRsp = false;
     }
+    QW_UNLOCK(QW_WRITE, &handles->lock);
+    qwReleaseTaskResCache(QW_READ, qWorkerMgmt);
+
     QW_ERR_RET(TSDB_CODE_QRY_TASK_CANCELLED);
   }
 
@@ -1336,10 +1365,18 @@ int32_t qWorkerProcessQueryContinueMsg(void *node, void *qWorkerMgmt, SRpcMsg *p
   
 _return:
 
-  if (needRsp) {
+  QW_ERR_JRET(qwAcquireTaskCtx(QW_READ, qWorkerMgmt, req->queryId, req->taskId, &handles));
+  QW_LOCK(QW_WRITE, &handles->lock);
+
+  if (handles->needRsp) {
     code = qwBuildAndSendQueryRsp(pMsg, code);
+    handles->needRsp = false;
   }
-  
+  handles->queryScheduled = false;
+
+  QW_UNLOCK(QW_WRITE, &handles->lock);
+  qwReleaseTaskResCache(QW_READ, qWorkerMgmt);
+
   if (TSDB_CODE_SUCCESS != code) {
     status = JOB_TASK_STATUS_FAILED;
   } else {

From a00a8dd90d1cb19453b4b0ee7b06deed682d8407 Mon Sep 17 00:00:00 2001
From: yihaoDeng <luomoxyz@126.com>
Date: Thu, 13 Jan 2022 20:59:41 +0800
Subject: [PATCH 07/20] add libuv test

---
 source/libs/transport/CMakeLists.txt          |  7 ++
 source/libs/transport/inc/transportInt.h      | 53 ++++++++++-
 source/libs/transport/src/rpcMain.c           | 88 +++++++++++++------
 source/libs/transport/test/CMakeLists.txt     | 21 +++++
 source/libs/transport/test/transportTests.cc  | 35 ++++++++
 source/libs/transport/test/transportTests.cpp |  0
 6 files changed, 174 insertions(+), 30 deletions(-)
 create mode 100644 source/libs/transport/test/CMakeLists.txt
 create mode 100644 source/libs/transport/test/transportTests.cc
 delete mode 100644 source/libs/transport/test/transportTests.cpp

diff --git a/source/libs/transport/CMakeLists.txt b/source/libs/transport/CMakeLists.txt
index c4eeef5df2..61d781210c 100644
--- a/source/libs/transport/CMakeLists.txt
+++ b/source/libs/transport/CMakeLists.txt
@@ -27,4 +27,11 @@ if (${BUILD_WITH_UV})
   add_definitions(-DUSE_UV) 
 endif(${BUILD_WITH_UV})  
 
+if (${BUILD_TEST})
+  add_subdirectory(test)
+endif(${BUILD_TEST})
+
+
+
+
   
diff --git a/source/libs/transport/inc/transportInt.h b/source/libs/transport/inc/transportInt.h
index 9809f7ee1a..067b371b84 100644
--- a/source/libs/transport/inc/transportInt.h
+++ b/source/libs/transport/inc/transportInt.h
@@ -22,9 +22,58 @@ extern "C" {
 
 #ifdef USE_UV
 
-#else
+#include <stddef.h>
+typedef void *queue[2];
+
+/* Private macros. */
+#define QUEUE_NEXT(q) (*(queue **)&((*(q))[0]))
+#define QUEUE_PREV(q) (*(queue **)&((*(q))[1]))
+
+#define QUEUE_PREV_NEXT(q) (QUEUE_NEXT(QUEUE_PREV(q)))
+#define QUEUE_NEXT_PREV(q) (QUEUE_PREV(QUEUE_NEXT(q)))
+
+/* Initialize an empty queue. */
+#define QUEUE_INIT(q)    \
+  {                      \
+    QUEUE_NEXT(q) = (q); \
+    QUEUE_PREV(q) = (q); \
+  }
+
+/* Return true if the queue has no element. */
+#define QUEUE_IS_EMPTY(q) ((const queue *)(q) == (const queue *)QUEUE_NEXT(q))
+
+/* Insert an element at the back of a queue. */
+#define QUEUE_PUSH(q, e)           \
+  {                                \
+    QUEUE_NEXT(e) = (q);           \
+    QUEUE_PREV(e) = QUEUE_PREV(q); \
+    QUEUE_PREV_NEXT(e) = (e);      \
+    QUEUE_PREV(q) = (e);           \
+  }
+
+/* Remove the given element from the queue. Any element can be removed at any *
+ * time. */
+#define QUEUE_REMOVE(e)                 \
+  {                                     \
+    QUEUE_PREV_NEXT(e) = QUEUE_NEXT(e); \
+    QUEUE_NEXT_PREV(e) = QUEUE_PREV(e); \
+  }
+
+/* Return the element at the front of the queue. */
+#define QUEUE_HEAD(q) (QUEUE_NEXT(q))
+
+/* Return the element at the back of the queue. */
+#define QUEUE_TAIL(q) (QUEUE_PREV(q))
+
+/* Iterate over the element of a queue. * Mutating the queue while iterating
+ * results in undefined behavior. */
+#define QUEUE_FOREACH(q, e) for ((q) = QUEUE_NEXT(e); (q) != (e); (q) = QUEUE_NEXT(q))
+
+/* Return the structure holding the given element. */
+#define QUEUE_DATA(e, type, field) ((type *)((void *)((char *)(e)-offsetof(type, field))))
+
+#endif  // USE_LIBUV
 
-#endif
 #ifdef __cplusplus
 }
 #endif
diff --git a/source/libs/transport/src/rpcMain.c b/source/libs/transport/src/rpcMain.c
index 542bde37b9..818d129032 100644
--- a/source/libs/transport/src/rpcMain.c
+++ b/source/libs/transport/src/rpcMain.c
@@ -28,6 +28,7 @@
 #include "tmd5.h"
 #include "tmempool.h"
 #include "tmsg.h"
+#include "transportInt.h"
 #include "tref.h"
 #include "trpc.h"
 #include "ttimer.h"
@@ -68,11 +69,13 @@ typedef struct {
 #define container_of(ptr, type, member) ((type*)((char*)(ptr)-offsetof(type, member)))
 
 typedef struct SThreadObj {
-  pthread_t   thread;
-  uv_pipe_t*  pipe;
-  uv_loop_t*  loop;
-  uv_async_t* workerAsync;  //
-  int         fd;
+  pthread_t       thread;
+  uv_pipe_t*      pipe;
+  uv_loop_t*      loop;
+  uv_async_t*     workerAsync;  //
+  int             fd;
+  queue           conn;
+  pthread_mutex_t connMtx;
 } SThreadObj;
 
 typedef struct SServerObj {
@@ -88,10 +91,12 @@ typedef struct SServerObj {
 } SServerObj;
 
 typedef struct SConnCtx {
-  uv_tcp_t*   pClient;
+  uv_tcp_t*   pTcp;
   uv_timer_t* pTimer;
   uv_async_t* pWorkerAsync;
+  queue       queue;
   int         ref;
+  int         persist;  // persist connection or not
 } SConnCtx;
 
 static void allocBuffer(uv_handle_t* handle, size_t suggested_size, uv_buf_t* buf);
@@ -110,6 +115,9 @@ void* taosInitServer(uint32_t ip, uint32_t port, char* label, int numOfThreads,
 int32_t rpcInit() { return -1; }
 void    rpcCleanup() { return; };
 
+void* taosInitClient(uint32_t ip, uint32_t port, char* label, int numOfThreads, void* fp, void* shandle) {
+  // opte
+}
 void* taosInitServer(uint32_t ip, uint32_t port, char* label, int numOfThreads, void* fp, void* shandle) {
   SServerObj* srv = calloc(1, sizeof(SServerObj));
   srv->loop = (uv_loop_t*)malloc(sizeof(uv_loop_t));
@@ -122,30 +130,32 @@ void* taosInitServer(uint32_t ip, uint32_t port, char* label, int numOfThreads,
   uv_loop_init(srv->loop);
 
   for (int i = 0; i < srv->numOfThread; i++) {
-    srv->pThreadObj[i] = (SThreadObj*)calloc(1, sizeof(SThreadObj));
-    srv->pipe[i] = (uv_pipe_t*)calloc(2, sizeof(uv_pipe_t));
+    SThreadObj* thrd = (SThreadObj*)calloc(1, sizeof(SThreadObj));
+
     int fds[2];
     if (uv_socketpair(AF_UNIX, SOCK_STREAM, fds, UV_NONBLOCK_PIPE, UV_NONBLOCK_PIPE) != 0) {
       return NULL;
     }
+    srv->pipe[i] = (uv_pipe_t*)calloc(2, sizeof(uv_pipe_t));
     uv_pipe_init(srv->loop, &(srv->pipe[i][0]), 1);
     uv_pipe_open(&(srv->pipe[i][0]), fds[1]);  // init write
 
-    srv->pThreadObj[i]->fd = fds[0];
-    srv->pThreadObj[i]->pipe = &(srv->pipe[i][1]);  // init read
-    int err = pthread_create(&(srv->pThreadObj[i]->thread), NULL, workerThread, (void*)(srv->pThreadObj[i]));
+    thrd->fd = fds[0];
+    thrd->pipe = &(srv->pipe[i][1]);  // init read
+    int err = pthread_create(&(thrd->thread), NULL, workerThread, (void*)(thrd));
     if (err == 0) {
-      tDebug("sucess to create worker thread %d", i);
+      tDebug("sucess to create worker-thread %d", i);
       // printf("thread %d create\n", i);
     } else {
       // clear all resource later
-      tError("failed to create worker thread %d", i);
+      tError("failed to create worker-thread %d", i);
     }
+    srv->pThreadObj[i] = thrd;
   }
 
   int err = pthread_create(&srv->thread, NULL, acceptThread, (void*)srv);
   if (err == 0) {
-    tDebug("success to create accept thread");
+    tDebug("success to create accept-thread");
   } else {
     // clear all resource later
   }
@@ -158,7 +168,7 @@ void* rpcOpen(const SRpcInit* pInit) {
     return NULL;
   }
   if (pInit->label) {
-    tstrncpy(pRpc->label, pInit->label, sizeof(pRpc->label));
+    tstrncpy(pRpc->label, pInit->label, strlen(pInit->label));
   }
   pRpc->numOfThreads = pInit->numOfThreads > TSDB_MAX_RPC_THREADS ? TSDB_MAX_RPC_THREADS : pInit->numOfThreads;
 
@@ -198,29 +208,45 @@ void onWrite(uv_write_t* req, int status) {
   if (status == 0) {
     tDebug("data already was written on stream");
   }
+  free(req);
 
   // opt
 }
 
 void workerAsyncCB(uv_async_t* handle) {
-  // opt
   SThreadObj* pObj = container_of(handle, SThreadObj, workerAsync);
+  SConnCtx*   conn = NULL;
+
+  // opt later
+  pthread_mutex_lock(&pObj->connMtx);
+  if (!QUEUE_IS_EMPTY(&pObj->conn)) {
+    queue* head = QUEUE_HEAD(&pObj->conn);
+    conn = QUEUE_DATA(head, SConnCtx, queue);
+    QUEUE_REMOVE(&conn->queue);
+  }
+  pthread_mutex_unlock(&pObj->connMtx);
+  if (conn == NULL) {
+    tError("except occurred, do nothing");
+    return;
+  }
 }
+
 void onAccept(uv_stream_t* stream, int status) {
   if (status == -1) {
     return;
   }
   SServerObj* pObj = container_of(stream, SServerObj, server);
-  tDebug("new conntion accepted by main server, dispatch to one worker thread");
 
   uv_tcp_t* cli = (uv_tcp_t*)malloc(sizeof(uv_tcp_t));
   uv_tcp_init(pObj->loop, cli);
+
   if (uv_accept(stream, (uv_stream_t*)cli) == 0) {
     uv_write_t* wr = (uv_write_t*)malloc(sizeof(uv_write_t));
 
     uv_buf_t buf = uv_buf_init("a", 1);
 
     pObj->workerIdx = (pObj->workerIdx + 1) % pObj->numOfThread;
+    tDebug("new conntion accepted by main server, dispatch to %dth worker-thread", pObj->workerIdx);
     uv_write2(wr, (uv_stream_t*)&(pObj->pipe[pObj->workerIdx][0]), &buf, 1, (uv_stream_t*)cli, onWrite);
   } else {
     uv_close((uv_handle_t*)cli, NULL);
@@ -250,21 +276,21 @@ void onConnection(uv_stream_t* q, ssize_t nread, const uv_buf_t* buf) {
   pConn->pTimer = malloc(sizeof(uv_timer_t));
   uv_timer_init(pObj->loop, pConn->pTimer);
 
-  pConn->pClient = (uv_tcp_t*)malloc(sizeof(uv_tcp_t));
+  pConn->pTcp = (uv_tcp_t*)malloc(sizeof(uv_tcp_t));
   pConn->pWorkerAsync = pObj->workerAsync;  // thread safty
-  uv_tcp_init(pObj->loop, pConn->pClient);
+  uv_tcp_init(pObj->loop, pConn->pTcp);
 
-  if (uv_accept(q, (uv_stream_t*)(pConn->pClient)) == 0) {
+  if (uv_accept(q, (uv_stream_t*)(pConn->pTcp)) == 0) {
     uv_os_fd_t fd;
-    uv_fileno((const uv_handle_t*)pConn->pClient, &fd);
+    uv_fileno((const uv_handle_t*)pConn->pTcp, &fd);
     tDebug("new connection created: %d", fd);
     uv_timer_start(pConn->pTimer, onTimeout, 10, 0);
-    uv_read_start((uv_stream_t*)(pConn->pClient), allocBuffer, onRead);
+    uv_read_start((uv_stream_t*)(pConn->pTcp), allocBuffer, onRead);
   } else {
     uv_timer_stop(pConn->pTimer);
     free(pConn->pTimer);
-    uv_close((uv_handle_t*)pConn->pClient, NULL);
-    free(pConn->pClient);
+    uv_close((uv_handle_t*)pConn->pTcp, NULL);
+    free(pConn->pTcp);
     free(pConn);
   }
 }
@@ -276,7 +302,6 @@ void* acceptThread(void* arg) {
 
   struct sockaddr_in bind_addr;
 
-  int port = 6030;
   uv_ip4_addr("0.0.0.0", srv->port, &bind_addr);
   uv_tcp_bind(&srv->server, (const struct sockaddr*)&bind_addr, 0);
   int err = 0;
@@ -288,16 +313,22 @@ void* acceptThread(void* arg) {
 }
 void* workerThread(void* arg) {
   SThreadObj* pObj = (SThreadObj*)arg;
-  int         fd = pObj->fd;
+
   pObj->loop = (uv_loop_t*)malloc(sizeof(uv_loop_t));
   uv_loop_init(pObj->loop);
 
   uv_pipe_init(pObj->loop, pObj->pipe, 1);
-  uv_pipe_open(pObj->pipe, fd);
+  uv_pipe_open(pObj->pipe, pObj->fd);
+
+  QUEUE_INIT(&pObj->conn);
 
   pObj->workerAsync = malloc(sizeof(uv_async_t));
   uv_async_init(pObj->loop, pObj->workerAsync, workerAsyncCB);
+
+  // pObj->workerAsync->data = (void*)pObj;
+
   uv_read_start((uv_stream_t*)pObj->pipe, allocBuffer, onConnection);
+  uv_run(pObj->loop, UV_RUN_DEFAULT);
 }
 #else
 
@@ -471,7 +502,8 @@ void *rpcOpen(const SRpcInit *pInit) {
   pRpc = (SRpcInfo *)calloc(1, sizeof(SRpcInfo));
   if (pRpc == NULL) return NULL;
 
-  if (pInit->label) tstrncpy(pRpc->label, pInit->label, sizeof(pRpc->label));
+  if (pInit->label) tstrncpy(pRpc->label, pInit->label, strlen(pInit->label));
+
   pRpc->connType = pInit->connType;
   if (pRpc->connType == TAOS_CONN_CLIENT) {
     pRpc->numOfThreads = pInit->numOfThreads;
diff --git a/source/libs/transport/test/CMakeLists.txt b/source/libs/transport/test/CMakeLists.txt
new file mode 100644
index 0000000000..9e58bf08cd
--- /dev/null
+++ b/source/libs/transport/test/CMakeLists.txt
@@ -0,0 +1,21 @@
+add_executable(transportTest "")
+target_sources(transportTest 
+  PRIVATE
+  "transportTests.cc"
+)  
+
+target_include_directories(transportTest 
+  PUBLIC
+  "${CMAKE_SOURCE_DIR}/include/libs/transport" 
+  "${CMAKE_CURRENT_SOURCE_DIR}/../inc"
+) 
+
+target_link_libraries (transportTest
+  os  
+  util
+  common
+  gtest_main
+  transport 
+)
+
+
diff --git a/source/libs/transport/test/transportTests.cc b/source/libs/transport/test/transportTests.cc
new file mode 100644
index 0000000000..468aeba8a9
--- /dev/null
+++ b/source/libs/transport/test/transportTests.cc
@@ -0,0 +1,35 @@
+/*
+ * Copyright (c) 2019 TAOS Data, Inc. <jhtao@taosdata.com>
+ *
+ * This program is free software: you can use, redistribute, and/or modify
+ * it under the terms of the GNU Affero General Public License, version 3 * or later ("AGPL"), as published by the Free
+ * Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.
+ *
+ * You should have received a copy of the GNU Affero General Public License
+ * along with this program. If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include <gtest/gtest.h>
+#include <chrono>
+#include <iostream>
+#include <string>
+#include <thread>
+
+#include "transportInt.h"
+#include "trpc.h"
+
+using namespace std;
+
+int main() {
+  SRpcInit init = {.localPort = 6030, .label = "rpc", .numOfThreads = 5};
+  void*    p = rpcOpen(&init);
+
+  while (1) {
+    std::cout << "cron task" << std::endl;
+    std::this_thread::sleep_for(std::chrono::milliseconds(10 * 1000));
+  }
+}
diff --git a/source/libs/transport/test/transportTests.cpp b/source/libs/transport/test/transportTests.cpp
deleted file mode 100644
index e69de29bb2..0000000000

From 3b69bde0cec2dcddd9a4e216007a5abc66934c1c Mon Sep 17 00:00:00 2001
From: Liu Jicong <liujicong@qq.com>
Date: Fri, 14 Jan 2022 10:48:05 +0800
Subject: [PATCH 08/20] add mq defination

---
 include/client/taos.h                    |   3 +-
 include/common/tmsg.h                    |  34 +++++++
 include/common/tmsgdef.h                 |   2 +-
 source/client/inc/clientHb.h             |  78 ---------------
 source/client/inc/clientInt.h            |  74 +++++++++++++--
 source/client/src/clientHb.c             |  43 ++++++---
 source/client/src/clientImpl.c           | 115 +++++++++++++++++++++++
 source/client/src/clientMsgHandler.c     |   5 +-
 source/dnode/mnode/impl/src/mndProfile.c |  18 ++++
 source/libs/wal/src/walMeta.c            |   6 +-
 10 files changed, 276 insertions(+), 102 deletions(-)
 delete mode 100644 source/client/inc/clientHb.h

diff --git a/include/client/taos.h b/include/client/taos.h
index 4669ca51f7..db33dbf8a2 100644
--- a/include/client/taos.h
+++ b/include/client/taos.h
@@ -193,8 +193,7 @@ DLL_EXPORT void taos_close_stream(TAOS_STREAM *tstr);
 DLL_EXPORT int taos_load_table_info(TAOS *taos, const char* tableNameList);
 DLL_EXPORT TAOS_RES* taos_schemaless_insert(TAOS* taos, char* lines[], int numLines, int protocol, int precision);
 
-
-DLL_EXPORT TAOS_RES *tmq_create_topic(TAOS* taos, const char* name, const char* sql, int sqlLen);
+DLL_EXPORT TAOS_RES* tmq_create_topic(TAOS* taos, const char* name, const char* sql, int sqlLen);
 
 #ifdef __cplusplus
 }
diff --git a/include/common/tmsg.h b/include/common/tmsg.h
index fdf64b7af2..9f6d268ab2 100644
--- a/include/common/tmsg.h
+++ b/include/common/tmsg.h
@@ -68,6 +68,14 @@ typedef uint16_t tmsg_t;
 #define TSDB_IE_TYPE_DNODE_EXT 6
 #define TSDB_IE_TYPE_DNODE_STATE 7
 
+typedef enum {
+  HEARTBEAT_TYPE_MQ    = 0,
+  HEARTBEAT_TYPE_QUERY = 1,
+  // types can be added here
+  //
+  HEARTBEAT_TYPE_MAX
+} EHbType;
+
 typedef enum _mgmt_table {
   TSDB_MGMT_TABLE_START,
   TSDB_MGMT_TABLE_ACCT,
@@ -220,6 +228,7 @@ static FORCE_INLINE void* taosDecodeSClientHbKey(void* buf, SClientHbKey* pKey)
   return buf;
 }
 
+
 typedef struct {
   int32_t vgId;
   char*   dbName;
@@ -359,6 +368,31 @@ static FORCE_INLINE void* taosDecodeSEpSet(void* buf, SEpSet* pEp) {
   return buf;
 }
 
+typedef struct SMqHbRsp {
+  int8_t status;    //idle or not
+  int8_t vnodeChanged;
+  int8_t epChanged; // should use new epset
+  int8_t reserved;
+  SEpSet epSet;
+} SMqHbRsp;
+
+static FORCE_INLINE int taosEncodeSMqHbRsp(void** buf, const SMqHbRsp* pRsp) {
+  int tlen = 0;
+  tlen += taosEncodeFixedI8(buf, pRsp->status);
+  tlen += taosEncodeFixedI8(buf, pRsp->vnodeChanged);
+  tlen += taosEncodeFixedI8(buf, pRsp->epChanged);
+  tlen += taosEncodeSEpSet(buf, &pRsp->epSet);
+  return tlen;
+}
+
+static FORCE_INLINE void* taosDecodeSMqHbRsp(void* buf, SMqHbRsp* pRsp) {
+  buf = taosDecodeFixedI8(buf, &pRsp->status);
+  buf = taosDecodeFixedI8(buf, &pRsp->vnodeChanged);
+  buf = taosDecodeFixedI8(buf, &pRsp->epChanged);
+  buf = taosDecodeSEpSet(buf, &pRsp->epSet);
+  return buf;
+}
+
 typedef struct {
   int32_t acctId;
   int64_t clusterId;
diff --git a/include/common/tmsgdef.h b/include/common/tmsgdef.h
index 592672b32b..654174966d 100644
--- a/include/common/tmsgdef.h
+++ b/include/common/tmsgdef.h
@@ -129,7 +129,7 @@ enum {
   TD_DEF_MSG_TYPE(TDMT_MND_VGROUP_LIST, "mnode-vgroup-list", NULL, NULL)
   TD_DEF_MSG_TYPE(TDMT_MND_KILL_QUERY, "mnode-kill-query", NULL, NULL)
   TD_DEF_MSG_TYPE(TDMT_MND_KILL_CONN, "mnode-kill-conn", NULL, NULL)
-  TD_DEF_MSG_TYPE(TDMT_MND_HEARTBEAT, "mnode-heartbeat", NULL, NULL)
+  TD_DEF_MSG_TYPE(TDMT_MND_HEARTBEAT, "mnode-heartbeat", SClientHbBatchReq, SClientHbBatchRsp)
   TD_DEF_MSG_TYPE(TDMT_MND_SHOW, "mnode-show", NULL, NULL)
   TD_DEF_MSG_TYPE(TDMT_MND_SHOW_RETRIEVE, "mnode-retrieve", NULL, NULL)
   TD_DEF_MSG_TYPE(TDMT_MND_STATUS, "mnode-status", NULL, NULL)
diff --git a/source/client/inc/clientHb.h b/source/client/inc/clientHb.h
deleted file mode 100644
index 7bc4311b29..0000000000
--- a/source/client/inc/clientHb.h
+++ /dev/null
@@ -1,78 +0,0 @@
-/*
- * Copyright (c) 2019 TAOS Data, Inc. <jhtao@taosdata.com>
- *
- * This program is free software: you can use, redistribute, and/or modify
- * it under the terms of the GNU Affero General Public License, version 3
- * or later ("AGPL"), as published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful, but WITHOUT
- * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
- * FITNESS FOR A PARTICULAR PURPOSE.
- *
- * You should have received a copy of the GNU Affero General Public License
- * along with this program. If not, see <http://www.gnu.org/licenses/>.
- */
-
-#include "os.h"
-#include "tarray.h"
-#include "thash.h"
-#include "tmsg.h"
-
-#define HEARTBEAT_INTERVAL 1500  // ms
-
-typedef enum {
-  HEARTBEAT_TYPE_MQ = 0,
-  // types can be added here
-  //
-  HEARTBEAT_TYPE_MAX
-} EHbType;
-
-typedef int32_t (*FHbRspHandle)(SClientHbRsp* pReq);
-
-typedef struct SAppHbMgr {
-  // statistics
-  int32_t reportCnt;
-  int32_t connKeyCnt;
-  int64_t reportBytes;  // not implemented
-  int64_t startTime;
-  // ctl
-  SRWLatch lock;  // lock is used in serialization
-  // connection
-  void*  transporter;
-  SEpSet epSet;
-  // info
-  SHashObj* activeInfo;    // hash<SClientHbKey, SClientHbReq>
-  SHashObj* getInfoFuncs;  // hash<SClientHbKey, FGetConnInfo>
-} SAppHbMgr;
-
-typedef struct SClientHbMgr {
-  int8_t inited;
-  // ctl
-  int8_t          threadStop;
-  pthread_t       thread;
-  pthread_mutex_t lock;       // used when app init and cleanup
-  SArray*         appHbMgrs;  // SArray<SAppHbMgr*> one for each cluster
-  FHbRspHandle    handle[HEARTBEAT_TYPE_MAX];
-} SClientHbMgr;
-
-// TODO: embed param into function
-// return type: SArray<Skv>
-typedef SArray* (*FGetConnInfo)(SClientHbKey connKey, void* param);
-
-// global, called by mgmt
-int  hbMgrInit();
-void hbMgrCleanUp();
-int  hbHandleRsp(SClientHbBatchRsp* hbRsp);
-
-// cluster level
-SAppHbMgr* appHbMgrInit(void* transporter, SEpSet epSet);
-void appHbMgrCleanup(SAppHbMgr* pAppHbMgr);
-
-// conn level
-int  hbRegisterConn(SAppHbMgr* pAppHbMgr, SClientHbKey connKey, FGetConnInfo func);
-void hbDeregisterConn(SAppHbMgr* pAppHbMgr, SClientHbKey connKey);
-
-int hbAddConnInfo(SAppHbMgr* pAppHbMgr, SClientHbKey connKey, void* key, void* value, int32_t keyLen, int32_t valueLen);
-
-// mq
-void hbMgrInitMqHbRspHandle();
diff --git a/source/client/inc/clientInt.h b/source/client/inc/clientInt.h
index 26afe237c9..71c441419d 100644
--- a/source/client/inc/clientInt.h
+++ b/source/client/inc/clientInt.h
@@ -31,6 +31,41 @@ extern "C" {
 #include "trpc.h"
 #include "query.h"
 
+#define HEARTBEAT_INTERVAL 1500  // ms
+
+typedef struct SAppInstInfo SAppInstInfo;
+
+typedef int32_t (*FHbRspHandle)(SClientHbRsp* pReq);
+
+typedef struct SAppHbMgr {
+  // statistics
+  int32_t reportCnt;
+  int32_t connKeyCnt;
+  int64_t reportBytes;  // not implemented
+  int64_t startTime;
+  // ctl
+  SRWLatch lock;  // lock is used in serialization
+  // connection
+  SAppInstInfo* pAppInstInfo;
+  // info
+  SHashObj* activeInfo;    // hash<SClientHbKey, SClientHbReq>
+  SHashObj* getInfoFuncs;  // hash<SClientHbKey, FGetConnInfo>
+} SAppHbMgr;
+
+typedef struct SClientHbMgr {
+  int8_t inited;
+  // ctl
+  int8_t          threadStop;
+  pthread_t       thread;
+  pthread_mutex_t lock;       // used when app init and cleanup
+  SArray*         appHbMgrs;  // SArray<SAppHbMgr*> one for each cluster
+  FHbRspHandle    handle[HEARTBEAT_TYPE_MAX];
+} SClientHbMgr;
+
+// TODO: embed param into function
+// return type: SArray<Skv>
+typedef SArray* (*FGetConnInfo)(SClientHbKey connKey, void* param);
+
 typedef struct SQueryExecMetric {
   int64_t      start;    // start timestamp
   int64_t      parsed;   // start to parse
@@ -55,15 +90,15 @@ typedef struct SHeartBeatInfo {
   void  *pTimer;   // timer, used to send request msg to mnode
 } SHeartBeatInfo;
 
-typedef struct SAppInstInfo {
-  int64_t           numOfConns;
-  SCorEpSet         mgmtEp;
-  SInstanceSummary  summary;
+struct SAppInstInfo {
+  int64_t          numOfConns;
+  SCorEpSet        mgmtEp;
+  SInstanceSummary summary;
   SList            *pConnList;  // STscObj linked list
-  int64_t           clusterId;
+  int64_t          clusterId;
   void             *pTransporter;
-  SHeartBeatInfo hb;
-} SAppInstInfo;
+  struct SAppHbMgr *pAppHbMgr;
+};
 
 typedef struct SAppInfo {
   int64_t        startTime;
@@ -81,6 +116,7 @@ typedef struct STscObj {
   char             db[TSDB_DB_FNAME_LEN];
   int32_t          acctId;
   uint32_t         connId;
+  int32_t          connType;
   uint64_t         id;       // ref ID returned by taosAddRef
   void            *pTransporter;
   pthread_mutex_t  mutex;     // used to protect the operation on db
@@ -88,6 +124,10 @@ typedef struct STscObj {
   SAppInstInfo    *pAppInfo;
 } STscObj;
 
+typedef struct SMqConsumer {
+  STscObj* pTscObj;
+} SMqConsumer;
+
 typedef struct SReqResultInfo {
   const char  *pRspMsg;
   const char  *pData;
@@ -169,6 +209,26 @@ TAOS_RES *taos_query_l(TAOS *taos, const char *sql, int sqlLen);
 void *doFetchRow(SRequestObj* pRequest);
 void  setResultDataPtr(SReqResultInfo* pResultInfo, TAOS_FIELD* pFields, int32_t numOfCols, int32_t numOfRows);
 
+// --- heartbeat 
+// global, called by mgmt
+int  hbMgrInit();
+void hbMgrCleanUp();
+int  hbHandleRsp(SClientHbBatchRsp* hbRsp);
+
+// cluster level
+SAppHbMgr* appHbMgrInit(SAppInstInfo* pAppInstInfo);
+void appHbMgrCleanup(SAppHbMgr* pAppHbMgr);
+
+// conn level
+int  hbRegisterConn(SAppHbMgr* pAppHbMgr, SClientHbKey connKey, FGetConnInfo func);
+void hbDeregisterConn(SAppHbMgr* pAppHbMgr, SClientHbKey connKey);
+
+int hbAddConnInfo(SAppHbMgr* pAppHbMgr, SClientHbKey connKey, void* key, void* value, int32_t keyLen, int32_t valueLen);
+
+// --- mq
+void hbMgrInitMqHbRspHandle();
+
+
 #ifdef __cplusplus
 }
 #endif
diff --git a/source/client/src/clientHb.c b/source/client/src/clientHb.c
index 9bbd62c1d9..0d343b1b77 100644
--- a/source/client/src/clientHb.c
+++ b/source/client/src/clientHb.c
@@ -13,6 +13,7 @@
  * along with this program. If not, see <http://www.gnu.org/licenses/>.
  */
 
+#include "clientInt.h"
 #include "clientHb.h"
 #include "trpc.h"
 
@@ -21,10 +22,18 @@ static SClientHbMgr clientHbMgr = {0};
 static int32_t hbCreateThread();
 static void    hbStopThread();
 
-static int32_t hbMqHbRspHandle(SClientHbRsp* pReq) {
+static int32_t hbMqHbRspHandle(SClientHbRsp* pRsp) {
   return 0;
 }
 
+static int32_t hbMqAsyncCallBack(void* param, const SDataBuf* pMsg, int32_t code) {
+  if (code != 0) {
+    return -1;
+  }
+  SClientHbRsp* pRsp = (SClientHbRsp*) pMsg->pData;
+  return hbMqHbRspHandle(pRsp);
+}
+
 void hbMgrInitMqHbRspHandle() {
   clientHbMgr.handle[HEARTBEAT_TYPE_MQ] = hbMqHbRspHandle;
 }
@@ -77,18 +86,31 @@ static void* hbThreadFunc(void* param) {
     for(int i = 0; i < sz; i++) {
       SAppHbMgr* pAppHbMgr = taosArrayGet(clientHbMgr.appHbMgrs, i);
       SClientHbBatchReq* pReq = hbGatherAllInfo(pAppHbMgr);
-      void* reqStr = NULL;
-      int tlen = tSerializeSClientHbBatchReq(&reqStr, pReq);
+      int tlen = tSerializeSClientHbBatchReq(NULL, pReq);
+      void *buf = malloc(tlen);
+      if (buf == NULL) {
+        //TODO: error handling
+        break;
+      }
+      tSerializeSClientHbBatchReq(buf, pReq);
       SMsgSendInfo info;
-      /*info.fp = hbHandleRsp;*/
+      info.fp = hbMqAsyncCallBack;
+      info.msgInfo.pData = buf;
+      info.msgInfo.len = tlen;
+      info.msgType = TDMT_MND_HEARTBEAT;
+      info.param = NULL;
+      info.requestId = generateRequestId();
+      info.requestObjRefId = -1;
 
+      SAppInstInfo *pAppInstInfo = pAppHbMgr->pAppInstInfo;
       int64_t transporterId = 0;
-      asyncSendMsgToServer(pAppHbMgr->transporter, &pAppHbMgr->epSet, &transporterId, &info);
+      SEpSet epSet = getEpSet_s(&pAppInstInfo->mgmtEp);
+      asyncSendMsgToServer(pAppInstInfo->pTransporter, &epSet, &transporterId, &info);
       tFreeClientHbBatchReq(pReq);
 
       atomic_add_fetch_32(&pAppHbMgr->reportCnt, 1);
-      taosMsleep(HEARTBEAT_INTERVAL);
     }
+    taosMsleep(HEARTBEAT_INTERVAL);
   }
   return NULL;
 }
@@ -110,7 +132,8 @@ static void hbStopThread() {
   atomic_store_8(&clientHbMgr.threadStop, 1);
 }
 
-SAppHbMgr* appHbMgrInit(void* transporter, SEpSet epSet) {
+SAppHbMgr* appHbMgrInit(SAppInstInfo* pAppInstInfo) {
+  hbMgrInit();
   SAppHbMgr* pAppHbMgr = malloc(sizeof(SAppHbMgr)); 
   if (pAppHbMgr == NULL) {
     terrno = TSDB_CODE_OUT_OF_MEMORY;
@@ -119,9 +142,8 @@ SAppHbMgr* appHbMgrInit(void* transporter, SEpSet epSet) {
   // init stat
   pAppHbMgr->startTime = taosGetTimestampMs();
 
-  // init connection info
-  pAppHbMgr->transporter = transporter;
-  pAppHbMgr->epSet = epSet;
+  // init app info
+  pAppHbMgr->pAppInstInfo = pAppInstInfo;
 
   // init hash info
   pAppHbMgr->activeInfo = taosHashInit(64, hbKeyHashFunc, 1, HASH_ENTRY_LOCK);
@@ -171,7 +193,6 @@ void hbMgrCleanUp() {
   if (old == 0) return;
 
   taosArrayDestroy(clientHbMgr.appHbMgrs);
-
 }
 
 int hbHandleRsp(SClientHbBatchRsp* hbRsp) {
diff --git a/source/client/src/clientImpl.c b/source/client/src/clientImpl.c
index d18142cebf..5ee71fc95a 100644
--- a/source/client/src/clientImpl.c
+++ b/source/client/src/clientImpl.c
@@ -113,6 +113,7 @@ TAOS *taos_connect_internal(const char *ip, const char *user, const char *pass,
     SAppInstInfo* p = calloc(1, sizeof(struct SAppInstInfo));
     p->mgmtEp       = epSet;
     p->pTransporter = openTransporter(user, secretEncrypt, tsNumOfCores);
+    p->pAppHbMgr = appHbMgrInit(p);
     taosHashPut(appInfo.pInstMap, key, strlen(key), &p, POINTER_BYTES);
 
     pInst = &p;
@@ -220,6 +221,101 @@ int32_t scheduleQuery(SRequestObj* pRequest, SQueryDag* pDag, void** pJob) {
   return scheduleAsyncExecJob(pRequest->pTscObj->pTransporter, NULL /*todo appInfo.xxx*/, pDag, pJob);
 }
 
+typedef struct tmq_t tmq_t;
+
+typedef struct SMqClientTopic {
+  // subscribe info
+  int32_t sqlLen;
+  char*   sql;
+  char*   topicName;
+  int64_t topicId;
+  // statistics
+  int64_t consumeCnt;
+  // offset
+  int64_t committedOffset;
+  int64_t currentOffset;
+  //connection info
+  int32_t vgId;
+  SEpSet  epSet;
+} SMqClientTopic;
+
+typedef struct tmq_resp_err_t {
+  int32_t code;
+} tmq_resp_err_t;
+
+typedef struct tmq_topic_vgroup_list_t {
+  char* topicName;
+  int32_t vgId;
+  int64_t committedOffset;
+} tmq_topic_vgroup_list_t;
+
+typedef void (tmq_commit_cb(tmq_t*, tmq_resp_err_t, tmq_topic_vgroup_list_t*, void* param));
+
+typedef struct tmq_conf_t{
+  char*          clientId;
+  char*          groupId;
+  char*          ip;
+  uint16_t       port;
+  tmq_commit_cb* commit_cb;
+} tmq_conf_t;
+
+struct tmq_t {
+  char           groupId[256];
+  char           clientId[256];
+  STscObj*       pTscObj;
+  tmq_commit_cb* commit_cb;
+  SArray*        clientTopics;  // SArray<SMqClientTopic>
+};
+
+void tmq_conf_set_offset_commit_cb(tmq_conf_t* conf, tmq_commit_cb* cb) {
+  conf->commit_cb = cb;
+}
+
+SArray* tmqGetConnInfo(SClientHbKey connKey, void* param) {
+  tmq_t* pTmq = (void*)param;
+  SArray* pArray = taosArrayInit(0, sizeof(SKv));
+  if (pArray == NULL) {
+    return NULL;
+  }
+  SKv kv = {0};
+  kv.key = malloc(256);
+  if (kv.key == NULL) {
+    taosArrayDestroy(pArray);
+    return NULL;
+  }
+  strcpy(kv.key, "groupId");
+  kv.keyLen = strlen("groupId") + 1;
+  kv.value = malloc(256);
+  if (kv.value == NULL) {
+    free(kv.key);
+    taosArrayDestroy(pArray);
+    return NULL;
+  }
+  strcpy(kv.value, pTmq->groupId);
+  kv.valueLen = strlen(pTmq->groupId) + 1;
+
+  taosArrayPush(pArray, &kv);
+  strcpy(kv.key, "clientUid");
+  kv.keyLen = strlen("clientUid") + 1;
+  *(uint32_t*)kv.value = pTmq->pTscObj->connId;
+  kv.valueLen = sizeof(uint32_t); 
+  
+  return NULL;
+}
+
+tmq_t* tmqCreateConsumerImpl(TAOS* conn, tmq_conf_t* conf) {
+  tmq_t* pTmq = malloc(sizeof(tmq_t));
+  if (pTmq == NULL) {
+    return NULL;
+  }
+  strcpy(pTmq->groupId, conf->groupId);
+  strcpy(pTmq->clientId, conf->clientId);
+  pTmq->pTscObj = (STscObj*)conn;
+  pTmq->pTscObj->connType = HEARTBEAT_TYPE_MQ;
+
+  return pTmq;
+}
+
 TAOS_RES *tmq_create_topic(TAOS* taos, const char* name, const char* sql, int sqlLen) {
   STscObj* pTscObj = (STscObj*)taos;
   SRequestObj* pRequest = NULL;
@@ -281,6 +377,25 @@ _return:
   return pRequest;
 }
 
+typedef struct tmq_message_t {
+  int32_t  numOfRows; 
+  char*    topicName;
+  TAOS_ROW row[];
+} tmq_message_t;
+
+tmq_message_t* tmq_consume_poll(tmq_t* mq, int64_t blocking_time) {
+  return NULL;
+}
+
+tmq_resp_err_t* tmq_commit(tmq_t* mq, void* callback, int32_t async) {
+  return NULL;
+}
+
+void tmq_message_destroy(tmq_message_t* mq_message) {
+  
+}
+
+
 TAOS_RES *taos_query_l(TAOS *taos, const char *sql, int sqlLen) {
   STscObj *pTscObj = (STscObj *)taos;
   if (sqlLen > (size_t) tsMaxSQLStringLen) {
diff --git a/source/client/src/clientMsgHandler.c b/source/client/src/clientMsgHandler.c
index 85f3fb06a7..94c5c230f7 100644
--- a/source/client/src/clientMsgHandler.c
+++ b/source/client/src/clientMsgHandler.c
@@ -71,6 +71,9 @@ int processConnectRsp(void* param, const SDataBuf* pMsg, int32_t code) {
   pTscObj->pAppInfo->clusterId = pConnect->clusterId;
   atomic_add_fetch_64(&pTscObj->pAppInfo->numOfConns, 1);
 
+  SClientHbKey connKey = {.connId = pConnect->connId, .hbType = HEARTBEAT_TYPE_QUERY};
+  hbRegisterConn(pTscObj->pAppInfo->pAppHbMgr, connKey, NULL);
+
   //  pRequest->body.resInfo.pRspMsg = pMsg->pData;
   tscDebug("0x%" PRIx64 " clusterId:%" PRId64 ", totalConn:%" PRId64, pRequest->requestId, pConnect->clusterId,
            pTscObj->pAppInfo->numOfConns);
@@ -382,4 +385,4 @@ void initMsgHandleFp() {
 
   handleRequestRspFp[TMSG_INDEX(TDMT_VND_SHOW_TABLES)]   = processShowRsp;
   handleRequestRspFp[TMSG_INDEX(TDMT_VND_SHOW_TABLES_FETCH)]   = processRetrieveVndRsp;
-}
\ No newline at end of file
+}
diff --git a/source/dnode/mnode/impl/src/mndProfile.c b/source/dnode/mnode/impl/src/mndProfile.c
index fced3facbe..d856d3a473 100644
--- a/source/dnode/mnode/impl/src/mndProfile.c
+++ b/source/dnode/mnode/impl/src/mndProfile.c
@@ -258,6 +258,23 @@ static int32_t mndSaveQueryStreamList(SConnObj *pConn, SHeartBeatReq *pReq) {
 }
 
 static int32_t mndProcessHeartBeatReq(SMnodeMsg *pReq) {
+  SMnode *pMnode = pReq->pMnode;
+  char *batchReqStr = pReq->rpcMsg.pCont;
+  SClientHbBatchReq batchReq = {0};
+  tDeserializeClientHbBatchReq(batchReqStr, &batchReq);
+  SArray *pArray = batchReq.reqs;
+  int sz = taosArrayGetSize(pArray);
+  for (int i = 0; i < sz; i++) {
+    SClientHbReq* pReq = taosArrayGet(pArray, i);
+    if (pReq->connKey.hbType == HEARTBEAT_TYPE_QUERY) {
+
+    } else if (pReq->connKey.hbType == HEARTBEAT_TYPE_MQ) {
+
+    }
+  }
+  return 0;
+
+#if 0
   SMnode       *pMnode = pReq->pMnode;
   SProfileMgmt *pMgmt = &pMnode->profileMgmt;
 
@@ -327,6 +344,7 @@ static int32_t mndProcessHeartBeatReq(SMnodeMsg *pReq) {
   pReq->contLen = sizeof(SConnectRsp);
   pReq->pCont = pRsp;
   return 0;
+#endif
 }
 
 static int32_t mndProcessKillQueryReq(SMnodeMsg *pReq) {
diff --git a/source/libs/wal/src/walMeta.c b/source/libs/wal/src/walMeta.c
index dea178eb54..d630080086 100644
--- a/source/libs/wal/src/walMeta.c
+++ b/source/libs/wal/src/walMeta.c
@@ -199,8 +199,10 @@ int walCheckAndRepairMeta(SWal* pWal) {
 }
 
 int walCheckAndRepairIdx(SWal* pWal) {
-  // iterate all idx files
-  // check first and last entry of each idx file valid
+  // TODO: iterate all log files
+  // if idx not found, scan log and write idx
+  // if found, check complete by first and last entry of each idx file
+  // if idx incomplete, binary search last valid entry, and then build other part
   return 0;
 }
 

From ae3c70a6ec1c2ab750e302576c4986d66f9be060 Mon Sep 17 00:00:00 2001
From: Liu Jicong <liujicong@qq.com>
Date: Fri, 14 Jan 2022 10:53:16 +0800
Subject: [PATCH 09/20] fix compile error

---
 source/client/src/clientHb.c | 1 -
 1 file changed, 1 deletion(-)

diff --git a/source/client/src/clientHb.c b/source/client/src/clientHb.c
index 0d343b1b77..9ed688f101 100644
--- a/source/client/src/clientHb.c
+++ b/source/client/src/clientHb.c
@@ -14,7 +14,6 @@
  */
 
 #include "clientInt.h"
-#include "clientHb.h"
 #include "trpc.h"
 
 static SClientHbMgr clientHbMgr = {0};

From 8cd25b8e48e1a782cc2dbca2ea9e6590acb9ca9f Mon Sep 17 00:00:00 2001
From: Liu Jicong <liujicong@qq.com>
Date: Fri, 14 Jan 2022 13:22:46 +0800
Subject: [PATCH 10/20] fix

---
 source/client/src/clientImpl.c           | 4 ++--
 source/client/src/clientMsgHandler.c     | 2 +-
 source/dnode/mnode/impl/src/mndProfile.c | 3 ++-
 3 files changed, 5 insertions(+), 4 deletions(-)

diff --git a/source/client/src/clientImpl.c b/source/client/src/clientImpl.c
index 84bb404764..b53c455cbd 100644
--- a/source/client/src/clientImpl.c
+++ b/source/client/src/clientImpl.c
@@ -116,7 +116,7 @@ TAOS *taos_connect_internal(const char *ip, const char *user, const char *pass,
     SAppInstInfo* p = calloc(1, sizeof(struct SAppInstInfo));
     p->mgmtEp       = epSet;
     p->pTransporter = openTransporter(user, secretEncrypt, tsNumOfCores);
-    p->pAppHbMgr = appHbMgrInit(p);
+    /*p->pAppHbMgr = appHbMgrInit(p);*/
     taosHashPut(appInfo.pInstMap, key, strlen(key), &p, POINTER_BYTES);
 
     pInst = &p;
@@ -788,4 +788,4 @@ void setQueryResultByRsp(SReqResultInfo* pResultInfo, const SRetrieveTableRsp* p
   pResultInfo->completed = (pRsp->completed == 1);
 
   setResultDataPtr(pResultInfo, pResultInfo->fields, pResultInfo->numOfCols, pResultInfo->numOfRows);
-}
\ No newline at end of file
+}
diff --git a/source/client/src/clientMsgHandler.c b/source/client/src/clientMsgHandler.c
index 497ef1ac95..ba29b81132 100644
--- a/source/client/src/clientMsgHandler.c
+++ b/source/client/src/clientMsgHandler.c
@@ -72,7 +72,7 @@ int processConnectRsp(void* param, const SDataBuf* pMsg, int32_t code) {
   atomic_add_fetch_64(&pTscObj->pAppInfo->numOfConns, 1);
 
   SClientHbKey connKey = {.connId = pConnect->connId, .hbType = HEARTBEAT_TYPE_QUERY};
-  hbRegisterConn(pTscObj->pAppInfo->pAppHbMgr, connKey, NULL);
+  /*hbRegisterConn(pTscObj->pAppInfo->pAppHbMgr, connKey, NULL);*/
 
   //  pRequest->body.resInfo.pRspMsg = pMsg->pData;
   tscDebug("0x%" PRIx64 " clusterId:%" PRId64 ", totalConn:%" PRId64, pRequest->requestId, pConnect->clusterId,
diff --git a/source/dnode/mnode/impl/src/mndProfile.c b/source/dnode/mnode/impl/src/mndProfile.c
index d856d3a473..71b61730c3 100644
--- a/source/dnode/mnode/impl/src/mndProfile.c
+++ b/source/dnode/mnode/impl/src/mndProfile.c
@@ -258,6 +258,7 @@ static int32_t mndSaveQueryStreamList(SConnObj *pConn, SHeartBeatReq *pReq) {
 }
 
 static int32_t mndProcessHeartBeatReq(SMnodeMsg *pReq) {
+#if 0
   SMnode *pMnode = pReq->pMnode;
   char *batchReqStr = pReq->rpcMsg.pCont;
   SClientHbBatchReq batchReq = {0};
@@ -273,8 +274,8 @@ static int32_t mndProcessHeartBeatReq(SMnodeMsg *pReq) {
     }
   }
   return 0;
+#else
 
-#if 0
   SMnode       *pMnode = pReq->pMnode;
   SProfileMgmt *pMgmt = &pMnode->profileMgmt;
 

From aa9a5b6f67168269da569d3d7f8c10e3eed85a16 Mon Sep 17 00:00:00 2001
From: Liu Jicong <liujicong@qq.com>
Date: Fri, 14 Jan 2022 15:54:07 +0800
Subject: [PATCH 11/20] add test for new heartbeat

---
 include/common/tmsg.h                         | 12 ++-
 source/client/src/clientHb.c                  | 10 +--
 source/client/test/clientTests.cpp            | 76 +++++++++----------
 source/common/src/tmsg.c                      | 74 ++++++++++++++++--
 source/dnode/mnode/impl/src/mndProfile.c      | 29 +++++--
 .../dnode/mnode/impl/test/profile/profile.cpp | 39 ++++++++++
 6 files changed, 179 insertions(+), 61 deletions(-)

diff --git a/include/common/tmsg.h b/include/common/tmsg.h
index 7bce917701..e98446fbdf 100644
--- a/include/common/tmsg.h
+++ b/include/common/tmsg.h
@@ -155,7 +155,7 @@ typedef struct {
 
 typedef struct {
   SClientHbKey connKey;
-  SHashObj*    info;  // hash<Slv.key, Sklv>
+  SHashObj*    info;  // hash<Skv.key, Skv>
 } SClientHbReq;
 
 typedef struct {
@@ -181,7 +181,10 @@ static FORCE_INLINE uint32_t hbKeyHashFunc(const char* key, uint32_t keyLen) {
 }
 
 int   tSerializeSClientHbReq(void** buf, const SClientHbReq* pReq);
-void* tDeserializeClientHbReq(void* buf, SClientHbReq* pReq);
+void* tDeserializeSClientHbReq(void* buf, SClientHbReq* pReq);
+
+int   tSerializeSClientHbRsp(void** buf, const SClientHbRsp* pRsp);
+void* tDeserializeSClientHbRsp(void* buf, SClientHbRsp* pRsp);
 
 static FORCE_INLINE void  tFreeClientHbReq(void *pReq) {
   SClientHbReq* req = (SClientHbReq*)pReq;
@@ -190,7 +193,7 @@ static FORCE_INLINE void  tFreeClientHbReq(void *pReq) {
 }
 
 int   tSerializeSClientHbBatchReq(void** buf, const SClientHbBatchReq* pReq);
-void* tDeserializeClientHbBatchReq(void* buf, SClientHbBatchReq* pReq);
+void* tDeserializeSClientHbBatchReq(void* buf, SClientHbBatchReq* pReq);
 
 static FORCE_INLINE void tFreeClientHbBatchReq(void* pReq) {
   SClientHbBatchReq *req = (SClientHbBatchReq*)pReq;
@@ -198,6 +201,9 @@ static FORCE_INLINE void tFreeClientHbBatchReq(void* pReq) {
   free(pReq);
 }
 
+int   tSerializeSClientHbBatchRsp(void** buf, const SClientHbBatchRsp* pBatchRsp);
+void* tDeserializeSClientHbBatchRsp(void* buf, SClientHbBatchRsp* pBatchRsp);
+
 static FORCE_INLINE int taosEncodeSKv(void** buf, const SKv* pKv) {
   int tlen = 0;
   tlen += taosEncodeFixedI32(buf, pKv->keyLen);
diff --git a/source/client/src/clientHb.c b/source/client/src/clientHb.c
index 9ed688f101..bd94bc05ec 100644
--- a/source/client/src/clientHb.c
+++ b/source/client/src/clientHb.c
@@ -43,17 +43,17 @@ static FORCE_INLINE void hbMgrInitHandle() {
 }
 
 SClientHbBatchReq* hbGatherAllInfo(SAppHbMgr *pAppHbMgr) {
-  SClientHbBatchReq* pReq = malloc(sizeof(SClientHbBatchReq));
-  if (pReq == NULL) {
+  SClientHbBatchReq* pBatchReq = malloc(sizeof(SClientHbBatchReq));
+  if (pBatchReq == NULL) {
     terrno = TSDB_CODE_TSC_OUT_OF_MEMORY;
     return NULL;
   }
   int32_t connKeyCnt = atomic_load_32(&pAppHbMgr->connKeyCnt);
-  pReq->reqs = taosArrayInit(connKeyCnt, sizeof(SClientHbReq));
+  pBatchReq->reqs = taosArrayInit(connKeyCnt, sizeof(SClientHbReq));
 
   void *pIter = taosHashIterate(pAppHbMgr->activeInfo, NULL);
   while (pIter != NULL) {
-    taosArrayPush(pReq->reqs, pIter);
+    taosArrayPush(pBatchReq->reqs, pIter);
     SClientHbReq* pOneReq = pIter;
     taosHashClear(pOneReq->info);
 
@@ -70,7 +70,7 @@ SClientHbBatchReq* hbGatherAllInfo(SAppHbMgr *pAppHbMgr) {
     pIter = taosHashIterate(pAppHbMgr->activeInfo, pIter);
   }
 
-  return pReq;
+  return pBatchReq;
 }
 
 static void* hbThreadFunc(void* param) {
diff --git a/source/client/test/clientTests.cpp b/source/client/test/clientTests.cpp
index d1093bb1a6..108f126e15 100644
--- a/source/client/test/clientTests.cpp
+++ b/source/client/test/clientTests.cpp
@@ -147,29 +147,29 @@ TEST(testCase, connect_Test) {
 //  taos_close(pConn);
 //}
 //
-//TEST(testCase, create_db_Test) {
-//  TAOS* pConn = taos_connect("localhost", "root", "taosdata", NULL, 0);
-//  assert(pConn != NULL);
-//
-//  TAOS_RES* pRes = taos_query(pConn, "create database abc1 vgroups 2");
-//  if (taos_errno(pRes) != 0) {
-//    printf("error in create db, reason:%s\n", taos_errstr(pRes));
-//  }
-//
-//  TAOS_FIELD* pFields = taos_fetch_fields(pRes);
-//  ASSERT_TRUE(pFields == NULL);
-//
-//  int32_t numOfFields = taos_num_fields(pRes);
-//  ASSERT_EQ(numOfFields, 0);
-//
-//  taos_free_result(pRes);
-//
-//  pRes = taos_query(pConn, "create database abc1 vgroups 4");
-//  if (taos_errno(pRes) != 0) {
-//    printf("error in create db, reason:%s\n", taos_errstr(pRes));
-//  }
-//  taos_close(pConn);
-//}
+TEST(testCase, create_db_Test) {
+  TAOS* pConn = taos_connect("localhost", "root", "taosdata", NULL, 0);
+  assert(pConn != NULL);
+
+  TAOS_RES* pRes = taos_query(pConn, "create database abc1 vgroups 2");
+  if (taos_errno(pRes) != 0) {
+    printf("error in create db, reason:%s\n", taos_errstr(pRes));
+  }
+
+  TAOS_FIELD* pFields = taos_fetch_fields(pRes);
+  ASSERT_TRUE(pFields == NULL);
+
+  int32_t numOfFields = taos_num_fields(pRes);
+  ASSERT_EQ(numOfFields, 0);
+
+  taos_free_result(pRes);
+
+  pRes = taos_query(pConn, "create database abc1 vgroups 4");
+  if (taos_errno(pRes) != 0) {
+    printf("error in create db, reason:%s\n", taos_errstr(pRes));
+  }
+  taos_close(pConn);
+}
 //
 //TEST(testCase, create_dnode_Test) {
 //  TAOS* pConn = taos_connect("localhost", "root", "taosdata", NULL, 0);
@@ -293,24 +293,24 @@ TEST(testCase, connect_Test) {
 //  taos_close(pConn);
 //}
 
-TEST(testCase, create_ctable_Test) {
-  TAOS* pConn = taos_connect("localhost", "root", "taosdata", NULL, 0);
-  assert(pConn != NULL);
+//TEST(testCase, create_ctable_Test) {
+  //TAOS* pConn = taos_connect("localhost", "root", "taosdata", NULL, 0);
+  //assert(pConn != NULL);
 
-  TAOS_RES* pRes = taos_query(pConn, "use abc1");
-  if (taos_errno(pRes) != 0) {
-    printf("failed to use db, reason:%s\n", taos_errstr(pRes));
-  }
-  taos_free_result(pRes);
+  //TAOS_RES* pRes = taos_query(pConn, "use abc1");
+  //if (taos_errno(pRes) != 0) {
+    //printf("failed to use db, reason:%s\n", taos_errstr(pRes));
+  //}
+  //taos_free_result(pRes);
 
-  pRes = taos_query(pConn, "create table tm0 using st1 tags(1)");
-  if (taos_errno(pRes) != 0) {
-    printf("failed to create child table tm0, reason:%s\n", taos_errstr(pRes));
-  }
+  //pRes = taos_query(pConn, "create table tm0 using st1 tags(1)");
+  //if (taos_errno(pRes) != 0) {
+    //printf("failed to create child table tm0, reason:%s\n", taos_errstr(pRes));
+  //}
 
-  taos_free_result(pRes);
-  taos_close(pConn);
-}
+  //taos_free_result(pRes);
+  //taos_close(pConn);
+//}
 
 //TEST(testCase, show_stable_Test) {
 //  TAOS* pConn = taos_connect("localhost", "root", "taosdata", NULL, 0);
diff --git a/source/common/src/tmsg.c b/source/common/src/tmsg.c
index 53f59c7d57..4d1a07be21 100644
--- a/source/common/src/tmsg.c
+++ b/source/common/src/tmsg.c
@@ -89,7 +89,7 @@ int tSerializeSClientHbReq(void **buf, const SClientHbReq *pReq) {
   int tlen = 0;
   tlen += taosEncodeSClientHbKey(buf, &pReq->connKey);
 
-  int kvNum = taosHashGetSize(pReq->info);
+  int32_t kvNum = taosHashGetSize(pReq->info);
   tlen += taosEncodeFixedI32(buf, kvNum);
   SKv kv;
   void* pIter = taosHashIterate(pReq->info, pIter);
@@ -104,14 +104,15 @@ int tSerializeSClientHbReq(void **buf, const SClientHbReq *pReq) {
   return tlen;
 }
 
-void *tDeserializeClientHbReq(void *buf, SClientHbReq *pReq) {
-  ASSERT(pReq->info != NULL);
+void *tDeserializeSClientHbReq(void *buf, SClientHbReq *pReq) {
   buf = taosDecodeSClientHbKey(buf, &pReq->connKey);
 
   // TODO: error handling
-  int kvNum;
-  taosDecodeFixedI32(buf, &kvNum);
-  pReq->info = taosHashInit(kvNum, taosGetDefaultHashFunction(TSDB_DATA_TYPE_BINARY), true, HASH_NO_LOCK);
+  int32_t kvNum;
+  buf = taosDecodeFixedI32(buf, &kvNum);
+  if (pReq->info == NULL) {
+    pReq->info = taosHashInit(kvNum, taosGetDefaultHashFunction(TSDB_DATA_TYPE_BINARY), true, HASH_NO_LOCK);
+  }
   for(int i = 0; i < kvNum; i++) {
     SKv kv;
     buf = taosDecodeSKv(buf, &kv);
@@ -121,12 +122,69 @@ void *tDeserializeClientHbReq(void *buf, SClientHbReq *pReq) {
   return buf;
 }
 
-int   tSerializeSClientHbBatchReq(void** buf, const SClientHbBatchReq* pReq) {
+int   tSerializeSClientHbRsp(void** buf, const SClientHbRsp* pRsp) {
   int tlen = 0;
+  tlen += taosEncodeSClientHbKey(buf, &pRsp->connKey);
+  tlen += taosEncodeFixedI32(buf, pRsp->status);
+  tlen += taosEncodeFixedI32(buf, pRsp->bodyLen);
+  tlen += taosEncodeBinary(buf, pRsp->body, pRsp->bodyLen);
+  return tlen;
+}
+void* tDeserializeSClientHbRsp(void* buf, SClientHbRsp* pRsp) {
+  buf = taosDecodeSClientHbKey(buf, &pRsp->connKey);
+  buf = taosDecodeFixedI32(buf, &pRsp->status);
+  buf = taosDecodeFixedI32(buf, &pRsp->bodyLen);
+  buf = taosDecodeBinary(buf, &pRsp->body, pRsp->bodyLen);
+  return buf;
+}
+
+int   tSerializeSClientHbBatchReq(void** buf, const SClientHbBatchReq* pBatchReq) {
+  int tlen = 0;
+  tlen += taosEncodeFixedI64(buf, pBatchReq->reqId);
+  int32_t reqNum = taosArrayGetSize(pBatchReq->reqs);
+  tlen += taosEncodeFixedI32(buf, reqNum); 
+  for (int i = 0; i < reqNum; i++) {
+    SClientHbReq* pReq = taosArrayGet(pBatchReq->reqs, i);
+    tlen += tSerializeSClientHbReq(buf, pReq);
+  }
   return tlen;
 }
 
-void* tDeserializeClientHbBatchReq(void* buf, SClientHbBatchReq* pReq) {
+void* tDeserializeSClientHbBatchReq(void* buf, SClientHbBatchReq* pBatchReq) {
+  buf = taosDecodeFixedI64(buf, &pBatchReq->reqId);
+  if (pBatchReq->reqs == NULL) {
+    pBatchReq->reqs = taosArrayInit(0, sizeof(SClientHbReq));
+  }
+  int32_t reqNum;
+  buf = taosDecodeFixedI32(buf, &reqNum);
+  for (int i = 0; i < reqNum; i++) {
+    SClientHbReq req = {0};
+    buf = tDeserializeSClientHbReq(buf, &req);
+    taosArrayPush(pBatchReq->reqs, &req);
+  }
+  return buf;
+}
+
+int   tSerializeSClientHbBatchRsp(void** buf, const SClientHbBatchRsp* pBatchRsp) {
+  int tlen = 0;
+  int32_t sz = taosArrayGetSize(pBatchRsp->rsps);
+  tlen += taosEncodeFixedI32(buf, sz);
+  for (int i = 0; i < sz; i++) {
+    SClientHbRsp* pRsp = taosArrayGet(pBatchRsp->rsps, i);
+    tlen += tSerializeSClientHbRsp(buf, pRsp);
+  }
+  return tlen;
+}
+
+void* tDeserializeSClientHbBatchRsp(void* buf, SClientHbBatchRsp* pBatchRsp) {
+  int32_t sz;
+  buf = taosDecodeFixedI32(buf, &sz);
+  pBatchRsp->rsps = taosArrayInit(sz, sizeof(SClientHbRsp));
+  for (int i = 0; i < sz; i++) {
+    SClientHbRsp rsp = {0};
+    buf = tDeserializeSClientHbRsp(buf, &rsp); 
+    taosArrayPush(pBatchRsp->rsps, &rsp);
+  }
   return buf;
 }
 
diff --git a/source/dnode/mnode/impl/src/mndProfile.c b/source/dnode/mnode/impl/src/mndProfile.c
index 71b61730c3..79e5d9eae5 100644
--- a/source/dnode/mnode/impl/src/mndProfile.c
+++ b/source/dnode/mnode/impl/src/mndProfile.c
@@ -258,24 +258,39 @@ static int32_t mndSaveQueryStreamList(SConnObj *pConn, SHeartBeatReq *pReq) {
 }
 
 static int32_t mndProcessHeartBeatReq(SMnodeMsg *pReq) {
-#if 0
   SMnode *pMnode = pReq->pMnode;
   char *batchReqStr = pReq->rpcMsg.pCont;
   SClientHbBatchReq batchReq = {0};
-  tDeserializeClientHbBatchReq(batchReqStr, &batchReq);
+  tDeserializeSClientHbBatchReq(batchReqStr, &batchReq);
   SArray *pArray = batchReq.reqs;
   int sz = taosArrayGetSize(pArray);
+
+  SClientHbBatchRsp batchRsp = {0};
+  batchRsp.rsps = taosArrayInit(0, sizeof(SClientHbRsp));
+
   for (int i = 0; i < sz; i++) {
-    SClientHbReq* pReq = taosArrayGet(pArray, i);
-    if (pReq->connKey.hbType == HEARTBEAT_TYPE_QUERY) {
-
-    } else if (pReq->connKey.hbType == HEARTBEAT_TYPE_MQ) {
+    SClientHbReq* pHbReq = taosArrayGet(pArray, i);
+    if (pHbReq->connKey.hbType == HEARTBEAT_TYPE_QUERY) {
 
+    } else if (pHbReq->connKey.hbType == HEARTBEAT_TYPE_MQ) {
+      SClientHbRsp rsp = {
+        .status = 0,
+        .connKey = pHbReq->connKey,
+        .bodyLen = 0,
+        .body = NULL
+      };
+      taosArrayPush(batchRsp.rsps, &rsp);
     }
   }
+  int32_t tlen = tSerializeSClientHbBatchRsp(NULL, &batchRsp);
+  void* buf = rpcMallocCont(tlen);
+  void* bufCopy = buf;
+  tSerializeSClientHbBatchRsp(&bufCopy, &batchRsp);
+  pReq->contLen = tlen;
+  pReq->pCont = buf;
   return 0;
-#else
 
+#if 0
   SMnode       *pMnode = pReq->pMnode;
   SProfileMgmt *pMgmt = &pMnode->profileMgmt;
 
diff --git a/source/dnode/mnode/impl/test/profile/profile.cpp b/source/dnode/mnode/impl/test/profile/profile.cpp
index bf047517d3..a74b1c01f5 100644
--- a/source/dnode/mnode/impl/test/profile/profile.cpp
+++ b/source/dnode/mnode/impl/test/profile/profile.cpp
@@ -96,6 +96,38 @@ TEST_F(MndTestProfile, 03_ConnectMsg_Show) {
 }
 
 TEST_F(MndTestProfile, 04_HeartBeatMsg) {
+
+  SClientHbBatchReq batchReq;
+  batchReq.reqs = taosArrayInit(0, sizeof(SClientHbReq));
+  SClientHbReq req = {0};
+  req.connKey = {.connId = 123, .hbType = HEARTBEAT_TYPE_MQ};
+  req.info = taosHashInit(64, hbKeyHashFunc, 1, HASH_ENTRY_LOCK);
+  SKv kv;
+  kv.key = (void*)"abc";
+  kv.keyLen = 4;
+  kv.value = (void*)"bcd";
+  kv.valueLen = 4;
+  taosHashPut(req.info, kv.key, kv.keyLen, kv.value, kv.valueLen);
+  taosArrayPush(batchReq.reqs, &req);
+
+  int32_t tlen = tSerializeSClientHbBatchReq(NULL, &batchReq);
+  
+  void* buf = (SClientHbBatchReq*)rpcMallocCont(tlen);
+  void* bufCopy = buf;
+  tSerializeSClientHbBatchReq(&bufCopy, &batchReq);
+  SRpcMsg* pMsg = test.SendReq(TDMT_MND_HEARTBEAT, buf, tlen);
+  ASSERT_NE(pMsg, nullptr);
+  ASSERT_EQ(pMsg->code, 0);
+  char* pRspChar = (char*)pMsg->pCont;
+  SClientHbBatchRsp rsp = {0};
+  tDeserializeSClientHbBatchRsp(pRspChar, &rsp);
+  int sz = taosArrayGetSize(rsp.rsps);
+  ASSERT_EQ(sz, 1);
+  SClientHbRsp* pRsp = (SClientHbRsp*) taosArrayGet(rsp.rsps, 0);
+  EXPECT_EQ(pRsp->connKey.connId, 123);
+  EXPECT_EQ(pRsp->connKey.hbType, HEARTBEAT_TYPE_MQ);
+  EXPECT_EQ(pRsp->status, 0);
+#if 0
   int32_t contLen = sizeof(SHeartBeatReq);
 
   SHeartBeatReq* pReq = (SHeartBeatReq*)rpcMallocCont(contLen);
@@ -129,9 +161,12 @@ TEST_F(MndTestProfile, 04_HeartBeatMsg) {
   EXPECT_EQ(pRsp->epSet.numOfEps, 1);
   EXPECT_EQ(pRsp->epSet.port[0], 9031);
   EXPECT_STREQ(pRsp->epSet.fqdn[0], "localhost");
+#endif
 }
 
 TEST_F(MndTestProfile, 05_KillConnMsg) {
+  // temporary remove since kill will use new heartbeat msg
+#if 0
   {
     int32_t contLen = sizeof(SKillConnReq);
 
@@ -190,6 +225,7 @@ TEST_F(MndTestProfile, 05_KillConnMsg) {
 
     connId = pRsp->connId;
   }
+#endif
 }
 
 TEST_F(MndTestProfile, 06_KillConnMsg_InvalidConn) {
@@ -204,6 +240,8 @@ TEST_F(MndTestProfile, 06_KillConnMsg_InvalidConn) {
 }
 
 TEST_F(MndTestProfile, 07_KillQueryMsg) {
+  // temporary remove since kill will use new heartbeat msg
+#if 0
   {
     int32_t contLen = sizeof(SKillQueryReq);
 
@@ -252,6 +290,7 @@ TEST_F(MndTestProfile, 07_KillQueryMsg) {
     EXPECT_EQ(pRsp->epSet.port[0], 9031);
     EXPECT_STREQ(pRsp->epSet.fqdn[0], "localhost");
   }
+#endif
 }
 
 TEST_F(MndTestProfile, 08_KillQueryMsg_InvalidConn) {

From 2e40293b4d0579e957999ca52bd68a2dc707fea3 Mon Sep 17 00:00:00 2001
From: Liu Jicong <liujicong@qq.com>
Date: Fri, 14 Jan 2022 16:45:15 +0800
Subject: [PATCH 12/20] add test for new heartbeat

---
 source/client/src/clientHb.c                  | 25 ++++++++++++-
 source/client/src/clientImpl.c                |  2 +-
 source/client/src/clientMsgHandler.c          |  2 +-
 source/client/test/clientTests.cpp            | 36 +++++++++----------
 .../dnode/mnode/impl/test/profile/profile.cpp |  2 +-
 5 files changed, 45 insertions(+), 22 deletions(-)

diff --git a/source/client/src/clientHb.c b/source/client/src/clientHb.c
index bd94bc05ec..7bdde457d5 100644
--- a/source/client/src/clientHb.c
+++ b/source/client/src/clientHb.c
@@ -51,10 +51,14 @@ SClientHbBatchReq* hbGatherAllInfo(SAppHbMgr *pAppHbMgr) {
   int32_t connKeyCnt = atomic_load_32(&pAppHbMgr->connKeyCnt);
   pBatchReq->reqs = taosArrayInit(connKeyCnt, sizeof(SClientHbReq));
 
+  if (pAppHbMgr->activeInfo == NULL) {
+    return NULL;
+  }
+
   void *pIter = taosHashIterate(pAppHbMgr->activeInfo, NULL);
   while (pIter != NULL) {
-    taosArrayPush(pBatchReq->reqs, pIter);
     SClientHbReq* pOneReq = pIter;
+    taosArrayPush(pBatchReq->reqs, pOneReq);
     taosHashClear(pOneReq->info);
 
     pIter = taosHashIterate(pAppHbMgr->activeInfo, pIter);
@@ -84,7 +88,14 @@ static void* hbThreadFunc(void* param) {
     int sz = taosArrayGetSize(clientHbMgr.appHbMgrs);
     for(int i = 0; i < sz; i++) {
       SAppHbMgr* pAppHbMgr = taosArrayGet(clientHbMgr.appHbMgrs, i);
+      int32_t connCnt = atomic_load_32(&pAppHbMgr->connKeyCnt);
+      if (connCnt == 0) {
+        continue;
+      }
       SClientHbBatchReq* pReq = hbGatherAllInfo(pAppHbMgr);
+      if (pReq == NULL) {
+        continue;
+      }
       int tlen = tSerializeSClientHbBatchReq(NULL, pReq);
       void *buf = malloc(tlen);
       if (buf == NULL) {
@@ -146,10 +157,22 @@ SAppHbMgr* appHbMgrInit(SAppInstInfo* pAppInstInfo) {
 
   // init hash info
   pAppHbMgr->activeInfo = taosHashInit(64, hbKeyHashFunc, 1, HASH_ENTRY_LOCK);
+
+  if (pAppHbMgr->activeInfo == NULL) {
+    terrno = TSDB_CODE_OUT_OF_MEMORY;
+    free(pAppHbMgr);
+    return NULL;
+  }
   pAppHbMgr->activeInfo->freeFp = tFreeClientHbReq;
   // init getInfoFunc
   pAppHbMgr->getInfoFuncs = taosHashInit(64, hbKeyHashFunc, 1, HASH_ENTRY_LOCK);
 
+  if (pAppHbMgr->getInfoFuncs == NULL) {
+    terrno = TSDB_CODE_OUT_OF_MEMORY;
+    free(pAppHbMgr);
+    return NULL;
+  }
+
   taosArrayPush(clientHbMgr.appHbMgrs, &pAppHbMgr);
   return pAppHbMgr;
 }
diff --git a/source/client/src/clientImpl.c b/source/client/src/clientImpl.c
index b53c455cbd..1a25cdc20c 100644
--- a/source/client/src/clientImpl.c
+++ b/source/client/src/clientImpl.c
@@ -116,7 +116,7 @@ TAOS *taos_connect_internal(const char *ip, const char *user, const char *pass,
     SAppInstInfo* p = calloc(1, sizeof(struct SAppInstInfo));
     p->mgmtEp       = epSet;
     p->pTransporter = openTransporter(user, secretEncrypt, tsNumOfCores);
-    /*p->pAppHbMgr = appHbMgrInit(p);*/
+    p->pAppHbMgr = appHbMgrInit(p);
     taosHashPut(appInfo.pInstMap, key, strlen(key), &p, POINTER_BYTES);
 
     pInst = &p;
diff --git a/source/client/src/clientMsgHandler.c b/source/client/src/clientMsgHandler.c
index ba29b81132..497ef1ac95 100644
--- a/source/client/src/clientMsgHandler.c
+++ b/source/client/src/clientMsgHandler.c
@@ -72,7 +72,7 @@ int processConnectRsp(void* param, const SDataBuf* pMsg, int32_t code) {
   atomic_add_fetch_64(&pTscObj->pAppInfo->numOfConns, 1);
 
   SClientHbKey connKey = {.connId = pConnect->connId, .hbType = HEARTBEAT_TYPE_QUERY};
-  /*hbRegisterConn(pTscObj->pAppInfo->pAppHbMgr, connKey, NULL);*/
+  hbRegisterConn(pTscObj->pAppInfo->pAppHbMgr, connKey, NULL);
 
   //  pRequest->body.resInfo.pRspMsg = pMsg->pData;
   tscDebug("0x%" PRIx64 " clusterId:%" PRId64 ", totalConn:%" PRId64, pRequest->requestId, pConnect->clusterId,
diff --git a/source/client/test/clientTests.cpp b/source/client/test/clientTests.cpp
index 108f126e15..9d4e7c8152 100644
--- a/source/client/test/clientTests.cpp
+++ b/source/client/test/clientTests.cpp
@@ -147,29 +147,29 @@ TEST(testCase, connect_Test) {
 //  taos_close(pConn);
 //}
 //
-TEST(testCase, create_db_Test) {
-  TAOS* pConn = taos_connect("localhost", "root", "taosdata", NULL, 0);
-  assert(pConn != NULL);
+//TEST(testCase, create_db_Test) {
+  //TAOS* pConn = taos_connect("localhost", "root", "taosdata", NULL, 0);
+  //assert(pConn != NULL);
 
-  TAOS_RES* pRes = taos_query(pConn, "create database abc1 vgroups 2");
-  if (taos_errno(pRes) != 0) {
-    printf("error in create db, reason:%s\n", taos_errstr(pRes));
-  }
+  //TAOS_RES* pRes = taos_query(pConn, "create database abc1 vgroups 2");
+  //if (taos_errno(pRes) != 0) {
+    //printf("error in create db, reason:%s\n", taos_errstr(pRes));
+  //}
 
-  TAOS_FIELD* pFields = taos_fetch_fields(pRes);
-  ASSERT_TRUE(pFields == NULL);
+  //TAOS_FIELD* pFields = taos_fetch_fields(pRes);
+  //ASSERT_TRUE(pFields == NULL);
 
-  int32_t numOfFields = taos_num_fields(pRes);
-  ASSERT_EQ(numOfFields, 0);
+  //int32_t numOfFields = taos_num_fields(pRes);
+  //ASSERT_EQ(numOfFields, 0);
 
-  taos_free_result(pRes);
+  //taos_free_result(pRes);
 
-  pRes = taos_query(pConn, "create database abc1 vgroups 4");
-  if (taos_errno(pRes) != 0) {
-    printf("error in create db, reason:%s\n", taos_errstr(pRes));
-  }
-  taos_close(pConn);
-}
+  //pRes = taos_query(pConn, "create database abc1 vgroups 4");
+  //if (taos_errno(pRes) != 0) {
+    //printf("error in create db, reason:%s\n", taos_errstr(pRes));
+  //}
+  //taos_close(pConn);
+//}
 //
 //TEST(testCase, create_dnode_Test) {
 //  TAOS* pConn = taos_connect("localhost", "root", "taosdata", NULL, 0);
diff --git a/source/dnode/mnode/impl/test/profile/profile.cpp b/source/dnode/mnode/impl/test/profile/profile.cpp
index a74b1c01f5..4b329886eb 100644
--- a/source/dnode/mnode/impl/test/profile/profile.cpp
+++ b/source/dnode/mnode/impl/test/profile/profile.cpp
@@ -96,7 +96,6 @@ TEST_F(MndTestProfile, 03_ConnectMsg_Show) {
 }
 
 TEST_F(MndTestProfile, 04_HeartBeatMsg) {
-
   SClientHbBatchReq batchReq;
   batchReq.reqs = taosArrayInit(0, sizeof(SClientHbReq));
   SClientHbReq req = {0};
@@ -127,6 +126,7 @@ TEST_F(MndTestProfile, 04_HeartBeatMsg) {
   EXPECT_EQ(pRsp->connKey.connId, 123);
   EXPECT_EQ(pRsp->connKey.hbType, HEARTBEAT_TYPE_MQ);
   EXPECT_EQ(pRsp->status, 0);
+
 #if 0
   int32_t contLen = sizeof(SHeartBeatReq);
 

From c5ece5715cfe9c57615e58c23d9024f41a0d3ecf Mon Sep 17 00:00:00 2001
From: Liu Jicong <liujicong@qq.com>
Date: Fri, 14 Jan 2022 17:30:57 +0800
Subject: [PATCH 13/20] set refObjId to 0

---
 include/common/tmsg.h              |  2 +-
 source/client/src/clientHb.c       | 38 +++++++++++++++++-------------
 source/client/test/clientTests.cpp |  1 +
 3 files changed, 23 insertions(+), 18 deletions(-)

diff --git a/include/common/tmsg.h b/include/common/tmsg.h
index e98446fbdf..9cf3068314 100644
--- a/include/common/tmsg.h
+++ b/include/common/tmsg.h
@@ -197,7 +197,7 @@ void* tDeserializeSClientHbBatchReq(void* buf, SClientHbBatchReq* pReq);
 
 static FORCE_INLINE void tFreeClientHbBatchReq(void* pReq) {
   SClientHbBatchReq *req = (SClientHbBatchReq*)pReq;
-  taosArrayDestroyEx(req->reqs, tFreeClientHbReq);
+  //taosArrayDestroyEx(req->reqs, tFreeClientHbReq);
   free(pReq);
 }
 
diff --git a/source/client/src/clientHb.c b/source/client/src/clientHb.c
index 7bdde457d5..6d7fc9f81a 100644
--- a/source/client/src/clientHb.c
+++ b/source/client/src/clientHb.c
@@ -51,10 +51,6 @@ SClientHbBatchReq* hbGatherAllInfo(SAppHbMgr *pAppHbMgr) {
   int32_t connKeyCnt = atomic_load_32(&pAppHbMgr->connKeyCnt);
   pBatchReq->reqs = taosArrayInit(connKeyCnt, sizeof(SClientHbReq));
 
-  if (pAppHbMgr->activeInfo == NULL) {
-    return NULL;
-  }
-
   void *pIter = taosHashIterate(pAppHbMgr->activeInfo, NULL);
   while (pIter != NULL) {
     SClientHbReq* pOneReq = pIter;
@@ -71,7 +67,7 @@ SClientHbBatchReq* hbGatherAllInfo(SAppHbMgr *pAppHbMgr) {
     taosHashCopyKey(pIter, &connKey);
     getConnInfoFp(connKey, NULL);
 
-    pIter = taosHashIterate(pAppHbMgr->activeInfo, pIter);
+    pIter = taosHashIterate(pAppHbMgr->getInfoFuncs, pIter);
   }
 
   return pBatchReq;
@@ -87,7 +83,8 @@ static void* hbThreadFunc(void* param) {
 
     int sz = taosArrayGetSize(clientHbMgr.appHbMgrs);
     for(int i = 0; i < sz; i++) {
-      SAppHbMgr* pAppHbMgr = taosArrayGet(clientHbMgr.appHbMgrs, i);
+      SAppHbMgr* pAppHbMgr = taosArrayGetP(clientHbMgr.appHbMgrs, i);
+
       int32_t connCnt = atomic_load_32(&pAppHbMgr->connKeyCnt);
       if (connCnt == 0) {
         continue;
@@ -102,20 +99,27 @@ static void* hbThreadFunc(void* param) {
         //TODO: error handling
         break;
       }
-      tSerializeSClientHbBatchReq(buf, pReq);
-      SMsgSendInfo info;
-      info.fp = hbMqAsyncCallBack;
-      info.msgInfo.pData = buf;
-      info.msgInfo.len = tlen;
-      info.msgType = TDMT_MND_HEARTBEAT;
-      info.param = NULL;
-      info.requestId = generateRequestId();
-      info.requestObjRefId = -1;
+      void *bufCopy = buf;
+      tSerializeSClientHbBatchReq(&bufCopy, pReq);
+      SMsgSendInfo *pInfo = malloc(sizeof(SMsgSendInfo));
+      if (pInfo == NULL) {
+        terrno = TSDB_CODE_TSC_OUT_OF_MEMORY;
+        tFreeClientHbBatchReq(pReq);
+        free(buf);
+        break;
+      }
+      pInfo->fp = hbMqAsyncCallBack;
+      pInfo->msgInfo.pData = buf;
+      pInfo->msgInfo.len = tlen;
+      pInfo->msgType = TDMT_MND_HEARTBEAT;
+      pInfo->param = NULL;
+      pInfo->requestId = generateRequestId();
+      pInfo->requestObjRefId = 0;
 
       SAppInstInfo *pAppInstInfo = pAppHbMgr->pAppInstInfo;
       int64_t transporterId = 0;
       SEpSet epSet = getEpSet_s(&pAppInstInfo->mgmtEp);
-      asyncSendMsgToServer(pAppInstInfo->pTransporter, &epSet, &transporterId, &info);
+      asyncSendMsgToServer(pAppInstInfo->pTransporter, &epSet, &transporterId, pInfo);
       tFreeClientHbBatchReq(pReq);
 
       atomic_add_fetch_32(&pAppHbMgr->reportCnt, 1);
@@ -182,7 +186,7 @@ void appHbMgrCleanup(SAppHbMgr* pAppHbMgr) {
 
   int sz = taosArrayGetSize(clientHbMgr.appHbMgrs);
   for (int i = 0; i < sz; i++) {
-    SAppHbMgr* pTarget = taosArrayGet(clientHbMgr.appHbMgrs, i);
+    SAppHbMgr* pTarget = taosArrayGetP(clientHbMgr.appHbMgrs, i);
     if (pAppHbMgr == pTarget) {
       taosHashCleanup(pTarget->activeInfo);
       taosHashCleanup(pTarget->getInfoFuncs);
diff --git a/source/client/test/clientTests.cpp b/source/client/test/clientTests.cpp
index 9d4e7c8152..d5b149284e 100644
--- a/source/client/test/clientTests.cpp
+++ b/source/client/test/clientTests.cpp
@@ -53,6 +53,7 @@ TEST(testCase, connect_Test) {
   if (pConn == NULL) {
     printf("failed to connect to server, reason:%s\n", taos_errstr(NULL));
   }
+  sleep(3);
   taos_close(pConn);
 }
 

From 552c151f494468827c03548342c46f6a934392d5 Mon Sep 17 00:00:00 2001
From: Xiaoyu Wang <xiaoyuwang@taosdata.com>
Date: Fri, 14 Jan 2022 05:05:54 -0500
Subject: [PATCH 14/20] TD-12678 dsGetDataBlock allowed to call again after the
 query is completed to return statistics

---
 source/libs/executor/src/dataDispatcher.c | 6 ++++++
 source/libs/planner/src/planner.c         | 6 +++---
 2 files changed, 9 insertions(+), 3 deletions(-)

diff --git a/source/libs/executor/src/dataDispatcher.c b/source/libs/executor/src/dataDispatcher.c
index a69084f3db..a6af9ff388 100644
--- a/source/libs/executor/src/dataDispatcher.c
+++ b/source/libs/executor/src/dataDispatcher.c
@@ -182,6 +182,12 @@ static void getDataLength(SDataSinkHandle* pHandle, int32_t* pLen, bool* pQueryE
 
 static int32_t getDataBlock(SDataSinkHandle* pHandle, SOutputData* pOutput) {
   SDataDispatchHandle* pDispatcher = (SDataDispatchHandle*)pHandle;
+  if (NULL == pDispatcher->nextOutput.pData) {
+    assert(pDispatcher->queryEnd);
+    pOutput->useconds = pDispatcher->useconds;
+    pOutput->precision = pDispatcher->schema.precision;
+    return TSDB_CODE_SUCCESS;
+  }
   SDataCacheEntry* pEntry = (SDataCacheEntry*)(pDispatcher->nextOutput.pData);
   memcpy(pOutput->pData, pEntry->data, pEntry->dataLen);
   pOutput->numOfRows = pEntry->numOfRows;
diff --git a/source/libs/planner/src/planner.c b/source/libs/planner/src/planner.c
index 21f57d95d5..bf815b26b2 100644
--- a/source/libs/planner/src/planner.c
+++ b/source/libs/planner/src/planner.c
@@ -65,9 +65,9 @@ int32_t qCreateQueryDag(const struct SQueryNode* pNode, struct SQueryDag** pDag,
   }
 
   if (pLogicPlan->info.type != QNODE_MODIFY) {
-//    char* str = NULL;
-//    queryPlanToString(pLogicPlan, &str);
-//    printf("%s\n", str);
+   char* str = NULL;
+   queryPlanToString(pLogicPlan, &str);
+   printf("%s\n", str);
   }
 
   code = optimizeQueryPlan(pLogicPlan);

From 8ac88a625ea0885819e75dbf616748de7aefb1bc Mon Sep 17 00:00:00 2001
From: lihui <huili@taosdata.com>
Date: Fri, 14 Jan 2022 18:40:37 +0800
Subject: [PATCH 15/20] [modify bug]

---
 tests/test/c/create_table.c | 20 +++++++++++++-------
 1 file changed, 13 insertions(+), 7 deletions(-)

diff --git a/tests/test/c/create_table.c b/tests/test/c/create_table.c
index aae4dc7074..0376ab70ad 100644
--- a/tests/test/c/create_table.c
+++ b/tests/test/c/create_table.c
@@ -187,13 +187,19 @@ void *threadFunc(void *param) {
     int64_t curMs = 0;
     int64_t beginMs = taosGetTimestampMs();
     pInfo->startMs = beginMs;
-    for (int64_t t = pInfo->tableBeginIndex; t < pInfo->tableEndIndex; ++t) {
-      int64_t batch = (pInfo->tableEndIndex - t);
-      batch = MIN(batch, batchNum);
+	int64_t t = pInfo->tableBeginIndex;
+    for (; t <= pInfo->tableEndIndex;) {
+      //int64_t batch = (pInfo->tableEndIndex - t);
+      //batch = MIN(batch, batchNum);
 
       int32_t len = sprintf(qstr, "create table");
-      for (int32_t i = 0; i < batch; ++i) {
-        len += sprintf(qstr + len, " t%" PRId64 " using %s tags(%" PRId64 ")", t + i, stbName, t + i);
+      for (int32_t i = 0; i < batchNum;) {
+        len += sprintf(qstr + len, " %s_t%" PRId64 " using %s tags(%" PRId64 ")", stbName, t, stbName, t);
+		t++;
+	    i++;
+        if (t > pInfo->tableEndIndex) {
+            break;
+        }		
       }
 
       int64_t startTs = taosGetTimestampUs();
@@ -212,11 +218,11 @@ void *threadFunc(void *param) {
 	  curMs = taosGetTimestampMs();
       if (curMs -  beginMs > 10000) {
 	  	beginMs = curMs;
+		//printf("==== tableBeginIndex: %"PRId64", t: %"PRId64"\n", pInfo->tableBeginIndex, t);
         printCreateProgress(pInfo, t);
       }
-      t += (batch - 1);
     }
-    printCreateProgress(pInfo, pInfo->tableEndIndex);
+    printCreateProgress(pInfo, t);
   }
 
   if (insertData) {

From 41f600698c955dedcbc9dcd26acaf7b2ca044b1d Mon Sep 17 00:00:00 2001
From: yihaoDeng <luomoxyz@126.com>
Date: Fri, 14 Jan 2022 22:49:05 +0800
Subject: [PATCH 16/20] add libuv

---
 source/libs/transport/inc/rpcHead.h |  21 ++++
 source/libs/transport/src/rpcMain.c | 182 +++++++++++++++++++++++-----
 2 files changed, 175 insertions(+), 28 deletions(-)

diff --git a/source/libs/transport/inc/rpcHead.h b/source/libs/transport/inc/rpcHead.h
index 7317d84af1..66821db133 100644
--- a/source/libs/transport/inc/rpcHead.h
+++ b/source/libs/transport/inc/rpcHead.h
@@ -22,6 +22,27 @@ extern "C" {
 #endif
 
 #ifdef USE_UV
+typedef struct {
+  char     version : 4;         // RPC version
+  char     comp : 4;            // compression algorithm, 0:no compression 1:lz4
+  char     resflag : 2;         // reserved bits
+  char     spi : 3;             // security parameter index
+  char     encrypt : 3;         // encrypt algorithm, 0: no encryption
+  uint16_t tranId;              // transcation ID
+  uint32_t linkUid;             // for unique connection ID assigned by client
+  uint64_t ahandle;             // ahandle assigned by client
+  uint32_t sourceId;            // source ID, an index for connection list
+  uint32_t destId;              // destination ID, an index for connection list
+  uint32_t destIp;              // destination IP address, for NAT scenario
+  char     user[TSDB_UNI_LEN];  // user ID
+  uint16_t port;                // for UDP only, port may be changed
+  char     empty[1];            // reserved
+  uint16_t msgType;             // message type
+  int32_t  msgLen;              // message length including the header iteslf
+  uint32_t msgVer;
+  int32_t  code;        // code in response message
+  uint8_t  content[0];  // message body starts from here
+} SRpcHead;
 
 #else
 
diff --git a/source/libs/transport/src/rpcMain.c b/source/libs/transport/src/rpcMain.c
index 818d129032..a1c0c05fc3 100644
--- a/source/libs/transport/src/rpcMain.c
+++ b/source/libs/transport/src/rpcMain.c
@@ -13,7 +13,9 @@
  * along with this program. If not, see <http://www.gnu.org/licenses/>.
  */
 
+#ifdef USE_UV
 #include <uv.h>
+#endif
 #include "lz4.h"
 #include "os.h"
 #include "rpcCache.h"
@@ -68,6 +70,8 @@ typedef struct {
 
 #define container_of(ptr, type, member) ((type*)((char*)(ptr)-offsetof(type, member)))
 
+static const char* notify = "a";
+
 typedef struct SThreadObj {
   pthread_t       thread;
   uv_pipe_t*      pipe;
@@ -90,23 +94,39 @@ typedef struct SServerObj {
   uint32_t     port;
 } SServerObj;
 
+typedef struct SContent {
+  char* buf;
+  int   len;
+  int   cap;
+  int   toRead;
+} SContent;
+
 typedef struct SConnCtx {
   uv_tcp_t*   pTcp;
+  uv_write_t* pWriter;
   uv_timer_t* pTimer;
+
   uv_async_t* pWorkerAsync;
   queue       queue;
   int         ref;
   int         persist;  // persist connection or not
+  SContent    pCont;
+  int         count;
 } SConnCtx;
 
-static void allocBuffer(uv_handle_t* handle, size_t suggested_size, uv_buf_t* buf);
-static void onTimeout(uv_timer_t* handle);
+static void allocReadBuffer(uv_handle_t* handle, size_t suggested_size, uv_buf_t* buf);
 static void onRead(uv_stream_t* cli, ssize_t nread, const uv_buf_t* buf);
+static void allocConnBuffer(uv_handle_t* handle, size_t suggested_size, uv_buf_t* buf);
+static void onTimeout(uv_timer_t* handle);
 static void onWrite(uv_write_t* req, int status);
 static void onAccept(uv_stream_t* stream, int status);
 static void onConnection(uv_stream_t* q, ssize_t nread, const uv_buf_t* buf);
 static void workerAsyncCB(uv_async_t* handle);
 
+static SConnCtx* connCtxCreate();
+static void      connCtxDestroy(SConnCtx* ctx);
+static void      uvConnCtxDestroy(uv_handle_t* handle);
+
 static void* workerThread(void* arg);
 static void* acceptThread(void* arg);
 
@@ -131,12 +151,11 @@ void* taosInitServer(uint32_t ip, uint32_t port, char* label, int numOfThreads,
 
   for (int i = 0; i < srv->numOfThread; i++) {
     SThreadObj* thrd = (SThreadObj*)calloc(1, sizeof(SThreadObj));
-
+    srv->pipe[i] = (uv_pipe_t*)calloc(2, sizeof(uv_pipe_t));
     int fds[2];
     if (uv_socketpair(AF_UNIX, SOCK_STREAM, fds, UV_NONBLOCK_PIPE, UV_NONBLOCK_PIPE) != 0) {
       return NULL;
     }
-    srv->pipe[i] = (uv_pipe_t*)calloc(2, sizeof(uv_pipe_t));
     uv_pipe_init(srv->loop, &(srv->pipe[i][0]), 1);
     uv_pipe_open(&(srv->pipe[i][0]), fds[1]);  // init write
 
@@ -147,7 +166,7 @@ void* taosInitServer(uint32_t ip, uint32_t port, char* label, int numOfThreads,
       tDebug("sucess to create worker-thread %d", i);
       // printf("thread %d create\n", i);
     } else {
-      // clear all resource later
+      // TODO: clear all other resource later
       tError("failed to create worker-thread %d", i);
     }
     srv->pThreadObj[i] = thrd;
@@ -171,7 +190,6 @@ void* rpcOpen(const SRpcInit* pInit) {
     tstrncpy(pRpc->label, pInit->label, strlen(pInit->label));
   }
   pRpc->numOfThreads = pInit->numOfThreads > TSDB_MAX_RPC_THREADS ? TSDB_MAX_RPC_THREADS : pInit->numOfThreads;
-
   pRpc->tcphandle = taosInitServer(0, pInit->localPort, pRpc->label, pRpc->numOfThreads, NULL, pRpc);
   return pRpc;
 }
@@ -190,26 +208,106 @@ void rpcSendRecv(void* shandle, SEpSet* pEpSet, SRpcMsg* pReq, SRpcMsg* pRsp) {
 int  rpcReportProgress(void* pConn, char* pCont, int contLen) { return -1; }
 void rpcCancelRequest(int64_t rid) { return; }
 
-void allocBuffer(uv_handle_t* handle, size_t suggested_size, uv_buf_t* buf) {
-  buf->base = malloc(suggested_size);
-  buf->len = suggested_size;
+void allocReadBuffer(uv_handle_t* handle, size_t suggested_size, uv_buf_t* buf) {
+  static const int CAPACITY = 1024;
+  tDebug("pre alloc buffer for read ");
+  SConnCtx* ctx = handle->data;
+  SContent* pCont = &ctx->pCont;
+  if (pCont->cap == 0) {
+    pCont->buf = (char*)calloc(CAPACITY, sizeof(char));
+    pCont->len = 0;
+    pCont->cap = CAPACITY;
+    pCont->toRead = -1;
+
+    buf->base = pCont->buf;
+    buf->len = CAPACITY;
+  } else {
+    if (pCont->len >= pCont->cap) {
+      if (pCont->toRead == -1) {
+        pCont->cap *= 2;
+        pCont->buf = realloc(pCont->buf, pCont->cap);
+      } else if (pCont->len + pCont->toRead > pCont->cap) {
+        pCont->cap = pCont->len + pCont->toRead;
+        pCont->buf = realloc(pCont->buf, pCont->len + pCont->toRead);
+      }
+    }
+    buf->base = pCont->buf + pCont->len;
+    buf->len = pCont->cap - pCont->len;
+  }
+
+  // if (ctx->pCont.cap == 0) {
+  //  ctx->pCont.buf = (char*)calloc(64, sizeof(char));
+  //  ctx->pCont.len = 0;
+  //  ctx->pCont.cap = 64;
+  //  //
+  //  buf->base = ctx->pCont.buf;
+  //  buf->len = sz;
+  //} else {
+  //  if (ctx->pCont.len + sz > ctx->pCont.cap) {
+  //    ctx->pCont.cap *= 2;
+  //    ctx->pCont.buf = realloc(ctx->pCont.buf, ctx->pCont.cap);
+  //  }
+  //  buf->base = ctx->pCont.buf + ctx->pCont.len;
+  //  buf->len = sz;
+  //}
+}
+// change later
+static bool handleUserData(SContent* data) {
+  SRpcHead rpcHead;
+
+  bool    finish = false;
+  int32_t msgLen, leftLen, retLen;
+  int32_t headLen = sizeof(rpcHead);
+  if (data->len >= headLen) {
+    memcpy((char*)&rpcHead, data->buf, headLen);
+    msgLen = (int32_t)htonl((uint32_t)rpcHead.msgLen);
+    if (msgLen + headLen <= data->len) {
+      return true;
+    } else {
+      return false;
+    }
+  } else {
+    return false;
+  }
+}
+
+void onRead(uv_stream_t* cli, ssize_t nread, const uv_buf_t* buf) {
+  // opt
+  SConnCtx* ctx = cli->data;
+  SContent* pCont = &ctx->pCont;
+  if (nread > 0) {
+    pCont->len += nread;
+    bool finish = handleUserData(pCont);
+    if (finish == false) {
+      tDebug("continue read");
+    } else {
+      tDebug("read completely");
+    }
+    return;
+  }
+
+  if (nread != UV_EOF) {
+    tDebug("Read error %s\n", uv_err_name(nread));
+  }
+  uv_close((uv_handle_t*)cli, uvConnCtxDestroy);
+}
+void allocConnBuffer(uv_handle_t* handle, size_t suggested_size, uv_buf_t* buf) {
+  buf->base = malloc(sizeof(char));
+  buf->len = 2;
 }
 
 void onTimeout(uv_timer_t* handle) {
   // opt
   tDebug("time out");
 }
-void onRead(uv_stream_t* cli, ssize_t nread, const uv_buf_t* buf) {
-  // opt
-  tDebug("data already was read on a stream");
-}
 
 void onWrite(uv_write_t* req, int status) {
+  SConnCtx* ctx = req->data;
   if (status == 0) {
     tDebug("data already was written on stream");
+  } else {
+    connCtxDestroy(ctx);
   }
-  free(req);
-
   // opt
 }
 
@@ -243,7 +341,7 @@ void onAccept(uv_stream_t* stream, int status) {
   if (uv_accept(stream, (uv_stream_t*)cli) == 0) {
     uv_write_t* wr = (uv_write_t*)malloc(sizeof(uv_write_t));
 
-    uv_buf_t buf = uv_buf_init("a", 1);
+    uv_buf_t buf = uv_buf_init((char*)notify, strlen(notify));
 
     pObj->workerIdx = (pObj->workerIdx + 1) % pObj->numOfThread;
     tDebug("new conntion accepted by main server, dispatch to %dth worker-thread", pObj->workerIdx);
@@ -253,6 +351,7 @@ void onAccept(uv_stream_t* stream, int status) {
   }
 }
 void onConnection(uv_stream_t* q, ssize_t nread, const uv_buf_t* buf) {
+  tDebug("connection coming");
   if (nread < 0) {
     if (nread != UV_EOF) {
       tError("read error %s", uv_err_name(nread));
@@ -261,6 +360,11 @@ void onConnection(uv_stream_t* q, ssize_t nread, const uv_buf_t* buf) {
     uv_close((uv_handle_t*)q, NULL);
     return;
   }
+  // free memory allocated by
+  assert(nread == strlen(notify));
+  assert(buf->base[0] == notify[0]);
+  free(buf->base);
+
   SThreadObj* pObj = (SThreadObj*)container_of(q, struct SThreadObj, pipe);
 
   uv_pipe_t* pipe = (uv_pipe_t*)q;
@@ -268,30 +372,33 @@ void onConnection(uv_stream_t* q, ssize_t nread, const uv_buf_t* buf) {
     tError("No pending count");
     return;
   }
+
   uv_handle_type pending = uv_pipe_pending_type(pipe);
   assert(pending == UV_TCP);
 
-  SConnCtx* pConn = malloc(sizeof(SConnCtx));
+  SConnCtx* pConn = connCtxCreate();
   /* init conn timer*/
   pConn->pTimer = malloc(sizeof(uv_timer_t));
   uv_timer_init(pObj->loop, pConn->pTimer);
 
-  pConn->pTcp = (uv_tcp_t*)malloc(sizeof(uv_tcp_t));
   pConn->pWorkerAsync = pObj->workerAsync;  // thread safty
+
+  // init client handle
+  pConn->pTcp = (uv_tcp_t*)malloc(sizeof(uv_tcp_t));
   uv_tcp_init(pObj->loop, pConn->pTcp);
+  pConn->pTcp->data = pConn;
+
+  // init write request, just
+  pConn->pWriter = calloc(1, sizeof(uv_write_t));
+  pConn->pWriter->data = pConn;
 
   if (uv_accept(q, (uv_stream_t*)(pConn->pTcp)) == 0) {
     uv_os_fd_t fd;
     uv_fileno((const uv_handle_t*)pConn->pTcp, &fd);
     tDebug("new connection created: %d", fd);
-    uv_timer_start(pConn->pTimer, onTimeout, 10, 0);
-    uv_read_start((uv_stream_t*)(pConn->pTcp), allocBuffer, onRead);
+    uv_read_start((uv_stream_t*)(pConn->pTcp), allocReadBuffer, onRead);
   } else {
-    uv_timer_stop(pConn->pTimer);
-    free(pConn->pTimer);
-    uv_close((uv_handle_t*)pConn->pTcp, NULL);
-    free(pConn->pTcp);
-    free(pConn);
+    connCtxDestroy(pConn);
   }
 }
 
@@ -325,11 +432,30 @@ void* workerThread(void* arg) {
   pObj->workerAsync = malloc(sizeof(uv_async_t));
   uv_async_init(pObj->loop, pObj->workerAsync, workerAsyncCB);
 
-  // pObj->workerAsync->data = (void*)pObj;
-
-  uv_read_start((uv_stream_t*)pObj->pipe, allocBuffer, onConnection);
+  uv_read_start((uv_stream_t*)pObj->pipe, allocConnBuffer, onConnection);
   uv_run(pObj->loop, UV_RUN_DEFAULT);
 }
+static SConnCtx* connCtxCreate() {
+  SConnCtx* pConn = (SConnCtx*)calloc(1, sizeof(SConnCtx));
+  return pConn;
+}
+static void connCtxDestroy(SConnCtx* ctx) {
+  if (ctx == NULL) {
+    return;
+  }
+  uv_timer_stop(ctx->pTimer);
+  free(ctx->pTimer);
+  uv_close((uv_handle_t*)ctx->pTcp, NULL);
+  free(ctx->pTcp);
+  free(ctx->pWriter);
+  free(ctx);
+  // handle
+}
+static void uvConnCtxDestroy(uv_handle_t* handle) {
+  SConnCtx* ctx = handle->data;
+  connCtxDestroy(ctx);
+}
+
 #else
 
 #define RPC_MSG_OVERHEAD (sizeof(SRpcReqContext) + sizeof(SRpcHead) + sizeof(SRpcDigest))

From fdb79077c4c2d1c945a234a66d123a93794329df Mon Sep 17 00:00:00 2001
From: yihaoDeng <luomoxyz@126.com>
Date: Sat, 15 Jan 2022 18:20:53 +0800
Subject: [PATCH 17/20] add libuv

---
 source/libs/transport/src/rpcMain.c | 208 +++++++++++++---------------
 1 file changed, 98 insertions(+), 110 deletions(-)

diff --git a/source/libs/transport/src/rpcMain.c b/source/libs/transport/src/rpcMain.c
index a1c0c05fc3..37ef10ba5b 100644
--- a/source/libs/transport/src/rpcMain.c
+++ b/source/libs/transport/src/rpcMain.c
@@ -66,10 +66,31 @@ typedef struct {
   struct SRpcConn* connList;  // connection list
 } SRpcInfo;
 
+typedef struct {
+  SRpcInfo*        pRpc;      // associated SRpcInfo
+  SEpSet           epSet;     // ip list provided by app
+  void*            ahandle;   // handle provided by app
+  struct SRpcConn* pConn;     // pConn allocated
+  tmsg_t           msgType;   // message type
+  uint8_t*         pCont;     // content provided by app
+  int32_t          contLen;   // content length
+  int32_t          code;      // error code
+  int16_t          numOfTry;  // number of try for different servers
+  int8_t           oldInUse;  // server EP inUse passed by app
+  int8_t           redirect;  // flag to indicate redirect
+  int8_t           connType;  // connection type
+  int64_t          rid;       // refId returned by taosAddRef
+  SRpcMsg*         pRsp;      // for synchronous API
+  tsem_t*          pSem;      // for synchronous API
+  SEpSet*          pSet;      // for synchronous API
+  char             msg[0];    // RpcHead starts from here
+} SRpcReqContext;
+
 #ifdef USE_UV
 
 #define container_of(ptr, type, member) ((type*)((char*)(ptr)-offsetof(type, member)))
 
+#define RPC_RESERVE_SIZE (sizeof(SRpcReqContext))
 static const char* notify = "a";
 
 typedef struct SThreadObj {
@@ -94,12 +115,12 @@ typedef struct SServerObj {
   uint32_t     port;
 } SServerObj;
 
-typedef struct SContent {
+typedef struct SConnBuffer {
   char* buf;
   int   len;
   int   cap;
-  int   toRead;
-} SContent;
+  int   left;
+} SConnBuffer;
 
 typedef struct SConnCtx {
   uv_tcp_t*   pTcp;
@@ -110,18 +131,18 @@ typedef struct SConnCtx {
   queue       queue;
   int         ref;
   int         persist;  // persist connection or not
-  SContent    pCont;
+  SConnBuffer connBuf;
   int         count;
 } SConnCtx;
 
-static void allocReadBuffer(uv_handle_t* handle, size_t suggested_size, uv_buf_t* buf);
-static void onRead(uv_stream_t* cli, ssize_t nread, const uv_buf_t* buf);
-static void allocConnBuffer(uv_handle_t* handle, size_t suggested_size, uv_buf_t* buf);
-static void onTimeout(uv_timer_t* handle);
-static void onWrite(uv_write_t* req, int status);
-static void onAccept(uv_stream_t* stream, int status);
-static void onConnection(uv_stream_t* q, ssize_t nread, const uv_buf_t* buf);
-static void workerAsyncCB(uv_async_t* handle);
+static void uvAllocConnBufferCb(uv_handle_t* handle, size_t suggested_size, uv_buf_t* buf);
+static void uvAllocReadBufferCb(uv_handle_t* handle, size_t suggested_size, uv_buf_t* buf);
+static void uvOnReadCb(uv_stream_t* cli, ssize_t nread, const uv_buf_t* buf);
+static void uvOnTimeoutCb(uv_timer_t* handle);
+static void uvOnWriteCb(uv_write_t* req, int status);
+static void uvOnAcceptCb(uv_stream_t* stream, int status);
+static void uvOnConnectionCb(uv_stream_t* q, ssize_t nread, const uv_buf_t* buf);
+static void uvWorkerAsyncCb(uv_async_t* handle);
 
 static SConnCtx* connCtxCreate();
 static void      connCtxDestroy(SConnCtx* ctx);
@@ -193,95 +214,68 @@ void* rpcOpen(const SRpcInit* pInit) {
   pRpc->tcphandle = taosInitServer(0, pInit->localPort, pRpc->label, pRpc->numOfThreads, NULL, pRpc);
   return pRpc;
 }
-void  rpcClose(void* arg) { return; }
-void* rpcMallocCont(int contLen) { return NULL; }
-void  rpcFreeCont(void* cont) { return; }
-void* rpcReallocCont(void* ptr, int contLen) { return NULL; }
 
-void rpcSendRequest(void* thandle, const SEpSet* pEpSet, SRpcMsg* pMsg, int64_t* rid) { return; }
-
-void rpcSendResponse(const SRpcMsg* pMsg) {}
-
-void rpcSendRedirectRsp(void* pConn, const SEpSet* pEpSet) {}
-int  rpcGetConnInfo(void* thandle, SRpcConnInfo* pInfo) { return -1; }
-void rpcSendRecv(void* shandle, SEpSet* pEpSet, SRpcMsg* pReq, SRpcMsg* pRsp) { return; }
-int  rpcReportProgress(void* pConn, char* pCont, int contLen) { return -1; }
-void rpcCancelRequest(int64_t rid) { return; }
-
-void allocReadBuffer(uv_handle_t* handle, size_t suggested_size, uv_buf_t* buf) {
+void uvAllocReadBufferCb(uv_handle_t* handle, size_t suggested_size, uv_buf_t* buf) {
   static const int CAPACITY = 1024;
-  tDebug("pre alloc buffer for read ");
-  SConnCtx* ctx = handle->data;
-  SContent* pCont = &ctx->pCont;
-  if (pCont->cap == 0) {
-    pCont->buf = (char*)calloc(CAPACITY, sizeof(char));
-    pCont->len = 0;
-    pCont->cap = CAPACITY;
-    pCont->toRead = -1;
+  /*
+   * formate of data buffer:
+   * |<-------SRpcReqContext------->|<------------data read from socket----------->|
+   */
 
-    buf->base = pCont->buf;
+  SConnCtx*    ctx = handle->data;
+  SConnBuffer* pBuf = &ctx->connBuf;
+  if (pBuf->cap == 0) {
+    pBuf->buf = (char*)calloc(CAPACITY + RPC_RESERVE_SIZE, sizeof(char));
+    pBuf->len = 0;
+    pBuf->cap = CAPACITY;
+    pBuf->left = -1;
+
+    buf->base = pBuf->buf + RPC_RESERVE_SIZE;
     buf->len = CAPACITY;
   } else {
-    if (pCont->len >= pCont->cap) {
-      if (pCont->toRead == -1) {
-        pCont->cap *= 2;
-        pCont->buf = realloc(pCont->buf, pCont->cap);
-      } else if (pCont->len + pCont->toRead > pCont->cap) {
-        pCont->cap = pCont->len + pCont->toRead;
-        pCont->buf = realloc(pCont->buf, pCont->len + pCont->toRead);
+    if (pBuf->len >= pBuf->cap) {
+      if (pBuf->left == -1) {
+        pBuf->cap *= 2;
+        pBuf->buf = realloc(pBuf->buf, pBuf->cap + RPC_RESERVE_SIZE);
+      } else if (pBuf->len + pBuf->left > pBuf->cap) {
+        pBuf->cap = pBuf->len + pBuf->left;
+        pBuf->buf = realloc(pBuf->buf, pBuf->len + pBuf->left + RPC_RESERVE_SIZE);
       }
     }
-    buf->base = pCont->buf + pCont->len;
-    buf->len = pCont->cap - pCont->len;
+    buf->base = pBuf->buf + pBuf->len + RPC_RESERVE_SIZE;
+    buf->len = pBuf->cap - pBuf->len;
   }
-
-  // if (ctx->pCont.cap == 0) {
-  //  ctx->pCont.buf = (char*)calloc(64, sizeof(char));
-  //  ctx->pCont.len = 0;
-  //  ctx->pCont.cap = 64;
-  //  //
-  //  buf->base = ctx->pCont.buf;
-  //  buf->len = sz;
-  //} else {
-  //  if (ctx->pCont.len + sz > ctx->pCont.cap) {
-  //    ctx->pCont.cap *= 2;
-  //    ctx->pCont.buf = realloc(ctx->pCont.buf, ctx->pCont.cap);
-  //  }
-  //  buf->base = ctx->pCont.buf + ctx->pCont.len;
-  //  buf->len = sz;
-  //}
 }
-// change later
-static bool handleUserData(SContent* data) {
+// check data read from socket completely or not
+//
+static bool isReadAll(SConnBuffer* data) {
+  // TODO(yihao): handle pipeline later
   SRpcHead rpcHead;
-
-  bool    finish = false;
-  int32_t msgLen, leftLen, retLen;
-  int32_t headLen = sizeof(rpcHead);
+  int32_t  headLen = sizeof(rpcHead);
   if (data->len >= headLen) {
     memcpy((char*)&rpcHead, data->buf, headLen);
-    msgLen = (int32_t)htonl((uint32_t)rpcHead.msgLen);
-    if (msgLen + headLen <= data->len) {
-      return true;
-    } else {
+    int32_t msgLen = (int32_t)htonl((uint32_t)rpcHead.msgLen);
+    if (msgLen > data->len) {
+      data->left = msgLen - data->len;
       return false;
+    } else {
+      return true;
     }
   } else {
     return false;
   }
 }
 
-void onRead(uv_stream_t* cli, ssize_t nread, const uv_buf_t* buf) {
+void uvOnReadCb(uv_stream_t* cli, ssize_t nread, const uv_buf_t* buf) {
   // opt
-  SConnCtx* ctx = cli->data;
-  SContent* pCont = &ctx->pCont;
+  SConnCtx*    ctx = cli->data;
+  SConnBuffer* pBuf = &ctx->connBuf;
   if (nread > 0) {
-    pCont->len += nread;
-    bool finish = handleUserData(pCont);
-    if (finish == false) {
-      tDebug("continue read");
+    pBuf->len += nread;
+    if (isReadAll(pBuf)) {
+      tDebug("alread read complete packet");
     } else {
-      tDebug("read completely");
+      tDebug("read half packet, continue to read");
     }
     return;
   }
@@ -291,17 +285,17 @@ void onRead(uv_stream_t* cli, ssize_t nread, const uv_buf_t* buf) {
   }
   uv_close((uv_handle_t*)cli, uvConnCtxDestroy);
 }
-void allocConnBuffer(uv_handle_t* handle, size_t suggested_size, uv_buf_t* buf) {
+void uvAllocConnBufferCb(uv_handle_t* handle, size_t suggested_size, uv_buf_t* buf) {
   buf->base = malloc(sizeof(char));
   buf->len = 2;
 }
 
-void onTimeout(uv_timer_t* handle) {
+void uvOnTimeoutCb(uv_timer_t* handle) {
   // opt
   tDebug("time out");
 }
 
-void onWrite(uv_write_t* req, int status) {
+void uvOnWriteCb(uv_write_t* req, int status) {
   SConnCtx* ctx = req->data;
   if (status == 0) {
     tDebug("data already was written on stream");
@@ -311,7 +305,7 @@ void onWrite(uv_write_t* req, int status) {
   // opt
 }
 
-void workerAsyncCB(uv_async_t* handle) {
+void uvWorkerAsyncCb(uv_async_t* handle) {
   SThreadObj* pObj = container_of(handle, SThreadObj, workerAsync);
   SConnCtx*   conn = NULL;
 
@@ -329,7 +323,7 @@ void workerAsyncCB(uv_async_t* handle) {
   }
 }
 
-void onAccept(uv_stream_t* stream, int status) {
+void uvOnAcceptCb(uv_stream_t* stream, int status) {
   if (status == -1) {
     return;
   }
@@ -345,12 +339,12 @@ void onAccept(uv_stream_t* stream, int status) {
 
     pObj->workerIdx = (pObj->workerIdx + 1) % pObj->numOfThread;
     tDebug("new conntion accepted by main server, dispatch to %dth worker-thread", pObj->workerIdx);
-    uv_write2(wr, (uv_stream_t*)&(pObj->pipe[pObj->workerIdx][0]), &buf, 1, (uv_stream_t*)cli, onWrite);
+    uv_write2(wr, (uv_stream_t*)&(pObj->pipe[pObj->workerIdx][0]), &buf, 1, (uv_stream_t*)cli, uvOnWriteCb);
   } else {
     uv_close((uv_handle_t*)cli, NULL);
   }
 }
-void onConnection(uv_stream_t* q, ssize_t nread, const uv_buf_t* buf) {
+void uvOnConnectionCb(uv_stream_t* q, ssize_t nread, const uv_buf_t* buf) {
   tDebug("connection coming");
   if (nread < 0) {
     if (nread != UV_EOF) {
@@ -396,7 +390,7 @@ void onConnection(uv_stream_t* q, ssize_t nread, const uv_buf_t* buf) {
     uv_os_fd_t fd;
     uv_fileno((const uv_handle_t*)pConn->pTcp, &fd);
     tDebug("new connection created: %d", fd);
-    uv_read_start((uv_stream_t*)(pConn->pTcp), allocReadBuffer, onRead);
+    uv_read_start((uv_stream_t*)(pConn->pTcp), uvAllocReadBufferCb, uvOnReadCb);
   } else {
     connCtxDestroy(pConn);
   }
@@ -412,7 +406,7 @@ void* acceptThread(void* arg) {
   uv_ip4_addr("0.0.0.0", srv->port, &bind_addr);
   uv_tcp_bind(&srv->server, (const struct sockaddr*)&bind_addr, 0);
   int err = 0;
-  if ((err = uv_listen((uv_stream_t*)&srv->server, 128, onAccept)) != 0) {
+  if ((err = uv_listen((uv_stream_t*)&srv->server, 128, uvOnAcceptCb)) != 0) {
     tError("Listen error %s\n", uv_err_name(err));
     return NULL;
   }
@@ -430,9 +424,9 @@ void* workerThread(void* arg) {
   QUEUE_INIT(&pObj->conn);
 
   pObj->workerAsync = malloc(sizeof(uv_async_t));
-  uv_async_init(pObj->loop, pObj->workerAsync, workerAsyncCB);
+  uv_async_init(pObj->loop, pObj->workerAsync, uvWorkerAsyncCb);
 
-  uv_read_start((uv_stream_t*)pObj->pipe, allocConnBuffer, onConnection);
+  uv_read_start((uv_stream_t*)pObj->pipe, uvAllocConnBufferCb, uvOnConnectionCb);
   uv_run(pObj->loop, UV_RUN_DEFAULT);
 }
 static SConnCtx* connCtxCreate() {
@@ -455,6 +449,20 @@ static void uvConnCtxDestroy(uv_handle_t* handle) {
   SConnCtx* ctx = handle->data;
   connCtxDestroy(ctx);
 }
+void  rpcClose(void* arg) { return; }
+void* rpcMallocCont(int contLen) { return NULL; }
+void  rpcFreeCont(void* cont) { return; }
+void* rpcReallocCont(void* ptr, int contLen) { return NULL; }
+
+void rpcSendRequest(void* thandle, const SEpSet* pEpSet, SRpcMsg* pMsg, int64_t* rid) { return; }
+
+void rpcSendResponse(const SRpcMsg* pMsg) {}
+
+void rpcSendRedirectRsp(void* pConn, const SEpSet* pEpSet) {}
+int  rpcGetConnInfo(void* thandle, SRpcConnInfo* pInfo) { return -1; }
+void rpcSendRecv(void* shandle, SEpSet* pEpSet, SRpcMsg* pReq, SRpcMsg* pRsp) { return; }
+int  rpcReportProgress(void* pConn, char* pCont, int contLen) { return -1; }
+void rpcCancelRequest(int64_t rid) { return; }
 
 #else
 
@@ -465,26 +473,6 @@ static void uvConnCtxDestroy(uv_handle_t* handle) {
 #define rpcContLenFromMsg(msgLen) (msgLen - sizeof(SRpcHead))
 #define rpcIsReq(type) (type & 1U)
 
-typedef struct {
-  SRpcInfo *       pRpc;      // associated SRpcInfo
-  SEpSet           epSet;     // ip list provided by app
-  void *           ahandle;   // handle provided by app
-  struct SRpcConn *pConn;     // pConn allocated
-  tmsg_t           msgType;   // message type
-  uint8_t *        pCont;     // content provided by app
-  int32_t          contLen;   // content length
-  int32_t          code;      // error code
-  int16_t          numOfTry;  // number of try for different servers
-  int8_t           oldInUse;  // server EP inUse passed by app
-  int8_t           redirect;  // flag to indicate redirect
-  int8_t           connType;  // connection type
-  int64_t          rid;       // refId returned by taosAddRef
-  SRpcMsg *        pRsp;      // for synchronous API
-  tsem_t *         pSem;      // for synchronous API
-  SEpSet *         pSet;      // for synchronous API
-  char             msg[0];    // RpcHead starts from here
-} SRpcReqContext;
-
 typedef struct SRpcConn {
   char            info[48];                   // debug info: label + pConn + ahandle
   int             sid;                        // session ID

From 60b48e3ee0b06a7e6591ff5e8d88c584c6ae68d4 Mon Sep 17 00:00:00 2001
From: yihaoDeng <luomoxyz@126.com>
Date: Sun, 16 Jan 2022 11:58:45 +0800
Subject: [PATCH 18/20] refactor code

---
 source/libs/transport/inc/rpcHead.h      |  26 -
 source/libs/transport/inc/rpcTcp.h       |   4 -
 source/libs/transport/inc/transportInt.h |   1 +
 source/libs/transport/src/rpcCache.c     |   4 -
 source/libs/transport/src/rpcMain.c      | 439 +-------------
 source/libs/transport/src/rpcTcp.c       |   8 -
 source/libs/transport/src/rpcUdp.c       |   4 -
 source/libs/transport/src/transport.c    | 697 +++++++++++++++++++++++
 8 files changed, 727 insertions(+), 456 deletions(-)

diff --git a/source/libs/transport/inc/rpcHead.h b/source/libs/transport/inc/rpcHead.h
index 66821db133..5ddf1a83c9 100644
--- a/source/libs/transport/inc/rpcHead.h
+++ b/source/libs/transport/inc/rpcHead.h
@@ -21,31 +21,6 @@
 extern "C" {
 #endif
 
-#ifdef USE_UV
-typedef struct {
-  char     version : 4;         // RPC version
-  char     comp : 4;            // compression algorithm, 0:no compression 1:lz4
-  char     resflag : 2;         // reserved bits
-  char     spi : 3;             // security parameter index
-  char     encrypt : 3;         // encrypt algorithm, 0: no encryption
-  uint16_t tranId;              // transcation ID
-  uint32_t linkUid;             // for unique connection ID assigned by client
-  uint64_t ahandle;             // ahandle assigned by client
-  uint32_t sourceId;            // source ID, an index for connection list
-  uint32_t destId;              // destination ID, an index for connection list
-  uint32_t destIp;              // destination IP address, for NAT scenario
-  char     user[TSDB_UNI_LEN];  // user ID
-  uint16_t port;                // for UDP only, port may be changed
-  char     empty[1];            // reserved
-  uint16_t msgType;             // message type
-  int32_t  msgLen;              // message length including the header iteslf
-  uint32_t msgVer;
-  int32_t  code;        // code in response message
-  uint8_t  content[0];  // message body starts from here
-} SRpcHead;
-
-#else
-
 #define RPC_CONN_TCP 2
 
 extern int tsRpcOverhead;
@@ -96,7 +71,6 @@ typedef struct {
 } SRpcDigest;
 
 #pragma pack(pop)
-#endif
 
 #ifdef __cplusplus
 }
diff --git a/source/libs/transport/inc/rpcTcp.h b/source/libs/transport/inc/rpcTcp.h
index 5e5c43a1db..ad42307516 100644
--- a/source/libs/transport/inc/rpcTcp.h
+++ b/source/libs/transport/inc/rpcTcp.h
@@ -21,8 +21,6 @@
 extern "C" {
 #endif
 
-#ifdef USE_UV
-#else
 void *taosInitTcpServer(uint32_t ip, uint16_t port, char *label, int numOfThreads, void *fp, void *shandle);
 void  taosStopTcpServer(void *param);
 void  taosCleanUpTcpServer(void *param);
@@ -35,8 +33,6 @@ void *taosOpenTcpClientConnection(void *shandle, void *thandle, uint32_t ip, uin
 void taosCloseTcpConnection(void *chandle);
 int  taosSendTcpData(uint32_t ip, uint16_t port, void *data, int len, void *chandle);
 
-#endif
-
 #ifdef __cplusplus
 }
 #endif
diff --git a/source/libs/transport/inc/transportInt.h b/source/libs/transport/inc/transportInt.h
index 067b371b84..f93753cfe9 100644
--- a/source/libs/transport/inc/transportInt.h
+++ b/source/libs/transport/inc/transportInt.h
@@ -16,6 +16,7 @@
 #ifndef _TD_TRANSPORT_INT_H_
 #define _TD_TRANSPORT_INT_H_
 
+#include "rpcHead.h"
 #ifdef __cplusplus
 extern "C" {
 #endif
diff --git a/source/libs/transport/src/rpcCache.c b/source/libs/transport/src/rpcCache.c
index 40767d2ba5..1db2808126 100644
--- a/source/libs/transport/src/rpcCache.c
+++ b/source/libs/transport/src/rpcCache.c
@@ -22,9 +22,6 @@
 #include "ttimer.h"
 #include "tutil.h"
 
-#ifdef USE_UV
-
-#else
 typedef struct SConnHash {
   char              fqdn[TSDB_FQDN_LEN];
   uint16_t          port;
@@ -295,4 +292,3 @@ static void rpcUnlockCache(int64_t *lockedBy) {
     assert(false);
   }
 }
-#endif
diff --git a/source/libs/transport/src/rpcMain.c b/source/libs/transport/src/rpcMain.c
index 37ef10ba5b..f381768a34 100644
--- a/source/libs/transport/src/rpcMain.c
+++ b/source/libs/transport/src/rpcMain.c
@@ -13,9 +13,6 @@
  * along with this program. If not, see <http://www.gnu.org/licenses/>.
  */
 
-#ifdef USE_UV
-#include <uv.h>
-#endif
 #include "lz4.h"
 #include "os.h"
 #include "rpcCache.h"
@@ -36,6 +33,17 @@
 #include "ttimer.h"
 #include "tutil.h"
 
+static pthread_once_t tsRpcInitOnce = PTHREAD_ONCE_INIT;
+
+int tsRpcMaxUdpSize = 15000;  // bytes
+int tsProgressTimer = 100;
+// not configurable
+int tsRpcMaxRetry;
+int tsRpcHeadSize;
+int tsRpcOverhead;
+
+#ifndef USE_UV
+
 typedef struct {
   int      sessions;      // number of sessions allowed
   int      numOfThreads;  // number of threads to process incoming messages
@@ -51,28 +59,28 @@ typedef struct {
   char secret[TSDB_PASSWORD_LEN];  // secret for the link
   char ckey[TSDB_PASSWORD_LEN];    // ciphering key
 
-  void (*cfp)(void* parent, SRpcMsg*, SEpSet*);
-  int (*afp)(void* parent, char* user, char* spi, char* encrypt, char* secret, char* ckey);
+  void (*cfp)(void *parent, SRpcMsg *, SEpSet *);
+  int (*afp)(void *parent, char *user, char *spi, char *encrypt, char *secret, char *ckey);
 
   int32_t          refCount;
-  void*            parent;
-  void*            idPool;     // handle to ID pool
-  void*            tmrCtrl;    // handle to timer
-  SHashObj*        hash;       // handle returned by hash utility
-  void*            tcphandle;  // returned handle from TCP initialization
-  void*            udphandle;  // returned handle from UDP initialization
-  void*            pCache;     // connection cache
+  void *           parent;
+  void *           idPool;     // handle to ID pool
+  void *           tmrCtrl;    // handle to timer
+  SHashObj *       hash;       // handle returned by hash utility
+  void *           tcphandle;  // returned handle from TCP initialization
+  void *           udphandle;  // returned handle from UDP initialization
+  void *           pCache;     // connection cache
   pthread_mutex_t  mutex;
-  struct SRpcConn* connList;  // connection list
+  struct SRpcConn *connList;  // connection list
 } SRpcInfo;
 
 typedef struct {
-  SRpcInfo*        pRpc;      // associated SRpcInfo
+  SRpcInfo *       pRpc;      // associated SRpcInfo
   SEpSet           epSet;     // ip list provided by app
-  void*            ahandle;   // handle provided by app
-  struct SRpcConn* pConn;     // pConn allocated
+  void *           ahandle;   // handle provided by app
+  struct SRpcConn *pConn;     // pConn allocated
   tmsg_t           msgType;   // message type
-  uint8_t*         pCont;     // content provided by app
+  uint8_t *        pCont;     // content provided by app
   int32_t          contLen;   // content length
   int32_t          code;      // error code
   int16_t          numOfTry;  // number of try for different servers
@@ -80,394 +88,14 @@ typedef struct {
   int8_t           redirect;  // flag to indicate redirect
   int8_t           connType;  // connection type
   int64_t          rid;       // refId returned by taosAddRef
-  SRpcMsg*         pRsp;      // for synchronous API
-  tsem_t*          pSem;      // for synchronous API
-  SEpSet*          pSet;      // for synchronous API
+  SRpcMsg *        pRsp;      // for synchronous API
+  tsem_t *         pSem;      // for synchronous API
+  SEpSet *         pSet;      // for synchronous API
   char             msg[0];    // RpcHead starts from here
 } SRpcReqContext;
 
-#ifdef USE_UV
-
-#define container_of(ptr, type, member) ((type*)((char*)(ptr)-offsetof(type, member)))
-
-#define RPC_RESERVE_SIZE (sizeof(SRpcReqContext))
-static const char* notify = "a";
-
-typedef struct SThreadObj {
-  pthread_t       thread;
-  uv_pipe_t*      pipe;
-  uv_loop_t*      loop;
-  uv_async_t*     workerAsync;  //
-  int             fd;
-  queue           conn;
-  pthread_mutex_t connMtx;
-} SThreadObj;
-
-typedef struct SServerObj {
-  pthread_t    thread;
-  uv_tcp_t     server;
-  uv_loop_t*   loop;
-  int          workerIdx;
-  int          numOfThread;
-  SThreadObj** pThreadObj;
-  uv_pipe_t**  pipe;
-  uint32_t     ip;
-  uint32_t     port;
-} SServerObj;
-
-typedef struct SConnBuffer {
-  char* buf;
-  int   len;
-  int   cap;
-  int   left;
-} SConnBuffer;
-
-typedef struct SConnCtx {
-  uv_tcp_t*   pTcp;
-  uv_write_t* pWriter;
-  uv_timer_t* pTimer;
-
-  uv_async_t* pWorkerAsync;
-  queue       queue;
-  int         ref;
-  int         persist;  // persist connection or not
-  SConnBuffer connBuf;
-  int         count;
-} SConnCtx;
-
-static void uvAllocConnBufferCb(uv_handle_t* handle, size_t suggested_size, uv_buf_t* buf);
-static void uvAllocReadBufferCb(uv_handle_t* handle, size_t suggested_size, uv_buf_t* buf);
-static void uvOnReadCb(uv_stream_t* cli, ssize_t nread, const uv_buf_t* buf);
-static void uvOnTimeoutCb(uv_timer_t* handle);
-static void uvOnWriteCb(uv_write_t* req, int status);
-static void uvOnAcceptCb(uv_stream_t* stream, int status);
-static void uvOnConnectionCb(uv_stream_t* q, ssize_t nread, const uv_buf_t* buf);
-static void uvWorkerAsyncCb(uv_async_t* handle);
-
-static SConnCtx* connCtxCreate();
-static void      connCtxDestroy(SConnCtx* ctx);
-static void      uvConnCtxDestroy(uv_handle_t* handle);
-
-static void* workerThread(void* arg);
-static void* acceptThread(void* arg);
-
-void* taosInitServer(uint32_t ip, uint32_t port, char* label, int numOfThreads, void* fp, void* shandle);
-
-int32_t rpcInit() { return -1; }
-void    rpcCleanup() { return; };
-
-void* taosInitClient(uint32_t ip, uint32_t port, char* label, int numOfThreads, void* fp, void* shandle) {
-  // opte
-}
-void* taosInitServer(uint32_t ip, uint32_t port, char* label, int numOfThreads, void* fp, void* shandle) {
-  SServerObj* srv = calloc(1, sizeof(SServerObj));
-  srv->loop = (uv_loop_t*)malloc(sizeof(uv_loop_t));
-  srv->numOfThread = numOfThreads;
-  srv->workerIdx = 0;
-  srv->pThreadObj = (SThreadObj**)calloc(srv->numOfThread, sizeof(SThreadObj*));
-  srv->pipe = (uv_pipe_t**)calloc(srv->numOfThread, sizeof(uv_pipe_t*));
-  srv->ip = ip;
-  srv->port = port;
-  uv_loop_init(srv->loop);
-
-  for (int i = 0; i < srv->numOfThread; i++) {
-    SThreadObj* thrd = (SThreadObj*)calloc(1, sizeof(SThreadObj));
-    srv->pipe[i] = (uv_pipe_t*)calloc(2, sizeof(uv_pipe_t));
-    int fds[2];
-    if (uv_socketpair(AF_UNIX, SOCK_STREAM, fds, UV_NONBLOCK_PIPE, UV_NONBLOCK_PIPE) != 0) {
-      return NULL;
-    }
-    uv_pipe_init(srv->loop, &(srv->pipe[i][0]), 1);
-    uv_pipe_open(&(srv->pipe[i][0]), fds[1]);  // init write
-
-    thrd->fd = fds[0];
-    thrd->pipe = &(srv->pipe[i][1]);  // init read
-    int err = pthread_create(&(thrd->thread), NULL, workerThread, (void*)(thrd));
-    if (err == 0) {
-      tDebug("sucess to create worker-thread %d", i);
-      // printf("thread %d create\n", i);
-    } else {
-      // TODO: clear all other resource later
-      tError("failed to create worker-thread %d", i);
-    }
-    srv->pThreadObj[i] = thrd;
-  }
-
-  int err = pthread_create(&srv->thread, NULL, acceptThread, (void*)srv);
-  if (err == 0) {
-    tDebug("success to create accept-thread");
-  } else {
-    // clear all resource later
-  }
-
-  return srv;
-}
-void* rpcOpen(const SRpcInit* pInit) {
-  SRpcInfo* pRpc = calloc(1, sizeof(SRpcInfo));
-  if (pRpc == NULL) {
-    return NULL;
-  }
-  if (pInit->label) {
-    tstrncpy(pRpc->label, pInit->label, strlen(pInit->label));
-  }
-  pRpc->numOfThreads = pInit->numOfThreads > TSDB_MAX_RPC_THREADS ? TSDB_MAX_RPC_THREADS : pInit->numOfThreads;
-  pRpc->tcphandle = taosInitServer(0, pInit->localPort, pRpc->label, pRpc->numOfThreads, NULL, pRpc);
-  return pRpc;
-}
-
-void uvAllocReadBufferCb(uv_handle_t* handle, size_t suggested_size, uv_buf_t* buf) {
-  static const int CAPACITY = 1024;
-  /*
-   * formate of data buffer:
-   * |<-------SRpcReqContext------->|<------------data read from socket----------->|
-   */
-
-  SConnCtx*    ctx = handle->data;
-  SConnBuffer* pBuf = &ctx->connBuf;
-  if (pBuf->cap == 0) {
-    pBuf->buf = (char*)calloc(CAPACITY + RPC_RESERVE_SIZE, sizeof(char));
-    pBuf->len = 0;
-    pBuf->cap = CAPACITY;
-    pBuf->left = -1;
-
-    buf->base = pBuf->buf + RPC_RESERVE_SIZE;
-    buf->len = CAPACITY;
-  } else {
-    if (pBuf->len >= pBuf->cap) {
-      if (pBuf->left == -1) {
-        pBuf->cap *= 2;
-        pBuf->buf = realloc(pBuf->buf, pBuf->cap + RPC_RESERVE_SIZE);
-      } else if (pBuf->len + pBuf->left > pBuf->cap) {
-        pBuf->cap = pBuf->len + pBuf->left;
-        pBuf->buf = realloc(pBuf->buf, pBuf->len + pBuf->left + RPC_RESERVE_SIZE);
-      }
-    }
-    buf->base = pBuf->buf + pBuf->len + RPC_RESERVE_SIZE;
-    buf->len = pBuf->cap - pBuf->len;
-  }
-}
-// check data read from socket completely or not
-//
-static bool isReadAll(SConnBuffer* data) {
-  // TODO(yihao): handle pipeline later
-  SRpcHead rpcHead;
-  int32_t  headLen = sizeof(rpcHead);
-  if (data->len >= headLen) {
-    memcpy((char*)&rpcHead, data->buf, headLen);
-    int32_t msgLen = (int32_t)htonl((uint32_t)rpcHead.msgLen);
-    if (msgLen > data->len) {
-      data->left = msgLen - data->len;
-      return false;
-    } else {
-      return true;
-    }
-  } else {
-    return false;
-  }
-}
-
-void uvOnReadCb(uv_stream_t* cli, ssize_t nread, const uv_buf_t* buf) {
-  // opt
-  SConnCtx*    ctx = cli->data;
-  SConnBuffer* pBuf = &ctx->connBuf;
-  if (nread > 0) {
-    pBuf->len += nread;
-    if (isReadAll(pBuf)) {
-      tDebug("alread read complete packet");
-    } else {
-      tDebug("read half packet, continue to read");
-    }
-    return;
-  }
-
-  if (nread != UV_EOF) {
-    tDebug("Read error %s\n", uv_err_name(nread));
-  }
-  uv_close((uv_handle_t*)cli, uvConnCtxDestroy);
-}
-void uvAllocConnBufferCb(uv_handle_t* handle, size_t suggested_size, uv_buf_t* buf) {
-  buf->base = malloc(sizeof(char));
-  buf->len = 2;
-}
-
-void uvOnTimeoutCb(uv_timer_t* handle) {
-  // opt
-  tDebug("time out");
-}
-
-void uvOnWriteCb(uv_write_t* req, int status) {
-  SConnCtx* ctx = req->data;
-  if (status == 0) {
-    tDebug("data already was written on stream");
-  } else {
-    connCtxDestroy(ctx);
-  }
-  // opt
-}
-
-void uvWorkerAsyncCb(uv_async_t* handle) {
-  SThreadObj* pObj = container_of(handle, SThreadObj, workerAsync);
-  SConnCtx*   conn = NULL;
-
-  // opt later
-  pthread_mutex_lock(&pObj->connMtx);
-  if (!QUEUE_IS_EMPTY(&pObj->conn)) {
-    queue* head = QUEUE_HEAD(&pObj->conn);
-    conn = QUEUE_DATA(head, SConnCtx, queue);
-    QUEUE_REMOVE(&conn->queue);
-  }
-  pthread_mutex_unlock(&pObj->connMtx);
-  if (conn == NULL) {
-    tError("except occurred, do nothing");
-    return;
-  }
-}
-
-void uvOnAcceptCb(uv_stream_t* stream, int status) {
-  if (status == -1) {
-    return;
-  }
-  SServerObj* pObj = container_of(stream, SServerObj, server);
-
-  uv_tcp_t* cli = (uv_tcp_t*)malloc(sizeof(uv_tcp_t));
-  uv_tcp_init(pObj->loop, cli);
-
-  if (uv_accept(stream, (uv_stream_t*)cli) == 0) {
-    uv_write_t* wr = (uv_write_t*)malloc(sizeof(uv_write_t));
-
-    uv_buf_t buf = uv_buf_init((char*)notify, strlen(notify));
-
-    pObj->workerIdx = (pObj->workerIdx + 1) % pObj->numOfThread;
-    tDebug("new conntion accepted by main server, dispatch to %dth worker-thread", pObj->workerIdx);
-    uv_write2(wr, (uv_stream_t*)&(pObj->pipe[pObj->workerIdx][0]), &buf, 1, (uv_stream_t*)cli, uvOnWriteCb);
-  } else {
-    uv_close((uv_handle_t*)cli, NULL);
-  }
-}
-void uvOnConnectionCb(uv_stream_t* q, ssize_t nread, const uv_buf_t* buf) {
-  tDebug("connection coming");
-  if (nread < 0) {
-    if (nread != UV_EOF) {
-      tError("read error %s", uv_err_name(nread));
-    }
-    // TODO(log other failure reason)
-    uv_close((uv_handle_t*)q, NULL);
-    return;
-  }
-  // free memory allocated by
-  assert(nread == strlen(notify));
-  assert(buf->base[0] == notify[0]);
-  free(buf->base);
-
-  SThreadObj* pObj = (SThreadObj*)container_of(q, struct SThreadObj, pipe);
-
-  uv_pipe_t* pipe = (uv_pipe_t*)q;
-  if (!uv_pipe_pending_count(pipe)) {
-    tError("No pending count");
-    return;
-  }
-
-  uv_handle_type pending = uv_pipe_pending_type(pipe);
-  assert(pending == UV_TCP);
-
-  SConnCtx* pConn = connCtxCreate();
-  /* init conn timer*/
-  pConn->pTimer = malloc(sizeof(uv_timer_t));
-  uv_timer_init(pObj->loop, pConn->pTimer);
-
-  pConn->pWorkerAsync = pObj->workerAsync;  // thread safty
-
-  // init client handle
-  pConn->pTcp = (uv_tcp_t*)malloc(sizeof(uv_tcp_t));
-  uv_tcp_init(pObj->loop, pConn->pTcp);
-  pConn->pTcp->data = pConn;
-
-  // init write request, just
-  pConn->pWriter = calloc(1, sizeof(uv_write_t));
-  pConn->pWriter->data = pConn;
-
-  if (uv_accept(q, (uv_stream_t*)(pConn->pTcp)) == 0) {
-    uv_os_fd_t fd;
-    uv_fileno((const uv_handle_t*)pConn->pTcp, &fd);
-    tDebug("new connection created: %d", fd);
-    uv_read_start((uv_stream_t*)(pConn->pTcp), uvAllocReadBufferCb, uvOnReadCb);
-  } else {
-    connCtxDestroy(pConn);
-  }
-}
-
-void* acceptThread(void* arg) {
-  // opt
-  SServerObj* srv = (SServerObj*)arg;
-  uv_tcp_init(srv->loop, &srv->server);
-
-  struct sockaddr_in bind_addr;
-
-  uv_ip4_addr("0.0.0.0", srv->port, &bind_addr);
-  uv_tcp_bind(&srv->server, (const struct sockaddr*)&bind_addr, 0);
-  int err = 0;
-  if ((err = uv_listen((uv_stream_t*)&srv->server, 128, uvOnAcceptCb)) != 0) {
-    tError("Listen error %s\n", uv_err_name(err));
-    return NULL;
-  }
-  uv_run(srv->loop, UV_RUN_DEFAULT);
-}
-void* workerThread(void* arg) {
-  SThreadObj* pObj = (SThreadObj*)arg;
-
-  pObj->loop = (uv_loop_t*)malloc(sizeof(uv_loop_t));
-  uv_loop_init(pObj->loop);
-
-  uv_pipe_init(pObj->loop, pObj->pipe, 1);
-  uv_pipe_open(pObj->pipe, pObj->fd);
-
-  QUEUE_INIT(&pObj->conn);
-
-  pObj->workerAsync = malloc(sizeof(uv_async_t));
-  uv_async_init(pObj->loop, pObj->workerAsync, uvWorkerAsyncCb);
-
-  uv_read_start((uv_stream_t*)pObj->pipe, uvAllocConnBufferCb, uvOnConnectionCb);
-  uv_run(pObj->loop, UV_RUN_DEFAULT);
-}
-static SConnCtx* connCtxCreate() {
-  SConnCtx* pConn = (SConnCtx*)calloc(1, sizeof(SConnCtx));
-  return pConn;
-}
-static void connCtxDestroy(SConnCtx* ctx) {
-  if (ctx == NULL) {
-    return;
-  }
-  uv_timer_stop(ctx->pTimer);
-  free(ctx->pTimer);
-  uv_close((uv_handle_t*)ctx->pTcp, NULL);
-  free(ctx->pTcp);
-  free(ctx->pWriter);
-  free(ctx);
-  // handle
-}
-static void uvConnCtxDestroy(uv_handle_t* handle) {
-  SConnCtx* ctx = handle->data;
-  connCtxDestroy(ctx);
-}
-void  rpcClose(void* arg) { return; }
-void* rpcMallocCont(int contLen) { return NULL; }
-void  rpcFreeCont(void* cont) { return; }
-void* rpcReallocCont(void* ptr, int contLen) { return NULL; }
-
-void rpcSendRequest(void* thandle, const SEpSet* pEpSet, SRpcMsg* pMsg, int64_t* rid) { return; }
-
-void rpcSendResponse(const SRpcMsg* pMsg) {}
-
-void rpcSendRedirectRsp(void* pConn, const SEpSet* pEpSet) {}
-int  rpcGetConnInfo(void* thandle, SRpcConnInfo* pInfo) { return -1; }
-void rpcSendRecv(void* shandle, SEpSet* pEpSet, SRpcMsg* pReq, SRpcMsg* pRsp) { return; }
-int  rpcReportProgress(void* pConn, char* pCont, int contLen) { return -1; }
-void rpcCancelRequest(int64_t rid) { return; }
-
-#else
-
 #define RPC_MSG_OVERHEAD (sizeof(SRpcReqContext) + sizeof(SRpcHead) + sizeof(SRpcDigest))
-#define rpcHeadFromCont(cont) ((SRpcHead*)((char*)cont - sizeof(SRpcHead)))
+#define rpcHeadFromCont(cont) ((SRpcHead *)((char *)cont - sizeof(SRpcHead)))
 #define rpcContFromHead(msg) (msg + sizeof(SRpcHead))
 #define rpcMsgLenFromCont(contLen) (contLen + sizeof(SRpcHead))
 #define rpcContLenFromMsg(msgLen) (msgLen - sizeof(SRpcHead))
@@ -510,15 +138,6 @@ typedef struct SRpcConn {
   SRpcReqContext *pContext;                   // request context
 } SRpcConn;
 
-static pthread_once_t tsRpcInitOnce = PTHREAD_ONCE_INIT;
-
-int tsRpcMaxUdpSize = 15000;  // bytes
-int tsProgressTimer = 100;
-// not configurable
-int tsRpcMaxRetry;
-int tsRpcHeadSize;
-int tsRpcOverhead;
-
 static int     tsRpcRefId = -1;
 static int32_t tsRpcNum = 0;
 // static pthread_once_t tsRpcInit = PTHREAD_ONCE_INIT;
diff --git a/source/libs/transport/src/rpcTcp.c b/source/libs/transport/src/rpcTcp.c
index 9fa51a6fdc..81c464a661 100644
--- a/source/libs/transport/src/rpcTcp.c
+++ b/source/libs/transport/src/rpcTcp.c
@@ -14,9 +14,6 @@
  */
 
 #include "rpcTcp.h"
-#ifdef USE_UV
-#include <uv.h>
-#endif
 #include "os.h"
 #include "rpcHead.h"
 #include "rpcLog.h"
@@ -24,9 +21,6 @@
 #include "taoserror.h"
 #include "tutil.h"
 
-#ifdef USE_UV
-
-#else
 typedef struct SFdObj {
   void *             signature;
   SOCKET             fd;       // TCP socket FD
@@ -662,5 +656,3 @@ static void taosFreeFdObj(SFdObj *pFdObj) {
 
   tfree(pFdObj);
 }
-
-#endif
diff --git a/source/libs/transport/src/rpcUdp.c b/source/libs/transport/src/rpcUdp.c
index 79956cc98d..b57cf57c55 100644
--- a/source/libs/transport/src/rpcUdp.c
+++ b/source/libs/transport/src/rpcUdp.c
@@ -22,9 +22,6 @@
 #include "ttimer.h"
 #include "tutil.h"
 
-#ifdef USE_UV
-// no support upd currently
-#else
 #define RPC_MAX_UDP_CONNS 256
 #define RPC_MAX_UDP_PKTS 1000
 #define RPC_UDP_BUF_TIME 5  // mseconds
@@ -260,4 +257,3 @@ int taosSendUdpData(uint32_t ip, uint16_t port, void *data, int dataLen, void *c
 
   return ret;
 }
-#endif
diff --git a/source/libs/transport/src/transport.c b/source/libs/transport/src/transport.c
index f2f48bbc8a..a6c9cee0b7 100644
--- a/source/libs/transport/src/transport.c
+++ b/source/libs/transport/src/transport.c
@@ -12,3 +12,700 @@
  * You should have received a copy of the GNU Affero General Public License
  * along with this program. If not, see <http://www.gnu.org/licenses/>.
  */
+
+#ifdef USE_UV
+
+#include <uv.h>
+#include "lz4.h"
+#include "os.h"
+#include "rpcCache.h"
+#include "rpcHead.h"
+#include "rpcLog.h"
+#include "rpcTcp.h"
+#include "rpcUdp.h"
+#include "taoserror.h"
+#include "tglobal.h"
+#include "thash.h"
+#include "tidpool.h"
+#include "tmd5.h"
+#include "tmempool.h"
+#include "tmsg.h"
+#include "transportInt.h"
+#include "tref.h"
+#include "trpc.h"
+#include "ttimer.h"
+#include "tutil.h"
+
+#define container_of(ptr, type, member) ((type*)((char*)(ptr)-offsetof(type, member)))
+#define RPC_RESERVE_SIZE (sizeof(SRpcReqContext))
+static const char* notify = "a";
+
+typedef struct {
+  int      sessions;      // number of sessions allowed
+  int      numOfThreads;  // number of threads to process incoming messages
+  int      idleTime;      // milliseconds;
+  uint16_t localPort;
+  int8_t   connType;
+  int      index;  // for UDP server only, round robin for multiple threads
+  char     label[TSDB_LABEL_LEN];
+
+  char user[TSDB_UNI_LEN];         // meter ID
+  char spi;                        // security parameter index
+  char encrypt;                    // encrypt algorithm
+  char secret[TSDB_PASSWORD_LEN];  // secret for the link
+  char ckey[TSDB_PASSWORD_LEN];    // ciphering key
+
+  void (*cfp)(void* parent, SRpcMsg*, SEpSet*);
+  int (*afp)(void* parent, char* user, char* spi, char* encrypt, char* secret, char* ckey);
+
+  int32_t          refCount;
+  void*            parent;
+  void*            idPool;     // handle to ID pool
+  void*            tmrCtrl;    // handle to timer
+  SHashObj*        hash;       // handle returned by hash utility
+  void*            tcphandle;  // returned handle from TCP initialization
+  void*            udphandle;  // returned handle from UDP initialization
+  void*            pCache;     // connection cache
+  pthread_mutex_t  mutex;
+  struct SRpcConn* connList;  // connection list
+} SRpcInfo;
+
+typedef struct {
+  SRpcInfo*        pRpc;      // associated SRpcInfo
+  SEpSet           epSet;     // ip list provided by app
+  void*            ahandle;   // handle provided by app
+  struct SRpcConn* pConn;     // pConn allocated
+  tmsg_t           msgType;   // message type
+  uint8_t*         pCont;     // content provided by app
+  int32_t          contLen;   // content length
+  int32_t          code;      // error code
+  int16_t          numOfTry;  // number of try for different servers
+  int8_t           oldInUse;  // server EP inUse passed by app
+  int8_t           redirect;  // flag to indicate redirect
+  int8_t           connType;  // connection type
+  int64_t          rid;       // refId returned by taosAddRef
+  SRpcMsg*         pRsp;      // for synchronous API
+  tsem_t*          pSem;      // for synchronous API
+  SEpSet*          pSet;      // for synchronous API
+  char             msg[0];    // RpcHead starts from here
+} SRpcReqContext;
+
+typedef struct SThreadObj {
+  pthread_t       thread;
+  uv_pipe_t*      pipe;
+  int             fd;
+  uv_loop_t*      loop;
+  uv_async_t*     workerAsync;  //
+  queue           conn;
+  pthread_mutex_t connMtx;
+  void*           shandle;
+} SThreadObj;
+
+#define RPC_MSG_OVERHEAD (sizeof(SRpcReqContext) + sizeof(SRpcHead) + sizeof(SRpcDigest))
+#define rpcHeadFromCont(cont) ((SRpcHead*)((char*)cont - sizeof(SRpcHead)))
+#define rpcContFromHead(msg) (msg + sizeof(SRpcHead))
+#define rpcMsgLenFromCont(contLen) (contLen + sizeof(SRpcHead))
+#define rpcContLenFromMsg(msgLen) (msgLen - sizeof(SRpcHead))
+#define rpcIsReq(type) (type & 1U)
+
+typedef struct SServerObj {
+  pthread_t    thread;
+  uv_tcp_t     server;
+  uv_loop_t*   loop;
+  int          workerIdx;
+  int          numOfThread;
+  SThreadObj** pThreadObj;
+  uv_pipe_t**  pipe;
+  uint32_t     ip;
+  uint32_t     port;
+} SServerObj;
+
+typedef struct SConnBuffer {
+  char* buf;
+  int   len;
+  int   cap;
+  int   left;
+} SConnBuffer;
+
+typedef struct SRpcConn {
+  uv_tcp_t*   pTcp;
+  uv_write_t* pWriter;
+  uv_timer_t* pTimer;
+
+  uv_async_t* pWorkerAsync;
+  queue       queue;
+  int         ref;
+  int         persist;  // persist connection or not
+  SConnBuffer connBuf;
+  int         count;
+  void*       shandle;  // rpc init
+  void*       ahandle;
+
+  // del later
+  char secured;
+  int  spi;
+  char info[64];
+  char user[TSDB_UNI_LEN];  // user ID for the link
+  char secret[TSDB_PASSWORD_LEN];
+  char ckey[TSDB_PASSWORD_LEN];  // ciphering key
+} SRpcConn;
+
+// auth function
+static int  rpcAuthenticateMsg(void* pMsg, int msgLen, void* pAuth, void* pKey);
+static void rpcBuildAuthHead(void* pMsg, int msgLen, void* pAuth, void* pKey);
+static int  rpcAddAuthPart(SRpcConn* pConn, char* msg, int msgLen);
+// compress data
+static int32_t   rpcCompressRpcMsg(char* pCont, int32_t contLen);
+static SRpcHead* rpcDecompressRpcMsg(SRpcHead* pHead);
+
+static void uvAllocConnBufferCb(uv_handle_t* handle, size_t suggested_size, uv_buf_t* buf);
+static void uvAllocReadBufferCb(uv_handle_t* handle, size_t suggested_size, uv_buf_t* buf);
+static void uvOnReadCb(uv_stream_t* cli, ssize_t nread, const uv_buf_t* buf);
+static void uvOnTimeoutCb(uv_timer_t* handle);
+static void uvOnWriteCb(uv_write_t* req, int status);
+static void uvOnAcceptCb(uv_stream_t* stream, int status);
+static void uvOnConnectionCb(uv_stream_t* q, ssize_t nread, const uv_buf_t* buf);
+static void uvWorkerAsyncCb(uv_async_t* handle);
+
+static SRpcConn* connCreate();
+static void      connDestroy(SRpcConn* conn);
+static void      uvConnDestroy(uv_handle_t* handle);
+
+static void* workerThread(void* arg);
+static void* acceptThread(void* arg);
+
+void* taosInitClient(uint32_t ip, uint32_t port, char* label, int numOfThreads, void* fp, void* shandle);
+void* taosInitServer(uint32_t ip, uint32_t port, char* label, int numOfThreads, void* fp, void* shandle);
+
+void* taosInitServer(uint32_t ip, uint32_t port, char* label, int numOfThreads, void* fp, void* shandle) {
+  SServerObj* srv = calloc(1, sizeof(SServerObj));
+  srv->loop = (uv_loop_t*)malloc(sizeof(uv_loop_t));
+  srv->numOfThread = numOfThreads;
+  srv->workerIdx = 0;
+  srv->pThreadObj = (SThreadObj**)calloc(srv->numOfThread, sizeof(SThreadObj*));
+  srv->pipe = (uv_pipe_t**)calloc(srv->numOfThread, sizeof(uv_pipe_t*));
+  srv->ip = ip;
+  srv->port = port;
+  uv_loop_init(srv->loop);
+
+  for (int i = 0; i < srv->numOfThread; i++) {
+    SThreadObj* thrd = (SThreadObj*)calloc(1, sizeof(SThreadObj));
+    srv->pipe[i] = (uv_pipe_t*)calloc(2, sizeof(uv_pipe_t));
+    int fds[2];
+    if (uv_socketpair(AF_UNIX, SOCK_STREAM, fds, UV_NONBLOCK_PIPE, UV_NONBLOCK_PIPE) != 0) {
+      return NULL;
+    }
+    uv_pipe_init(srv->loop, &(srv->pipe[i][0]), 1);
+    uv_pipe_open(&(srv->pipe[i][0]), fds[1]);  // init write
+
+    thrd->shandle = shandle;
+    thrd->fd = fds[0];
+    thrd->pipe = &(srv->pipe[i][1]);  // init read
+    int err = pthread_create(&(thrd->thread), NULL, workerThread, (void*)(thrd));
+    if (err == 0) {
+      tDebug("sucess to create worker-thread %d", i);
+      // printf("thread %d create\n", i);
+    } else {
+      // TODO: clear all other resource later
+      tError("failed to create worker-thread %d", i);
+    }
+    srv->pThreadObj[i] = thrd;
+  }
+
+  int err = pthread_create(&srv->thread, NULL, acceptThread, (void*)srv);
+  if (err == 0) {
+    tDebug("success to create accept-thread");
+  } else {
+    // clear all resource later
+  }
+
+  return srv;
+}
+void uvAllocReadBufferCb(uv_handle_t* handle, size_t suggested_size, uv_buf_t* buf) {
+  /*
+   * formate of data buffer:
+   * |<-------SRpcReqContext------->|<------------data read from socket----------->|
+   */
+  static const int CAPACITY = 1024;
+
+  SRpcConn*    ctx = handle->data;
+  SConnBuffer* pBuf = &ctx->connBuf;
+  if (pBuf->cap == 0) {
+    pBuf->buf = (char*)calloc(CAPACITY + RPC_RESERVE_SIZE, sizeof(char));
+    pBuf->len = 0;
+    pBuf->cap = CAPACITY;
+    pBuf->left = -1;
+
+    buf->base = pBuf->buf + RPC_RESERVE_SIZE;
+    buf->len = CAPACITY;
+  } else {
+    if (pBuf->len >= pBuf->cap) {
+      if (pBuf->left == -1) {
+        pBuf->cap *= 2;
+        pBuf->buf = realloc(pBuf->buf, pBuf->cap + RPC_RESERVE_SIZE);
+      } else if (pBuf->len + pBuf->left > pBuf->cap) {
+        pBuf->cap = pBuf->len + pBuf->left;
+        pBuf->buf = realloc(pBuf->buf, pBuf->len + pBuf->left + RPC_RESERVE_SIZE);
+      }
+    }
+    buf->base = pBuf->buf + pBuf->len + RPC_RESERVE_SIZE;
+    buf->len = pBuf->cap - pBuf->len;
+  }
+}
+// check data read from socket completely or not
+//
+static bool isReadAll(SConnBuffer* data) {
+  // TODO(yihao): handle pipeline later
+  SRpcHead rpcHead;
+  int32_t  headLen = sizeof(rpcHead);
+  if (data->len >= headLen) {
+    memcpy((char*)&rpcHead, data->buf, headLen);
+    int32_t msgLen = (int32_t)htonl((uint32_t)rpcHead.msgLen);
+    if (msgLen > data->len) {
+      data->left = msgLen - data->len;
+      return false;
+    } else {
+      return true;
+    }
+  } else {
+    return false;
+  }
+}
+static void uvDoProcess(SRecvInfo* pRecv) {
+  SRpcHead* pHead = (SRpcHead*)pRecv->msg;
+  SRpcInfo* pRpc = (SRpcInfo*)pRecv->shandle;
+  SRpcConn* pConn = pRecv->thandle;
+
+  tDump(pRecv->msg, pRecv->msgLen);
+
+  terrno = 0;
+  SRpcReqContext* pContest;
+
+  // do auth and check
+}
+static int uvAuthData(SRpcConn* pConn, char* msg, int len) {
+  SRpcHead* pHead = (SRpcHead*)msg;
+  int       code = 0;
+
+  if ((pConn->secured && pHead->spi == 0) || (pHead->spi == 0 && pConn->spi == 0)) {
+    // secured link, or no authentication
+    pHead->msgLen = (int32_t)htonl((uint32_t)pHead->msgLen);
+    // tTrace("%s, secured link, no auth is required", pConn->info);
+    return 0;
+  }
+
+  if (!rpcIsReq(pHead->msgType)) {
+    // for response, if code is auth failure, it shall bypass the auth process
+    code = htonl(pHead->code);
+    if (code == TSDB_CODE_RPC_INVALID_TIME_STAMP || code == TSDB_CODE_RPC_AUTH_FAILURE ||
+        code == TSDB_CODE_RPC_INVALID_VERSION || code == TSDB_CODE_RPC_AUTH_REQUIRED ||
+        code == TSDB_CODE_MND_USER_NOT_EXIST || code == TSDB_CODE_RPC_NOT_READY) {
+      pHead->msgLen = (int32_t)htonl((uint32_t)pHead->msgLen);
+      // tTrace("%s, dont check authentication since code is:0x%x", pConn->info, code);
+      return 0;
+    }
+  }
+
+  code = 0;
+  if (pHead->spi == pConn->spi) {
+    // authentication
+    SRpcDigest* pDigest = (SRpcDigest*)((char*)pHead + len - sizeof(SRpcDigest));
+
+    int32_t delta;
+    delta = (int32_t)htonl(pDigest->timeStamp);
+    delta -= (int32_t)taosGetTimestampSec();
+    if (abs(delta) > 900) {
+      tWarn("%s, time diff:%d is too big, msg discarded", pConn->info, delta);
+      code = TSDB_CODE_RPC_INVALID_TIME_STAMP;
+    } else {
+      if (rpcAuthenticateMsg(pHead, len - TSDB_AUTH_LEN, pDigest->auth, pConn->secret) < 0) {
+        // tDebug("%s, authentication failed, msg discarded", pConn->info);
+        code = TSDB_CODE_RPC_AUTH_FAILURE;
+      } else {
+        pHead->msgLen = (int32_t)htonl((uint32_t)pHead->msgLen) - sizeof(SRpcDigest);
+        if (!rpcIsReq(pHead->msgType)) pConn->secured = 1;  // link is secured for client
+        // tTrace("%s, message is authenticated", pConn->info);
+      }
+    }
+  } else {
+    tDebug("%s, auth spi:%d not matched with received:%d", pConn->info, pConn->spi, pHead->spi);
+    code = pHead->spi ? TSDB_CODE_RPC_AUTH_FAILURE : TSDB_CODE_RPC_AUTH_REQUIRED;
+  }
+
+  return code;
+}
+static void uvProcessData(SRpcConn* ctx) {
+  SRecvInfo    info;
+  SRecvInfo*   p = &info;
+  SConnBuffer* pBuf = &ctx->connBuf;
+  p->msg = pBuf->buf + RPC_RESERVE_SIZE;
+  p->msgLen = pBuf->len;
+  p->ip = 0;
+  p->port = 0;
+  p->shandle = ctx->shandle;  //
+  p->thandle = ctx;
+  p->chandle = NULL;
+
+  //
+  SRpcHead* pHead = (SRpcHead*)p->msg;
+  assert(rpcIsReq(pHead->msgType));
+
+  SRpcInfo* pRpc = (SRpcInfo*)p->shandle;
+  SRpcConn* pConn = (SRpcConn*)p->thandle;
+
+  pConn->ahandle = (void*)pHead->ahandle;
+  pHead->code = htonl(pHead->code);
+
+  SRpcMsg rpcMsg;
+
+  pHead = rpcDecompressRpcMsg(pHead);
+  rpcMsg.contLen = rpcContLenFromMsg(pHead->msgLen);
+  rpcMsg.pCont = pHead->content;
+  rpcMsg.msgType = pHead->msgType;
+  rpcMsg.code = pHead->code;
+  rpcMsg.ahandle = pConn->ahandle;
+  rpcMsg.handle = pConn;
+  (*(pRpc->cfp))(pRpc->parent, &rpcMsg, NULL);
+  // auth
+  // validate msg type
+}
+void uvOnReadCb(uv_stream_t* cli, ssize_t nread, const uv_buf_t* buf) {
+  // opt
+  SRpcConn*    ctx = cli->data;
+  SConnBuffer* pBuf = &ctx->connBuf;
+  if (nread > 0) {
+    pBuf->len += nread;
+    if (isReadAll(pBuf)) {
+      tDebug("alread read complete packet");
+      uvProcessData(ctx);
+    } else {
+      tDebug("read half packet, continue to read");
+    }
+    return;
+  }
+
+  if (nread != UV_EOF) {
+    tDebug("Read error %s\n", uv_err_name(nread));
+  }
+  uv_close((uv_handle_t*)cli, uvConnDestroy);
+}
+void uvAllocConnBufferCb(uv_handle_t* handle, size_t suggested_size, uv_buf_t* buf) {
+  buf->base = malloc(sizeof(char));
+  buf->len = 2;
+}
+
+void uvOnTimeoutCb(uv_timer_t* handle) {
+  // opt
+  tDebug("time out");
+}
+
+void uvOnWriteCb(uv_write_t* req, int status) {
+  SRpcConn* conn = req->data;
+  if (status == 0) {
+    tDebug("data already was written on stream");
+  } else {
+    connDestroy(conn);
+  }
+  // opt
+}
+
+void uvWorkerAsyncCb(uv_async_t* handle) {
+  SThreadObj* pObj = container_of(handle, SThreadObj, workerAsync);
+  SRpcConn*   conn = NULL;
+
+  // opt later
+  pthread_mutex_lock(&pObj->connMtx);
+  if (!QUEUE_IS_EMPTY(&pObj->conn)) {
+    queue* head = QUEUE_HEAD(&pObj->conn);
+    conn = QUEUE_DATA(head, SRpcConn, queue);
+    QUEUE_REMOVE(&conn->queue);
+  }
+  pthread_mutex_unlock(&pObj->connMtx);
+  if (conn == NULL) {
+    tError("except occurred, do nothing");
+    return;
+  }
+}
+
+void uvOnAcceptCb(uv_stream_t* stream, int status) {
+  if (status == -1) {
+    return;
+  }
+  SServerObj* pObj = container_of(stream, SServerObj, server);
+
+  uv_tcp_t* cli = (uv_tcp_t*)malloc(sizeof(uv_tcp_t));
+  uv_tcp_init(pObj->loop, cli);
+
+  if (uv_accept(stream, (uv_stream_t*)cli) == 0) {
+    uv_write_t* wr = (uv_write_t*)malloc(sizeof(uv_write_t));
+
+    uv_buf_t buf = uv_buf_init((char*)notify, strlen(notify));
+
+    pObj->workerIdx = (pObj->workerIdx + 1) % pObj->numOfThread;
+    tDebug("new conntion accepted by main server, dispatch to %dth worker-thread", pObj->workerIdx);
+    uv_write2(wr, (uv_stream_t*)&(pObj->pipe[pObj->workerIdx][0]), &buf, 1, (uv_stream_t*)cli, uvOnWriteCb);
+  } else {
+    uv_close((uv_handle_t*)cli, NULL);
+  }
+}
+void uvOnConnectionCb(uv_stream_t* q, ssize_t nread, const uv_buf_t* buf) {
+  tDebug("connection coming");
+  if (nread < 0) {
+    if (nread != UV_EOF) {
+      tError("read error %s", uv_err_name(nread));
+    }
+    // TODO(log other failure reason)
+    uv_close((uv_handle_t*)q, NULL);
+    return;
+  }
+  // free memory allocated by
+  assert(nread == strlen(notify));
+  assert(buf->base[0] == notify[0]);
+  free(buf->base);
+
+  SThreadObj* pObj = q->data;
+
+  uv_pipe_t* pipe = (uv_pipe_t*)q;
+  if (!uv_pipe_pending_count(pipe)) {
+    tError("No pending count");
+    return;
+  }
+
+  uv_handle_type pending = uv_pipe_pending_type(pipe);
+  assert(pending == UV_TCP);
+
+  SRpcConn* pConn = connCreate();
+  pConn->shandle = pObj->shandle;
+  /* init conn timer*/
+  pConn->pTimer = malloc(sizeof(uv_timer_t));
+  uv_timer_init(pObj->loop, pConn->pTimer);
+
+  pConn->pWorkerAsync = pObj->workerAsync;  // thread safty
+
+  // init client handle
+  pConn->pTcp = (uv_tcp_t*)malloc(sizeof(uv_tcp_t));
+  uv_tcp_init(pObj->loop, pConn->pTcp);
+  pConn->pTcp->data = pConn;
+
+  // init write request, just
+  pConn->pWriter = calloc(1, sizeof(uv_write_t));
+  pConn->pWriter->data = pConn;
+
+  if (uv_accept(q, (uv_stream_t*)(pConn->pTcp)) == 0) {
+    uv_os_fd_t fd;
+    uv_fileno((const uv_handle_t*)pConn->pTcp, &fd);
+    tDebug("new connection created: %d", fd);
+    uv_read_start((uv_stream_t*)(pConn->pTcp), uvAllocReadBufferCb, uvOnReadCb);
+  } else {
+    connDestroy(pConn);
+  }
+}
+
+void* acceptThread(void* arg) {
+  // opt
+  SServerObj* srv = (SServerObj*)arg;
+  uv_tcp_init(srv->loop, &srv->server);
+
+  struct sockaddr_in bind_addr;
+
+  uv_ip4_addr("0.0.0.0", srv->port, &bind_addr);
+  uv_tcp_bind(&srv->server, (const struct sockaddr*)&bind_addr, 0);
+  int err = 0;
+  if ((err = uv_listen((uv_stream_t*)&srv->server, 128, uvOnAcceptCb)) != 0) {
+    tError("Listen error %s\n", uv_err_name(err));
+    return NULL;
+  }
+  uv_run(srv->loop, UV_RUN_DEFAULT);
+}
+void* workerThread(void* arg) {
+  SThreadObj* pObj = (SThreadObj*)arg;
+
+  pObj->loop = (uv_loop_t*)malloc(sizeof(uv_loop_t));
+  uv_loop_init(pObj->loop);
+
+  uv_pipe_init(pObj->loop, pObj->pipe, 1);
+  uv_pipe_open(pObj->pipe, pObj->fd);
+
+  pObj->pipe->data = pObj;
+
+  QUEUE_INIT(&pObj->conn);
+
+  pObj->workerAsync = malloc(sizeof(uv_async_t));
+  uv_async_init(pObj->loop, pObj->workerAsync, uvWorkerAsyncCb);
+
+  uv_read_start((uv_stream_t*)pObj->pipe, uvAllocConnBufferCb, uvOnConnectionCb);
+  uv_run(pObj->loop, UV_RUN_DEFAULT);
+}
+static SRpcConn* connCreate() {
+  SRpcConn* pConn = (SRpcConn*)calloc(1, sizeof(SRpcConn));
+  return pConn;
+}
+static void connDestroy(SRpcConn* conn) {
+  if (conn == NULL) {
+    return;
+  }
+  uv_timer_stop(conn->pTimer);
+  free(conn->pTimer);
+  uv_close((uv_handle_t*)conn->pTcp, NULL);
+  free(conn->pTcp);
+  free(conn->pWriter);
+  free(conn);
+  // handle
+}
+static void uvConnDestroy(uv_handle_t* handle) {
+  SRpcConn* conn = handle->data;
+  connDestroy(conn);
+}
+void* rpcOpen(const SRpcInit* pInit) {
+  SRpcInfo* pRpc = calloc(1, sizeof(SRpcInfo));
+  if (pRpc == NULL) {
+    return NULL;
+  }
+  if (pInit->label) {
+    tstrncpy(pRpc->label, pInit->label, strlen(pInit->label));
+  }
+  pRpc->numOfThreads = pInit->numOfThreads > TSDB_MAX_RPC_THREADS ? TSDB_MAX_RPC_THREADS : pInit->numOfThreads;
+  pRpc->tcphandle = taosInitServer(0, pInit->localPort, pRpc->label, pRpc->numOfThreads, NULL, pRpc);
+  return pRpc;
+}
+void  rpcClose(void* arg) { return; }
+void* rpcMallocCont(int contLen) { return NULL; }
+void  rpcFreeCont(void* cont) { return; }
+void* rpcReallocCont(void* ptr, int contLen) { return NULL; }
+
+void rpcSendRequest(void* thandle, const SEpSet* pEpSet, SRpcMsg* pMsg, int64_t* rid) { return; }
+
+void rpcSendResponse(const SRpcMsg* pMsg) {}
+
+void rpcSendRedirectRsp(void* pConn, const SEpSet* pEpSet) {}
+int  rpcGetConnInfo(void* thandle, SRpcConnInfo* pInfo) { return -1; }
+void rpcSendRecv(void* shandle, SEpSet* pEpSet, SRpcMsg* pReq, SRpcMsg* pRsp) { return; }
+int  rpcReportProgress(void* pConn, char* pCont, int contLen) { return -1; }
+void rpcCancelRequest(int64_t rid) { return; }
+
+static int rpcAuthenticateMsg(void* pMsg, int msgLen, void* pAuth, void* pKey) {
+  T_MD5_CTX context;
+  int       ret = -1;
+
+  tMD5Init(&context);
+  tMD5Update(&context, (uint8_t*)pKey, TSDB_PASSWORD_LEN);
+  tMD5Update(&context, (uint8_t*)pMsg, msgLen);
+  tMD5Update(&context, (uint8_t*)pKey, TSDB_PASSWORD_LEN);
+  tMD5Final(&context);
+
+  if (memcmp(context.digest, pAuth, sizeof(context.digest)) == 0) ret = 0;
+
+  return ret;
+}
+static void rpcBuildAuthHead(void* pMsg, int msgLen, void* pAuth, void* pKey) {
+  T_MD5_CTX context;
+
+  tMD5Init(&context);
+  tMD5Update(&context, (uint8_t*)pKey, TSDB_PASSWORD_LEN);
+  tMD5Update(&context, (uint8_t*)pMsg, msgLen);
+  tMD5Update(&context, (uint8_t*)pKey, TSDB_PASSWORD_LEN);
+  tMD5Final(&context);
+
+  memcpy(pAuth, context.digest, sizeof(context.digest));
+}
+
+static int rpcAddAuthPart(SRpcConn* pConn, char* msg, int msgLen) {
+  SRpcHead* pHead = (SRpcHead*)msg;
+
+  if (pConn->spi && pConn->secured == 0) {
+    // add auth part
+    pHead->spi = pConn->spi;
+    SRpcDigest* pDigest = (SRpcDigest*)(msg + msgLen);
+    pDigest->timeStamp = htonl(taosGetTimestampSec());
+    msgLen += sizeof(SRpcDigest);
+    pHead->msgLen = (int32_t)htonl((uint32_t)msgLen);
+    rpcBuildAuthHead(pHead, msgLen - TSDB_AUTH_LEN, pDigest->auth, pConn->secret);
+  } else {
+    pHead->spi = 0;
+    pHead->msgLen = (int32_t)htonl((uint32_t)msgLen);
+  }
+
+  return msgLen;
+}
+
+static int32_t rpcCompressRpcMsg(char* pCont, int32_t contLen) {
+  SRpcHead* pHead = rpcHeadFromCont(pCont);
+  int32_t   finalLen = 0;
+  int       overhead = sizeof(SRpcComp);
+
+  if (!NEEDTO_COMPRESSS_MSG(contLen)) {
+    return contLen;
+  }
+
+  char* buf = malloc(contLen + overhead + 8);  // 8 extra bytes
+  if (buf == NULL) {
+    tError("failed to allocate memory for rpc msg compression, contLen:%d", contLen);
+    return contLen;
+  }
+
+  int32_t compLen = LZ4_compress_default(pCont, buf, contLen, contLen + overhead);
+  tDebug("compress rpc msg, before:%d, after:%d, overhead:%d", contLen, compLen, overhead);
+
+  /*
+   * only the compressed size is less than the value of contLen - overhead, the compression is applied
+   * The first four bytes is set to 0, the second four bytes are utilized to keep the original length of message
+   */
+  if (compLen > 0 && compLen < contLen - overhead) {
+    SRpcComp* pComp = (SRpcComp*)pCont;
+    pComp->reserved = 0;
+    pComp->contLen = htonl(contLen);
+    memcpy(pCont + overhead, buf, compLen);
+
+    pHead->comp = 1;
+    tDebug("compress rpc msg, before:%d, after:%d", contLen, compLen);
+    finalLen = compLen + overhead;
+  } else {
+    finalLen = contLen;
+  }
+
+  free(buf);
+  return finalLen;
+}
+
+static SRpcHead* rpcDecompressRpcMsg(SRpcHead* pHead) {
+  int       overhead = sizeof(SRpcComp);
+  SRpcHead* pNewHead = NULL;
+  uint8_t*  pCont = pHead->content;
+  SRpcComp* pComp = (SRpcComp*)pHead->content;
+
+  if (pHead->comp) {
+    // decompress the content
+    assert(pComp->reserved == 0);
+    int contLen = htonl(pComp->contLen);
+
+    // prepare the temporary buffer to decompress message
+    char* temp = (char*)malloc(contLen + RPC_MSG_OVERHEAD);
+    pNewHead = (SRpcHead*)(temp + sizeof(SRpcReqContext));  // reserve SRpcReqContext
+
+    if (pNewHead) {
+      int compLen = rpcContLenFromMsg(pHead->msgLen) - overhead;
+      int origLen = LZ4_decompress_safe((char*)(pCont + overhead), (char*)pNewHead->content, compLen, contLen);
+      assert(origLen == contLen);
+
+      memcpy(pNewHead, pHead, sizeof(SRpcHead));
+      pNewHead->msgLen = rpcMsgLenFromCont(origLen);
+      /// rpcFreeMsg(pHead);  // free the compressed message buffer
+      pHead = pNewHead;
+      tTrace("decomp malloc mem:%p", temp);
+    } else {
+      tError("failed to allocate memory to decompress msg, contLen:%d", contLen);
+    }
+  }
+
+  return pHead;
+}
+int32_t rpcInit(void) {
+  // impl later
+  return -1;
+}
+
+void rpcCleanup(void) {
+  // impl later
+  return;
+}
+#endif

From 6600ae49f6edfaa5ac0593b80d34f4798342e076 Mon Sep 17 00:00:00 2001
From: yihaoDeng <luomoxyz@126.com>
Date: Sun, 16 Jan 2022 20:03:13 +0800
Subject: [PATCH 19/20] refactor code

---
 source/libs/transport/src/transport.c | 26 ++++++++++++++++++--------
 1 file changed, 18 insertions(+), 8 deletions(-)

diff --git a/source/libs/transport/src/transport.c b/source/libs/transport/src/transport.c
index a6c9cee0b7..f33d54c4f3 100644
--- a/source/libs/transport/src/transport.c
+++ b/source/libs/transport/src/transport.c
@@ -151,6 +151,7 @@ typedef struct SRpcConn {
 } SRpcConn;
 
 // auth function
+static int  uvAuthMsg(SRpcConn* pConn, char* msg, int msgLen);
 static int  rpcAuthenticateMsg(void* pMsg, int msgLen, void* pAuth, void* pKey);
 static void rpcBuildAuthHead(void* pMsg, int msgLen, void* pAuth, void* pKey);
 static int  rpcAddAuthPart(SRpcConn* pConn, char* msg, int msgLen);
@@ -259,7 +260,7 @@ static bool isReadAll(SConnBuffer* data) {
   SRpcHead rpcHead;
   int32_t  headLen = sizeof(rpcHead);
   if (data->len >= headLen) {
-    memcpy((char*)&rpcHead, data->buf, headLen);
+    memcpy((char*)&rpcHead, data->buf + RPC_RESERVE_SIZE, headLen);
     int32_t msgLen = (int32_t)htonl((uint32_t)rpcHead.msgLen);
     if (msgLen > data->len) {
       data->left = msgLen - data->len;
@@ -283,7 +284,7 @@ static void uvDoProcess(SRecvInfo* pRecv) {
 
   // do auth and check
 }
-static int uvAuthData(SRpcConn* pConn, char* msg, int len) {
+static int uvAuthMsg(SRpcConn* pConn, char* msg, int len) {
   SRpcHead* pHead = (SRpcHead*)msg;
   int       code = 0;
 
@@ -334,16 +335,16 @@ static int uvAuthData(SRpcConn* pConn, char* msg, int len) {
 
   return code;
 }
-static void uvProcessData(SRpcConn* ctx) {
+static void uvProcessData(SRpcConn* pConn) {
   SRecvInfo    info;
   SRecvInfo*   p = &info;
-  SConnBuffer* pBuf = &ctx->connBuf;
+  SConnBuffer* pBuf = &pConn->connBuf;
   p->msg = pBuf->buf + RPC_RESERVE_SIZE;
   p->msgLen = pBuf->len;
   p->ip = 0;
   p->port = 0;
-  p->shandle = ctx->shandle;  //
-  p->thandle = ctx;
+  p->shandle = pConn->shandle;  //
+  p->thandle = pConn;
   p->chandle = NULL;
 
   //
@@ -351,9 +352,14 @@ static void uvProcessData(SRpcConn* ctx) {
   assert(rpcIsReq(pHead->msgType));
 
   SRpcInfo* pRpc = (SRpcInfo*)p->shandle;
-  SRpcConn* pConn = (SRpcConn*)p->thandle;
-
   pConn->ahandle = (void*)pHead->ahandle;
+  // auth here
+
+  int8_t code = uvAuthMsg(pConn, (char*)pHead, p->msgLen);
+  if (code != 0) {
+    terrno = code;
+  }
+  // rpcCheckAuthentication(pConn, (char*)pHead, pBuf->len);
   pHead->code = htonl(pHead->code);
 
   SRpcMsg rpcMsg;
@@ -383,6 +389,9 @@ void uvOnReadCb(uv_stream_t* cli, ssize_t nread, const uv_buf_t* buf) {
     }
     return;
   }
+  if (terrno != 0) {
+    // handle err code
+  }
 
   if (nread != UV_EOF) {
     tDebug("Read error %s\n", uv_err_name(nread));
@@ -547,6 +556,7 @@ static void connDestroy(SRpcConn* conn) {
   uv_timer_stop(conn->pTimer);
   free(conn->pTimer);
   uv_close((uv_handle_t*)conn->pTcp, NULL);
+  free(conn->connBuf.buf);
   free(conn->pTcp);
   free(conn->pWriter);
   free(conn);

From 41c3160daaef575c209967ec40517361aa649764 Mon Sep 17 00:00:00 2001
From: yihaoDeng <luomoxyz@126.com>
Date: Sun, 16 Jan 2022 21:57:42 +0800
Subject: [PATCH 20/20] refactor code

---
 source/libs/transport/src/transport.c | 81 ++++++++++++++++++---------
 1 file changed, 53 insertions(+), 28 deletions(-)

diff --git a/source/libs/transport/src/transport.c b/source/libs/transport/src/transport.c
index f33d54c4f3..93bbaf2820 100644
--- a/source/libs/transport/src/transport.c
+++ b/source/libs/transport/src/transport.c
@@ -135,12 +135,13 @@ typedef struct SRpcConn {
   uv_async_t* pWorkerAsync;
   queue       queue;
   int         ref;
-  int         persist;  // persist connection or not
-  SConnBuffer connBuf;
+  int         persist;   // persist connection or not
+  SConnBuffer connBuf;   // read buf,
+  SConnBuffer writeBuf;  // write buf
   int         count;
   void*       shandle;  // rpc init
-  void*       ahandle;
-
+  void*       ahandle;  //
+  void*       hostThread;
   // del later
   char secured;
   int  spi;
@@ -335,6 +336,11 @@ static int uvAuthMsg(SRpcConn* pConn, char* msg, int len) {
 
   return code;
 }
+// refers specifically to query or insert timeout
+static void uvHandleActivityTimeout(uv_timer_t* handle) {
+  // impl later
+  SRpcConn* conn = handle->data;
+}
 static void uvProcessData(SRpcConn* pConn) {
   SRecvInfo    info;
   SRecvInfo*   p = &info;
@@ -358,8 +364,8 @@ static void uvProcessData(SRpcConn* pConn) {
   int8_t code = uvAuthMsg(pConn, (char*)pHead, p->msgLen);
   if (code != 0) {
     terrno = code;
+    return;
   }
-  // rpcCheckAuthentication(pConn, (char*)pHead, pBuf->len);
   pHead->code = htonl(pHead->code);
 
   SRpcMsg rpcMsg;
@@ -371,7 +377,9 @@ static void uvProcessData(SRpcConn* pConn) {
   rpcMsg.code = pHead->code;
   rpcMsg.ahandle = pConn->ahandle;
   rpcMsg.handle = pConn;
+
   (*(pRpc->cfp))(pRpc->parent, &rpcMsg, NULL);
+  uv_timer_start(pConn->pTimer, uvHandleActivityTimeout, pRpc->idleTime, 0);
   // auth
   // validate msg type
 }
@@ -419,21 +427,23 @@ void uvOnWriteCb(uv_write_t* req, int status) {
 }
 
 void uvWorkerAsyncCb(uv_async_t* handle) {
-  SThreadObj* pObj = container_of(handle, SThreadObj, workerAsync);
+  SThreadObj* pThrd = container_of(handle, SThreadObj, workerAsync);
   SRpcConn*   conn = NULL;
 
   // opt later
-  pthread_mutex_lock(&pObj->connMtx);
-  if (!QUEUE_IS_EMPTY(&pObj->conn)) {
-    queue* head = QUEUE_HEAD(&pObj->conn);
+  pthread_mutex_lock(&pThrd->connMtx);
+  if (!QUEUE_IS_EMPTY(&pThrd->conn)) {
+    queue* head = QUEUE_HEAD(&pThrd->conn);
     conn = QUEUE_DATA(head, SRpcConn, queue);
     QUEUE_REMOVE(&conn->queue);
   }
-  pthread_mutex_unlock(&pObj->connMtx);
+  pthread_mutex_unlock(&pThrd->connMtx);
   if (conn == NULL) {
     tError("except occurred, do nothing");
     return;
   }
+  uv_buf_t wb = uv_buf_init(conn->writeBuf.buf, conn->writeBuf.len);
+  uv_write(conn->pWriter, (uv_stream_t*)conn->pTcp, &wb, 1, uvOnWriteCb);
 }
 
 void uvOnAcceptCb(uv_stream_t* stream, int status) {
@@ -472,7 +482,7 @@ void uvOnConnectionCb(uv_stream_t* q, ssize_t nread, const uv_buf_t* buf) {
   assert(buf->base[0] == notify[0]);
   free(buf->base);
 
-  SThreadObj* pObj = q->data;
+  SThreadObj* pThrd = q->data;
 
   uv_pipe_t* pipe = (uv_pipe_t*)q;
   if (!uv_pipe_pending_count(pipe)) {
@@ -484,16 +494,18 @@ void uvOnConnectionCb(uv_stream_t* q, ssize_t nread, const uv_buf_t* buf) {
   assert(pending == UV_TCP);
 
   SRpcConn* pConn = connCreate();
-  pConn->shandle = pObj->shandle;
+  pConn->shandle = pThrd->shandle;
   /* init conn timer*/
   pConn->pTimer = malloc(sizeof(uv_timer_t));
-  uv_timer_init(pObj->loop, pConn->pTimer);
+  uv_timer_init(pThrd->loop, pConn->pTimer);
+  pConn->pTimer->data = pConn;
 
-  pConn->pWorkerAsync = pObj->workerAsync;  // thread safty
+  pConn->hostThread = pThrd;
+  pConn->pWorkerAsync = pThrd->workerAsync;  // thread safty
 
   // init client handle
   pConn->pTcp = (uv_tcp_t*)malloc(sizeof(uv_tcp_t));
-  uv_tcp_init(pObj->loop, pConn->pTcp);
+  uv_tcp_init(pThrd->loop, pConn->pTcp);
   pConn->pTcp->data = pConn;
 
   // init write request, just
@@ -527,23 +539,23 @@ void* acceptThread(void* arg) {
   uv_run(srv->loop, UV_RUN_DEFAULT);
 }
 void* workerThread(void* arg) {
-  SThreadObj* pObj = (SThreadObj*)arg;
+  SThreadObj* pThrd = (SThreadObj*)arg;
 
-  pObj->loop = (uv_loop_t*)malloc(sizeof(uv_loop_t));
-  uv_loop_init(pObj->loop);
+  pThrd->loop = (uv_loop_t*)malloc(sizeof(uv_loop_t));
+  uv_loop_init(pThrd->loop);
 
-  uv_pipe_init(pObj->loop, pObj->pipe, 1);
-  uv_pipe_open(pObj->pipe, pObj->fd);
+  uv_pipe_init(pThrd->loop, pThrd->pipe, 1);
+  uv_pipe_open(pThrd->pipe, pThrd->fd);
 
-  pObj->pipe->data = pObj;
+  pThrd->pipe->data = pThrd;
 
-  QUEUE_INIT(&pObj->conn);
+  QUEUE_INIT(&pThrd->conn);
 
-  pObj->workerAsync = malloc(sizeof(uv_async_t));
-  uv_async_init(pObj->loop, pObj->workerAsync, uvWorkerAsyncCb);
+  pThrd->workerAsync = malloc(sizeof(uv_async_t));
+  uv_async_init(pThrd->loop, pThrd->workerAsync, uvWorkerAsyncCb);
 
-  uv_read_start((uv_stream_t*)pObj->pipe, uvAllocConnBufferCb, uvOnConnectionCb);
-  uv_run(pObj->loop, UV_RUN_DEFAULT);
+  uv_read_start((uv_stream_t*)pThrd->pipe, uvAllocConnBufferCb, uvOnConnectionCb);
+  uv_run(pThrd->loop, UV_RUN_DEFAULT);
 }
 static SRpcConn* connCreate() {
   SRpcConn* pConn = (SRpcConn*)calloc(1, sizeof(SRpcConn));
@@ -583,9 +595,22 @@ void* rpcMallocCont(int contLen) { return NULL; }
 void  rpcFreeCont(void* cont) { return; }
 void* rpcReallocCont(void* ptr, int contLen) { return NULL; }
 
-void rpcSendRequest(void* thandle, const SEpSet* pEpSet, SRpcMsg* pMsg, int64_t* rid) { return; }
+void rpcSendRequest(void* thandle, const SEpSet* pEpSet, SRpcMsg* pMsg, int64_t* rid) {
+  // impl later
+  return;
+}
 
-void rpcSendResponse(const SRpcMsg* pMsg) {}
+void rpcSendResponse(const SRpcMsg* pMsg) {
+  SRpcConn*   pConn = pMsg->handle;
+  SThreadObj* pThrd = pConn->hostThread;
+
+  // opt later
+  pthread_mutex_lock(&pThrd->connMtx);
+  QUEUE_PUSH(&pThrd->conn, &pConn->queue);
+  pthread_mutex_unlock(&pThrd->connMtx);
+
+  uv_async_send(pConn->pWorkerAsync);
+}
 
 void rpcSendRedirectRsp(void* pConn, const SEpSet* pEpSet) {}
 int  rpcGetConnInfo(void* thandle, SRpcConnInfo* pInfo) { return -1; }