From a7f58f94378ffdaa34c9bdd026e70eb73dca26df Mon Sep 17 00:00:00 2001 From: Amos Kong Date: Tue, 25 Aug 2020 07:17:09 +0800 Subject: [PATCH 1/5] install.sh: create empty history file after installation There is a error message in first use of taos, because the history file doesn't exist. This patch tried to create an empty history file after installation, then users won't see the error message. | [root@045a9307c53a ~]# taos | | Welcome to the TDengine shell from Linux, Client Version:2.0.1.1 | Copyright (c) 2017 by TAOS Data, Inc. All rights reserved. | | Failed to open file /root/.taos_history | taos> Signed-off-by: Amos Kong --- packaging/tools/install.sh | 1 + 1 file changed, 1 insertion(+) diff --git a/packaging/tools/install.sh b/packaging/tools/install.sh index 64de09df6d..ee500a727b 100644 --- a/packaging/tools/install.sh +++ b/packaging/tools/install.sh @@ -713,6 +713,7 @@ function install_TDengine() { echo echo -e "\033[44;32;1mTDengine client is installed successfully!${NC}" fi + touch ~/.taos_history rm -rf $(tar -tf taos.tar.gz) } From 5e17894fbae8932ba8a70c0caad1d2ff0f5847e8 Mon Sep 17 00:00:00 2001 From: Jeff Tao Date: Wed, 26 Aug 2020 14:33:03 +0800 Subject: [PATCH 2/5] Update architecture-ch.md --- documentation20/webdocs/markdowndocs/architecture-ch.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/documentation20/webdocs/markdowndocs/architecture-ch.md b/documentation20/webdocs/markdowndocs/architecture-ch.md index bfe3b55bd2..7ab4b5d096 100644 --- a/documentation20/webdocs/markdowndocs/architecture-ch.md +++ b/documentation20/webdocs/markdowndocs/architecture-ch.md @@ -82,7 +82,7 @@ TDengine 分布式架构的逻辑结构图如下: ### 节点之间的通讯 **通讯方式:**TDengine系统的各个节点之间的通讯是通过TCP/UDP进行的。因为考虑到物联网场景,数据写入的包一般不大,因此TDengine 除采用TCP做传输之外,还采用UDP方式,因为UDP 更加高效,而且不受连接数的限制。TDengine实现了自己的超时、重传、确认等机制,以确保UDP的可靠传输。对于数据量不到15K的数据包,采取UDP的方式进行传输,超过15K的,或者是查询类的操作,自动采取TCP的方式进行传输。同时,TDengine根据配置和数据包,会自动对数据进行压缩/解压缩,数字签名/认证等处理。对于数据节点之间的数据复制,只采用TCP方式进行数据传输。 -**FQDN配置**:一个数据节点有一个或多个FQDN,可以在系统配置文件taos.cfg通过选项“fqdn"进行指定,如果没有指定,系统将自动获取FQDN。如果节点没有配置FQDN,可以直接使用IP地址作为FQDN,但不建议使用,因为IP地址可变,一旦变化,将让集群无法正常工作。一个数据节点的EP(End Point)由FQDN + Port组成。 +**FQDN配置**:一个数据节点有一个或多个FQDN,可以在系统配置文件taos.cfg通过参数“fqdn"进行指定,如果没有指定,系统将自动获取FQDN。如果节点没有配置FQDN,可以直接将该节点的配置参数fqdn设置为它的IP地址。但不建议使用IP,因为IP地址可变,一旦变化,将让集群无法正常工作。一个数据节点的EP(End Point)由FQDN + Port组成。采用FQDN,需要保证DNS服务正常工作,或者在节点以及应用所在的节点配置好hosts文件。 **端口配置:**一个数据节点对外的端口由TDengine的系统配置参数serverPort决定,对集群内部通讯的端口是serverPort+5。集群内数据节点之间的数据复制操作还占有一个TCP端口,是serverPort+10. 为支持多线程高效的处理UDP数据,每个对内和对外的UDP链接,都需要占用5个连续的端口。因此一个数据节点总的端口范围为serverPort到serverPort + 10,总共11个TCP/UDP端口。使用时,需要确保防火墙将这些端口打开。每个数据节点可以配置不同的serverPort。 From a393cc7524a56a2e16ac6935116d034a426506f7 Mon Sep 17 00:00:00 2001 From: Bomin Zhang Date: Thu, 27 Aug 2020 09:26:31 +0800 Subject: [PATCH 3/5] fix td-1242 --- src/dnode/src/dnodeSystem.c | 4 ++-- src/sync/src/tarbitrator.c | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/src/dnode/src/dnodeSystem.c b/src/dnode/src/dnodeSystem.c index 6f32bc0f7a..b286c0f219 100644 --- a/src/dnode/src/dnodeSystem.c +++ b/src/dnode/src/dnodeSystem.c @@ -120,8 +120,8 @@ int32_t main(int32_t argc, char *argv[]) { syslog(LOG_INFO, "Started TDengine service successfully."); for (int res = tsem_wait(&exitSem); res != 0; res = tsem_wait(&exitSem)) { - if (res != EINTR) { - syslog(LOG_ERR, "failed to wait exit semphore: %d", res); + if (errno != EINTR) { + syslog(LOG_ERR, "failed to wait exit semphore: %s", strerror(errno)); break; } } diff --git a/src/sync/src/tarbitrator.c b/src/sync/src/tarbitrator.c index 625c0d6838..79ddb073de 100644 --- a/src/sync/src/tarbitrator.c +++ b/src/sync/src/tarbitrator.c @@ -104,7 +104,7 @@ int main(int argc, char *argv[]) { sInfo("TAOS arbitrator: %s:%d is running", tsNodeFqdn, tsArbitratorPort); for (int res = tsem_wait(&tsArbSem); res != 0; res = tsem_wait(&tsArbSem)) { - if (res != EINTR) break; + if (errno != EINTR) break; } taosCloseTcpThreadPool(tsArbTcpPool); From 829caf39d38be113774ae236ba737c3d902cff52 Mon Sep 17 00:00:00 2001 From: Bomin Zhang Date: Thu, 27 Aug 2020 10:07:58 +0800 Subject: [PATCH 4/5] td-1242: wait again if sem_wait returns EINTR --- src/dnode/src/dnodeSystem.c | 7 ++----- src/os/inc/osSemphone.h | 2 +- src/os/src/detail/osSemphone.c | 12 ++++++++++++ src/sync/src/tarbitrator.c | 4 +--- src/util/src/tsched.c | 13 ++----------- 5 files changed, 18 insertions(+), 20 deletions(-) diff --git a/src/dnode/src/dnodeSystem.c b/src/dnode/src/dnodeSystem.c index b286c0f219..56316e9619 100644 --- a/src/dnode/src/dnodeSystem.c +++ b/src/dnode/src/dnodeSystem.c @@ -119,11 +119,8 @@ int32_t main(int32_t argc, char *argv[]) { syslog(LOG_INFO, "Started TDengine service successfully."); - for (int res = tsem_wait(&exitSem); res != 0; res = tsem_wait(&exitSem)) { - if (errno != EINTR) { - syslog(LOG_ERR, "failed to wait exit semphore: %s", strerror(errno)); - break; - } + if (tsem_wait(&exitSem) != 0) { + syslog(LOG_ERR, "failed to wait exit semphore: %s", strerror(errno)); } dnodeCleanUpSystem(); diff --git a/src/os/inc/osSemphone.h b/src/os/inc/osSemphone.h index fd88d2d798..4280b458a6 100644 --- a/src/os/inc/osSemphone.h +++ b/src/os/inc/osSemphone.h @@ -23,7 +23,7 @@ extern "C" { #ifndef TAOS_OS_FUNC_SEMPHONE #define tsem_t sem_t #define tsem_init sem_init - #define tsem_wait sem_wait + int tsem_wait(tsem_t* sem); #define tsem_post sem_post #define tsem_destroy sem_destroy #endif diff --git a/src/os/src/detail/osSemphone.c b/src/os/src/detail/osSemphone.c index 74f8859029..b91888845e 100644 --- a/src/os/src/detail/osSemphone.c +++ b/src/os/src/detail/osSemphone.c @@ -16,6 +16,18 @@ #define _DEFAULT_SOURCE #include "os.h" +#ifndef TAOS_OS_FUNC_SEMPHONE + +int tsem_wait(tsem_t* sem) { + int ret = 0; + do { + ret = sem_wait(sem); + } while (ret != 0 && errno == EINTR); + return ret; +} + +#endif + #ifndef TAOS_OS_FUNC_SEMPHONE_PTHREAD bool taosCheckPthreadValid(pthread_t thread) { return thread != 0; } diff --git a/src/sync/src/tarbitrator.c b/src/sync/src/tarbitrator.c index 79ddb073de..eea46a2495 100644 --- a/src/sync/src/tarbitrator.c +++ b/src/sync/src/tarbitrator.c @@ -103,9 +103,7 @@ int main(int argc, char *argv[]) { sInfo("TAOS arbitrator: %s:%d is running", tsNodeFqdn, tsArbitratorPort); - for (int res = tsem_wait(&tsArbSem); res != 0; res = tsem_wait(&tsArbSem)) { - if (errno != EINTR) break; - } + tsem_wait(&tsArbSem); taosCloseTcpThreadPool(tsArbTcpPool); sInfo("TAOS arbitrator is shut down\n"); diff --git a/src/util/src/tsched.c b/src/util/src/tsched.c index cf7f5c10d4..f014dd0fab 100644 --- a/src/util/src/tsched.c +++ b/src/util/src/tsched.c @@ -123,11 +123,6 @@ void *taosProcessSchedQueue(void *param) { while (1) { if (tsem_wait(&pSched->fullSem) != 0) { - if (errno == EINTR) { - /* sem_wait is interrupted by interrupt, ignore and continue */ - uDebug("wait %s fullSem was interrupted", pSched->label); - continue; - } uError("wait %s fullSem failed(%s)", pSched->label, strerror(errno)); } if (pSched->stop) { @@ -163,12 +158,8 @@ int taosScheduleTask(void *qhandle, SSchedMsg *pMsg) { return 0; } - while (tsem_wait(&pSched->emptySem) != 0) { - if (errno != EINTR) { - uError("wait %s emptySem failed(%s)", pSched->label, strerror(errno)); - break; - } - uDebug("wait %s emptySem was interrupted", pSched->label); + if (tsem_wait(&pSched->emptySem) != 0) { + uError("wait %s emptySem failed(%s)", pSched->label, strerror(errno)); } if (pthread_mutex_lock(&pSched->queueMutex) != 0) From 65f750a087e18b4ffcffdc3379cacb97c0994e5a Mon Sep 17 00:00:00 2001 From: yihaoDeng Date: Thu, 27 Aug 2020 06:47:59 +0000 Subject: [PATCH 5/5] fixbug connection killing led floating point exception --- src/client/src/tscProfile.c | 2 +- src/client/src/tscServer.c | 10 +++++++--- src/inc/taoserror.h | 1 + src/mnode/src/mnodeProfile.c | 2 +- 4 files changed, 10 insertions(+), 5 deletions(-) diff --git a/src/client/src/tscProfile.c b/src/client/src/tscProfile.c index f48e7b7691..6ff97e9d00 100644 --- a/src/client/src/tscProfile.c +++ b/src/client/src/tscProfile.c @@ -285,9 +285,9 @@ void tscKillConnection(STscObj *pObj) { SSqlObj *pSql = pObj->sqlList; while (pSql) { - //taosStopRpcConn(pSql->thandle); pSql = pSql->next; } + SSqlStream *pStream = pObj->streamList; while (pStream) { diff --git a/src/client/src/tscServer.c b/src/client/src/tscServer.c index 9282fa74fb..4f179adf72 100644 --- a/src/client/src/tscServer.c +++ b/src/client/src/tscServer.c @@ -226,13 +226,17 @@ int tscSendMsgToServer(SSqlObj *pSql) { .handle = &pSql->pRpcCtx, .code = 0 }; - // NOTE: the rpc context should be acquired before sending data to server. // Otherwise, the pSql object may have been released already during the response function, which is // processMsgFromServer function. In the meanwhile, the assignment of the rpc context to sql object will absolutely // cause crash. - rpcSendRequest(pObj->pDnodeConn, &pSql->epSet, &rpcMsg); - return TSDB_CODE_SUCCESS; + if (pObj != NULL && pObj->signature == pObj) { + rpcSendRequest(pObj->pDnodeConn, &pSql->epSet, &rpcMsg); + return TSDB_CODE_SUCCESS; + } else { + //pObj->signature has been reset by other thread, ignore concurrency problem + return TSDB_CODE_TSC_CONN_KILLED; + } } void tscProcessMsgFromServer(SRpcMsg *rpcMsg, SRpcEpSet *pEpSet) { diff --git a/src/inc/taoserror.h b/src/inc/taoserror.h index 57c2b322fa..e2ba7b03c1 100644 --- a/src/inc/taoserror.h +++ b/src/inc/taoserror.h @@ -96,6 +96,7 @@ TAOS_DEFINE_ERROR(TSDB_CODE_TSC_APP_ERROR, 0, 0x0211, "Applicatio TAOS_DEFINE_ERROR(TSDB_CODE_TSC_ACTION_IN_PROGRESS, 0, 0x0212, "Action in progress") TAOS_DEFINE_ERROR(TSDB_CODE_TSC_DISCONNECTED, 0, 0x0213, "Disconnected from service") TAOS_DEFINE_ERROR(TSDB_CODE_TSC_NO_WRITE_AUTH, 0, 0x0214, "No write permission") +TAOS_DEFINE_ERROR(TSDB_CODE_TSC_CONN_KILLED, 0, 0x0215, "Connection killed") // mnode TAOS_DEFINE_ERROR(TSDB_CODE_MND_MSG_NOT_PROCESSED, 0, 0x0300, "Message not processed") diff --git a/src/mnode/src/mnodeProfile.c b/src/mnode/src/mnodeProfile.c index 85457d7a26..06f992c26a 100644 --- a/src/mnode/src/mnodeProfile.c +++ b/src/mnode/src/mnodeProfile.c @@ -100,7 +100,7 @@ SConnObj *mnodeCreateConn(char *user, uint32_t ip, uint16_t port) { }; tstrncpy(connObj.user, user, sizeof(connObj.user)); - SConnObj *pConn = taosCachePut(tsMnodeConnCache, &connId, sizeof(int32_t), &connObj, sizeof(connObj), CONN_KEEP_TIME); + SConnObj *pConn = taosCachePut(tsMnodeConnCache, &connId, sizeof(int32_t), &connObj, sizeof(connObj), CONN_KEEP_TIME * 1000); mDebug("connId:%d, is created, user:%s ip:%s:%u", connId, user, taosIpStr(ip), port); return pConn;