enh: set max restart time of taosd to 3; allow disable alert in pod

This commit is contained in:
tangfangzhi 2022-11-23 15:50:06 +08:00
parent f7f7b97279
commit 533a21693b
2 changed files with 30 additions and 13 deletions

View File

@ -26,4 +26,4 @@ COPY ./bin/* /usr/bin/
ENTRYPOINT ["/tini", "--", "/usr/bin/entrypoint.sh"] ENTRYPOINT ["/tini", "--", "/usr/bin/entrypoint.sh"]
CMD ["bash", "-c", "/usr/bin/run.sh"] CMD ["bash", "-c", "/usr/bin/run.sh"]
VOLUME [ "/var/lib/taos", "/var/log/taos", "/corefile" ] VOLUME [ "/var/lib/taos", "/var/log/taos" ]

View File

@ -6,6 +6,9 @@ TAOSD_STARTUP_TIMEOUT_SECOND=${TAOSD_STARTUP_TIMEOUT_SECOND:-160}
TAOS_TIMEOUT_SECOND=${TAOS_TIMEOUT_SECOND:-5} TAOS_TIMEOUT_SECOND=${TAOS_TIMEOUT_SECOND:-5}
BACKUP_CORE_FOLDER=/var/log/corefile BACKUP_CORE_FOLDER=/var/log/corefile
ALERT_URL=app/system/alert/add ALERT_URL=app/system/alert/add
ALERT_DISABLE_FILE=/var/log/disable_alert
START_TAOSD_MAX_NUMBER=3
start_taosd_count=0
echo "ADMIN_URL: ${ADMIN_URL}" echo "ADMIN_URL: ${ADMIN_URL}"
echo "TAOS_TIMEOUT_SECOND: ${TAOS_TIMEOUT_SECOND}" echo "TAOS_TIMEOUT_SECOND: ${TAOS_TIMEOUT_SECOND}"
@ -37,12 +40,16 @@ function post_error_msg() {
echo "service_state: ${service_state}" echo "service_state: ${service_state}"
echo "`date` service_msg: ${service_msg}" echo "`date` service_msg: ${service_msg}"
echo "${taos_version}" echo "${taos_version}"
curl --connect-timeout 10 --max-time 20 -X POST -H "Content-Type: application/json" \ if [ -f ${ALERT_DISABLE_FILE} ]; then
-d"{\"appName\":\"${app_name}\",\ echo "alert disabled"
\"alertLevel\":\"${service_state}\",\ else
\"taosVersion\":\"${taos_version}\",\ curl --connect-timeout 10 --max-time 20 -X POST -H "Content-Type: application/json" \
\"alertMsg\":\"${service_msg}\"}" \ -d"{\"appName\":\"${app_name}\",\
${ADMIN_URL}/${ALERT_URL} \"alertLevel\":\"${service_state}\",\
\"taosVersion\":\"${taos_version}\",\
\"alertMsg\":\"${service_msg}\"}" \
${ADMIN_URL}/${ALERT_URL}
fi
fi fi
} }
function check_taosd_exit_type() { function check_taosd_exit_type() {
@ -78,12 +85,16 @@ function post_disk_error_msg() {
echo "disk_state: ${disk_state}" echo "disk_state: ${disk_state}"
echo "`date` disk_msg: ${disk_msg}" echo "`date` disk_msg: ${disk_msg}"
echo "${taos_version}" echo "${taos_version}"
curl --connect-timeout 10 --max-time 20 -X POST -H "Content-Type: application/json" \ if [ -f ${ALERT_DISABLE_FILE} ]; then
-d"{\"appName\":\"${app_name}\",\ echo "alert disabled"
\"alertLevel\":\"${disk_state}\",\ else
\"taosVersion\":\"${taos_version}\",\ curl --connect-timeout 10 --max-time 20 -X POST -H "Content-Type: application/json" \
\"alertMsg\":\"${disk_msg}\"}" \ -d"{\"appName\":\"${app_name}\",\
${ADMIN_URL}/${ALERT_URL} \"alertLevel\":\"${disk_state}\",\
\"taosVersion\":\"${taos_version}\",\
\"alertMsg\":\"${disk_msg}\"}" \
${ADMIN_URL}/${ALERT_URL}
fi
fi fi
} }
function check_disk() { function check_disk() {
@ -154,6 +165,12 @@ do
# echo $status # echo $status
if [ "$status"x = "0"x ] if [ "$status"x = "0"x ]
then then
echo "start taosd count: ${start_taosd_count}"
if [ ${start_taosd_count} -gt ${START_TAOSD_MAX_NUMBER} ]; then
echo "exceed restart max count: ${START_TAOSD_MAX_NUMBER}"
break
fi
start_taosd_count=$(( start_taosd_count + 1 ))
# taosd_start_time=`date +%s` # taosd_start_time=`date +%s`
run_taosd & run_taosd &
fi fi