From 70850697a48445e103badb6519e2642ff059e0f8 Mon Sep 17 00:00:00 2001 From: wangjiaming0909 <604227650@qq.com> Date: Wed, 25 Oct 2023 20:01:22 +0800 Subject: [PATCH] feat: support to_timestamp/to_char fix comments --- docs/en/12-taos-sql/10-function.md | 5 +- docs/zh/12-taos-sql/10-function.md | 9 +- include/common/ttime.h | 13 +- source/common/src/ttime.c | 229 ++++++++++++------ source/common/test/commonTests.cpp | 20 +- source/libs/function/src/builtins.c | 2 +- source/libs/scalar/src/sclfunc.c | 39 ++- .../2-query/func_to_char_timestamp.py | 25 +- 8 files changed, 236 insertions(+), 106 deletions(-) diff --git a/docs/en/12-taos-sql/10-function.md b/docs/en/12-taos-sql/10-function.md index 266cdb4958..c986a98e46 100644 --- a/docs/en/12-taos-sql/10-function.md +++ b/docs/en/12-taos-sql/10-function.md @@ -486,7 +486,7 @@ return_timestamp: { #### TO_CHAR ```sql -TO_CHAR(ts, str_literal) +TO_CHAR(ts, format_str_literal) ``` **Description**: Convert a ts column to string as the format specified @@ -539,11 +539,12 @@ TO_CHAR(ts, str_literal) - When `ms`,`us`,`ns` are used in `to_char`, like `to_char(ts, 'yyyy-mm-dd hh:mi:ss.ms.us.ns')`, The time of `ms`,`us`,`ns` corresponds to the same fraction seconds. When ts is `1697182085123`, the output of `ms` is `123`, `us` is `123000`, `ns` is `123000000`. - If we want to output some characters of format without converting, surround it with double quotes. `to_char(ts, 'yyyy-mm-dd "is formated by yyyy-mm-dd"')`. If want to output double quotes, add a back slash before double quote, like `to_char(ts, '\"yyyy-mm-dd\"')` will output `"2023-10-10"`. - For formats that output digits, the uppercase and lowercase formats are the same. +- The local time zone will be used to convert the timestamp. #### TO_TIMESTAMP ```sql -TO_TIMESTAMP(str_literal, str_literal) +TO_TIMESTAMP(ts_str_literal, format_str_literal) ``` **Description**: Convert a formated timestamp string to a timestamp diff --git a/docs/zh/12-taos-sql/10-function.md b/docs/zh/12-taos-sql/10-function.md index 806ff3c6a8..44ab3d5091 100644 --- a/docs/zh/12-taos-sql/10-function.md +++ b/docs/zh/12-taos-sql/10-function.md @@ -486,7 +486,7 @@ return_timestamp: { #### TO_CHAR ```sql -TO_CHAR(ts, str_literal) +TO_CHAR(ts, format_str_literal) ``` **功能说明**: 将timestamp类型按照指定格式转换为字符串 @@ -504,7 +504,7 @@ TO_CHAR(ts, str_literal) | **格式** | **说明**| **例子** | | --- | --- | --- | |AM,am,PM,pm| 无点分隔的上午下午 | 07:00:00am| -|A.M.,a.m.,P.M.,p.m.| 有点分割的上午下午| 07:00:00a.m.| +|A.M.,a.m.,P.M.,p.m.| 有点分隔的上午下午| 07:00:00a.m.| |YYYY,yyyy|年, 4个及以上数字| 2023-10-10| |YYY,yyy| 年, 最后3位数字| 023-10-10| |YY,yy| 年, 最后2位数字| 23-10-10| @@ -537,13 +537,14 @@ TO_CHAR(ts, str_literal) **使用说明**: - `Month`, `Day`等的输出格式是左对齐的, 右侧添加空格, 如`2023-OCTOBER -01`, `2023-SEPTEMBER-01`, 9月是月份中英文字母数最长的, 因此9月没有空格. 星期类似. - 使用`ms`, `us`, `ns`时, 以上三种格式的输出只在精度上不同, 比如ts为 `1697182085123`, `ms` 的输出为 `123`, `us` 的输出为 `123000`, `ns` 的输出为 `123000000`. -- 如果想要在格式串中指定某些部分不做转换, 可以使用双引号, 如`to_char(ts, 'yyyy-mm-dd "is formated by yyyy-mm-dd"')`. 如果想要输出双引号, 那么在双引号之前加一个反斜杠, 如 `to_char(ts, '\"yyyy-mm-dd\"')` 将会输出 `"2023-10-10"`. +- 时间格式中无法匹配规则的内容会直接输出. 如果想要在格式串中指定某些能够匹配规则的部分不做转换, 可以使用双引号, 如`to_char(ts, 'yyyy-mm-dd "is formated by yyyy-mm-dd"')`. 如果想要输出双引号, 那么在双引号之前加一个反斜杠, 如 `to_char(ts, '\"yyyy-mm-dd\"')` 将会输出 `"2023-10-10"`. - 那些输出是数字的格式, 如`YYYY`, `DD`, 大写与小写意义相同, 即`yyyy` 和 `YYYY` 可以互换. +- 默认输出的时间为本地时区的时间. #### TO_TIMESTAMP ```sql -TO_TIMESTAMP(str_literal, str_literal) +TO_TIMESTAMP(ts_str_literal, format_str_literal) ``` **功能说明**: 将字符串按照指定格式转化为时间戳. diff --git a/include/common/ttime.h b/include/common/ttime.h index 75bbcddd0e..306b5105d0 100644 --- a/include/common/ttime.h +++ b/include/common/ttime.h @@ -100,15 +100,22 @@ int32_t taosTm2Ts(struct STm* tm, int64_t* ts, int32_t precision); /// @brief convert a timestamp to a formatted string /// @param format the timestamp format, must null terminated -void taosTs2Char(const char* format, int64_t ts, int32_t precision, char* out); +/// @param [in,out] formats the formats array pointer generated. Shouldn't be NULL. +/// If (*formats == NULL), [format] will be used and [formats] will be updated to the new generated +/// formats array; If not NULL, [formats] will be used instead of [format] to skip parse formats again. +/// @param out output buffer, should be initialized by memset +/// @notes remember to free the generated formats +void taosTs2Char(const char* format, SArray** formats, int64_t ts, int32_t precision, char* out, int32_t outLen); /// @brief convert a formatted timestamp string to a timestamp /// @param format must null terminated +/// @param [in, out] formats, see taosTs2Char /// @param tsStr must null terminated /// @retval 0 for success, otherwise error occured -int32_t taosChar2Ts(const char* format, const char* tsStr, int64_t* ts, int32_t precision, char* errMsg, +/// @notes remember to free the generated formats even when error occured +int32_t taosChar2Ts(const char* format, SArray** formats, const char* tsStr, int64_t* ts, int32_t precision, char* errMsg, int32_t errMsgLen); -void TEST_ts2char(const char* format, int64_t ts, int32_t precision, char* out); +void TEST_ts2char(const char* format, int64_t ts, int32_t precision, char* out, int32_t outLen); int32_t TEST_char2ts(const char* format, int64_t* ts, int32_t precision, const char* tsStr); #ifdef __cplusplus diff --git a/source/common/src/ttime.c b/source/common/src/ttime.c index 3450e32f4a..aad844da88 100644 --- a/source/common/src/ttime.c +++ b/source/common/src/ttime.c @@ -1008,7 +1008,6 @@ typedef enum { TSFKW_Day, // Sunday, Monday TSFKW_DY, // MON, TUE TSFKW_Dy, // Mon, Tue - TSFKW_dy, // mon, tue TSFKW_D, // 1-7 -> Sunday(1) -> Saturday(7) TSFKW_HH24, TSFKW_HH12, @@ -1021,12 +1020,12 @@ typedef enum { TSFKW_Mon, TSFKW_MS, TSFKW_NS, - TSFKW_OF, + //TSFKW_OF, TSFKW_PM, TSFKW_P_M, TSFKW_SS, - // TSFKW_TZM, TSFKW_TZH, + // TSFKW_TZM, // TSFKW_TZ, TSFKW_US, TSFKW_YYYY, @@ -1039,13 +1038,15 @@ typedef enum { TSFKW_a_m, // TSFKW_b_c, // TSFKW_bc, - TSFKW_d, TSFKW_day, TSFKW_ddd, TSFKW_dd, + TSFKW_dy, // mon, tue + TSFKW_d, TSFKW_hh24, TSFKW_hh12, TSFKW_hh, + TSFKW_mi, TSFKW_mm, TSFKW_month, TSFKW_mon, @@ -1067,17 +1068,17 @@ typedef enum { // clang-format off static const TSFormatKeyWord formatKeyWords[] = { - //{"A.D.", 4, TSFKW_A_D}, - {"A.M.", 4, TSFKW_A_M, false}, //{"AD", 2, TSFKW_AD, false}, + //{"A.D.", 4, TSFKW_A_D}, {"AM", 2, TSFKW_AM, false}, - //{"B.C.", 4, TSFKW_B_C, false}, + {"A.M.", 4, TSFKW_A_M, false}, //{"BC", 2, TSFKW_BC, false}, + //{"B.C.", 4, TSFKW_B_C, false}, {"DAY", 3, TSFKW_DAY, false}, {"DDD", 3, TSFKW_DDD, true}, {"DD", 2, TSFKW_DD, true}, - {"DY", 2, TSFKW_DY, false}, {"Day", 3, TSFKW_Day, false}, + {"DY", 2, TSFKW_DY, false}, {"Dy", 2, TSFKW_Dy, false}, {"D", 1, TSFKW_D, true}, {"HH24", 4, TSFKW_HH24, true}, @@ -1087,13 +1088,13 @@ static const TSFormatKeyWord formatKeyWords[] = { {"MM", 2, TSFKW_MM, true}, {"MONTH", 5, TSFKW_MONTH, false}, {"MON", 3, TSFKW_MON, false}, - {"MS", 2, TSFKW_MS, true}, {"Month", 5, TSFKW_Month, false}, {"Mon", 3, TSFKW_Mon, false}, + {"MS", 2, TSFKW_MS, true}, {"NS", 2, TSFKW_NS, true}, //{"OF", 2, TSFKW_OF, false}, - {"P.M.", 4, TSFKW_P_M, false}, {"PM", 2, TSFKW_PM, false}, + {"P.M.", 4, TSFKW_P_M, false}, {"SS", 2, TSFKW_SS, true}, {"TZH", 3, TSFKW_TZH, false}, //{"TZM", 3, TSFKW_TZM}, @@ -1104,9 +1105,9 @@ static const TSFormatKeyWord formatKeyWords[] = { {"YY", 2, TSFKW_YY, true}, {"Y", 1, TSFKW_Y, true}, //{"a.d.", 4, TSFKW_a_d, false}, - {"a.m.", 4, TSFKW_a_m, false}, //{"ad", 2, TSFKW_ad, false}, {"am", 2, TSFKW_am, false}, + {"a.m.", 4, TSFKW_a_m, false}, //{"b.c.", 4, TSFKW_b_c, false}, //{"bc", 2, TSFKW_bc, false}, {"day", 3, TSFKW_day, false}, @@ -1124,8 +1125,8 @@ static const TSFormatKeyWord formatKeyWords[] = { {"ms", 2, TSFKW_MS, true}, {"ns", 2, TSFKW_NS, true}, //{"of", 2, TSFKW_OF, false}, - {"p.m.", 4, TSFKW_p_m, false}, {"pm", 2, TSFKW_pm, false}, + {"p.m.", 4, TSFKW_p_m, false}, {"ss", 2, TSFKW_SS, true}, {"tzh", 3, TSFKW_TZH, false}, //{"tzm", 3, TSFKW_TZM}, @@ -1139,9 +1140,34 @@ static const TSFormatKeyWord formatKeyWords[] = { }; // clang-format on +#define TS_FROMAT_KEYWORD_INDEX_SIZE ('z' - 'A' + 1) +static const int TSFormatKeywordIndex[TS_FROMAT_KEYWORD_INDEX_SIZE] = { + /*A*/ TSFKW_AM, -1, -1, + /*D*/ TSFKW_DAY, -1, -1, -1, + /*H*/ TSFKW_HH24, -1, -1, -1, -1, + /*M*/ TSFKW_MI, + /*N*/ TSFKW_NS, -1, + /*P*/ TSFKW_PM, -1, -1, + /*S*/ TSFKW_SS, + /*T*/ TSFKW_TZH, + /*U*/ TSFKW_US, -1, -1, -1, + /*Y*/ TSFKW_YYYY, -1, + /*[ \ ] ^ _ `*/ -1, -1, -1, -1, -1, -1, + /*a*/ TSFKW_am, -1, -1, + /*d*/ TSFKW_day, -1, -1, -1, + /*h*/ TSFKW_hh24, -1, -1, -1, -1, + /*m*/ TSFKW_mi, + /*n*/ TSFKW_ns, -1, + /*p*/ TSFKW_pm, -1, -1, + /*s*/ TSFKW_ss, + /*t*/ TSFKW_tzh, + /*u*/ TSFKW_us, -1, -1, -1, + /*y*/ TSFKW_yyyy, -1}; + typedef struct { uint8_t type; - char c[2]; + const char* c; + int32_t len; const TSFormatKeyWord* key; } TSFormatNode; @@ -1169,9 +1195,10 @@ static const char* const long_apms[] = {A_M_STR, P_M_STR, a_m_str, p_m_str, NULL static const TSFormatKeyWord* keywordSearch(const char* str) { if (*str < 'A' || *str > 'z' || (*str > 'Z' && *str < 'a')) return NULL; - int32_t idx = 0; + int32_t idx = TSFormatKeywordIndex[str[0] - 'A']; + if (idx < 0) return NULL; const TSFormatKeyWord* key = &formatKeyWords[idx++]; - while (key->name) { + while (key->name && str[0] == key->name[0]) { if (0 == strncmp(key->name, str, key->len)) { return key; } @@ -1184,74 +1211,110 @@ static bool isSeperatorChar(char c) { return (c > 0x20 && c < 0x7F && !(c >= 'A' && c <= 'Z') && !(c >= 'a' && c <= 'z') && !(c >= '0' && c <= '9')); } -static void parseTsFormat(const char* format_str, SArray* formats) { - while (*format_str) { - const TSFormatKeyWord* key = keywordSearch(format_str); +static void parseTsFormat(const char* formatStr, SArray* formats) { + TSFormatNode* lastOtherFormat = NULL; + while (*formatStr) { + const TSFormatKeyWord* key = keywordSearch(formatStr); if (key) { TSFormatNode format = {.key = key, .type = TS_FORMAT_NODE_TYPE_KEYWORD}; taosArrayPush(formats, &format); - format_str += key->len; + formatStr += key->len; + lastOtherFormat = NULL; } else { - if (*format_str == '"') { + if (*formatStr == '"') { + lastOtherFormat = NULL; // for double quoted string - format_str++; - while (*format_str) { - if (*format_str == '"') { - format_str++; + formatStr++; + TSFormatNode* last = NULL; + while (*formatStr) { + if (*formatStr == '"') { + formatStr++; break; } - if (*format_str == '\\' && *(format_str + 1)) format_str++; - TSFormatNode format = {.type = TS_FORMAT_NODE_TYPE_CHAR, .key = NULL}; - format.c[0] = *format_str; - format.c[1] = '\0'; - taosArrayPush(formats, &format); - format_str++; + if (*formatStr == '\\' && *(formatStr + 1)) { + formatStr++; + last = NULL; // stop expanding last format, create new format + } + if (last) { + // expand + assert(last->type == TS_FORMAT_NODE_TYPE_CHAR); + last->len++; + formatStr++; + } else { + // create new + TSFormatNode format = {.type = TS_FORMAT_NODE_TYPE_CHAR, .key = NULL}; + format.c = formatStr; + format.len = 1; + taosArrayPush(formats, &format); + formatStr++; + last = taosArrayGetLast(formats); + } } } else { // for other strings - if (*format_str == '\\' && *(format_str + 1)) format_str++; - TSFormatNode format = { - .type = isSeperatorChar(*format_str) ? TS_FORMAT_NODE_TYPE_SEPARATOR : TS_FORMAT_NODE_TYPE_CHAR, + if (*formatStr == '\\' && *(formatStr + 1)) { + formatStr++; + lastOtherFormat = NULL; // stop expanding + } else { + if (lastOtherFormat && !isSeperatorChar(*formatStr)) { + // expanding + } else { + // create new + lastOtherFormat = NULL; + } + } + if (lastOtherFormat) { + assert(lastOtherFormat->type == TS_FORMAT_NODE_TYPE_CHAR); + lastOtherFormat->len++; + formatStr++; + } else { + TSFormatNode format = { + .type = isSeperatorChar(*formatStr) ? TS_FORMAT_NODE_TYPE_SEPARATOR : TS_FORMAT_NODE_TYPE_CHAR, .key = NULL}; - format.c[0] = *format_str; - format.c[1] = '\0'; - taosArrayPush(formats, &format); - format_str++; + format.c = formatStr; + format.len = 1; + taosArrayPush(formats, &format); + formatStr++; + if (format.type == TS_FORMAT_NODE_TYPE_CHAR) lastOtherFormat = taosArrayGetLast(formats); + } } } } } -static void tm2char(const SArray* formats, const struct STm* tm, char* s) { +static void tm2char(const SArray* formats, const struct STm* tm, char* s, int32_t outLen) { int32_t size = taosArrayGetSize(formats); + const char* start = s; for (int32_t i = 0; i < size; ++i) { TSFormatNode* format = taosArrayGet(formats, i); if (format->type != TS_FORMAT_NODE_TYPE_KEYWORD) { - strcpy(s, format->c); - s += strlen(s); + if (s - start + format->len + 1 > outLen) break; + strncpy(s, format->c, format->len); + s += format->len; continue; } + if (s - start + 16 > outLen) break; switch (format->key->id) { case TSFKW_AM: case TSFKW_PM: sprintf(s, tm->tm.tm_hour % 24 >= 12 ? "PM" : "AM"); - s += strlen(s); + s += 2; break; case TSFKW_A_M: case TSFKW_P_M: sprintf(s, tm->tm.tm_hour % 24 >= 12 ? "P.M." : "A.M."); - s += strlen(s); + s += 4; break; case TSFKW_am: case TSFKW_pm: sprintf(s, tm->tm.tm_hour % 24 >= 12 ? "pm" : "am"); - s += strlen(s); + s += 2; break; case TSFKW_a_m: case TSFKW_p_m: sprintf(s, tm->tm.tm_hour % 24 >= 12 ? "p.m." : "a.m."); - s += strlen(s); + s += 4; break; case TSFKW_DDD: sprintf(s, "%d", tm->tm.tm_yday); @@ -1259,11 +1322,11 @@ static void tm2char(const SArray* formats, const struct STm* tm, char* s) { break; case TSFKW_DD: sprintf(s, "%02d", tm->tm.tm_mday); - s += strlen(s); + s += 2; break; case TSFKW_D: sprintf(s, "%d", tm->tm.tm_wday + 1); - s += strlen(s); + s += 1; break; case TSFKW_DAY: { // MONDAY, TUESDAY... @@ -1293,13 +1356,13 @@ static void tm2char(const SArray* formats, const struct STm* tm, char* s) { char buf[8] = {0}; for (int32_t i = 0; i < strlen(wd); ++i) buf[i] = toupper(wd[i]); sprintf(s, "%3s", buf); - s += strlen(s); + s += 3; break; } case TSFKW_Dy: // Mon, Tue sprintf(s, "%3s", shortWeekDays[tm->tm.tm_wday]); - s += strlen(s); + s += 3; break; case TSFKW_dy: { // mon, tue @@ -1307,26 +1370,26 @@ static void tm2char(const SArray* formats, const struct STm* tm, char* s) { char buf[8] = {0}; for (int32_t i = 0; i < strlen(wd); ++i) buf[i] = tolower(wd[i]); sprintf(s, "%3s", buf); - s += strlen(s); + s += 3; break; } case TSFKW_HH24: sprintf(s, "%02d", tm->tm.tm_hour); - s += strlen(s); + s += 2; break; case TSFKW_HH: case TSFKW_HH12: // 0 or 12 o'clock in 24H coresponds to 12 o'clock (AM/PM) in 12H sprintf(s, "%02d", tm->tm.tm_hour % 12 == 0 ? 12 : tm->tm.tm_hour % 12); - s += strlen(s); + s += 2; break; case TSFKW_MI: sprintf(s, "%02d", tm->tm.tm_min); - s += strlen(s); + s += 2; break; case TSFKW_MM: sprintf(s, "%02d", tm->tm.tm_mon + 1); - s += strlen(s); + s += 2; break; case TSFKW_MONTH: { const char* mon = fullMonths[tm->tm.tm_mon]; @@ -1370,19 +1433,19 @@ static void tm2char(const SArray* formats, const struct STm* tm, char* s) { } case TSFKW_SS: sprintf(s, "%02d", tm->tm.tm_sec); - s += strlen(s); + s += 2; break; case TSFKW_MS: sprintf(s, "%03" PRId64, tm->fsec / 1000000L); - s += strlen(s); + s += 3; break; case TSFKW_US: sprintf(s, "%06" PRId64, tm->fsec / 1000L); - s += strlen(s); + s += 6; break; case TSFKW_NS: sprintf(s, "%09" PRId64, tm->fsec); - s += strlen(s); + s += 9; break; case TSFKW_TZH: sprintf(s, "%s%02d", tsTimezone < 0 ? "-" : "+", tsTimezone); @@ -1429,6 +1492,7 @@ static const char* tsFormatStr2Int32(int32_t* dest, const char* str, int32_t len char* last; int64_t res; const char* s = str; + if ('\0' == str[0]) return NULL; if (len <= 0) { res = taosStr2Int64(s, &last, 10); s = last; @@ -1523,18 +1587,27 @@ static int32_t char2ts(const char* s, SArray* formats, int64_t* ts, int32_t prec int32_t tzSign = 1, tz = tsTimezone; int32_t err = 0; - for (int32_t i = 0; i < size; ++i) { - while (isspace(*s)) { + for (int32_t i = 0; i < size && *s != '\0'; ++i) { + while (isspace(*s) && *s != '\0') { s++; } + if (!s) break; TSFormatNode* node = taosArrayGet(formats, i); if (node->type == TS_FORMAT_NODE_TYPE_SEPARATOR) { // separator matches any character - if (isSeperatorChar(s[0])) s += strlen(node->c); + if (isSeperatorChar(s[0])) s += node->len; continue; } if (node->type == TS_FORMAT_NODE_TYPE_CHAR) { - if (!isspace(node->c[0])) s += strlen(node->c); + int32_t pos = 0; + // skip leading spaces + while (isspace(node->c[pos]) && node->len > 0) pos++; + while (pos < node->len && *s != '\0') { + if (!isspace(node->c[pos++])) { + while (isspace(*s) && *s != '\0') s++; + if (*s != '\0') s++; // forward together + } + } continue; } assert(node->type == TS_FORMAT_NODE_TYPE_KEYWORD); @@ -1545,7 +1618,7 @@ static int32_t char2ts(const char* s, SArray* formats, int64_t* ts, int32_t prec case TSFKW_p_m: { int32_t idx = strArrayCaseSearch(long_apms, s); if (idx >= 0) { - s += strlen(long_apms[idx]); + s += 4; pm = idx % 2; hour12 = 1; } else { @@ -1558,7 +1631,7 @@ static int32_t char2ts(const char* s, SArray* formats, int64_t* ts, int32_t prec case TSFKW_pm: { int32_t idx = strArrayCaseSearch(apms, s); if (idx >= 0) { - s += strlen(apms[idx]); + s += 2; pm = idx % 2; hour12 = 1; } else { @@ -1793,39 +1866,41 @@ static int32_t char2ts(const char* s, SArray* formats, int64_t* ts, int32_t prec return ret; } -void taosTs2Char(const char* format, int64_t ts, int32_t precision, char* out) { - SArray* formats = taosArrayInit(8, sizeof(TSFormatNode)); - parseTsFormat(format, formats); +void taosTs2Char(const char* format, SArray** formats, int64_t ts, int32_t precision, char* out, int32_t outLen) { + if (!*formats) { + *formats = taosArrayInit(8, sizeof(TSFormatNode)); + parseTsFormat(format, *formats); + } struct STm tm; taosTs2Tm(ts, precision, &tm); - tm2char(formats, &tm, out); - taosArrayDestroy(formats); + tm2char(*formats, &tm, out, outLen); } -int32_t taosChar2Ts(const char* format, const char* tsStr, int64_t* ts, int32_t precision, char* errMsg, +int32_t taosChar2Ts(const char* format, SArray** formats, const char* tsStr, int64_t* ts, int32_t precision, char* errMsg, int32_t errMsgLen) { const char* sErrPos; int32_t fErrIdx; - SArray* formats = taosArrayInit(4, sizeof(TSFormatNode)); - parseTsFormat(format, formats); - int32_t code = char2ts(tsStr, formats, ts, precision, &sErrPos, &fErrIdx); + if (!*formats) { + *formats = taosArrayInit(4, sizeof(TSFormatNode)); + parseTsFormat(format, *formats); + } + int32_t code = char2ts(tsStr, *formats, ts, precision, &sErrPos, &fErrIdx); if (code == -1) { - TSFormatNode* fNode = (taosArrayGet(formats, fErrIdx)); + TSFormatNode* fNode = (taosArrayGet(*formats, fErrIdx)); snprintf(errMsg, errMsgLen, "mismatch format for: %s and %s", sErrPos, - fErrIdx < taosArrayGetSize(formats) ? ((TSFormatNode*)taosArrayGet(formats, fErrIdx))->key->name : ""); + fErrIdx < taosArrayGetSize(*formats) ? ((TSFormatNode*)taosArrayGet(*formats, fErrIdx))->key->name : ""); } else if (code == -2) { snprintf(errMsg, errMsgLen, "timestamp format error: %s -> %s", tsStr, format); } - taosArrayDestroy(formats); return code; } -void TEST_ts2char(const char* format, int64_t ts, int32_t precision, char* out) { +void TEST_ts2char(const char* format, int64_t ts, int32_t precision, char* out, int32_t outLen) { SArray* formats = taosArrayInit(4, sizeof(TSFormatNode)); parseTsFormat(format, formats); struct STm tm; taosTs2Tm(ts, precision, &tm); - tm2char(formats, &tm, out); + tm2char(formats, &tm, out, outLen); taosArrayDestroy(formats); } diff --git a/source/common/test/commonTests.cpp b/source/common/test/commonTests.cpp index 49a16351ca..dc320ebcb2 100644 --- a/source/common/test/commonTests.cpp +++ b/source/common/test/commonTests.cpp @@ -316,8 +316,8 @@ TEST(timeTest, timestamp2tm) { } void test_ts2char(int64_t ts, const char* format, int32_t precison, const char* expected) { - char buf[128] = {0}; - TEST_ts2char(format, ts, precison, buf); + char buf[256] = {0}; + TEST_ts2char(format, ts, precison, buf, 256); printf("ts: %ld format: %s res: [%s], expected: [%s]\n", ts, format, buf, expected); ASSERT_STREQ(expected, buf); } @@ -408,8 +408,8 @@ TEST(timeTest, char2ts) { #ifndef WINDOWS // 2023-1-1 21:10:10.120450780 - ASSERT_EQ(0, TEST_char2ts("yy \"年\"-MM 月-dd \"日\" HH24:MI:ss.ms.us.ns TZH", &ts, TSDB_TIME_PRECISION_NANO, - " 23 年 - 1 月 - 01 日 \t 21:10:10 . 12 . \t 00045 . 00000078 \t+08")); + ASSERT_EQ(0, TEST_char2ts("yy \"年\"-MM 月-dd \"日 子\" HH24:MI:ss.ms.us.ns TZH", &ts, TSDB_TIME_PRECISION_NANO, + " 23 年 - 1 月 - 01 日 子 \t 21:10:10 . 12 . \t 00045 . 00000078 \t+08")); ASSERT_EQ(ts, 1672578610120450780LL); #endif @@ -437,7 +437,7 @@ TEST(timeTest, char2ts) { "2100/january/01 FRIDAY 11:10:10.124456+08")); ASSERT_EQ(ts, 4102456210124456LL); ASSERT_EQ(0, TEST_char2ts("yyyy/Month/dd Dy HH24:MI:ss.usTZH", &ts, TSDB_TIME_PRECISION_MICRO, - "2100/january/01 Fri 11:10:10.124456+08")); + "2100/january/01 Fri 11:10:10.124456+08:00")); ASSERT_EQ(ts, 4102456210124456LL); ASSERT_EQ(0, TEST_char2ts("yyyy/month/dd day HH24:MI:ss.usTZH", &ts, TSDB_TIME_PRECISION_MICRO, @@ -461,7 +461,8 @@ TEST(timeTest, char2ts) { // '/' cannot convert to MM ASSERT_EQ(-1, TEST_char2ts("yyyyMMdd ", &ts, TSDB_TIME_PRECISION_MICRO, "2100/2/1")); // nothing to be converted to dd - ASSERT_EQ(-1, TEST_char2ts("yyyyMMdd ", &ts, TSDB_TIME_PRECISION_MICRO, "210012")); + ASSERT_EQ(0, TEST_char2ts("yyyyMMdd ", &ts, TSDB_TIME_PRECISION_MICRO, "210012")); + ASSERT_EQ(ts, 4131273600000000LL); // 2100-12-1 ASSERT_EQ(-1, TEST_char2ts("yyyyMMdd ", &ts, TSDB_TIME_PRECISION_MICRO, "21001")); ASSERT_EQ(-1, TEST_char2ts("yyyyMM-dd ", &ts, TSDB_TIME_PRECISION_MICRO, "23a1-1")); @@ -481,6 +482,13 @@ TEST(timeTest, char2ts) { ASSERT_EQ(-1, TEST_char2ts("HH12:MI:SS", &ts, TSDB_TIME_PRECISION_MICRO, "21:12:12")); ASSERT_EQ(-1, TEST_char2ts("yyyy/MM1/dd ", &ts, TSDB_TIME_PRECISION_MICRO, "2100111111111/11/2")); ASSERT_EQ(-2, TEST_char2ts("yyyy/MM1/ddTZH", &ts, TSDB_TIME_PRECISION_MICRO, "23/11/2-13")); + ASSERT_EQ(0, TEST_char2ts("yyyy年 MM/ddTZH", &ts, TSDB_TIME_PRECISION_MICRO, "1970年1/1+0")); + ASSERT_EQ(ts, 0); + ASSERT_EQ(-1, TEST_char2ts("yyyy年a MM/dd", &ts, TSDB_TIME_PRECISION_MICRO, "2023年1/2")); + ASSERT_EQ(0, TEST_char2ts("yyyy年 MM/ddTZH", &ts, TSDB_TIME_PRECISION_MICRO, "1970年 1/1+0")); + ASSERT_EQ(ts, 0); + ASSERT_EQ(0, TEST_char2ts("yyyy年 a a a MM/ddTZH", &ts, TSDB_TIME_PRECISION_MICRO, "1970年 a a a 1/1+0")); + ASSERT_EQ(0, TEST_char2ts("yyyy年 a a a a a a a a a a a a a a a MM/ddTZH", &ts, TSDB_TIME_PRECISION_MICRO, "1970年 a ")); } #pragma GCC diagnostic pop diff --git a/source/libs/function/src/builtins.c b/source/libs/function/src/builtins.c index 628a609715..84aff9fa88 100644 --- a/source/libs/function/src/builtins.c +++ b/source/libs/function/src/builtins.c @@ -2107,7 +2107,7 @@ static int32_t translateToChar(SFunctionNode* pFunc, char* pErrBuf, int32_t len) if (!IS_STR_DATA_TYPE(para2Type) || !IS_TIMESTAMP_TYPE(para1Type)) { return invaildFuncParaTypeErrMsg(pErrBuf, len, pFunc->functionName); } - pFunc->node.resType = (SDataType){.bytes = tDataTypes[TSDB_DATA_TYPE_VARCHAR].bytes, .type = TSDB_DATA_TYPE_VARCHAR}; + pFunc->node.resType = (SDataType){.bytes = 4096, .type = TSDB_DATA_TYPE_VARCHAR}; return TSDB_CODE_SUCCESS; } diff --git a/source/libs/scalar/src/sclfunc.c b/source/libs/scalar/src/sclfunc.c index ee2ba47ce8..48886b1eec 100644 --- a/source/libs/scalar/src/sclfunc.c +++ b/source/libs/scalar/src/sclfunc.c @@ -1203,9 +1203,13 @@ int32_t toTimestampFunction(SScalarParam* pInput, int32_t inputNum, SScalarParam char * tsStr = taosMemoryMalloc(TS_FORMAT_MAX_LEN); char * format = taosMemoryMalloc(TS_FORMAT_MAX_LEN); int32_t len, code = TSDB_CODE_SUCCESS; + SArray *formats = NULL; + for (int32_t i = 0; i < pInput[0].numOfRows; ++i) { - if (colDataIsNull_s(pInput[1].columnData, i) || colDataIsNull_s(pInput[0].columnData, i)) + if (colDataIsNull_s(pInput[1].columnData, i) || colDataIsNull_s(pInput[0].columnData, i)) { colDataSetNULL(pOutput->columnData, i); + continue; + } char *tsData = colDataGetData(pInput[0].columnData, i); char *formatData = colDataGetData(pInput[1].columnData, pInput[1].numOfRows > 1 ? i : 0); @@ -1213,11 +1217,17 @@ int32_t toTimestampFunction(SScalarParam* pInput, int32_t inputNum, SScalarParam strncpy(tsStr, varDataVal(tsData), len); tsStr[len] = '\0'; len = TMIN(TS_FORMAT_MAX_LEN - 1, varDataLen(formatData)); - strncpy(format, varDataVal(formatData), len); - format[len] = '\0'; + if (pInput[1].numOfRows > 1 || i == 0) { + strncpy(format, varDataVal(formatData), len); + format[len] = '\0'; + if (formats) { + taosArrayDestroy(formats); + formats = NULL; + } + } int32_t precision = pOutput->columnData->info.precision; char errMsg[128] = {0}; - code = taosChar2Ts(format, tsStr, &ts, precision, errMsg, 128); + code = taosChar2Ts(format, &formats, tsStr, &ts, precision, errMsg, 128); if (code) { qError("func to_timestamp failed %s", errMsg); code = TSDB_CODE_FUNC_TO_TIMESTAMP_FAILED; @@ -1225,6 +1235,7 @@ int32_t toTimestampFunction(SScalarParam* pInput, int32_t inputNum, SScalarParam } colDataSetVal(pOutput->columnData, i, (char *)&ts, false); } + if (formats) taosArrayDestroy(formats); taosMemoryFree(tsStr); taosMemoryFree(format); return code; @@ -1232,22 +1243,32 @@ int32_t toTimestampFunction(SScalarParam* pInput, int32_t inputNum, SScalarParam int32_t toCharFunction(SScalarParam* pInput, int32_t inputNum, SScalarParam* pOutput) { char * format = taosMemoryMalloc(TS_FORMAT_MAX_LEN); - char * out = taosMemoryMalloc(TS_FORMAT_MAX_LEN * 2); + char * out = taosMemoryCalloc(1, TS_FORMAT_MAX_LEN + VARSTR_HEADER_SIZE); int32_t len; + SArray *formats = NULL; for (int32_t i = 0; i < pInput[0].numOfRows; ++i) { - if (colDataIsNull_s(pInput[1].columnData, i) || colDataIsNull_s(pInput[0].columnData, i)) + if (colDataIsNull_s(pInput[1].columnData, i) || colDataIsNull_s(pInput[0].columnData, i)) { colDataSetNULL(pOutput->columnData, i); + continue; + } char *ts = colDataGetData(pInput[0].columnData, i); char *formatData = colDataGetData(pInput[1].columnData, pInput[1].numOfRows > 1 ? i : 0); len = TMIN(TS_FORMAT_MAX_LEN - 1, varDataLen(formatData)); - strncpy(format, varDataVal(formatData), len); - format[len] = '\0'; + if (pInput[1].numOfRows > 1 || i == 0) { + strncpy(format, varDataVal(formatData), len); + format[len] = '\0'; + if (formats) { + taosArrayDestroy(formats); + formats = NULL; + } + } int32_t precision = pInput[0].columnData->info.precision; - taosTs2Char(format, *(int64_t *)ts, precision, varDataVal(out)); + taosTs2Char(format, &formats, *(int64_t *)ts, precision, varDataVal(out), TS_FORMAT_MAX_LEN); varDataSetLen(out, strlen(varDataVal(out))); colDataSetVal(pOutput->columnData, i, out, false); } + if (formats) taosArrayDestroy(formats); taosMemoryFree(format); taosMemoryFree(out); return TSDB_CODE_SUCCESS; diff --git a/tests/system-test/2-query/func_to_char_timestamp.py b/tests/system-test/2-query/func_to_char_timestamp.py index 3d3435d9c7..639811d275 100644 --- a/tests/system-test/2-query/func_to_char_timestamp.py +++ b/tests/system-test/2-query/func_to_char_timestamp.py @@ -60,10 +60,15 @@ class TDTestCase: rowsBatched = 0 sql += " %s%d values "%(ctbPrefix,i) for j in range(rowsPerTbl): - if (i < ctbNum/2): - sql += "(%d, %d, %d, %d,%d,%d,%d,true,'binary%d', 'nchar%d') "%(startTs + j*tsStep, j%10, j%10, j%10, j%10, j%10, j%10, j%10, j%10) + if i % 3 == 0: + ts_format = 'NULL' else: - sql += "(%d, %d, NULL, %d,NULL,%d,%d,true,'binary%d', 'nchar%d') "%(startTs + j*tsStep, j%10, j%10, j%10, j%10, j%10, j%10) + ts_format = "'yyyy-mm-dd hh24:mi:ss'" + + if (i < ctbNum/2): + sql += "(%d, %d, %d, %d,%d,%d,%d,true,'2023-11-01 10:10:%d', %s, 'nchar%d') "%(startTs + j*tsStep, j%10, j%10, j%10, j%10, j%10, j%10, j%10, ts_format, j%10) + else: + sql += "(%d, %d, NULL, %d,NULL,%d,%d,true,NULL , %s, 'nchar%d') "%(startTs + j*tsStep, j%10, j%10, j%10, j%10, ts_format, j%10) rowsBatched += 1 if ((rowsBatched == batchNum) or (j == rowsPerTbl - 1)): tsql.execute(sql) @@ -85,7 +90,7 @@ class TDTestCase: 'stbName': 'meters', 'colPrefix': 'c', 'tagPrefix': 't', - 'colSchema': [{'type': 'INT', 'count':1},{'type': 'BIGINT', 'count':1},{'type': 'FLOAT', 'count':1},{'type': 'DOUBLE', 'count':1},{'type': 'smallint', 'count':1},{'type': 'tinyint', 'count':1},{'type': 'bool', 'count':1},{'type': 'binary', 'len':10, 'count':1},{'type': 'nchar', 'len':10, 'count':1}], + 'colSchema': [{'type': 'INT', 'count':1},{'type': 'BIGINT', 'count':1},{'type': 'FLOAT', 'count':1},{'type': 'DOUBLE', 'count':1},{'type': 'smallint', 'count':1},{'type': 'tinyint', 'count':1},{'type': 'bool', 'count':1},{'type': 'varchar', 'len':1024, 'count':2},{'type': 'nchar', 'len':10, 'count':1}], 'tagSchema': [{'type': 'INT', 'count':1},{'type': 'nchar', 'len':20, 'count':1},{'type': 'binary', 'len':20, 'count':1},{'type': 'BIGINT', 'count':1},{'type': 'smallint', 'count':1},{'type': 'DOUBLE', 'count':1}], 'ctbPrefix': 't', 'ctbStartIdx': 0, @@ -146,6 +151,18 @@ class TDTestCase: tdSql.query("select to_char(ts, 'yy-mon-dd hh24:mi:ss.msa.m.TZH Day') from meters where to_timestamp(to_char(ts, 'yy-mon-dd hh24:mi:ss dy'), 'yy-mon-dd hh24:mi:ss dy') != ts") tdSql.checkRows(0) + tdSql.query("select to_timestamp(c8, 'YYYY-MM-DD hh24:mi:ss') from meters") + tdSql.query("select to_timestamp(c8, c9) from meters") + + format = "YYYY-MM-DD HH:MI:SS" + for i in range(500): + format = format + "1234567890" + tdSql.query("select to_char(ts, '%s') from meters" % (format), queryTimes=1) + time_str = '2023-11-11 10:10:10' + for i in range(500): + time_str = time_str + "1234567890" + tdSql.query("select to_timestamp('%s', '%s')" % (time_str, format)) + def run(self): self.prepareTestEnv() self.test_to_timestamp()