From 394178da688da9bdaa61274abc4da4e25a2178d8 Mon Sep 17 00:00:00 2001 From: afwerar <1296468573@qq.com> Date: Fri, 18 Mar 2022 13:23:48 +0800 Subject: [PATCH] [TD-13767]: only use iconv. --- cmake/cmake.options | 7 + cmake/iconv_CMakeLists.txt.in | 12 + contrib/CMakeLists.txt | 25 +- include/os/osFile.h | 31 +- include/os/osString.h | 23 +- include/util/tdef.h | 2 +- source/common/src/tvariant.c | 10 +- source/libs/parser/src/parInsert.c | 4 +- source/libs/scalar/src/filter.c | 8 +- source/libs/scalar/src/sclvector.c | 2 +- source/os/CMakeLists.txt | 7 +- source/os/src/osFile.c | 17 + source/os/src/osLocale.c | 2 +- source/os/src/osString.c | 714 +++++++++++++++++------------ 14 files changed, 502 insertions(+), 362 deletions(-) create mode 100644 cmake/iconv_CMakeLists.txt.in diff --git a/cmake/cmake.options b/cmake/cmake.options index 1a1a5b5d78..946eb5d258 100644 --- a/cmake/cmake.options +++ b/cmake/cmake.options @@ -18,6 +18,13 @@ IF(${TD_WINDOWS}) ON ) + MESSAGE("build iconv Win32") + option( + BUILD_WITH_ICONV + "If build iconv on Windows" + ON + ) + ENDIF () IF(${TD_LINUX} MATCHES TRUE) diff --git a/cmake/iconv_CMakeLists.txt.in b/cmake/iconv_CMakeLists.txt.in new file mode 100644 index 0000000000..31dfd829fc --- /dev/null +++ b/cmake/iconv_CMakeLists.txt.in @@ -0,0 +1,12 @@ + +# iconv +ExternalProject_Add(iconv + GIT_REPOSITORY https://github.com/win-iconv/win-iconv.git + GIT_TAG v0.0.8 + SOURCE_DIR "${CMAKE_CONTRIB_DIR}/iconv" + BINARY_DIR "" + CONFIGURE_COMMAND "" + BUILD_COMMAND "" + INSTALL_COMMAND "" + TEST_COMMAND "" + ) \ No newline at end of file diff --git a/contrib/CMakeLists.txt b/contrib/CMakeLists.txt index f87f660ab3..9cf68b87f9 100644 --- a/contrib/CMakeLists.txt +++ b/contrib/CMakeLists.txt @@ -83,6 +83,11 @@ if(${BUILD_WITH_NURAFT}) cat("${CMAKE_SUPPORT_DIR}/nuraft_CMakeLists.txt.in" ${CONTRIB_TMP_FILE}) endif(${BUILD_WITH_NURAFT}) +# iconv +if(${BUILD_WITH_ICONV}) + cat("${CMAKE_SUPPORT_DIR}/iconv_CMakeLists.txt.in" ${CONTRIB_TMP_FILE}) +endif(${BUILD_WITH_ICONV}) + # download dependencies configure_file(${CONTRIB_TMP_FILE} "${CMAKE_CONTRIB_DIR}/deps-download/CMakeLists.txt") execute_process(COMMAND "${CMAKE_COMMAND}" -G "${CMAKE_GENERATOR}" . @@ -208,14 +213,10 @@ endif(${BUILD_WITH_TRAFT}) # LIBUV if(${BUILD_WITH_UV}) - if (NOT ${CMAKE_SYSTEM_NAME} MATCHES "Windows") - MESSAGE("Windows need set no-sign-compare") - add_compile_options(-Wno-sign-compare) - endif () - if (${CMAKE_SYSTEM_NAME} MATCHES "Windows") - file(READ "libuv/include/uv.h" CONTENTS) - string(REGEX REPLACE "/([\r]*)\nstruct uv_tcp_s {" "/\\1\ntypedef BOOL (PASCAL *LPFN_CONNECTEX) (SOCKET s, const struct sockaddr* name, int namelen, PVOID lpSendBuffer, DWORD dwSendDataLength,LPDWORD lpdwBytesSent, LPOVERLAPPED lpOverlapped);\\1\nstruct uv_tcp_s {" CONTENTS_NEW "${CONTENTS}") - file(WRITE "libuv/include/uv.h" "${CONTENTS_NEW}") + if (${TD_WINDOWS}) + file(READ "libuv/include/uv.h" CONTENTS) + string(REGEX REPLACE "/([\r]*)\nstruct uv_tcp_s {" "/\\1\ntypedef BOOL (PASCAL *LPFN_CONNECTEX) (SOCKET s, const struct sockaddr* name, int namelen, PVOID lpSendBuffer, DWORD dwSendDataLength,LPDWORD lpdwBytesSent, LPOVERLAPPED lpOverlapped);\\1\nstruct uv_tcp_s {" CONTENTS_NEW "${CONTENTS}") + file(WRITE "libuv/include/uv.h" "${CONTENTS_NEW}") endif () add_subdirectory(libuv) endif(${BUILD_WITH_UV}) @@ -249,10 +250,14 @@ endif(${BUILD_WITH_SQLITE}) # pthread if(${BUILD_PTHREAD}) - ADD_DEFINITIONS("-DPTW32_STATIC_LIB") - add_subdirectory(pthread-win32) + add_definitions(-DPTW32_STATIC_LIB) + add_subdirectory(pthread) endif(${BUILD_PTHREAD}) +# iconv +if(${BUILD_WITH_ICONV}) + add_subdirectory(iconv) +endif(${BUILD_WITH_ICONV}) # ================================================================================================ # Build test diff --git a/include/os/osFile.h b/include/os/osFile.h index 209cecedf8..508a522679 100644 --- a/include/os/osFile.h +++ b/include/os/osFile.h @@ -22,15 +22,6 @@ extern "C" { #include "osSocket.h" -#if defined(WINDOWS) -typedef int32_t FileFd; -typedef int32_t SocketFd; -#else -typedef int32_t FileFd; -typedef int32_t SocketFd; -#endif - -int64_t taosRead(FileFd fd, void *buf, int64_t count); // If the error is in a third-party library, place this header file under the third-party library header file. #ifndef ALLOW_FORBID_FUNC #define open OPEN_FUNC_TAOS_FORBID @@ -42,6 +33,7 @@ int64_t taosRead(FileFd fd, void *buf, int64_t count); #define close CLOSE_FUNC_TAOS_FORBID #define fclose FCLOSE_FUNC_TAOS_FORBID #define fsync FSYNC_FUNC_TAOS_FORBID + #define getline GETLINE_FUNC_TAOS_FORBID // #define fflush FFLUSH_FUNC_TAOS_FORBID #endif @@ -49,15 +41,6 @@ int64_t taosRead(FileFd fd, void *buf, int64_t count); #define PATH_MAX 256 #endif -typedef int32_t FileFd; - -typedef struct TdFile { - pthread_rwlock_t rwlock; - int refId; - FileFd fd; - FILE *fp; -} * TdFilePtr, TdFile; - typedef struct TdFile *TdFilePtr; #define TD_FILE_CTEATE 0x0001 @@ -95,10 +78,6 @@ int64_t taosPReadFile(TdFilePtr pFile, void *buf, int64_t count, int64_t offset) int64_t taosWriteFile(TdFilePtr pFile, const void *buf, int64_t count); void taosFprintfFile(TdFilePtr pFile, const char *format, ...); -#if defined(WINDOWS) -#define __restrict__ -#endif // WINDOWS - int64_t taosGetLineFile(TdFilePtr pFile, char ** __restrict__ ptrBuf); int32_t taosEOFFile(TdFilePtr pFile); @@ -111,15 +90,7 @@ int32_t taosRemoveFile(const char *path); void taosGetTmpfilePath(const char *inputTmpDir, const char *fileNamePrefix, char *dstPath); -#if defined(_TD_DARWIN_64) -typedef int32_t SocketFd; - -int64_t taosSendFile(SocketFd fdDst, FileFd pFileSrc, int64_t *offset, int64_t size); -int64_t taosFSendFile(FILE *pFileOut, FILE *pFileIn, int64_t *offset, int64_t size); -#else -int64_t taosSendFile(SocketFd fdDst, TdFilePtr pFileSrc, int64_t *offset, int64_t size); int64_t taosFSendFile(TdFilePtr pFileOut, TdFilePtr pFileIn, int64_t *offset, int64_t size); -#endif void *taosMmapReadOnlyFile(TdFilePtr pFile, int64_t length); bool taosValidFile(TdFilePtr pFile); diff --git a/include/os/osString.h b/include/os/osString.h index 88160dd69e..80e925e18d 100644 --- a/include/os/osString.h +++ b/include/os/osString.h @@ -20,16 +20,19 @@ extern "C" { #endif +// If the error is in a third-party library, place this header file under the third-party library header file. +#ifndef ALLOW_FORBID_FUNC + #define iconv_open ICONV_OPEN_FUNC_TAOS_FORBID + #define iconv_close ICONV_CLOSE_FUNC_TAOS_FORBID + #define iconv ICONV_FUNC_TAOS_FORBID +#endif + +typedef int32_t TdUcs4; + #if defined(_TD_WINDOWS_64) || defined(_TD_WINDOWS_32) #define tstrdup(str) _strdup(str) - #define tstrndup(str, size) _strndup(str, size) - int32_t tgetline(char **lineptr, size_t *n, FILE *stream); - int32_t twcslen(const wchar_t *wcs); #else #define tstrdup(str) strdup(str) - #define tstrndup(str, size) strndup(str, size) - #define tgetline(lineptr, n, stream) getline(lineptr, n, stream) - #define twcslen wcslen #endif #define tstrncpy(dst, src, size) \ @@ -38,12 +41,12 @@ extern "C" { (dst)[(size)-1] = 0; \ } while (0) +int32_t taosUcs4len(TdUcs4 *ucs4); int64_t taosStr2int64(const char *str); -// USE_LIBICONV -int32_t taosUcs4ToMbs(void *ucs4, int32_t ucs4_max_len, char *mbs); -bool taosMbsToUcs4(const char *mbs, size_t mbs_len, char *ucs4, int32_t ucs4_max_len, int32_t *len); -int32_t tasoUcs4Compare(void *f1_ucs4, void *f2_ucs4, int32_t bytes, int8_t ncharSize); +int32_t taosUcs4ToMbs(TdUcs4 *ucs4, int32_t ucs4_max_len, char *mbs); +bool taosMbsToUcs4(const char *mbs, size_t mbs_len, TdUcs4 *ucs4, int32_t ucs4_max_len, int32_t *len); +int32_t tasoUcs4Compare(TdUcs4 *f1_ucs4, TdUcs4 *f2_ucs4, int32_t bytes); bool taosValidateEncodec(const char *encodec); #ifdef __cplusplus diff --git a/include/util/tdef.h b/include/util/tdef.h index 41a61ceb55..a59a23b5d0 100644 --- a/include/util/tdef.h +++ b/include/util/tdef.h @@ -41,7 +41,7 @@ extern const int32_t TYPE_BYTES[15]; #define DOUBLE_BYTES sizeof(double) #define POINTER_BYTES sizeof(void *) // 8 by default assert(sizeof(ptrdiff_t) == sizseof(void*) #define TSDB_KEYSIZE sizeof(TSKEY) -#define TSDB_NCHAR_SIZE sizeof(int32_t) +#define TSDB_NCHAR_SIZE sizeof(TdUcs4) // NULL definition #define TSDB_DATA_BOOL_NULL 0x02 diff --git a/source/common/src/tvariant.c b/source/common/src/tvariant.c index af6152d3f4..703f4402ef 100644 --- a/source/common/src/tvariant.c +++ b/source/common/src/tvariant.c @@ -343,7 +343,7 @@ int32_t taosVariantToString(SVariant *pVar, char *dst) { case TSDB_DATA_TYPE_NCHAR: { dst[0] = '\''; - taosUcs4ToMbs(pVar->wpz, (twcslen(pVar->wpz) + 1) * TSDB_NCHAR_SIZE, dst + 1); + taosUcs4ToMbs(pVar->wpz, (taosUcs4len(pVar->wpz) + 1) * TSDB_NCHAR_SIZE, dst + 1); int32_t len = (int32_t)strlen(dst); dst[len] = '\''; dst[len + 1] = 0; @@ -460,8 +460,8 @@ static int32_t toNchar(SVariant *pVariant, char **pDest, int32_t *pDestSize) { } if (*pDest == pVariant->pz) { - wchar_t *pWStr = calloc(1, (nLen + 1) * TSDB_NCHAR_SIZE); - bool ret = taosMbsToUcs4(pDst, nLen, (char *)pWStr, (nLen + 1) * TSDB_NCHAR_SIZE, NULL); + TdUcs4 *pWStr = calloc(1, (nLen + 1) * TSDB_NCHAR_SIZE); + bool ret = taosMbsToUcs4(pDst, nLen, pWStr, (nLen + 1) * TSDB_NCHAR_SIZE, NULL); if (!ret) { tfree(pWStr); return -1; @@ -473,7 +473,7 @@ static int32_t toNchar(SVariant *pVariant, char **pDest, int32_t *pDestSize) { } pVariant->wpz = pWStr; - *pDestSize = twcslen(pVariant->wpz); + *pDestSize = taosUcs4len(pVariant->wpz); // shrink the allocate memory, no need to check here. char *tmp = realloc(pVariant->wpz, (*pDestSize + 1) * TSDB_NCHAR_SIZE); @@ -483,7 +483,7 @@ static int32_t toNchar(SVariant *pVariant, char **pDest, int32_t *pDestSize) { } else { int32_t output = 0; - bool ret = taosMbsToUcs4(pDst, nLen, *pDest, (nLen + 1) * TSDB_NCHAR_SIZE, &output); + bool ret = taosMbsToUcs4(pDst, nLen, (TdUcs4*)*pDest, (nLen + 1) * TSDB_NCHAR_SIZE, &output); if (!ret) { return -1; } diff --git a/source/libs/parser/src/parInsert.c b/source/libs/parser/src/parInsert.c index 6db3abb9d6..a37820634f 100644 --- a/source/libs/parser/src/parInsert.c +++ b/source/libs/parser/src/parInsert.c @@ -622,7 +622,7 @@ static FORCE_INLINE int32_t MemRowAppend(const void* value, int32_t len, void* p // if the converted output len is over than pColumnModel->bytes, return error: 'Argument list too long' int32_t output = 0; const char* rowEnd = tdRowEnd(rb->pBuf); - if (!taosMbsToUcs4(value, len, (char*)varDataVal(rowEnd), pa->schema->bytes - VARSTR_HEADER_SIZE, &output)) { + if (!taosMbsToUcs4(value, len, (TdUcs4*)varDataVal(rowEnd), pa->schema->bytes - VARSTR_HEADER_SIZE, &output)) { return TSDB_CODE_TSC_SQL_SYNTAX_ERROR; } varDataSetLen(rowEnd, output); @@ -725,7 +725,7 @@ static int32_t KvRowAppend(const void *value, int32_t len, void *param) { } else if (TSDB_DATA_TYPE_NCHAR == type) { // if the converted output len is over than pColumnModel->bytes, return error: 'Argument list too long' int32_t output = 0; - if (!taosMbsToUcs4(value, len, varDataVal(pa->buf), pa->schema->bytes - VARSTR_HEADER_SIZE, &output)) { + if (!taosMbsToUcs4(value, len, (TdUcs4*)varDataVal(pa->buf), pa->schema->bytes - VARSTR_HEADER_SIZE, &output)) { return TSDB_CODE_TSC_SQL_SYNTAX_ERROR; } diff --git a/source/libs/scalar/src/filter.c b/source/libs/scalar/src/filter.c index 33ec8ab6ef..58b5c8340a 100644 --- a/source/libs/scalar/src/filter.c +++ b/source/libs/scalar/src/filter.c @@ -1813,7 +1813,7 @@ int32_t fltInitValFieldData(SFilterInfo *info) { if(type == TSDB_DATA_TYPE_NCHAR && (unit->compare.optr == OP_TYPE_MATCH || unit->compare.optr == OP_TYPE_NMATCH)){ char newValData[TSDB_REGEX_STRING_DEFAULT_LEN * TSDB_NCHAR_SIZE + VARSTR_HEADER_SIZE] = {0}; - int32_t len = taosUcs4ToMbs(varDataVal(fi->data), varDataLen(fi->data), varDataVal(newValData)); + int32_t len = taosUcs4ToMbs((TdUcs4*)varDataVal(fi->data), varDataLen(fi->data), varDataVal(newValData)); if (len < 0){ qError("filterInitValFieldData taosUcs4ToMbs error 1"); return TSDB_CODE_QRY_APP_ERROR; @@ -2992,7 +2992,7 @@ bool filterExecuteImplMisc(void *pinfo, int32_t numOfRows, int8_t** p, SColumnDa if(info->cunits[uidx].dataType == TSDB_DATA_TYPE_NCHAR && (info->cunits[uidx].optr == OP_TYPE_MATCH || info->cunits[uidx].optr == OP_TYPE_NMATCH)){ char *newColData = calloc(info->cunits[uidx].dataSize * TSDB_NCHAR_SIZE + VARSTR_HEADER_SIZE, 1); - int32_t len = taosUcs4ToMbs(varDataVal(colData), varDataLen(colData), varDataVal(newColData)); + int32_t len = taosUcs4ToMbs((TdUcs4*)varDataVal(colData), varDataLen(colData), varDataVal(newColData)); if (len < 0){ qError("castConvert1 taosUcs4ToMbs error"); }else{ @@ -3052,7 +3052,7 @@ bool filterExecuteImpl(void *pinfo, int32_t numOfRows, int8_t** p, SColumnDataAg } else { if(cunit->dataType == TSDB_DATA_TYPE_NCHAR && (cunit->optr == OP_TYPE_MATCH || cunit->optr == OP_TYPE_NMATCH)){ char *newColData = calloc(cunit->dataSize * TSDB_NCHAR_SIZE + VARSTR_HEADER_SIZE, 1); - int32_t len = taosUcs4ToMbs(varDataVal(colData), varDataLen(colData), varDataVal(newColData)); + int32_t len = taosUcs4ToMbs((TdUcs4*)varDataVal(colData), varDataLen(colData), varDataVal(newColData)); if (len < 0){ qError("castConvert1 taosUcs4ToMbs error"); }else{ @@ -3433,7 +3433,7 @@ int32_t filterConverNcharColumns(SFilterInfo* info, int32_t rows, bool *gotNchar varDataCopy(dst, src); continue; } - bool ret = taosMbsToUcs4(varDataVal(src), varDataLen(src), varDataVal(dst), bufSize, &len); + bool ret = taosMbsToUcs4(varDataVal(src), varDataLen(src), (TdUcs4*)varDataVal(dst), bufSize, &len); if(!ret) { qError("filterConverNcharColumns taosMbsToUcs4 error"); return TSDB_CODE_FAILED; diff --git a/source/libs/scalar/src/sclvector.c b/source/libs/scalar/src/sclvector.c index 0fe7bf6e36..17ac9b19fd 100644 --- a/source/libs/scalar/src/sclvector.c +++ b/source/libs/scalar/src/sclvector.c @@ -316,7 +316,7 @@ int32_t vectorConvertFromVarData(SScalarParam* pIn, SScalarParam* pOut, int32_t tmp = realloc(tmp, bufSize); } - int len = taosUcs4ToMbs(varDataVal(pIn->data), varDataLen(pIn->data), tmp); + int len = taosUcs4ToMbs((TdUcs4*)varDataVal(pIn->data), varDataLen(pIn->data), tmp); if (len < 0){ sclError("castConvert taosUcs4ToMbs error 1"); tfree(tmp); diff --git a/source/os/CMakeLists.txt b/source/os/CMakeLists.txt index ed240015d4..eea3903911 100644 --- a/source/os/CMakeLists.txt +++ b/source/os/CMakeLists.txt @@ -5,9 +5,14 @@ target_include_directories( PUBLIC "${CMAKE_SOURCE_DIR}/include/os" PUBLIC "${CMAKE_SOURCE_DIR}/include" PUBLIC "${CMAKE_SOURCE_DIR}/include/util" - PUBLIC "${CMAKE_SOURCE_DIR}/contrib/pthread-win32" + PUBLIC "${CMAKE_SOURCE_DIR}/contrib/pthread" PUBLIC "${CMAKE_SOURCE_DIR}/contrib/gnuregex" ) +# iconv +find_path(IconvApiIncludes iconv.h PATHS) +if(NOT IconvApiIncludes) + add_definitions(-DDISALLOW_NCHAR_WITHOUT_ICONV) +endif () target_link_libraries( os pthread dl rt m ) diff --git a/source/os/src/osFile.c b/source/os/src/osFile.c index bcbd95e160..5e859de5d6 100644 --- a/source/os/src/osFile.c +++ b/source/os/src/osFile.c @@ -46,6 +46,23 @@ extern int openU(const char *, int, ...); /* MsvcLibX UTF-8 version of open */ #define O_TEXT LINUX_FILE_NO_TEXT_OPTION #endif +#if defined(WINDOWS) +typedef int32_t FileFd; +typedef int32_t SocketFd; +#else +typedef int32_t FileFd; +typedef int32_t SocketFd; +#endif + +typedef int32_t FileFd; + +typedef struct TdFile { + pthread_rwlock_t rwlock; + int refId; + FileFd fd; + FILE *fp; +} * TdFilePtr, TdFile; + #define FILE_WITH_LOCK 1 void taosGetTmpfilePath(const char *inputTmpDir, const char *fileNamePrefix, char *dstPath) { diff --git a/source/os/src/osLocale.c b/source/os/src/osLocale.c index e9d6ed7c54..5f12f9cd3d 100644 --- a/source/os/src/osLocale.c +++ b/source/os/src/osLocale.c @@ -81,7 +81,7 @@ void taosSetSystemLocale(const char *inLocale, const char *inCharSet) { } if (!taosValidateEncodec(inCharSet)) { - printf("Invalid charset:%s, please set the valid charset in config file", inCharSet); + printf("Invalid charset:%s, please set the valid charset in config file\n", inCharSet); exit(-1); } } diff --git a/source/os/src/osString.c b/source/os/src/osString.c index 1052d108af..18f6ceb5a0 100644 --- a/source/os/src/osString.c +++ b/source/os/src/osString.c @@ -13,287 +13,32 @@ * along with this program. If not, see . */ +#define ALLOW_FORBID_FUNC #define _DEFAULT_SOURCE #include "os.h" -#include "tdef.h" -#include -#include +// #include "tdef.h" +// #include +// #include +#ifndef DISALLOW_NCHAR_WITHOUT_ICONV +#include "iconv.h" +#endif int64_t taosStr2int64(const char *str) { char *endptr = NULL; return strtoll(str, &endptr, 10); } -#ifdef USE_LIBICONV -#include "iconv.h" - -int32_t taosUcs4ToMbs(void *ucs4, int32_t ucs4_max_len, char *mbs) { - iconv_t cd = iconv_open(tsCharset, DEFAULT_UNICODE_ENCODEC); - size_t ucs4_input_len = ucs4_max_len; - size_t outLen = ucs4_max_len; - if (iconv(cd, (char **)&ucs4, &ucs4_input_len, &mbs, &outLen) == -1) { - iconv_close(cd); - return -1; - } - - iconv_close(cd); - return (int32_t)(ucs4_max_len - outLen); -} - -bool taosMbsToUcs4(char *mbs, size_t mbsLength, char *ucs4, int32_t ucs4_max_len, int32_t *len) { - memset(ucs4, 0, ucs4_max_len); - iconv_t cd = iconv_open(DEFAULT_UNICODE_ENCODEC, tsCharset); - size_t ucs4_input_len = mbsLength; - size_t outLeft = ucs4_max_len; - if (iconv(cd, &mbs, &ucs4_input_len, &ucs4, &outLeft) == -1) { - iconv_close(cd); - return false; - } - - iconv_close(cd); - if (len != NULL) { - *len = (int32_t)(ucs4_max_len - outLeft); - if (*len < 0) { - return false; - } - } - - return true; -} - -bool taosValidateEncodec(const char *encodec) { - iconv_t cd = iconv_open(encodec, DEFAULT_UNICODE_ENCODEC); - if (cd == (iconv_t)(-1)) { - return false; - } - - iconv_close(cd); - return true; -} - +bool taosCheckNcharValid(void) { +#ifdef DISALLOW_NCHAR_WITHOUT_ICONV + return false; #else - -int32_t taosUcs4ToMbs(void *ucs4, int32_t ucs4_max_len, char *mbs) { - mbstate_t state = {0}; - int32_t len = (int32_t)wcsnrtombs(NULL, (const wchar_t **)&ucs4, ucs4_max_len / 4, 0, &state); - if (len < 0) { - return -1; - } - - memset(&state, 0, sizeof(state)); - len = wcsnrtombs(mbs, (const wchar_t **)&ucs4, ucs4_max_len / 4, (size_t)len, &state); - if (len < 0) { - return -1; - } - - return len; -} - -bool taosMbsToUcs4(const char *mbs, size_t mbsLength, char *ucs4, int32_t ucs4_max_len, int32_t *len) { - memset(ucs4, 0, ucs4_max_len); - mbstate_t state = {0}; - int32_t retlen = mbsnrtowcs((wchar_t *)ucs4, (const char **)&mbs, mbsLength, ucs4_max_len / 4, &state); - *len = retlen; - - return retlen >= 0; -} - -bool taosValidateEncodec(const char *encodec) { return true; -} - #endif - -#if defined(_TD_WINDOWS_64) || defined(_TD_WINDOWS_32) - -/* - * windows implementation - */ - -#ifdef HAVE_CONFIG_H -#include -#endif - -#include -#include -#include -#include -#include - -#if STDC_HEADERS -#include -#else -char *malloc(), *realloc(); -#endif - -/* Always add at least this many bytes when extending the buffer. */ -#define MIN_CHUNK 64 - -/* Read up to (and including) a TERMINATOR from STREAM into *LINEPTR -+ OFFSET (and null-terminate it). *LINEPTR is a pointer returned from -malloc (or NULL), pointing to *N characters of space. It is realloc'd -as necessary. Return the number of characters read (not including the -null terminator), or -1 on error or EOF. On a -1 return, the caller -should check feof(), if not then errno has been set to indicate -the error. */ - -int32_t getstr(char **lineptr, size_t *n, FILE *stream, char terminator, int32_t offset) { - int32_t nchars_avail; /* Allocated but unused chars in *LINEPTR. */ - char * read_pos; /* Where we're reading into *LINEPTR. */ - int32_t ret; - - if (!lineptr || !n || !stream) { - errno = EINVAL; - return -1; - } - - if (!*lineptr) { - *n = MIN_CHUNK; - *lineptr = malloc(*n); - if (!*lineptr) { - errno = ENOMEM; - return -1; - } - } - - nchars_avail = (int32_t)(*n - offset); - read_pos = *lineptr + offset; - - for (;;) { - int32_t save_errno; - register int32_t c = getc(stream); - - save_errno = errno; - - /* We always want at least one char left in the buffer, since we - always (unless we get an error while reading the first char) - NUL-terminate the line buffer. */ - - assert((*lineptr + *n) == (read_pos + nchars_avail)); - if (nchars_avail < 2) { - if (*n > MIN_CHUNK) - *n *= 2; - else - *n += MIN_CHUNK; - - nchars_avail = (int32_t)(*n + *lineptr - read_pos); - char* lineptr1 = realloc(*lineptr, *n); - if (!lineptr1) { - errno = ENOMEM; - return -1; - } - *lineptr = lineptr1; - - read_pos = *n - nchars_avail + *lineptr; - assert((*lineptr + *n) == (read_pos + nchars_avail)); - } - - if (ferror(stream)) { - /* Might like to return partial line, but there is no - place for us to store errno. And we don't want to just - lose errno. */ - errno = save_errno; - return -1; - } - - if (c == EOF) { - /* Return partial line, if any. */ - if (read_pos == *lineptr) - return -1; - else - break; - } - - *read_pos++ = c; - nchars_avail--; - - if (c == terminator) /* Return the line. */ - break; - } - - /* Done - NUL terminate and return the number of chars read. */ - *read_pos = '\0'; - - ret = (int32_t)(read_pos - (*lineptr + offset)); - return ret; } -int32_t tgetline(char **lineptr, size_t *n, FILE *stream) { return getstr(lineptr, n, stream, '\n', 0); } - - -/* - * Get next token from string *stringp, where tokens are possibly-empty - * strings separated by characters from delim. - * - * Writes NULs into the string at *stringp to end tokens. - * delim need not remain constant from call to call. - * On return, *stringp points past the last NUL written (if there might - * be further tokens), or is NULL (if there are definitely no moretokens). - * - * If *stringp is NULL, strsep returns NULL. - */ -char *strsep(char **stringp, const char *delim) { - char * s; - const char *spanp; - int32_t c, sc; - char *tok; - if ((s = *stringp) == NULL) - return (NULL); - for (tok = s;;) { - c = *s++; - spanp = delim; - do { - if ((sc = *spanp++) == c) { - if (c == 0) - s = NULL; - else - s[-1] = 0; - *stringp = s; - return (tok); - } - } while (sc != 0); - } - /* NOTREACHED */ -} - -char *getpass(const char *prefix) { - static char passwd[TSDB_PASSWORD_LEN] = {0}; - memset(passwd, 0, TSDB_PASSWORD_LEN); - //printf("%s", prefix); - - int32_t index = 0; - char ch; - while (index < TSDB_PASSWORD_LEN) { - ch = getch(); - if (ch == '\n' || ch == '\r') { - break; - } else { - passwd[index++] = ch; - } - } - - return passwd; -} - -int32_t twcslen(const wchar_t *wcs) { - int32_t *wstr = (int32_t *)wcs; - if (NULL == wstr) { - return 0; - } - - int32_t n = 0; - while (1) { - if (0 == *wstr++) { - break; - } - n++; - } - - return n; -} -int32_t tasoUcs4Compare(void *f1_ucs4, void *f2_ucs4, int32_t bytes) { - for (int32_t i = 0; i < bytes; i += TSDB_NCHAR_SIZE) { +int32_t tasoUcs4Compare(TdUcs4 *f1_ucs4, TdUcs4 *f2_ucs4, int32_t bytes) { + for (int32_t i = 0; i < bytes; i += sizeof(TdUcs4)) { int32_t f1 = *(int32_t *)((char *)f1_ucs4 + i); int32_t f2 = *(int32_t *)((char *)f2_ucs4 + i); @@ -327,38 +72,413 @@ int32_t tasoUcs4Compare(void *f1_ucs4, void *f2_ucs4, int32_t bytes) { #endif } -/* Copy memory to memory until the specified number of bytes -has been copied, return pointer to following byte. -Overlap is NOT handled correctly. */ -void *mempcpy(void *dest, const void *src, size_t len) { - return (char*)memcpy(dest, src, len) + len; -} - -/* Copy SRC to DEST, returning the address of the terminating '\0' in DEST. */ -char *stpcpy (char *dest, const char *src) { - size_t len = strlen (src); - return (char*)memcpy(dest, src, len + 1) + len; -} - -/* Copy no more than N characters of SRC to DEST, returning the address of - the terminating '\0' in DEST, if any, or else DEST + N. */ -char *stpncpy (char *dest, const char *src, size_t n) { - size_t size = strnlen (src, n); - memcpy (dest, src, size); - dest += size; - if (size == n) - return dest; - return memset (dest, '\0', n - size); -} - +int32_t taosUcs4ToMbs(TdUcs4 *ucs4, int32_t ucs4_max_len, char *mbs) { +#ifdef DISALLOW_NCHAR_WITHOUT_ICONV + return -1; #else + iconv_t cd = iconv_open(tsCharset, DEFAULT_UNICODE_ENCODEC); + size_t ucs4_input_len = ucs4_max_len; + size_t outLen = ucs4_max_len; + if (iconv(cd, (char **)&ucs4, &ucs4_input_len, &mbs, &outLen) == -1) { + iconv_close(cd); + return -1; + } -/* - * linux and darwin implementation - */ - -int32_t tasoUcs4Compare(void *f1_ucs4, void *f2_ucs4, int32_t bytes, int8_t ncharSize) { - return wcsncmp((wchar_t *)f1_ucs4, (wchar_t *)f2_ucs4, bytes / ncharSize); + iconv_close(cd); + return (int32_t)(ucs4_max_len - outLen); +#endif } +bool taosMbsToUcs4(const char *mbs, size_t mbsLength, TdUcs4 *ucs4, int32_t ucs4_max_len, int32_t *len) { +#ifdef DISALLOW_NCHAR_WITHOUT_ICONV + return -1; +#else + memset(ucs4, 0, ucs4_max_len); + iconv_t cd = iconv_open(DEFAULT_UNICODE_ENCODEC, tsCharset); + size_t ucs4_input_len = mbsLength; + size_t outLeft = ucs4_max_len; + if (iconv(cd, (char**)&mbs, &ucs4_input_len, (char**)&ucs4, &outLeft) == -1) { + iconv_close(cd); + return false; + } + + iconv_close(cd); + if (len != NULL) { + *len = (int32_t)(ucs4_max_len - outLeft); + if (*len < 0) { + return false; + } + } + + return true; #endif +} + +bool taosValidateEncodec(const char *encodec) { +#ifdef DISALLOW_NCHAR_WITHOUT_ICONV + return false; +#else + iconv_t cd = iconv_open(encodec, DEFAULT_UNICODE_ENCODEC); + if (cd == (iconv_t)(-1)) { + return false; + } + + iconv_close(cd); + return true; +#endif +} + +int32_t taosUcs4len(TdUcs4 *ucs4) { + TdUcs4 *wstr = (TdUcs4 *)ucs4; + if (NULL == wstr) { + return 0; + } + + int32_t n = 0; + while (1) { + if (0 == *wstr++) { + break; + } + n++; + } + + return n; +} + +// #ifdef USE_LIBICONV +// #include "iconv.h" + +// int32_t taosUcs4ToMbs(void *ucs4, int32_t ucs4_max_len, char *mbs) { +// iconv_t cd = iconv_open(tsCharset, DEFAULT_UNICODE_ENCODEC); +// size_t ucs4_input_len = ucs4_max_len; +// size_t outLen = ucs4_max_len; +// if (iconv(cd, (char **)&ucs4, &ucs4_input_len, &mbs, &outLen) == -1) { +// iconv_close(cd); +// return -1; +// } + +// iconv_close(cd); +// return (int32_t)(ucs4_max_len - outLen); +// } + +// bool taosMbsToUcs4(char *mbs, size_t mbsLength, char *ucs4, int32_t ucs4_max_len, int32_t *len) { +// memset(ucs4, 0, ucs4_max_len); +// iconv_t cd = iconv_open(DEFAULT_UNICODE_ENCODEC, tsCharset); +// size_t ucs4_input_len = mbsLength; +// size_t outLeft = ucs4_max_len; +// if (iconv(cd, &mbs, &ucs4_input_len, &ucs4, &outLeft) == -1) { +// iconv_close(cd); +// return false; +// } + +// iconv_close(cd); +// if (len != NULL) { +// *len = (int32_t)(ucs4_max_len - outLeft); +// if (*len < 0) { +// return false; +// } +// } + +// return true; +// } + +// bool taosValidateEncodec(const char *encodec) { +// iconv_t cd = iconv_open(encodec, DEFAULT_UNICODE_ENCODEC); +// if (cd == (iconv_t)(-1)) { +// return false; +// } + +// iconv_close(cd); +// return true; +// } + +// #else + +// int32_t taosUcs4ToMbs(void *ucs4, int32_t ucs4_max_len, char *mbs) { +// mbstate_t state = {0}; +// int32_t len = (int32_t)wcsnrtombs(NULL, (const wchar_t **)&ucs4, ucs4_max_len / 4, 0, &state); +// if (len < 0) { +// return -1; +// } + +// memset(&state, 0, sizeof(state)); +// len = wcsnrtombs(mbs, (const wchar_t **)&ucs4, ucs4_max_len / 4, (size_t)len, &state); +// if (len < 0) { +// return -1; +// } + +// return len; +// } + +// bool taosMbsToUcs4(const char *mbs, size_t mbsLength, char *ucs4, int32_t ucs4_max_len, int32_t *len) { +// memset(ucs4, 0, ucs4_max_len); +// mbstate_t state = {0}; +// int32_t retlen = mbsnrtowcs((wchar_t *)ucs4, (const char **)&mbs, mbsLength, ucs4_max_len / 4, &state); +// *len = retlen; + +// return retlen >= 0; +// } + +// bool taosValidateEncodec(const char *encodec) { +// return true; +// } + +// #endif + +// #if defined(_TD_WINDOWS_64) || defined(_TD_WINDOWS_32) + +// /* +// * windows implementation +// */ + +// #ifdef HAVE_CONFIG_H +// #include +// #endif + +// #include +// #include +// #include +// #include +// #include + +// #if STDC_HEADERS +// #include +// #else +// char *malloc(), *realloc(); +// #endif + +// /* Always add at least this many bytes when extending the buffer. */ +// #define MIN_CHUNK 64 + +// /* Read up to (and including) a TERMINATOR from STREAM into *LINEPTR +// + OFFSET (and null-terminate it). *LINEPTR is a pointer returned from +// malloc (or NULL), pointing to *N characters of space. It is realloc'd +// as necessary. Return the number of characters read (not including the +// null terminator), or -1 on error or EOF. On a -1 return, the caller +// should check feof(), if not then errno has been set to indicate +// the error. */ + +// int32_t getstr(char **lineptr, size_t *n, FILE *stream, char terminator, int32_t offset) { +// int32_t nchars_avail; /* Allocated but unused chars in *LINEPTR. */ +// char * read_pos; /* Where we're reading into *LINEPTR. */ +// int32_t ret; + +// if (!lineptr || !n || !stream) { +// errno = EINVAL; +// return -1; +// } + +// if (!*lineptr) { +// *n = MIN_CHUNK; +// *lineptr = malloc(*n); +// if (!*lineptr) { +// errno = ENOMEM; +// return -1; +// } +// } + +// nchars_avail = (int32_t)(*n - offset); +// read_pos = *lineptr + offset; + +// for (;;) { +// int32_t save_errno; +// register int32_t c = getc(stream); + +// save_errno = errno; + +// /* We always want at least one char left in the buffer, since we +// always (unless we get an error while reading the first char) +// NUL-terminate the line buffer. */ + +// assert((*lineptr + *n) == (read_pos + nchars_avail)); +// if (nchars_avail < 2) { +// if (*n > MIN_CHUNK) +// *n *= 2; +// else +// *n += MIN_CHUNK; + +// nchars_avail = (int32_t)(*n + *lineptr - read_pos); +// char* lineptr1 = realloc(*lineptr, *n); +// if (!lineptr1) { +// errno = ENOMEM; +// return -1; +// } +// *lineptr = lineptr1; + +// read_pos = *n - nchars_avail + *lineptr; +// assert((*lineptr + *n) == (read_pos + nchars_avail)); +// } + +// if (ferror(stream)) { +// /* Might like to return partial line, but there is no +// place for us to store errno. And we don't want to just +// lose errno. */ +// errno = save_errno; +// return -1; +// } + +// if (c == EOF) { +// /* Return partial line, if any. */ +// if (read_pos == *lineptr) +// return -1; +// else +// break; +// } + +// *read_pos++ = c; +// nchars_avail--; + +// if (c == terminator) /* Return the line. */ +// break; +// } + +// /* Done - NUL terminate and return the number of chars read. */ +// *read_pos = '\0'; + +// ret = (int32_t)(read_pos - (*lineptr + offset)); +// return ret; +// } + +// int32_t tgetline(char **lineptr, size_t *n, FILE *stream) { return getstr(lineptr, n, stream, '\n', 0); } + + +// /* +// * Get next token from string *stringp, where tokens are possibly-empty +// * strings separated by characters from delim. +// * +// * Writes NULs into the string at *stringp to end tokens. +// * delim need not remain constant from call to call. +// * On return, *stringp points past the last NUL written (if there might +// * be further tokens), or is NULL (if there are definitely no moretokens). +// * +// * If *stringp is NULL, strsep returns NULL. +// */ +// char *strsep(char **stringp, const char *delim) { +// char * s; +// const char *spanp; +// int32_t c, sc; +// char *tok; +// if ((s = *stringp) == NULL) +// return (NULL); +// for (tok = s;;) { +// c = *s++; +// spanp = delim; +// do { +// if ((sc = *spanp++) == c) { +// if (c == 0) +// s = NULL; +// else +// s[-1] = 0; +// *stringp = s; +// return (tok); +// } +// } while (sc != 0); +// } +// /* NOTREACHED */ +// } + +// char *getpass(const char *prefix) { +// static char passwd[TSDB_PASSWORD_LEN] = {0}; +// memset(passwd, 0, TSDB_PASSWORD_LEN); +// //printf("%s", prefix); + +// int32_t index = 0; +// char ch; +// while (index < TSDB_PASSWORD_LEN) { +// ch = getch(); +// if (ch == '\n' || ch == '\r') { +// break; +// } else { +// passwd[index++] = ch; +// } +// } + +// return passwd; +// } + +// int32_t twcslen(const wchar_t *wcs) { +// int32_t *wstr = (int32_t *)wcs; +// if (NULL == wstr) { +// return 0; +// } + +// int32_t n = 0; +// while (1) { +// if (0 == *wstr++) { +// break; +// } +// n++; +// } + +// return n; +// } +// int32_t tasoUcs4Compare(void *f1_ucs4, void *f2_ucs4, int32_t bytes) { +// for (int32_t i = 0; i < bytes; i += TSDB_NCHAR_SIZE) { +// int32_t f1 = *(int32_t *)((char *)f1_ucs4 + i); +// int32_t f2 = *(int32_t *)((char *)f2_ucs4 + i); + +// if ((f1 == 0 && f2 != 0) || (f1 != 0 && f2 == 0)) { +// return f1 - f2; +// } else if (f1 == 0 && f2 == 0) { +// return 0; +// } + +// if (f1 != f2) { +// return f1 - f2; +// } +// } + +// return 0; + +// #if 0 +// int32_t ucs4_max_len = bytes + 4; +// char *f1_mbs = calloc(bytes, 1); +// char *f2_mbs = calloc(bytes, 1); +// if (taosUcs4ToMbs(f1_ucs4, ucs4_max_len, f1_mbs) < 0) { +// return -1; +// } +// if (taosUcs4ToMbs(f2_ucs4, ucs4_max_len, f2_mbs) < 0) { +// return -1; +// } +// int32_t ret = strcmp(f1_mbs, f2_mbs); +// free(f1_mbs); +// free(f2_mbs); +// return ret; +// #endif +// } + +// /* Copy memory to memory until the specified number of bytes +// has been copied, return pointer to following byte. +// Overlap is NOT handled correctly. */ +// void *mempcpy(void *dest, const void *src, size_t len) { +// return (char*)memcpy(dest, src, len) + len; +// } + +// /* Copy SRC to DEST, returning the address of the terminating '\0' in DEST. */ +// char *stpcpy (char *dest, const char *src) { +// size_t len = strlen (src); +// return (char*)memcpy(dest, src, len + 1) + len; +// } + +// /* Copy no more than N characters of SRC to DEST, returning the address of +// the terminating '\0' in DEST, if any, or else DEST + N. */ +// char *stpncpy (char *dest, const char *src, size_t n) { +// size_t size = strnlen (src, n); +// memcpy (dest, src, size); +// dest += size; +// if (size == n) +// return dest; +// return memset (dest, '\0', n - size); +// } + +// #else + +// /* +// * linux and darwin implementation +// */ + +// int32_t tasoUcs4Compare(void *f1_ucs4, void *f2_ucs4, int32_t bytes, int8_t ncharSize) { +// return wcsncmp((wchar_t *)f1_ucs4, (wchar_t *)f2_ucs4, bytes / ncharSize); +// } + +// #endif