Merge pull request #25471 from taosdata/feat/TD-29196

feat: add pcre2 lib to precheck geometry wkt
This commit is contained in:
Hongze Cheng 2024-05-14 17:42:15 +08:00 committed by GitHub
commit 7b35a5964e
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
9 changed files with 214 additions and 46 deletions

View File

@ -47,27 +47,20 @@ IF(${TD_WINDOWS})
MESSAGE("build wingetopt Win32")
option(
BUILD_WINGETOPT
BUILD_WINGETOPT
"If build wingetopt on Windows"
ON
)
option(
TDENGINE_3
"TDengine 3.x for taos-tools"
ON
)
option(
BUILD_CRASHDUMP
"If build crashdump on Windows"
ON
)
MESSAGE("build geos Win32")
option(
BUILD_GEOS
"If build geos on Windows"
"If build crashdump on Windows"
ON
)
@ -79,7 +72,7 @@ ENDIF ()
option(
BUILD_GEOS
"If build geos on Windows"
"If build with geos"
ON
)
@ -95,6 +88,12 @@ option(
ON
)
option(
BUILD_PCRE2
"If build with pcre2"
ON
)
option(
JEMALLOC_ENABLED
"If build with jemalloc"
@ -114,14 +113,14 @@ option(
)
option(
BUILD_WITH_LEVELDB
"If build with leveldb"
BUILD_WITH_LEVELDB
"If build with leveldb"
OFF
)
option(
BUILD_WITH_ROCKSDB
"If build with rocksdb"
BUILD_WITH_ROCKSDB
"If build with rocksdb"
ON
)
@ -170,46 +169,46 @@ ENDIF ()
option(
BUILD_WITH_SQLITE
"If build with sqlite"
"If build with sqlite"
OFF
)
option(
BUILD_WITH_BDB
"If build with BDB"
"If build with BDB"
OFF
)
option(
BUILD_WITH_LUCENE
"If build with lucene"
off
BUILD_WITH_LUCENE
"If build with lucene"
off
)
option(
BUILD_WITH_NURAFT
"If build with NuRaft"
"If build with NuRaft"
OFF
)
option(
BUILD_WITH_UV
"If build with libuv"
ON
"If build with libuv"
ON
)
option(
BUILD_WITH_UV_TRANS
"If build with libuv_trans "
ON
"If build with libuv_trans "
ON
)
IF(${TD_LINUX} MATCHES TRUE)
option(
BUILD_DEPENDENCY_TESTS
"If build dependency tests"
BUILD_DEPENDENCY_TESTS
"If build dependency tests"
ON
)
@ -217,14 +216,14 @@ ENDIF ()
option(
BUILD_DOCS
"If use doxygen build documents"
"If use doxygen build documents"
OFF
)
option(
BUILD_WITH_INVERTEDINDEX
"If use invertedIndex"
ON
"If use invertedIndex"
ON
)
option(

View File

@ -0,0 +1,13 @@
# pcre2
ExternalProject_Add(pcre2
GIT_REPOSITORY https://github.com/PCRE2Project/pcre2.git
GIT_TAG pcre2-10.43
SOURCE_DIR "${TD_CONTRIB_DIR}/pcre2"
#BINARY_DIR "${TD_CONTRIB_DIR}/pcre2"
#BUILD_IN_SOURCE TRUE
CONFIGURE_COMMAND ""
BUILD_COMMAND ""
INSTALL_COMMAND ""
TEST_COMMAND ""
)

View File

@ -121,7 +121,7 @@ if (${BUILD_CONTRIB})
cat("${TD_SUPPORT_DIR}/rocksdb_CMakeLists.txt.in" ${CONTRIB_TMP_FILE})
add_definitions(-DUSE_ROCKSDB)
endif()
else()
else()
if (NOT ${TD_LINUX})
if(${BUILD_WITH_ROCKSDB})
cat("${TD_SUPPORT_DIR}/rocksdb_CMakeLists.txt.in" ${CONTRIB_TMP_FILE})
@ -183,6 +183,11 @@ if(${BUILD_GEOS})
cat("${TD_SUPPORT_DIR}/geos_CMakeLists.txt.in" ${CONTRIB_TMP_FILE})
endif()
#
if(${BUILD_PCRE2})
cat("${TD_SUPPORT_DIR}/pcre2_CMakeLists.txt.in" ${CONTRIB_TMP_FILE})
endif()
# download dependencies
configure_file(${CONTRIB_TMP_FILE} "${TD_CONTRIB_DIR}/deps-download/CMakeLists.txt")
execute_process(COMMAND "${CMAKE_COMMAND}" -G "${CMAKE_GENERATOR}" .
@ -270,8 +275,8 @@ unset(CMAKE_PROJECT_INCLUDE_BEFORE)
# add_subdirectory(xz EXCLUDE_FROM_ALL)
# target_include_directories(
# xz
# PUBLIC ${CMAKE_CURRENT_BINARY_DIR}/xz
# PUBLIC ${CMAKE_CURRENT_SOURCE_DIR}/xz
# PUBLIC ${CMAKE_CURRENT_BINARY_DIR}/xz
# PUBLIC ${CMAKE_CURRENT_SOURCE_DIR}/xz
# )
# leveldb
@ -351,7 +356,7 @@ if (${BUILD_WITH_ROCKSDB})
rocksdb
PUBLIC $<BUILD_INTERFACE:${CMAKE_CURRENT_SOURCE_DIR}/rocksdb/include>
)
else()
else()
if (NOT ${TD_LINUX})
MESSAGE(STATUS "ROCKSDB CXX STATUS CONFIG: " ${CMAKE_CXX_FLAGS})
MESSAGE(STATUS "ROCKSDB C STATUS CONFIG: " ${CMAKE_C_FLAGS})
@ -400,8 +405,8 @@ if (${BUILD_WITH_ROCKSDB})
rocksdb
PUBLIC $<BUILD_INTERFACE:${CMAKE_CURRENT_SOURCE_DIR}/rocksdb/include>
)
endif()
endif()
endif()
endif()
@ -605,6 +610,10 @@ if(${BUILD_GEOS})
)
endif(${BUILD_GEOS})
if (${BUILD_PCRE2})
add_subdirectory(pcre2 EXCLUDE_FROM_ALL)
endif(${BUILD_PCRE2})
# ================================================================================================
# Build test
# ================================================================================================

View File

@ -21,6 +21,7 @@ extern "C" {
#endif
#include <geos_c.h>
#include <tpcre2.h>
typedef struct SGeosContext {
GEOSContextHandle_t handle;
@ -31,6 +32,9 @@ typedef struct SGeosContext {
GEOSWKBReader *WKBReader;
GEOSWKBWriter *WKBWriter;
pcre2_code *WKTRegex;
pcre2_match_data *WKTMatchData;
char errMsg[512];
} SGeosContext;

34
include/util/tpcre2.h Normal file
View File

@ -0,0 +1,34 @@
/*
* Copyright (c) 2019 TAOS Data, Inc. <jhtao@taosdata.com>
*
* This program is free software: you can use, redistribute, and/or modify
* it under the terms of the GNU Affero General Public License, version 3
* or later ("AGPL"), as published by the Free Software Foundation.
*
* This program is distributed in the hope that it will be useful, but WITHOUT
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
* FITNESS FOR A PARTICULAR PURPOSE.
*
* You should have received a copy of the GNU Affero General Public License
* along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
#ifndef _TD_ULIT_PCRE2_H_
#define _TD_ULIT_PCRE2_H_
#define PCRE2_CODE_UNIT_WIDTH 8
#include "pcre2.h"
#ifdef __cplusplus
extern "C" {
#endif
int32_t doRegComp(pcre2_code** ppRegex, pcre2_match_data** ppMatchData, const char* pattern);
int32_t doRegExec(const char* pString, pcre2_code* pRegex, pcre2_match_data* pMatchData);
void destroyRegexes(pcre2_code* pWktRegex, pcre2_match_data* pWktMatchData);
#ifdef __cplusplus
}
#endif
#endif // _TD_UTIL_PAGEDBUF_H_

View File

@ -86,6 +86,67 @@ _exit:
return code;
}
static int initWktRegex(pcre2_code **ppRegex, pcre2_match_data **ppMatchData) {
int ret = 0;
char *wktPatternWithSpace = taosMemoryCalloc(4, 1024);
sprintf(
wktPatternWithSpace,
"^( *)point( *)z?m?( *)((empty)|(\\(( *)(([-+]?[0-9]+\\.?[0-9]*)|([-+]?[0-9]*\\.?[0-9]+))(e[-+]?[0-9]+)?(( "
"*)(([-+]?[0-9]+\\.?[0-9]*)|([-+]?[0-9]*\\.?[0-9]+))(e[-+]?[0-9]+)?){1,3}( *)\\)))|linestring( *)z?m?( "
"*)((empty)|(\\(( *)(([-+]?[0-9]+\\.?[0-9]*)|([-+]?[0-9]*\\.?[0-9]+))(e[-+]?[0-9]+)?(( "
"*)(([-+]?[0-9]+\\.?[0-9]*)|([-+]?[0-9]*\\.?[0-9]+))(e[-+]?[0-9]+)?){1,3}(( *)(,)( "
"*)(([-+]?[0-9]+\\.?[0-9]*)|([-+]?[0-9]*\\.?[0-9]+))(e[-+]?[0-9]+)?(( "
"*)(([-+]?[0-9]+\\.?[0-9]*)|([-+]?[0-9]*\\.?[0-9]+))(e[-+]?[0-9]+)?){1,3}( *))*( *)\\)))|polygon( *)z?m?( "
"*)((empty)|(\\(( *)((empty)|(\\(( *)(([-+]?[0-9]+\\.?[0-9]*)|([-+]?[0-9]*\\.?[0-9]+))(e[-+]?[0-9]+)?(( "
"*)(([-+]?[0-9]+\\.?[0-9]*)|([-+]?[0-9]*\\.?[0-9]+))(e[-+]?[0-9]+)?){1,3}(( *)(,)( "
"*)(([-+]?[0-9]+\\.?[0-9]*)|([-+]?[0-9]*\\.?[0-9]+))(e[-+]?[0-9]+)?(( "
"*)(([-+]?[0-9]+\\.?[0-9]*)|([-+]?[0-9]*\\.?[0-9]+))(e[-+]?[0-9]+)?){1,3}( *))*( *)\\)))(( *)(,)( "
"*)((empty)|(\\(( *)(([-+]?[0-9]+\\.?[0-9]*)|([-+]?[0-9]*\\.?[0-9]+))(e[-+]?[0-9]+)?(( "
"*)(([-+]?[0-9]+\\.?[0-9]*)|([-+]?[0-9]*\\.?[0-9]+))(e[-+]?[0-9]+)?){1,3}(( *)(,)( "
"*)(([-+]?[0-9]+\\.?[0-9]*)|([-+]?[0-9]*\\.?[0-9]+))(e[-+]?[0-9]+)?(( "
"*)(([-+]?[0-9]+\\.?[0-9]*)|([-+]?[0-9]*\\.?[0-9]+))(e[-+]?[0-9]+)?){1,3}( *))*( *)\\)))( *))*( "
"*)\\)))|multipoint( *)z?m?( *)((empty)|(\\(( "
"*)((([-+]?[0-9]+\\.?[0-9]*)|([-+]?[0-9]*\\.?[0-9]+))(e[-+]?[0-9]+)?(( "
"*)(([-+]?[0-9]+\\.?[0-9]*)|([-+]?[0-9]*\\.?[0-9]+))(e[-+]?[0-9]+)?){1,3}|((empty)|(\\(( "
"*)(([-+]?[0-9]+\\.?[0-9]*)|([-+]?[0-9]*\\.?[0-9]+))(e[-+]?[0-9]+)?(( "
"*)(([-+]?[0-9]+\\.?[0-9]*)|([-+]?[0-9]*\\.?[0-9]+))(e[-+]?[0-9]+)?){1,3}( *)\\))))(( *)(,)( "
"*)((([-+]?[0-9]+\\.?[0-9]*)|([-+]?[0-9]*\\.?[0-9]+))(e[-+]?[0-9]+)?(( "
"*)(([-+]?[0-9]+\\.?[0-9]*)|([-+]?[0-9]*\\.?[0-9]+))(e[-+]?[0-9]+)?){1,3}|((empty)|(\\(( "
"*)(([-+]?[0-9]+\\.?[0-9]*)|([-+]?[0-9]*\\.?[0-9]+))(e[-+]?[0-9]+)?(( "
"*)(([-+]?[0-9]+\\.?[0-9]*)|([-+]?[0-9]*\\.?[0-9]+))(e[-+]?[0-9]+)?){1,3}( *)\\))))( *))*( "
"*)\\)))|multilinestring( *)z?m?( *)((empty)|(\\(( *)((empty)|(\\(( "
"*)(([-+]?[0-9]+\\.?[0-9]*)|([-+]?[0-9]*\\.?[0-9]+))(e[-+]?[0-9]+)?(( "
"*)(([-+]?[0-9]+\\.?[0-9]*)|([-+]?[0-9]*\\.?[0-9]+))(e[-+]?[0-9]+)?){1,3}(( *)(,)( "
"*)(([-+]?[0-9]+\\.?[0-9]*)|([-+]?[0-9]*\\.?[0-9]+))(e[-+]?[0-9]+)?(( "
"*)(([-+]?[0-9]+\\.?[0-9]*)|([-+]?[0-9]*\\.?[0-9]+))(e[-+]?[0-9]+)?){1,3}( *))*( *)\\)))(( *)(,)( "
"*)((empty)|(\\(( *)(([-+]?[0-9]+\\.?[0-9]*)|([-+]?[0-9]*\\.?[0-9]+))(e[-+]?[0-9]+)?(( "
"*)(([-+]?[0-9]+\\.?[0-9]*)|([-+]?[0-9]*\\.?[0-9]+))(e[-+]?[0-9]+)?){1,3}(( *)(,)( "
"*)(([-+]?[0-9]+\\.?[0-9]*)|([-+]?[0-9]*\\.?[0-9]+))(e[-+]?[0-9]+)?(( "
"*)(([-+]?[0-9]+\\.?[0-9]*)|([-+]?[0-9]*\\.?[0-9]+))(e[-+]?[0-9]+)?){1,3}( *))*( *)\\)))( *))*( "
"*)\\)))|multipolygon( *)z?m?( *)((empty)|(\\(( *)((empty)|(\\(( *)((empty)|(\\(( "
"*)(([-+]?[0-9]+\\.?[0-9]*)|([-+]?[0-9]*\\.?[0-9]+))(e[-+]?[0-9]+)?(( "
"*)(([-+]?[0-9]+\\.?[0-9]*)|([-+]?[0-9]*\\.?[0-9]+))(e[-+]?[0-9]+)?){1,3}(( *)(,)( "
"*)(([-+]?[0-9]+\\.?[0-9]*)|([-+]?[0-9]*\\.?[0-9]+))(e[-+]?[0-9]+)?(( "
"*)(([-+]?[0-9]+\\.?[0-9]*)|([-+]?[0-9]*\\.?[0-9]+))(e[-+]?[0-9]+)?){1,3}( *))*( *)\\)))(( *)(,)( "
"*)((empty)|(\\(( *)(([-+]?[0-9]+\\.?[0-9]*)|([-+]?[0-9]*\\.?[0-9]+))(e[-+]?[0-9]+)?(( "
"*)(([-+]?[0-9]+\\.?[0-9]*)|([-+]?[0-9]*\\.?[0-9]+))(e[-+]?[0-9]+)?){1,3}(( *)(,)( "
"*)(([-+]?[0-9]+\\.?[0-9]*)|([-+]?[0-9]*\\.?[0-9]+))(e[-+]?[0-9]+)?(( "
"*)(([-+]?[0-9]+\\.?[0-9]*)|([-+]?[0-9]*\\.?[0-9]+))(e[-+]?[0-9]+)?){1,3}( *))*( *)\\)))( *))*( *)\\)))(( *)(,)( "
"*)((empty)|(\\(( *)((empty)|(\\(( *)(([-+]?[0-9]+\\.?[0-9]*)|([-+]?[0-9]*\\.?[0-9]+))(e[-+]?[0-9]+)?(( "
"*)(([-+]?[0-9]+\\.?[0-9]*)|([-+]?[0-9]*\\.?[0-9]+))(e[-+]?[0-9]+)?){1,3}(( *)(,)( "
"*)(([-+]?[0-9]+\\.?[0-9]*)|([-+]?[0-9]*\\.?[0-9]+))(e[-+]?[0-9]+)?(( "
"*)(([-+]?[0-9]+\\.?[0-9]*)|([-+]?[0-9]*\\.?[0-9]+))(e[-+]?[0-9]+)?){1,3}( *))*( *)\\)))(( *)(,)( "
"*)((empty)|(\\(( *)(([-+]?[0-9]+\\.?[0-9]*)|([-+]?[0-9]*\\.?[0-9]+))(e[-+]?[0-9]+)?(( "
"*)(([-+]?[0-9]+\\.?[0-9]*)|([-+]?[0-9]*\\.?[0-9]+))(e[-+]?[0-9]+)?){1,3}(( *)(,)( "
"*)(([-+]?[0-9]+\\.?[0-9]*)|([-+]?[0-9]*\\.?[0-9]+))(e[-+]?[0-9]+)?(( "
"*)(([-+]?[0-9]+\\.?[0-9]*)|([-+]?[0-9]*\\.?[0-9]+))(e[-+]?[0-9]+)?){1,3}( *))*( *)\\)))( *))*( *)\\)))( *))*( "
"*)\\)))|(GEOCOLLECTION\\((?R)(( *)(,)( *)(?R))*( *)\\))( *)$");
ret = doRegComp(ppRegex, ppMatchData, wktPatternWithSpace);
taosMemoryFree(wktPatternWithSpace);
return ret;
}
int32_t initCtxGeomFromText() {
int32_t code = TSDB_CODE_FAILED;
SGeosContext* geosCtx = getThreadLocalGeosCtx();
@ -113,6 +174,10 @@ int32_t initCtxGeomFromText() {
}
}
if (geosCtx->WKTRegex == NULL) {
if (initWktRegex(&geosCtx->WKTRegex, &geosCtx->WKTMatchData) != 0) return code;
}
return TSDB_CODE_SUCCESS;
}
@ -125,6 +190,11 @@ int32_t doGeomFromText(const char *inputWKT, unsigned char **outputGeom, size_t
GEOSGeometry *geom = NULL;
unsigned char *wkb = NULL;
if (doRegExec(inputWKT, geosCtx->WKTRegex, geosCtx->WKTMatchData) != 0) {
code = TSDB_CODE_FUNC_FUNTION_PARA_VALUE;
goto _exit;
}
geom = GEOSWKTReader_read_r(geosCtx->handle, geosCtx->WKTReader, inputWKT);
if (geom == NULL) {
code = TSDB_CODE_FUNC_FUNTION_PARA_VALUE;

View File

@ -21,29 +21,29 @@ target_include_directories(
PRIVATE "${TD_SOURCE_DIR}/utils/TSZ/sz/inc"
PRIVATE "${TD_SOURCE_DIR}/utils/TSZ/zstd/"
PRIVATE "${TD_SOURCE_DIR}/contrib/lzma2/"
PRIVATE "${TD_SOURCE_DIR}/contrib/pcre2/"
)
target_link_directories(
util
PUBLIC "${TD_SOURCE_DIR}/contrib/lzma2"
PUBLIC "${TD_SOURCE_DIR}/contrib/pcre2"
)
if (TD_LINUX)
target_link_libraries(
util
PUBLIC os common
PUBLIC lz4_static fast-lzma2
PUBLIC api cjson geos_c TSZ
)
else()
PUBLIC lz4_static fast-lzma2 pcre2-8
PUBLIC api cjson geos_c TSZ
)
else()
target_link_libraries(
util
PUBLIC os common
PUBLIC lz4_static
PUBLIC api cjson geos_c TSZ
)
PUBLIC lz4_static pcre2-8
PUBLIC api cjson geos_c TSZ
)
endif()
if(${BUILD_TEST})

View File

@ -43,6 +43,10 @@ void destroyThreadLocalGeosCtx() {
tlGeosCtx.WKBWriter = NULL;
}
if (tlGeosCtx.WKTRegex) {
destroyRegexes(tlGeosCtx.WKTRegex, tlGeosCtx.WKTMatchData);
}
if(tlGeosCtx.handle) {
GEOS_finish_r(tlGeosCtx.handle);
tlGeosCtx.handle = NULL;

35
source/util/src/tpcr2.c Normal file
View File

@ -0,0 +1,35 @@
#include "tpcre2.h"
int32_t doRegComp(pcre2_code** ppRegex, pcre2_match_data** ppMatchData, const char* pattern) {
uint32_t options = PCRE2_CASELESS;
int errorcode;
PCRE2_SIZE erroroffset;
*ppRegex = pcre2_compile((PCRE2_SPTR8)pattern, PCRE2_ZERO_TERMINATED, options, &errorcode, &erroroffset, NULL);
if (*ppRegex == NULL) {
PCRE2_UCHAR buffer[256];
pcre2_get_error_message(errorcode, buffer, sizeof(buffer));
return 1;
}
*ppMatchData = pcre2_match_data_create_from_pattern(*ppRegex, NULL);
return 0;
}
int32_t doRegExec(const char* pString, pcre2_code* pRegex, pcre2_match_data* pMatchData) {
int32_t ret = 0;
ret = pcre2_match(pRegex, (PCRE2_SPTR)pString, PCRE2_ZERO_TERMINATED, 0, 0, pMatchData, NULL);
if (ret < 0) {
PCRE2_UCHAR buffer[256];
pcre2_get_error_message(ret, buffer, sizeof(buffer));
return 1;
}
return (ret > 0) ? 0 : 1;
}
void destroyRegexes(pcre2_code* pWktRegex, pcre2_match_data* pWktMatchData) {
pcre2_code_free(pWktRegex);
pcre2_match_data_free(pWktMatchData);
}