diff --git a/cmake/cmake.define b/cmake/cmake.define index 3343798686..56b6b7e1de 100644 --- a/cmake/cmake.define +++ b/cmake/cmake.define @@ -149,6 +149,8 @@ ELSE () CHECK_C_COMPILER_FLAG("-mfma" COMPILER_SUPPORT_FMA) CHECK_C_COMPILER_FLAG("-mavx" COMPILER_SUPPORT_AVX) CHECK_C_COMPILER_FLAG("-mavx2" COMPILER_SUPPORT_AVX2) + CHECK_C_COMPILER_FLAG("-mavx512f" COMPILER_SUPPORT_AVX512F) + CHECK_C_COMPILER_FLAG("-mavx512vbmi" COMPILER_SUPPORT_AVX512BMI) IF (COMPILER_SUPPORT_SSE42) SET(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -msse4.2") @@ -168,7 +170,13 @@ ELSE () SET(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -mavx2") SET(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -mavx2") ENDIF() - MESSAGE(STATUS "SIMD instructions (FMA/AVX/AVX2) is ACTIVATED") + MESSAGE(STATUS "SIMD instructions (FMA/AVX/AVX2/AVX512) is ACTIVATED") + + IF (COMPILER_SUPPORT_AVX512F AND COMPILER_SUPPORT_AVX512BMI) + SET(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -mavx512f -mavx512vbmi") + SET(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -mavx512f -mavx512vbmi") + MESSAGE(STATUS "avx512 supported by gcc") + ENDIF() ENDIF() # build mode diff --git a/include/os/osEnv.h b/include/os/osEnv.h index bc65da47a9..ac4ecd4212 100644 --- a/include/os/osEnv.h +++ b/include/os/osEnv.h @@ -36,11 +36,12 @@ extern int64_t tsStreamMax; extern float tsNumOfCores; extern int64_t tsTotalMemoryKB; extern char *tsProcPath; -extern char tsSIMDBuiltins; +extern char tsSIMDEnable; extern char tsSSE42Enable; extern char tsAVXEnable; extern char tsAVX2Enable; extern char tsFMAEnable; +extern char tsAVX512Enable; extern char tsTagFilterCache; extern char configDir[]; diff --git a/include/os/osSysinfo.h b/include/os/osSysinfo.h index 29b6f07dca..7a1df2b81c 100644 --- a/include/os/osSysinfo.h +++ b/include/os/osSysinfo.h @@ -41,7 +41,7 @@ int32_t taosGetOsReleaseName(char *releaseName, char* sName, char* ver, int32_t int32_t taosGetCpuInfo(char *cpuModel, int32_t maxLen, float *numOfCores); int32_t taosGetCpuCores(float *numOfCores, bool physical); void taosGetCpuUsage(double *cpu_system, double *cpu_engine); -int32_t taosGetCpuInstructions(char* sse42, char* avx, char* avx2, char* fma); +int32_t taosGetCpuInstructions(char* sse42, char* avx, char* avx2, char* fma, char* avx512); int32_t taosGetTotalMemory(int64_t *totalKB); int32_t taosGetProcMemory(int64_t *usedKB); int32_t taosGetSysMemory(int64_t *usedKB); diff --git a/source/common/src/tglobal.c b/source/common/src/tglobal.c index cb67fc1ba3..d12ebb13c2 100644 --- a/source/common/src/tglobal.c +++ b/source/common/src/tglobal.c @@ -511,12 +511,13 @@ static int32_t taosAddSystemCfg(SConfig *pCfg) { if (cfgAddBool(pCfg, "enableCoreFile", 1, CFG_SCOPE_BOTH, CFG_DYN_CLIENT) != 0) return -1; if (cfgAddFloat(pCfg, "numOfCores", tsNumOfCores, 1, 100000, CFG_SCOPE_BOTH, CFG_DYN_NONE) != 0) return -1; - if (cfgAddBool(pCfg, "ssd42", tsSSE42Enable, CFG_SCOPE_BOTH, CFG_DYN_NONE) != 0) return -1; - if (cfgAddBool(pCfg, "avx", tsAVXEnable, CFG_SCOPE_BOTH, CFG_DYN_NONE) != 0) return -1; - if (cfgAddBool(pCfg, "avx2", tsAVX2Enable, CFG_SCOPE_BOTH, CFG_DYN_NONE) != 0) return -1; - if (cfgAddBool(pCfg, "fma", tsFMAEnable, CFG_SCOPE_BOTH, CFG_DYN_NONE) != 0) return -1; - if (cfgAddBool(pCfg, "simdEnable", tsSIMDBuiltins, CFG_SCOPE_BOTH, CFG_DYN_NONE) != 0) return -1; - if (cfgAddBool(pCfg, "tagFilterCache", tsTagFilterCache, CFG_SCOPE_BOTH, CFG_DYN_NONE) != 0) return -1; + if (cfgAddBool(pCfg, "ssd42", tsSSE42Enable, CFG_SCOPE_BOTH) != 0) return -1; + if (cfgAddBool(pCfg, "avx", tsAVXEnable, CFG_SCOPE_BOTH) != 0) return -1; + if (cfgAddBool(pCfg, "avx2", tsAVX2Enable, CFG_SCOPE_BOTH) != 0) return -1; + if (cfgAddBool(pCfg, "fma", tsFMAEnable, CFG_SCOPE_BOTH) != 0) return -1; + if (cfgAddBool(pCfg, "avx512", tsAVX512Enable, CFG_SCOPE_BOTH) != 0) return -1; + if (cfgAddBool(pCfg, "simdEnable", tsSIMDEnable, CFG_SCOPE_BOTH) != 0) return -1; + if (cfgAddBool(pCfg, "tagFilterCache", tsTagFilterCache, CFG_SCOPE_BOTH) != 0) return -1; if (cfgAddInt64(pCfg, "openMax", tsOpenMax, 0, INT64_MAX, CFG_SCOPE_BOTH, CFG_DYN_NONE) != 0) return -1; #if !defined(_ALPINE) @@ -1080,7 +1081,7 @@ static int32_t taosSetServerCfg(SConfig *pCfg) { tsNumOfSnodeWriteThreads = cfgGetItem(pCfg, "numOfSnodeUniqueThreads")->i32; tsRpcQueueMemoryAllowed = cfgGetItem(pCfg, "rpcQueueMemoryAllowed")->i64; - tsSIMDBuiltins = (bool)cfgGetItem(pCfg, "simdEnable")->bval; + tsSIMDEnable = (bool)cfgGetItem(pCfg, "simdEnable")->bval; tsTagFilterCache = (bool)cfgGetItem(pCfg, "tagFilterCache")->bval; tsEnableMonitor = cfgGetItem(pCfg, "monitor")->bval; diff --git a/source/libs/function/src/detail/tavgfunction.c b/source/libs/function/src/detail/tavgfunction.c index 50df1b5067..e626c937da 100644 --- a/source/libs/function/src/detail/tavgfunction.c +++ b/source/libs/function/src/detail/tavgfunction.c @@ -565,7 +565,7 @@ int32_t avgFunction(SqlFunctionCtx* pCtx) { numOfElem = pInput->numOfRows; pAvgRes->count += pInput->numOfRows; - bool simdAvailable = tsAVXEnable && tsSIMDBuiltins && (numOfRows > THRESHOLD_SIZE); + bool simdAvailable = tsAVXEnable && tsSIMDEnable && (numOfRows > THRESHOLD_SIZE); switch(type) { case TSDB_DATA_TYPE_UTINYINT: diff --git a/source/libs/function/src/detail/tminmax.c b/source/libs/function/src/detail/tminmax.c index 3ca1c06303..a6c91a57ce 100644 --- a/source/libs/function/src/detail/tminmax.c +++ b/source/libs/function/src/detail/tminmax.c @@ -370,7 +370,7 @@ static int32_t findFirstValPosition(const SColumnInfoData* pCol, int32_t start, static void handleInt8Col(const void* data, int32_t start, int32_t numOfRows, SMinmaxResInfo* pBuf, bool isMinFunc, bool signVal) { // AVX2 version to speedup the loop - if (tsAVX2Enable && tsSIMDBuiltins) { + if (tsAVX2Enable && tsSIMDEnable) { pBuf->v = i8VectorCmpAVX2(data, numOfRows, isMinFunc, signVal); } else { if (!pBuf->assign) { @@ -404,7 +404,7 @@ static void handleInt8Col(const void* data, int32_t start, int32_t numOfRows, SM static void handleInt16Col(const void* data, int32_t start, int32_t numOfRows, SMinmaxResInfo* pBuf, bool isMinFunc, bool signVal) { // AVX2 version to speedup the loop - if (tsAVX2Enable && tsSIMDBuiltins) { + if (tsAVX2Enable && tsSIMDEnable) { pBuf->v = i16VectorCmpAVX2(data, numOfRows, isMinFunc, signVal); } else { if (!pBuf->assign) { @@ -438,7 +438,7 @@ static void handleInt16Col(const void* data, int32_t start, int32_t numOfRows, S static void handleInt32Col(const void* data, int32_t start, int32_t numOfRows, SMinmaxResInfo* pBuf, bool isMinFunc, bool signVal) { // AVX2 version to speedup the loop - if (tsAVX2Enable && tsSIMDBuiltins) { + if (tsAVX2Enable && tsSIMDEnable) { pBuf->v = i32VectorCmpAVX2(data, numOfRows, isMinFunc, signVal); } else { if (!pBuf->assign) { @@ -502,7 +502,7 @@ static void handleFloatCol(SColumnInfoData* pCol, int32_t start, int32_t numOfRo float* val = (float*)&pBuf->v; // AVX version to speedup the loop - if (tsAVXEnable && tsSIMDBuiltins) { + if (tsAVXEnable && tsSIMDEnable) { *val = floatVectorCmpAVX(pData, numOfRows, isMinFunc); } else { if (!pBuf->assign) { @@ -533,7 +533,7 @@ static void handleDoubleCol(SColumnInfoData* pCol, int32_t start, int32_t numOfR double* val = (double*)&pBuf->v; // AVX version to speedup the loop - if (tsAVXEnable && tsSIMDBuiltins) { + if (tsAVXEnable && tsSIMDEnable) { *val = (double)doubleVectorCmpAVX(pData, numOfRows, isMinFunc); } else { if (!pBuf->assign) { diff --git a/source/os/src/osEnv.c b/source/os/src/osEnv.c index 0fc136c693..54107db325 100644 --- a/source/os/src/osEnv.c +++ b/source/os/src/osEnv.c @@ -37,11 +37,12 @@ float tsNumOfCores = 0; int64_t tsTotalMemoryKB = 0; char *tsProcPath = NULL; -char tsSIMDBuiltins = 0; +char tsSIMDEnable = 0; char tsSSE42Enable = 0; char tsAVXEnable = 0; char tsAVX2Enable = 0; char tsFMAEnable = 0; +char tsAVX512Enable = 0; void osDefaultInit() { taosSeedRand(taosSafeRand()); diff --git a/source/os/src/osSysinfo.c b/source/os/src/osSysinfo.c index 4816ec8f8b..fea7a4f63d 100644 --- a/source/os/src/osSysinfo.c +++ b/source/os/src/osSysinfo.c @@ -250,7 +250,7 @@ void taosGetSystemInfo() { taosGetCpuCores(&tsNumOfCores, false); taosGetTotalMemory(&tsTotalMemoryKB); taosGetCpuUsage(NULL, NULL); - taosGetCpuInstructions(&tsSSE42Enable, &tsAVXEnable, &tsAVX2Enable, &tsFMAEnable); + taosGetCpuInstructions(&tsSSE42Enable, &tsAVXEnable, &tsAVX2Enable, &tsFMAEnable, &tsAVX512Enable); #endif } @@ -602,7 +602,7 @@ void taosGetCpuUsage(double *cpu_system, double *cpu_engine) { : "0"(level)) // todo add for windows and mac -int32_t taosGetCpuInstructions(char* sse42, char* avx, char* avx2, char* fma) { +int32_t taosGetCpuInstructions(char* sse42, char* avx, char* avx2, char* fma, char* avx512) { #ifdef WINDOWS #elif defined(_TD_DARWIN_64) #else @@ -610,12 +610,6 @@ int32_t taosGetCpuInstructions(char* sse42, char* avx, char* avx2, char* fma) { #ifdef _TD_X86_ // Since the compiler is not support avx/avx2 instructions, the global variables always need to be // set to be false -//#if __AVX__ || __AVX2__ -// tsSIMDBuiltins = true; -//#else -// tsSIMDBuiltins = false; -//#endif - uint32_t eax = 0, ebx = 0, ecx = 0, edx = 0; int32_t ret = __get_cpuid(1, &eax, &ebx, &ecx, &edx); @@ -631,6 +625,7 @@ int32_t taosGetCpuInstructions(char* sse42, char* avx, char* avx2, char* fma) { // Ref to https://gcc.gnu.org/bugzilla/show_bug.cgi?id=77756 __cpuid_fix(7u, eax, ebx, ecx, edx); *avx2 = (char) ((ebx & bit_AVX2) == bit_AVX2); + *avx512 = (char)((ebx & bit_AVX512F) == bit_AVX512F); #endif // _TD_X86_ #endif diff --git a/source/util/src/tcompression.c b/source/util/src/tcompression.c index 3fc3ef6be6..dc89a24180 100644 --- a/source/util/src/tcompression.c +++ b/source/util/src/tcompression.c @@ -283,7 +283,7 @@ int32_t tsDecompressINTImp(const char *const input, const int32_t nelements, cha int32_t batch = num >> 2; int32_t remain = num & 0x03; if (selector == 0 || selector == 1) { - if (tsAVX2Enable && tsSIMDBuiltins) { + if (tsAVX2Enable && tsSIMDEnable) { for (int32_t i = 0; i < batch; ++i) { __m256i prev = _mm256_set1_epi64x(prev_value); _mm256_storeu_si256((__m256i *)&p[_pos], prev); @@ -300,7 +300,7 @@ int32_t tsDecompressINTImp(const char *const input, const int32_t nelements, cha } } } else { - if (tsAVX2Enable && tsSIMDBuiltins) { + if (tsAVX2Enable && tsSIMDEnable) { __m256i base = _mm256_set1_epi64x(w); __m256i maskVal = _mm256_set1_epi64x(mask);