diff --git a/cmake/cmake.define b/cmake/cmake.define index 735735f0cc..f1a5cef67e 100644 --- a/cmake/cmake.define +++ b/cmake/cmake.define @@ -180,18 +180,20 @@ ELSE () SET(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -mavx2") ENDIF() MESSAGE(STATUS "SIMD instructions (FMA/AVX/AVX2) is ACTIVATED") + ENDIF() + IF ("${SIMD_AVX512_SUPPORT}" MATCHES "true") IF (COMPILER_SUPPORT_AVX512F AND COMPILER_SUPPORT_AVX512BMI) SET(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -mavx512f -mavx512vbmi") SET(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -mavx512f -mavx512vbmi") - MESSAGE(STATUS "avx512f/avx512bmi supported by compiler") + MESSAGE(STATUS "avx512f/avx512bmi enabled by compiler") ENDIF() -# IF (COMPILER_SUPPORT_AVX512VL) -# SET(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -mavx512vl") -# SET(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -mavx512vl") -# MESSAGE(STATUS "avx512vl supported by compiler") -# ENDIF() + IF (COMPILER_SUPPORT_AVX512VL) + SET(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -mavx512vl") + SET(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -mavx512vl") + MESSAGE(STATUS "avx512vl enabled by compiler") + ENDIF() ENDIF() # build mode diff --git a/include/os/osEnv.h b/include/os/osEnv.h index ac4ecd4212..e3e5da59f5 100644 --- a/include/os/osEnv.h +++ b/include/os/osEnv.h @@ -39,9 +39,10 @@ extern char *tsProcPath; extern char tsSIMDEnable; extern char tsSSE42Enable; extern char tsAVXEnable; -extern char tsAVX2Enable; -extern char tsFMAEnable; -extern char tsAVX512Enable; +extern char tsAVX2Supported; +extern char tsFMASupported; +extern char tsAVX512Supported; +extern char tsAVX512Enable; extern char tsTagFilterCache; extern char configDir[]; diff --git a/source/client/test/clientTests.cpp b/source/client/test/clientTests.cpp index b5bad92dc4..bbd759b3d1 100644 --- a/source/client/test/clientTests.cpp +++ b/source/client/test/clientTests.cpp @@ -828,12 +828,8 @@ TEST(clientCase, projection_query_tables) { // printf("error in create db, reason:%s\n", taos_errstr(pRes)); // } // taos_free_result(pRes); -/* - TAOS_RES* pRes = taos_query(pConn, "select last(ts), ts from cache_1.t1"); -// pRes = taos_query(pConn, "select last(ts), ts from cache_1.no_pk_t1"); - if (taos_errno(pRes) != 0) { - printf("failed to create table tu, reason:%s\n", taos_errstr(pRes)); - } + + pRes= taos_query(pConn, "use abc1"); taos_free_result(pRes); pRes = taos_query(pConn, "create table tu using st2 tags(2)"); @@ -868,7 +864,6 @@ TEST(clientCase, projection_query_tables) { createNewTable(pConn, i, 100000, 0, pstr); } } -*/ pRes = taos_query(pConn, "select * from abc1.st2"); if (taos_errno(pRes) != 0) { diff --git a/source/common/src/tglobal.c b/source/common/src/tglobal.c index c68dc85c29..5b0361f8ca 100644 --- a/source/common/src/tglobal.c +++ b/source/common/src/tglobal.c @@ -596,10 +596,11 @@ static int32_t taosAddSystemCfg(SConfig *pCfg) { if (cfgAddBool(pCfg, "ssd42", tsSSE42Enable, CFG_SCOPE_BOTH, CFG_DYN_NONE) != 0) return -1; if (cfgAddBool(pCfg, "avx", tsAVXEnable, CFG_SCOPE_BOTH, CFG_DYN_NONE) != 0) return -1; - if (cfgAddBool(pCfg, "avx2", tsAVX2Enable, CFG_SCOPE_BOTH, CFG_DYN_NONE) != 0) return -1; - if (cfgAddBool(pCfg, "fma", tsFMAEnable, CFG_SCOPE_BOTH, CFG_DYN_NONE) != 0) return -1; - if (cfgAddBool(pCfg, "avx512", tsAVX512Enable, CFG_SCOPE_BOTH, CFG_DYN_NONE) != 0) return -1; + if (cfgAddBool(pCfg, "avx2", tsAVX2Supported, CFG_SCOPE_BOTH, CFG_DYN_NONE) != 0) return -1; + if (cfgAddBool(pCfg, "fma", tsFMASupported, CFG_SCOPE_BOTH, CFG_DYN_NONE) != 0) return -1; + if (cfgAddBool(pCfg, "avx512", tsAVX512Supported, CFG_SCOPE_BOTH, CFG_DYN_NONE) != 0) return -1; if (cfgAddBool(pCfg, "simdEnable", tsSIMDEnable, CFG_SCOPE_BOTH, CFG_DYN_NONE) != 0) return -1; + if (cfgAddBool(pCfg, "AVX512Enable", tsAVX512Enable, CFG_SCOPE_BOTH, CFG_DYN_NONE) != 0) return -1; if (cfgAddBool(pCfg, "tagFilterCache", tsTagFilterCache, CFG_SCOPE_BOTH, CFG_DYN_NONE) != 0) return -1; if (cfgAddInt64(pCfg, "openMax", tsOpenMax, 0, INT64_MAX, CFG_SCOPE_BOTH, CFG_DYN_NONE) != 0) return -1; diff --git a/source/libs/function/src/detail/tminmax.c b/source/libs/function/src/detail/tminmax.c index a6c91a57ce..e36157b565 100644 --- a/source/libs/function/src/detail/tminmax.c +++ b/source/libs/function/src/detail/tminmax.c @@ -370,7 +370,7 @@ static int32_t findFirstValPosition(const SColumnInfoData* pCol, int32_t start, static void handleInt8Col(const void* data, int32_t start, int32_t numOfRows, SMinmaxResInfo* pBuf, bool isMinFunc, bool signVal) { // AVX2 version to speedup the loop - if (tsAVX2Enable && tsSIMDEnable) { + if (tsAVX2Supported && tsSIMDEnable) { pBuf->v = i8VectorCmpAVX2(data, numOfRows, isMinFunc, signVal); } else { if (!pBuf->assign) { @@ -404,7 +404,7 @@ static void handleInt8Col(const void* data, int32_t start, int32_t numOfRows, SM static void handleInt16Col(const void* data, int32_t start, int32_t numOfRows, SMinmaxResInfo* pBuf, bool isMinFunc, bool signVal) { // AVX2 version to speedup the loop - if (tsAVX2Enable && tsSIMDEnable) { + if (tsAVX2Supported && tsSIMDEnable) { pBuf->v = i16VectorCmpAVX2(data, numOfRows, isMinFunc, signVal); } else { if (!pBuf->assign) { @@ -438,7 +438,7 @@ static void handleInt16Col(const void* data, int32_t start, int32_t numOfRows, S static void handleInt32Col(const void* data, int32_t start, int32_t numOfRows, SMinmaxResInfo* pBuf, bool isMinFunc, bool signVal) { // AVX2 version to speedup the loop - if (tsAVX2Enable && tsSIMDEnable) { + if (tsAVX2Supported && tsSIMDEnable) { pBuf->v = i32VectorCmpAVX2(data, numOfRows, isMinFunc, signVal); } else { if (!pBuf->assign) { diff --git a/source/os/src/osEnv.c b/source/os/src/osEnv.c index 54107db325..ea88d5307b 100644 --- a/source/os/src/osEnv.c +++ b/source/os/src/osEnv.c @@ -38,11 +38,12 @@ int64_t tsTotalMemoryKB = 0; char *tsProcPath = NULL; char tsSIMDEnable = 0; +char tsAVX512Enable = 0; char tsSSE42Enable = 0; char tsAVXEnable = 0; -char tsAVX2Enable = 0; -char tsFMAEnable = 0; -char tsAVX512Enable = 0; +char tsAVX2Supported = 0; +char tsFMASupported = 0; +char tsAVX512Supported = 0; void osDefaultInit() { taosSeedRand(taosSafeRand()); diff --git a/source/os/src/osSysinfo.c b/source/os/src/osSysinfo.c index 187461826a..71cbf5541f 100644 --- a/source/os/src/osSysinfo.c +++ b/source/os/src/osSysinfo.c @@ -250,7 +250,7 @@ void taosGetSystemInfo() { taosGetCpuCores(&tsNumOfCores, false); taosGetTotalMemory(&tsTotalMemoryKB); taosGetCpuUsage(NULL, NULL); - taosGetCpuInstructions(&tsSSE42Enable, &tsAVXEnable, &tsAVX2Enable, &tsFMAEnable, &tsAVX512Enable); + taosGetCpuInstructions(&tsSSE42Enable, &tsAVXEnable, &tsAVX2Supported, &tsFMASupported, &tsAVX512Supported); #endif } diff --git a/source/util/src/tcompression.c b/source/util/src/tcompression.c index 4635ec340d..053a716721 100644 --- a/source/util/src/tcompression.c +++ b/source/util/src/tcompression.c @@ -822,9 +822,9 @@ int32_t tsDecompressTimestampImp(const char *const input, const int32_t nelement memcpy(output, input + 1, nelements * longBytes); return nelements * longBytes; } else if (input[0] == 1) { // Decompress - if (tsSIMDEnable && tsAVX512Enable) { + if (tsSIMDEnable && tsAVX512Supported && tsAVX512Enable) { tsDecompressTimestampAvx512(input, nelements, output, false); - } else if (tsSIMDEnable && tsAVX2Enable) { + } else if (tsSIMDEnable && tsAVX2Supported) { tsDecompressTimestampAvx2(input, nelements, output, false); } else { int64_t *ostream = (int64_t *)output; @@ -1198,9 +1198,9 @@ int32_t tsDecompressFloatImp(const char *const input, const int32_t nelements, c return nelements * FLOAT_BYTES; } - if (tsSIMDEnable && tsAVX2Enable) { + if (tsSIMDEnable && tsAVX2Supported) { tsDecompressFloatImplAvx2(input, nelements, output); - } else if (tsSIMDEnable && tsAVX512Enable) { + } else if (tsSIMDEnable && tsAVX512Supported && tsAVX512Enable) { tsDecompressFloatImplAvx512(input, nelements, output); } else { // alternative implementation without SIMD instructions. tsDecompressFloatHelper(input, nelements, (float *)output); @@ -2713,7 +2713,7 @@ int32_t tsDecompressBigint(void *pIn, int32_t nIn, int32_t nEle, void *pOut, int int8_t alvl = tsGetCompressL2Level(l2, lvl); \ return compressL2Dict[l2].comprFn(pIn, nIn, pOut, nOut, type, alvl); \ } else { \ - uTrace("dencode:%s, dcompress:%s, level:%d, type:%s", "disabled", compressL2Dict[l1].name, lvl, \ + uTrace("dencode:%s, decompress:%s, level:%d, type:%s", "disabled", compressL2Dict[l1].name, lvl, \ tDataTypes[type].name); \ return compressL2Dict[l2].decomprFn(pIn, nIn, pOut, nOut, type); \ } \ diff --git a/source/util/src/tdecompress.c b/source/util/src/tdecompress.c index d3cb3118d2..38f277fb48 100644 --- a/source/util/src/tdecompress.c +++ b/source/util/src/tdecompress.c @@ -74,12 +74,12 @@ int32_t tsDecompressIntImpl_Hw(const char *const input, const int32_t nelements, int32_t batch = 0; int32_t remain = 0; - if (tsSIMDEnable && tsAVX512Enable) { + if (tsSIMDEnable && tsAVX512Supported && tsAVX512Enable) { #if __AVX512F__ batch = num >> 3; remain = num & 0x07; #endif - } else if (tsSIMDEnable && tsAVX2Enable) { + } else if (tsSIMDEnable && tsAVX2Supported) { #if __AVX2__ batch = num >> 2; remain = num & 0x03; @@ -87,7 +87,7 @@ int32_t tsDecompressIntImpl_Hw(const char *const input, const int32_t nelements, } if (selector == 0 || selector == 1) { - if (tsSIMDEnable && tsAVX512Enable) { + if (tsSIMDEnable && tsAVX512Supported && tsAVX512Enable) { #if __AVX512F__ for (int32_t i = 0; i < batch; ++i) { __m512i prev = _mm512_set1_epi64(prevValue); @@ -98,7 +98,7 @@ int32_t tsDecompressIntImpl_Hw(const char *const input, const int32_t nelements, p[_pos++] = prevValue; } #endif - } else if (tsSIMDEnable && tsAVX2Enable) { + } else if (tsSIMDEnable && tsAVX2Supported) { for (int32_t i = 0; i < batch; ++i) { __m256i prev = _mm256_set1_epi64x(prevValue); _mm256_storeu_si256((__m256i *)&p[_pos], prev); @@ -116,7 +116,7 @@ int32_t tsDecompressIntImpl_Hw(const char *const input, const int32_t nelements, } } } else { - if (tsSIMDEnable && tsAVX512Enable) { + if (tsSIMDEnable && tsAVX512Supported && tsAVX512Enable) { #if __AVX512F__ __m512i sum_mask1 = _mm512_set_epi64(6, 6, 4, 4, 2, 2, 0, 0); __m512i sum_mask2 = _mm512_set_epi64(5, 5, 5, 5, 1, 1, 1, 1); @@ -182,7 +182,7 @@ int32_t tsDecompressIntImpl_Hw(const char *const input, const int32_t nelements, v += bit; } #endif - } else if (tsSIMDEnable && tsAVX2Enable) { + } else if (tsSIMDEnable && tsAVX2Supported) { __m256i base = _mm256_set1_epi64x(w); __m256i maskVal = _mm256_set1_epi64x(mask); @@ -331,16 +331,16 @@ int32_t tsDecompressFloatImplAvx2(const char *const input, const int32_t nelemen int32_t tsDecompressTimestampAvx2(const char *const input, const int32_t nelements, char *const output, bool bigEndian) { -#if 0 int64_t *ostream = (int64_t *)output; int32_t ipos = 1, opos = 0; + +#if __AVX2__ __m128i prevVal = _mm_setzero_si128(); __m128i prevDelta = _mm_setzero_si128(); -#if __AVX2__ int32_t batch = nelements >> 1; int32_t remainder = nelements & 0x01; - __mmask16 mask2[16] = {0, 0x0001, 0x0003, 0x0007, 0x000f, 0x001f, 0x003f, 0x007f, 0x00ff}; +// __mmask16 mask2[16] = {0, 0x0001, 0x0003, 0x0007, 0x000f, 0x001f, 0x003f, 0x007f, 0x00ff}; int32_t i = 0; if (batch > 1) { @@ -398,8 +398,6 @@ int32_t tsDecompressTimestampAvx2(const char *const input, const int32_t nelemen int8_t nbytes1 = flags & INT8MASK(4); // range of nbytes starts from 0 to 7 int8_t nbytes2 = (flags >> 4) & INT8MASK(4); -// __m128i data1 = _mm_maskz_loadu_epi8(mask2[nbytes1], (const void*)(input + ipos)); -// __m128i data2 = _mm_maskz_loadu_epi8(mask2[nbytes2], (const void*)(input + ipos + nbytes1)); __m128i data1; if (nbytes1 == 0) { data1 = _mm_setzero_si128(); @@ -471,7 +469,6 @@ int32_t tsDecompressTimestampAvx2(const char *const input, const int32_t nelemen ostream[opos++] = prevVal[1] + prevDeltaX; } } -#endif #endif return 0; } diff --git a/source/util/test/decompressTest.cpp b/source/util/test/decompressTest.cpp index 2ddef3f595..c982f450ea 100644 --- a/source/util/test/decompressTest.cpp +++ b/source/util/test/decompressTest.cpp @@ -4,9 +4,16 @@ #include #include "ttypes.h" -namespace {} // namespace +namespace { + +} // namespace + +TEST(utilTest, decompress_ts_test) { + { + tsSIMDEnable = 1; + tsAVX2Supported = 1; + } -TEST(utilTest, decompress_test) { int64_t tsList[10] = {1700000000, 1700000100, 1700000200, 1700000300, 1700000400, 1700000500, 1700000600, 1700000700, 1700000800, 1700000900}; @@ -57,6 +64,49 @@ TEST(utilTest, decompress_test) { } } +TEST(utilTest, decompress_bigint_avx2_test) { + { + tsSIMDEnable = 1; + tsAVX2Supported = 1; + } + + int64_t tsList[10] = {1700000000, 1700000100, 1700000200, 1700000300, 1700000400, + 1700000500, 1700000600, 1700000700, 1700000800, 1700000900}; + + char* pOutput[10 * sizeof(int64_t)] = {0}; + int32_t len = tsCompressBigint(tsList, sizeof(tsList), sizeof(tsList) / sizeof(tsList[0]), pOutput, 10, + ONE_STAGE_COMP, NULL, 0); + + char* decompOutput[10 * 8] = {0}; + + tsDecompressBigint(pOutput, len, 10, decompOutput, sizeof(int64_t) * 10, ONE_STAGE_COMP, NULL, 0); + + for (int32_t i = 0; i < 10; ++i) { + std::cout << ((int64_t*)decompOutput)[i] << std::endl; + } +} + +TEST(utilTest, decompress_int_avx2_test) { + { + tsSIMDEnable = 1; + tsAVX2Supported = 1; + } + + int32_t tsList[10] = {17000000, 17000001, 17000002, 17000003, 17000004, + 17000005, 17000006, 17000007, 17000008, 17000009}; + + char* pOutput[10 * sizeof(int32_t)] = {0}; + int32_t len = + tsCompressInt(tsList, sizeof(tsList), sizeof(tsList) / sizeof(tsList[0]), pOutput, 10, ONE_STAGE_COMP, NULL, 0); + + char* decompOutput[10 * 8] = {0}; + tsDecompressInt(pOutput, len, 10, decompOutput, sizeof(int32_t) * 10, ONE_STAGE_COMP, NULL, 0); + + for (int32_t i = 0; i < 10; ++i) { + std::cout << ((int32_t*)decompOutput)[i] << std::endl; + } +} + TEST(utilTest, decompress_perf_test) { int32_t num = 10000;