diff --git a/cmake/cmake.define b/cmake/cmake.define index 89d1853263..a3c2daf43b 100644 --- a/cmake/cmake.define +++ b/cmake/cmake.define @@ -1,5 +1,5 @@ cmake_minimum_required(VERSION 3.0) -set(CMAKE_VERBOSE_MAKEFILE FALSE) +set(CMAKE_VERBOSE_MAKEFILE TRUE) set(TD_BUILD_TAOSA_INTERNAL FALSE) #set output directory @@ -151,6 +151,7 @@ ELSE () CHECK_C_COMPILER_FLAG("-mavx2" COMPILER_SUPPORT_AVX2) CHECK_C_COMPILER_FLAG("-mavx512f" COMPILER_SUPPORT_AVX512F) CHECK_C_COMPILER_FLAG("-mavx512vbmi" COMPILER_SUPPORT_AVX512BMI) + CHECK_C_COMPILER_FLAG("-mavx512vl" COMPILER_SUPPORT_AVX512VL) IF (COMPILER_SUPPORT_SSE42) SET(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -msse4.2") @@ -158,11 +159,11 @@ ELSE () ENDIF() IF ("${SIMD_SUPPORT}" MATCHES "true") - IF (COMPILER_SUPPORT_FMA) + IF (COMPILER_SUPPORT_FMA) SET(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -mfma") SET(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -mfma") - ENDIF() - IF (COMPILER_SUPPORT_AVX) + ENDIF() + IF (COMPILER_SUPPORT_AVX) SET(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -mavx") SET(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -mavx") ENDIF() @@ -174,8 +175,14 @@ ELSE () IF (COMPILER_SUPPORT_AVX512F AND COMPILER_SUPPORT_AVX512BMI) SET(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -mavx512f -mavx512vbmi -mavx512vl") - SET(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -mavx512f -mavx512vbmi -mavx512vl") - MESSAGE(STATUS "avx512 supported by gcc") + SET(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -mavx512f -mavx512vbmi -mavx512vl") + MESSAGE(STATUS "avx512f/avx512bmi supported by gcc") + ENDIF() + + IF (COMPILER_SUPPORT_AVX512VL) + SET(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -mavx512vl") + SET(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -mavx512vl") + MESSAGE(STATUS "avx512vl supported by gcc") ENDIF() ENDIF() diff --git a/source/util/src/tdecompress.c b/source/util/src/tdecompress.c index 1353357ce8..41e3bf065d 100644 --- a/source/util/src/tdecompress.c +++ b/source/util/src/tdecompress.c @@ -111,7 +111,7 @@ int32_t tsDecompressIntImpl_Hw(const char *const input, const int32_t nelements, __m256i signmask = _mm256_and_si256(_mm256_set1_epi64x(1), zigzagVal); signmask = _mm256_sub_epi64(_mm256_setzero_si256(), signmask); - // get the four zigzag values here + // get four zigzag values here __m256i delta = _mm256_xor_si256(_mm256_srli_epi64(zigzagVal, 1), signmask); // calculate the cumulative sum (prefix sum) for each number @@ -254,10 +254,11 @@ int32_t tsDecompressTimestampAvx512(const char* const input, const int32_t nelem __m128i prevDelta = _mm_setzero_si128(); // _mm_maskz_loadu_epi8 -#if __AVX512F__ +#if __AVX512VL__ int32_t batch = nelements >> 1; int32_t remainder = nelements & 0x01; + __mmask16 mask2[16] = {0, 0x0001, 0x0003, 0x0007, 0x000f, 0x001f, 0x003f, 0x007f, 0x00ff}; int32_t i = 0; if (batch > 1) { @@ -267,7 +268,6 @@ int32_t tsDecompressTimestampAvx512(const char* const input, const int32_t nelem int8_t nbytes1 = flags & INT8MASK(4); // range of nbytes starts from 0 to 7 int8_t nbytes2 = (flags >> 4) & INT8MASK(4); - __mmask16 mask2[16] = {0, 0x0001, 0x0003, 0x0007, 0x000f, 0x001f, 0x003f, 0x007f, 0x00ff}; __m128i data1 = _mm_maskz_loadu_epi8(mask2[nbytes1], (const void*)(input + ipos)); __m128i data2 = _mm_maskz_loadu_epi8(mask2[nbytes2], (const void*)(input + ipos + nbytes1)); data2 = _mm_broadcastq_epi64(data2); @@ -305,7 +305,6 @@ int32_t tsDecompressTimestampAvx512(const char* const input, const int32_t nelem int8_t nbytes1 = flags & INT8MASK(4); // range of nbytes starts from 0 to 7 int8_t nbytes2 = (flags >> 4) & INT8MASK(4); - __mmask16 mask2[16] = {0, 0x0001, 0x0003, 0x0007, 0x000f, 0x001f, 0x003f, 0x007f, 0x00ff}; __m128i data1 = _mm_maskz_loadu_epi8(mask2[nbytes1], (const void*)(input + ipos)); __m128i data2 = _mm_maskz_loadu_epi8(mask2[nbytes2], (const void*)(input + ipos + nbytes1)); data2 = _mm_broadcastq_epi64(data2); @@ -357,10 +356,8 @@ int32_t tsDecompressTimestampAvx512(const char* const input, const int32_t nelem if (opos == 0) { ostream[opos++] = deltaOfDelta; } else { - int64_t prevV = prevVal[1]; - int64_t prevDeltaX = deltaOfDelta + prevDelta[1]; - ostream[opos++] = prevV + prevDeltaX; + ostream[opos++] = prevVal[1] + prevDeltaX; } } diff --git a/source/util/test/decompressTest.cpp b/source/util/test/decompressTest.cpp index 0c4f660002..378b67edbb 100644 --- a/source/util/test/decompressTest.cpp +++ b/source/util/test/decompressTest.cpp @@ -53,7 +53,7 @@ TEST(utilTest, decompress_test) { } TEST(utilTest, decompress_perf_test) { - int32_t num = 100000; + int32_t num = 10000; int64_t* pList = static_cast(taosMemoryCalloc(num, sizeof(int64_t))); int64_t iniVal = 1700000000; @@ -71,7 +71,7 @@ TEST(utilTest, decompress_perf_test) { char* pOutput = static_cast(taosMemoryMalloc(num * sizeof(int64_t))); int64_t st = taosGetTimestampUs(); - for(int32_t k = 0; k < 10; ++k) { + for(int32_t k = 0; k < 10000; ++k) { tsDecompressTimestamp(px, len, num, pOutput, sizeof(int64_t) * num, ONE_STAGE_COMP, NULL, 0); } @@ -80,7 +80,7 @@ TEST(utilTest, decompress_perf_test) { memset(pOutput, 0, num * sizeof(int64_t)); st = taosGetTimestampUs(); - for(int32_t k = 0; k < 10; ++k) { + for(int32_t k = 0; k < 10000; ++k) { tsDecompressTimestampAvx512(px, num, pOutput, false); }