fix(test): do some internal refactor.
This commit is contained in:
parent
5dfeac4a11
commit
cf835c5ce4
|
@ -1,5 +1,5 @@
|
||||||
cmake_minimum_required(VERSION 3.0)
|
cmake_minimum_required(VERSION 3.0)
|
||||||
set(CMAKE_VERBOSE_MAKEFILE FALSE)
|
set(CMAKE_VERBOSE_MAKEFILE TRUE)
|
||||||
set(TD_BUILD_TAOSA_INTERNAL FALSE)
|
set(TD_BUILD_TAOSA_INTERNAL FALSE)
|
||||||
|
|
||||||
#set output directory
|
#set output directory
|
||||||
|
@ -151,6 +151,7 @@ ELSE ()
|
||||||
CHECK_C_COMPILER_FLAG("-mavx2" COMPILER_SUPPORT_AVX2)
|
CHECK_C_COMPILER_FLAG("-mavx2" COMPILER_SUPPORT_AVX2)
|
||||||
CHECK_C_COMPILER_FLAG("-mavx512f" COMPILER_SUPPORT_AVX512F)
|
CHECK_C_COMPILER_FLAG("-mavx512f" COMPILER_SUPPORT_AVX512F)
|
||||||
CHECK_C_COMPILER_FLAG("-mavx512vbmi" COMPILER_SUPPORT_AVX512BMI)
|
CHECK_C_COMPILER_FLAG("-mavx512vbmi" COMPILER_SUPPORT_AVX512BMI)
|
||||||
|
CHECK_C_COMPILER_FLAG("-mavx512vl" COMPILER_SUPPORT_AVX512VL)
|
||||||
|
|
||||||
IF (COMPILER_SUPPORT_SSE42)
|
IF (COMPILER_SUPPORT_SSE42)
|
||||||
SET(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -msse4.2")
|
SET(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -msse4.2")
|
||||||
|
@ -175,7 +176,13 @@ ELSE ()
|
||||||
IF (COMPILER_SUPPORT_AVX512F AND COMPILER_SUPPORT_AVX512BMI)
|
IF (COMPILER_SUPPORT_AVX512F AND COMPILER_SUPPORT_AVX512BMI)
|
||||||
SET(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -mavx512f -mavx512vbmi -mavx512vl")
|
SET(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -mavx512f -mavx512vbmi -mavx512vl")
|
||||||
SET(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -mavx512f -mavx512vbmi -mavx512vl")
|
SET(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -mavx512f -mavx512vbmi -mavx512vl")
|
||||||
MESSAGE(STATUS "avx512 supported by gcc")
|
MESSAGE(STATUS "avx512f/avx512bmi supported by gcc")
|
||||||
|
ENDIF()
|
||||||
|
|
||||||
|
IF (COMPILER_SUPPORT_AVX512VL)
|
||||||
|
SET(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -mavx512vl")
|
||||||
|
SET(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -mavx512vl")
|
||||||
|
MESSAGE(STATUS "avx512vl supported by gcc")
|
||||||
ENDIF()
|
ENDIF()
|
||||||
ENDIF()
|
ENDIF()
|
||||||
|
|
||||||
|
|
|
@ -111,7 +111,7 @@ int32_t tsDecompressIntImpl_Hw(const char *const input, const int32_t nelements,
|
||||||
__m256i signmask = _mm256_and_si256(_mm256_set1_epi64x(1), zigzagVal);
|
__m256i signmask = _mm256_and_si256(_mm256_set1_epi64x(1), zigzagVal);
|
||||||
signmask = _mm256_sub_epi64(_mm256_setzero_si256(), signmask);
|
signmask = _mm256_sub_epi64(_mm256_setzero_si256(), signmask);
|
||||||
|
|
||||||
// get the four zigzag values here
|
// get four zigzag values here
|
||||||
__m256i delta = _mm256_xor_si256(_mm256_srli_epi64(zigzagVal, 1), signmask);
|
__m256i delta = _mm256_xor_si256(_mm256_srli_epi64(zigzagVal, 1), signmask);
|
||||||
|
|
||||||
// calculate the cumulative sum (prefix sum) for each number
|
// calculate the cumulative sum (prefix sum) for each number
|
||||||
|
@ -254,10 +254,11 @@ int32_t tsDecompressTimestampAvx512(const char* const input, const int32_t nelem
|
||||||
__m128i prevDelta = _mm_setzero_si128();
|
__m128i prevDelta = _mm_setzero_si128();
|
||||||
|
|
||||||
// _mm_maskz_loadu_epi8
|
// _mm_maskz_loadu_epi8
|
||||||
#if __AVX512F__
|
#if __AVX512VL__
|
||||||
|
|
||||||
int32_t batch = nelements >> 1;
|
int32_t batch = nelements >> 1;
|
||||||
int32_t remainder = nelements & 0x01;
|
int32_t remainder = nelements & 0x01;
|
||||||
|
__mmask16 mask2[16] = {0, 0x0001, 0x0003, 0x0007, 0x000f, 0x001f, 0x003f, 0x007f, 0x00ff};
|
||||||
|
|
||||||
int32_t i = 0;
|
int32_t i = 0;
|
||||||
if (batch > 1) {
|
if (batch > 1) {
|
||||||
|
@ -267,7 +268,6 @@ int32_t tsDecompressTimestampAvx512(const char* const input, const int32_t nelem
|
||||||
int8_t nbytes1 = flags & INT8MASK(4); // range of nbytes starts from 0 to 7
|
int8_t nbytes1 = flags & INT8MASK(4); // range of nbytes starts from 0 to 7
|
||||||
int8_t nbytes2 = (flags >> 4) & INT8MASK(4);
|
int8_t nbytes2 = (flags >> 4) & INT8MASK(4);
|
||||||
|
|
||||||
__mmask16 mask2[16] = {0, 0x0001, 0x0003, 0x0007, 0x000f, 0x001f, 0x003f, 0x007f, 0x00ff};
|
|
||||||
__m128i data1 = _mm_maskz_loadu_epi8(mask2[nbytes1], (const void*)(input + ipos));
|
__m128i data1 = _mm_maskz_loadu_epi8(mask2[nbytes1], (const void*)(input + ipos));
|
||||||
__m128i data2 = _mm_maskz_loadu_epi8(mask2[nbytes2], (const void*)(input + ipos + nbytes1));
|
__m128i data2 = _mm_maskz_loadu_epi8(mask2[nbytes2], (const void*)(input + ipos + nbytes1));
|
||||||
data2 = _mm_broadcastq_epi64(data2);
|
data2 = _mm_broadcastq_epi64(data2);
|
||||||
|
@ -305,7 +305,6 @@ int32_t tsDecompressTimestampAvx512(const char* const input, const int32_t nelem
|
||||||
int8_t nbytes1 = flags & INT8MASK(4); // range of nbytes starts from 0 to 7
|
int8_t nbytes1 = flags & INT8MASK(4); // range of nbytes starts from 0 to 7
|
||||||
int8_t nbytes2 = (flags >> 4) & INT8MASK(4);
|
int8_t nbytes2 = (flags >> 4) & INT8MASK(4);
|
||||||
|
|
||||||
__mmask16 mask2[16] = {0, 0x0001, 0x0003, 0x0007, 0x000f, 0x001f, 0x003f, 0x007f, 0x00ff};
|
|
||||||
__m128i data1 = _mm_maskz_loadu_epi8(mask2[nbytes1], (const void*)(input + ipos));
|
__m128i data1 = _mm_maskz_loadu_epi8(mask2[nbytes1], (const void*)(input + ipos));
|
||||||
__m128i data2 = _mm_maskz_loadu_epi8(mask2[nbytes2], (const void*)(input + ipos + nbytes1));
|
__m128i data2 = _mm_maskz_loadu_epi8(mask2[nbytes2], (const void*)(input + ipos + nbytes1));
|
||||||
data2 = _mm_broadcastq_epi64(data2);
|
data2 = _mm_broadcastq_epi64(data2);
|
||||||
|
@ -357,10 +356,8 @@ int32_t tsDecompressTimestampAvx512(const char* const input, const int32_t nelem
|
||||||
if (opos == 0) {
|
if (opos == 0) {
|
||||||
ostream[opos++] = deltaOfDelta;
|
ostream[opos++] = deltaOfDelta;
|
||||||
} else {
|
} else {
|
||||||
int64_t prevV = prevVal[1];
|
|
||||||
|
|
||||||
int64_t prevDeltaX = deltaOfDelta + prevDelta[1];
|
int64_t prevDeltaX = deltaOfDelta + prevDelta[1];
|
||||||
ostream[opos++] = prevV + prevDeltaX;
|
ostream[opos++] = prevVal[1] + prevDeltaX;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -53,7 +53,7 @@ TEST(utilTest, decompress_test) {
|
||||||
}
|
}
|
||||||
|
|
||||||
TEST(utilTest, decompress_perf_test) {
|
TEST(utilTest, decompress_perf_test) {
|
||||||
int32_t num = 100000;
|
int32_t num = 10000;
|
||||||
|
|
||||||
int64_t* pList = static_cast<int64_t*>(taosMemoryCalloc(num, sizeof(int64_t)));
|
int64_t* pList = static_cast<int64_t*>(taosMemoryCalloc(num, sizeof(int64_t)));
|
||||||
int64_t iniVal = 1700000000;
|
int64_t iniVal = 1700000000;
|
||||||
|
@ -71,7 +71,7 @@ TEST(utilTest, decompress_perf_test) {
|
||||||
char* pOutput = static_cast<char*>(taosMemoryMalloc(num * sizeof(int64_t)));
|
char* pOutput = static_cast<char*>(taosMemoryMalloc(num * sizeof(int64_t)));
|
||||||
|
|
||||||
int64_t st = taosGetTimestampUs();
|
int64_t st = taosGetTimestampUs();
|
||||||
for(int32_t k = 0; k < 10; ++k) {
|
for(int32_t k = 0; k < 10000; ++k) {
|
||||||
tsDecompressTimestamp(px, len, num, pOutput, sizeof(int64_t) * num, ONE_STAGE_COMP, NULL, 0);
|
tsDecompressTimestamp(px, len, num, pOutput, sizeof(int64_t) * num, ONE_STAGE_COMP, NULL, 0);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -80,7 +80,7 @@ TEST(utilTest, decompress_perf_test) {
|
||||||
|
|
||||||
memset(pOutput, 0, num * sizeof(int64_t));
|
memset(pOutput, 0, num * sizeof(int64_t));
|
||||||
st = taosGetTimestampUs();
|
st = taosGetTimestampUs();
|
||||||
for(int32_t k = 0; k < 10; ++k) {
|
for(int32_t k = 0; k < 10000; ++k) {
|
||||||
tsDecompressTimestampAvx512(px, num, pOutput, false);
|
tsDecompressTimestampAvx512(px, num, pOutput, false);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
Loading…
Reference in New Issue