fix(util): uncomment the timestamp decode function with AVX2 instructions, and do some internal refactor.
This commit is contained in:
parent
6e2a5b9292
commit
04281bcd07
|
@ -180,18 +180,20 @@ ELSE ()
|
|||
SET(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -mavx2")
|
||||
ENDIF()
|
||||
MESSAGE(STATUS "SIMD instructions (FMA/AVX/AVX2) is ACTIVATED")
|
||||
ENDIF()
|
||||
|
||||
IF ("${SIMD_AVX512_SUPPORT}" MATCHES "true")
|
||||
IF (COMPILER_SUPPORT_AVX512F AND COMPILER_SUPPORT_AVX512BMI)
|
||||
SET(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -mavx512f -mavx512vbmi")
|
||||
SET(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -mavx512f -mavx512vbmi")
|
||||
MESSAGE(STATUS "avx512f/avx512bmi supported by compiler")
|
||||
MESSAGE(STATUS "avx512f/avx512bmi enabled by compiler")
|
||||
ENDIF()
|
||||
|
||||
# IF (COMPILER_SUPPORT_AVX512VL)
|
||||
# SET(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -mavx512vl")
|
||||
# SET(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -mavx512vl")
|
||||
# MESSAGE(STATUS "avx512vl supported by compiler")
|
||||
# ENDIF()
|
||||
IF (COMPILER_SUPPORT_AVX512VL)
|
||||
SET(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -mavx512vl")
|
||||
SET(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -mavx512vl")
|
||||
MESSAGE(STATUS "avx512vl enabled by compiler")
|
||||
ENDIF()
|
||||
ENDIF()
|
||||
|
||||
# build mode
|
||||
|
|
|
@ -39,8 +39,9 @@ extern char *tsProcPath;
|
|||
extern char tsSIMDEnable;
|
||||
extern char tsSSE42Enable;
|
||||
extern char tsAVXEnable;
|
||||
extern char tsAVX2Enable;
|
||||
extern char tsFMAEnable;
|
||||
extern char tsAVX2Supported;
|
||||
extern char tsFMASupported;
|
||||
extern char tsAVX512Supported;
|
||||
extern char tsAVX512Enable;
|
||||
extern char tsTagFilterCache;
|
||||
|
||||
|
|
|
@ -828,12 +828,8 @@ TEST(clientCase, projection_query_tables) {
|
|||
// printf("error in create db, reason:%s\n", taos_errstr(pRes));
|
||||
// }
|
||||
// taos_free_result(pRes);
|
||||
/*
|
||||
TAOS_RES* pRes = taos_query(pConn, "select last(ts), ts from cache_1.t1");
|
||||
// pRes = taos_query(pConn, "select last(ts), ts from cache_1.no_pk_t1");
|
||||
if (taos_errno(pRes) != 0) {
|
||||
printf("failed to create table tu, reason:%s\n", taos_errstr(pRes));
|
||||
}
|
||||
|
||||
pRes= taos_query(pConn, "use abc1");
|
||||
taos_free_result(pRes);
|
||||
|
||||
pRes = taos_query(pConn, "create table tu using st2 tags(2)");
|
||||
|
@ -868,7 +864,6 @@ TEST(clientCase, projection_query_tables) {
|
|||
createNewTable(pConn, i, 100000, 0, pstr);
|
||||
}
|
||||
}
|
||||
*/
|
||||
|
||||
pRes = taos_query(pConn, "select * from abc1.st2");
|
||||
if (taos_errno(pRes) != 0) {
|
||||
|
|
|
@ -596,10 +596,11 @@ static int32_t taosAddSystemCfg(SConfig *pCfg) {
|
|||
|
||||
if (cfgAddBool(pCfg, "ssd42", tsSSE42Enable, CFG_SCOPE_BOTH, CFG_DYN_NONE) != 0) return -1;
|
||||
if (cfgAddBool(pCfg, "avx", tsAVXEnable, CFG_SCOPE_BOTH, CFG_DYN_NONE) != 0) return -1;
|
||||
if (cfgAddBool(pCfg, "avx2", tsAVX2Enable, CFG_SCOPE_BOTH, CFG_DYN_NONE) != 0) return -1;
|
||||
if (cfgAddBool(pCfg, "fma", tsFMAEnable, CFG_SCOPE_BOTH, CFG_DYN_NONE) != 0) return -1;
|
||||
if (cfgAddBool(pCfg, "avx512", tsAVX512Enable, CFG_SCOPE_BOTH, CFG_DYN_NONE) != 0) return -1;
|
||||
if (cfgAddBool(pCfg, "avx2", tsAVX2Supported, CFG_SCOPE_BOTH, CFG_DYN_NONE) != 0) return -1;
|
||||
if (cfgAddBool(pCfg, "fma", tsFMASupported, CFG_SCOPE_BOTH, CFG_DYN_NONE) != 0) return -1;
|
||||
if (cfgAddBool(pCfg, "avx512", tsAVX512Supported, CFG_SCOPE_BOTH, CFG_DYN_NONE) != 0) return -1;
|
||||
if (cfgAddBool(pCfg, "simdEnable", tsSIMDEnable, CFG_SCOPE_BOTH, CFG_DYN_NONE) != 0) return -1;
|
||||
if (cfgAddBool(pCfg, "AVX512Enable", tsAVX512Enable, CFG_SCOPE_BOTH, CFG_DYN_NONE) != 0) return -1;
|
||||
if (cfgAddBool(pCfg, "tagFilterCache", tsTagFilterCache, CFG_SCOPE_BOTH, CFG_DYN_NONE) != 0) return -1;
|
||||
|
||||
if (cfgAddInt64(pCfg, "openMax", tsOpenMax, 0, INT64_MAX, CFG_SCOPE_BOTH, CFG_DYN_NONE) != 0) return -1;
|
||||
|
|
|
@ -370,7 +370,7 @@ static int32_t findFirstValPosition(const SColumnInfoData* pCol, int32_t start,
|
|||
static void handleInt8Col(const void* data, int32_t start, int32_t numOfRows, SMinmaxResInfo* pBuf, bool isMinFunc,
|
||||
bool signVal) {
|
||||
// AVX2 version to speedup the loop
|
||||
if (tsAVX2Enable && tsSIMDEnable) {
|
||||
if (tsAVX2Supported && tsSIMDEnable) {
|
||||
pBuf->v = i8VectorCmpAVX2(data, numOfRows, isMinFunc, signVal);
|
||||
} else {
|
||||
if (!pBuf->assign) {
|
||||
|
@ -404,7 +404,7 @@ static void handleInt8Col(const void* data, int32_t start, int32_t numOfRows, SM
|
|||
static void handleInt16Col(const void* data, int32_t start, int32_t numOfRows, SMinmaxResInfo* pBuf, bool isMinFunc,
|
||||
bool signVal) {
|
||||
// AVX2 version to speedup the loop
|
||||
if (tsAVX2Enable && tsSIMDEnable) {
|
||||
if (tsAVX2Supported && tsSIMDEnable) {
|
||||
pBuf->v = i16VectorCmpAVX2(data, numOfRows, isMinFunc, signVal);
|
||||
} else {
|
||||
if (!pBuf->assign) {
|
||||
|
@ -438,7 +438,7 @@ static void handleInt16Col(const void* data, int32_t start, int32_t numOfRows, S
|
|||
static void handleInt32Col(const void* data, int32_t start, int32_t numOfRows, SMinmaxResInfo* pBuf, bool isMinFunc,
|
||||
bool signVal) {
|
||||
// AVX2 version to speedup the loop
|
||||
if (tsAVX2Enable && tsSIMDEnable) {
|
||||
if (tsAVX2Supported && tsSIMDEnable) {
|
||||
pBuf->v = i32VectorCmpAVX2(data, numOfRows, isMinFunc, signVal);
|
||||
} else {
|
||||
if (!pBuf->assign) {
|
||||
|
|
|
@ -38,11 +38,12 @@ int64_t tsTotalMemoryKB = 0;
|
|||
char *tsProcPath = NULL;
|
||||
|
||||
char tsSIMDEnable = 0;
|
||||
char tsAVX512Enable = 0;
|
||||
char tsSSE42Enable = 0;
|
||||
char tsAVXEnable = 0;
|
||||
char tsAVX2Enable = 0;
|
||||
char tsFMAEnable = 0;
|
||||
char tsAVX512Enable = 0;
|
||||
char tsAVX2Supported = 0;
|
||||
char tsFMASupported = 0;
|
||||
char tsAVX512Supported = 0;
|
||||
|
||||
void osDefaultInit() {
|
||||
taosSeedRand(taosSafeRand());
|
||||
|
|
|
@ -250,7 +250,7 @@ void taosGetSystemInfo() {
|
|||
taosGetCpuCores(&tsNumOfCores, false);
|
||||
taosGetTotalMemory(&tsTotalMemoryKB);
|
||||
taosGetCpuUsage(NULL, NULL);
|
||||
taosGetCpuInstructions(&tsSSE42Enable, &tsAVXEnable, &tsAVX2Enable, &tsFMAEnable, &tsAVX512Enable);
|
||||
taosGetCpuInstructions(&tsSSE42Enable, &tsAVXEnable, &tsAVX2Supported, &tsFMASupported, &tsAVX512Supported);
|
||||
#endif
|
||||
}
|
||||
|
||||
|
|
|
@ -822,9 +822,9 @@ int32_t tsDecompressTimestampImp(const char *const input, const int32_t nelement
|
|||
memcpy(output, input + 1, nelements * longBytes);
|
||||
return nelements * longBytes;
|
||||
} else if (input[0] == 1) { // Decompress
|
||||
if (tsSIMDEnable && tsAVX512Enable) {
|
||||
if (tsSIMDEnable && tsAVX512Supported && tsAVX512Enable) {
|
||||
tsDecompressTimestampAvx512(input, nelements, output, false);
|
||||
} else if (tsSIMDEnable && tsAVX2Enable) {
|
||||
} else if (tsSIMDEnable && tsAVX2Supported) {
|
||||
tsDecompressTimestampAvx2(input, nelements, output, false);
|
||||
} else {
|
||||
int64_t *ostream = (int64_t *)output;
|
||||
|
@ -1198,9 +1198,9 @@ int32_t tsDecompressFloatImp(const char *const input, const int32_t nelements, c
|
|||
return nelements * FLOAT_BYTES;
|
||||
}
|
||||
|
||||
if (tsSIMDEnable && tsAVX2Enable) {
|
||||
if (tsSIMDEnable && tsAVX2Supported) {
|
||||
tsDecompressFloatImplAvx2(input, nelements, output);
|
||||
} else if (tsSIMDEnable && tsAVX512Enable) {
|
||||
} else if (tsSIMDEnable && tsAVX512Supported && tsAVX512Enable) {
|
||||
tsDecompressFloatImplAvx512(input, nelements, output);
|
||||
} else { // alternative implementation without SIMD instructions.
|
||||
tsDecompressFloatHelper(input, nelements, (float *)output);
|
||||
|
@ -2713,7 +2713,7 @@ int32_t tsDecompressBigint(void *pIn, int32_t nIn, int32_t nEle, void *pOut, int
|
|||
int8_t alvl = tsGetCompressL2Level(l2, lvl); \
|
||||
return compressL2Dict[l2].comprFn(pIn, nIn, pOut, nOut, type, alvl); \
|
||||
} else { \
|
||||
uTrace("dencode:%s, dcompress:%s, level:%d, type:%s", "disabled", compressL2Dict[l1].name, lvl, \
|
||||
uTrace("dencode:%s, decompress:%s, level:%d, type:%s", "disabled", compressL2Dict[l1].name, lvl, \
|
||||
tDataTypes[type].name); \
|
||||
return compressL2Dict[l2].decomprFn(pIn, nIn, pOut, nOut, type); \
|
||||
} \
|
||||
|
|
|
@ -74,12 +74,12 @@ int32_t tsDecompressIntImpl_Hw(const char *const input, const int32_t nelements,
|
|||
|
||||
int32_t batch = 0;
|
||||
int32_t remain = 0;
|
||||
if (tsSIMDEnable && tsAVX512Enable) {
|
||||
if (tsSIMDEnable && tsAVX512Supported && tsAVX512Enable) {
|
||||
#if __AVX512F__
|
||||
batch = num >> 3;
|
||||
remain = num & 0x07;
|
||||
#endif
|
||||
} else if (tsSIMDEnable && tsAVX2Enable) {
|
||||
} else if (tsSIMDEnable && tsAVX2Supported) {
|
||||
#if __AVX2__
|
||||
batch = num >> 2;
|
||||
remain = num & 0x03;
|
||||
|
@ -87,7 +87,7 @@ int32_t tsDecompressIntImpl_Hw(const char *const input, const int32_t nelements,
|
|||
}
|
||||
|
||||
if (selector == 0 || selector == 1) {
|
||||
if (tsSIMDEnable && tsAVX512Enable) {
|
||||
if (tsSIMDEnable && tsAVX512Supported && tsAVX512Enable) {
|
||||
#if __AVX512F__
|
||||
for (int32_t i = 0; i < batch; ++i) {
|
||||
__m512i prev = _mm512_set1_epi64(prevValue);
|
||||
|
@ -98,7 +98,7 @@ int32_t tsDecompressIntImpl_Hw(const char *const input, const int32_t nelements,
|
|||
p[_pos++] = prevValue;
|
||||
}
|
||||
#endif
|
||||
} else if (tsSIMDEnable && tsAVX2Enable) {
|
||||
} else if (tsSIMDEnable && tsAVX2Supported) {
|
||||
for (int32_t i = 0; i < batch; ++i) {
|
||||
__m256i prev = _mm256_set1_epi64x(prevValue);
|
||||
_mm256_storeu_si256((__m256i *)&p[_pos], prev);
|
||||
|
@ -116,7 +116,7 @@ int32_t tsDecompressIntImpl_Hw(const char *const input, const int32_t nelements,
|
|||
}
|
||||
}
|
||||
} else {
|
||||
if (tsSIMDEnable && tsAVX512Enable) {
|
||||
if (tsSIMDEnable && tsAVX512Supported && tsAVX512Enable) {
|
||||
#if __AVX512F__
|
||||
__m512i sum_mask1 = _mm512_set_epi64(6, 6, 4, 4, 2, 2, 0, 0);
|
||||
__m512i sum_mask2 = _mm512_set_epi64(5, 5, 5, 5, 1, 1, 1, 1);
|
||||
|
@ -182,7 +182,7 @@ int32_t tsDecompressIntImpl_Hw(const char *const input, const int32_t nelements,
|
|||
v += bit;
|
||||
}
|
||||
#endif
|
||||
} else if (tsSIMDEnable && tsAVX2Enable) {
|
||||
} else if (tsSIMDEnable && tsAVX2Supported) {
|
||||
__m256i base = _mm256_set1_epi64x(w);
|
||||
__m256i maskVal = _mm256_set1_epi64x(mask);
|
||||
|
||||
|
@ -331,16 +331,16 @@ int32_t tsDecompressFloatImplAvx2(const char *const input, const int32_t nelemen
|
|||
|
||||
int32_t tsDecompressTimestampAvx2(const char *const input, const int32_t nelements, char *const output,
|
||||
bool bigEndian) {
|
||||
#if 0
|
||||
int64_t *ostream = (int64_t *)output;
|
||||
int32_t ipos = 1, opos = 0;
|
||||
|
||||
#if __AVX2__
|
||||
__m128i prevVal = _mm_setzero_si128();
|
||||
__m128i prevDelta = _mm_setzero_si128();
|
||||
|
||||
#if __AVX2__
|
||||
int32_t batch = nelements >> 1;
|
||||
int32_t remainder = nelements & 0x01;
|
||||
__mmask16 mask2[16] = {0, 0x0001, 0x0003, 0x0007, 0x000f, 0x001f, 0x003f, 0x007f, 0x00ff};
|
||||
// __mmask16 mask2[16] = {0, 0x0001, 0x0003, 0x0007, 0x000f, 0x001f, 0x003f, 0x007f, 0x00ff};
|
||||
|
||||
int32_t i = 0;
|
||||
if (batch > 1) {
|
||||
|
@ -398,8 +398,6 @@ int32_t tsDecompressTimestampAvx2(const char *const input, const int32_t nelemen
|
|||
int8_t nbytes1 = flags & INT8MASK(4); // range of nbytes starts from 0 to 7
|
||||
int8_t nbytes2 = (flags >> 4) & INT8MASK(4);
|
||||
|
||||
// __m128i data1 = _mm_maskz_loadu_epi8(mask2[nbytes1], (const void*)(input + ipos));
|
||||
// __m128i data2 = _mm_maskz_loadu_epi8(mask2[nbytes2], (const void*)(input + ipos + nbytes1));
|
||||
__m128i data1;
|
||||
if (nbytes1 == 0) {
|
||||
data1 = _mm_setzero_si128();
|
||||
|
@ -471,7 +469,6 @@ int32_t tsDecompressTimestampAvx2(const char *const input, const int32_t nelemen
|
|||
ostream[opos++] = prevVal[1] + prevDeltaX;
|
||||
}
|
||||
}
|
||||
#endif
|
||||
#endif
|
||||
return 0;
|
||||
}
|
||||
|
|
|
@ -4,9 +4,16 @@
|
|||
#include <random>
|
||||
#include "ttypes.h"
|
||||
|
||||
namespace {} // namespace
|
||||
namespace {
|
||||
|
||||
} // namespace
|
||||
|
||||
TEST(utilTest, decompress_ts_test) {
|
||||
{
|
||||
tsSIMDEnable = 1;
|
||||
tsAVX2Supported = 1;
|
||||
}
|
||||
|
||||
TEST(utilTest, decompress_test) {
|
||||
int64_t tsList[10] = {1700000000, 1700000100, 1700000200, 1700000300, 1700000400,
|
||||
1700000500, 1700000600, 1700000700, 1700000800, 1700000900};
|
||||
|
||||
|
@ -57,6 +64,49 @@ TEST(utilTest, decompress_test) {
|
|||
}
|
||||
}
|
||||
|
||||
TEST(utilTest, decompress_bigint_avx2_test) {
|
||||
{
|
||||
tsSIMDEnable = 1;
|
||||
tsAVX2Supported = 1;
|
||||
}
|
||||
|
||||
int64_t tsList[10] = {1700000000, 1700000100, 1700000200, 1700000300, 1700000400,
|
||||
1700000500, 1700000600, 1700000700, 1700000800, 1700000900};
|
||||
|
||||
char* pOutput[10 * sizeof(int64_t)] = {0};
|
||||
int32_t len = tsCompressBigint(tsList, sizeof(tsList), sizeof(tsList) / sizeof(tsList[0]), pOutput, 10,
|
||||
ONE_STAGE_COMP, NULL, 0);
|
||||
|
||||
char* decompOutput[10 * 8] = {0};
|
||||
|
||||
tsDecompressBigint(pOutput, len, 10, decompOutput, sizeof(int64_t) * 10, ONE_STAGE_COMP, NULL, 0);
|
||||
|
||||
for (int32_t i = 0; i < 10; ++i) {
|
||||
std::cout << ((int64_t*)decompOutput)[i] << std::endl;
|
||||
}
|
||||
}
|
||||
|
||||
TEST(utilTest, decompress_int_avx2_test) {
|
||||
{
|
||||
tsSIMDEnable = 1;
|
||||
tsAVX2Supported = 1;
|
||||
}
|
||||
|
||||
int32_t tsList[10] = {17000000, 17000001, 17000002, 17000003, 17000004,
|
||||
17000005, 17000006, 17000007, 17000008, 17000009};
|
||||
|
||||
char* pOutput[10 * sizeof(int32_t)] = {0};
|
||||
int32_t len =
|
||||
tsCompressInt(tsList, sizeof(tsList), sizeof(tsList) / sizeof(tsList[0]), pOutput, 10, ONE_STAGE_COMP, NULL, 0);
|
||||
|
||||
char* decompOutput[10 * 8] = {0};
|
||||
tsDecompressInt(pOutput, len, 10, decompOutput, sizeof(int32_t) * 10, ONE_STAGE_COMP, NULL, 0);
|
||||
|
||||
for (int32_t i = 0; i < 10; ++i) {
|
||||
std::cout << ((int32_t*)decompOutput)[i] << std::endl;
|
||||
}
|
||||
}
|
||||
|
||||
TEST(utilTest, decompress_perf_test) {
|
||||
int32_t num = 10000;
|
||||
|
||||
|
|
Loading…
Reference in New Issue