fix(util): uncomment the timestamp decode function with AVX2 instructions, and do some internal refactor.

This commit is contained in:
Haojun Liao 2024-06-19 15:06:09 +08:00
parent 6e2a5b9292
commit 04281bcd07
10 changed files with 92 additions and 45 deletions

View File

@ -180,18 +180,20 @@ ELSE ()
SET(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -mavx2")
ENDIF()
MESSAGE(STATUS "SIMD instructions (FMA/AVX/AVX2) is ACTIVATED")
ENDIF()
IF ("${SIMD_AVX512_SUPPORT}" MATCHES "true")
IF (COMPILER_SUPPORT_AVX512F AND COMPILER_SUPPORT_AVX512BMI)
SET(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -mavx512f -mavx512vbmi")
SET(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -mavx512f -mavx512vbmi")
MESSAGE(STATUS "avx512f/avx512bmi supported by compiler")
MESSAGE(STATUS "avx512f/avx512bmi enabled by compiler")
ENDIF()
# IF (COMPILER_SUPPORT_AVX512VL)
# SET(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -mavx512vl")
# SET(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -mavx512vl")
# MESSAGE(STATUS "avx512vl supported by compiler")
# ENDIF()
IF (COMPILER_SUPPORT_AVX512VL)
SET(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -mavx512vl")
SET(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -mavx512vl")
MESSAGE(STATUS "avx512vl enabled by compiler")
ENDIF()
ENDIF()
# build mode

View File

@ -39,8 +39,9 @@ extern char *tsProcPath;
extern char tsSIMDEnable;
extern char tsSSE42Enable;
extern char tsAVXEnable;
extern char tsAVX2Enable;
extern char tsFMAEnable;
extern char tsAVX2Supported;
extern char tsFMASupported;
extern char tsAVX512Supported;
extern char tsAVX512Enable;
extern char tsTagFilterCache;

View File

@ -828,12 +828,8 @@ TEST(clientCase, projection_query_tables) {
// printf("error in create db, reason:%s\n", taos_errstr(pRes));
// }
// taos_free_result(pRes);
/*
TAOS_RES* pRes = taos_query(pConn, "select last(ts), ts from cache_1.t1");
// pRes = taos_query(pConn, "select last(ts), ts from cache_1.no_pk_t1");
if (taos_errno(pRes) != 0) {
printf("failed to create table tu, reason:%s\n", taos_errstr(pRes));
}
pRes= taos_query(pConn, "use abc1");
taos_free_result(pRes);
pRes = taos_query(pConn, "create table tu using st2 tags(2)");
@ -868,7 +864,6 @@ TEST(clientCase, projection_query_tables) {
createNewTable(pConn, i, 100000, 0, pstr);
}
}
*/
pRes = taos_query(pConn, "select * from abc1.st2");
if (taos_errno(pRes) != 0) {

View File

@ -596,10 +596,11 @@ static int32_t taosAddSystemCfg(SConfig *pCfg) {
if (cfgAddBool(pCfg, "ssd42", tsSSE42Enable, CFG_SCOPE_BOTH, CFG_DYN_NONE) != 0) return -1;
if (cfgAddBool(pCfg, "avx", tsAVXEnable, CFG_SCOPE_BOTH, CFG_DYN_NONE) != 0) return -1;
if (cfgAddBool(pCfg, "avx2", tsAVX2Enable, CFG_SCOPE_BOTH, CFG_DYN_NONE) != 0) return -1;
if (cfgAddBool(pCfg, "fma", tsFMAEnable, CFG_SCOPE_BOTH, CFG_DYN_NONE) != 0) return -1;
if (cfgAddBool(pCfg, "avx512", tsAVX512Enable, CFG_SCOPE_BOTH, CFG_DYN_NONE) != 0) return -1;
if (cfgAddBool(pCfg, "avx2", tsAVX2Supported, CFG_SCOPE_BOTH, CFG_DYN_NONE) != 0) return -1;
if (cfgAddBool(pCfg, "fma", tsFMASupported, CFG_SCOPE_BOTH, CFG_DYN_NONE) != 0) return -1;
if (cfgAddBool(pCfg, "avx512", tsAVX512Supported, CFG_SCOPE_BOTH, CFG_DYN_NONE) != 0) return -1;
if (cfgAddBool(pCfg, "simdEnable", tsSIMDEnable, CFG_SCOPE_BOTH, CFG_DYN_NONE) != 0) return -1;
if (cfgAddBool(pCfg, "AVX512Enable", tsAVX512Enable, CFG_SCOPE_BOTH, CFG_DYN_NONE) != 0) return -1;
if (cfgAddBool(pCfg, "tagFilterCache", tsTagFilterCache, CFG_SCOPE_BOTH, CFG_DYN_NONE) != 0) return -1;
if (cfgAddInt64(pCfg, "openMax", tsOpenMax, 0, INT64_MAX, CFG_SCOPE_BOTH, CFG_DYN_NONE) != 0) return -1;

View File

@ -370,7 +370,7 @@ static int32_t findFirstValPosition(const SColumnInfoData* pCol, int32_t start,
static void handleInt8Col(const void* data, int32_t start, int32_t numOfRows, SMinmaxResInfo* pBuf, bool isMinFunc,
bool signVal) {
// AVX2 version to speedup the loop
if (tsAVX2Enable && tsSIMDEnable) {
if (tsAVX2Supported && tsSIMDEnable) {
pBuf->v = i8VectorCmpAVX2(data, numOfRows, isMinFunc, signVal);
} else {
if (!pBuf->assign) {
@ -404,7 +404,7 @@ static void handleInt8Col(const void* data, int32_t start, int32_t numOfRows, SM
static void handleInt16Col(const void* data, int32_t start, int32_t numOfRows, SMinmaxResInfo* pBuf, bool isMinFunc,
bool signVal) {
// AVX2 version to speedup the loop
if (tsAVX2Enable && tsSIMDEnable) {
if (tsAVX2Supported && tsSIMDEnable) {
pBuf->v = i16VectorCmpAVX2(data, numOfRows, isMinFunc, signVal);
} else {
if (!pBuf->assign) {
@ -438,7 +438,7 @@ static void handleInt16Col(const void* data, int32_t start, int32_t numOfRows, S
static void handleInt32Col(const void* data, int32_t start, int32_t numOfRows, SMinmaxResInfo* pBuf, bool isMinFunc,
bool signVal) {
// AVX2 version to speedup the loop
if (tsAVX2Enable && tsSIMDEnable) {
if (tsAVX2Supported && tsSIMDEnable) {
pBuf->v = i32VectorCmpAVX2(data, numOfRows, isMinFunc, signVal);
} else {
if (!pBuf->assign) {

View File

@ -38,11 +38,12 @@ int64_t tsTotalMemoryKB = 0;
char *tsProcPath = NULL;
char tsSIMDEnable = 0;
char tsAVX512Enable = 0;
char tsSSE42Enable = 0;
char tsAVXEnable = 0;
char tsAVX2Enable = 0;
char tsFMAEnable = 0;
char tsAVX512Enable = 0;
char tsAVX2Supported = 0;
char tsFMASupported = 0;
char tsAVX512Supported = 0;
void osDefaultInit() {
taosSeedRand(taosSafeRand());

View File

@ -250,7 +250,7 @@ void taosGetSystemInfo() {
taosGetCpuCores(&tsNumOfCores, false);
taosGetTotalMemory(&tsTotalMemoryKB);
taosGetCpuUsage(NULL, NULL);
taosGetCpuInstructions(&tsSSE42Enable, &tsAVXEnable, &tsAVX2Enable, &tsFMAEnable, &tsAVX512Enable);
taosGetCpuInstructions(&tsSSE42Enable, &tsAVXEnable, &tsAVX2Supported, &tsFMASupported, &tsAVX512Supported);
#endif
}

View File

@ -822,9 +822,9 @@ int32_t tsDecompressTimestampImp(const char *const input, const int32_t nelement
memcpy(output, input + 1, nelements * longBytes);
return nelements * longBytes;
} else if (input[0] == 1) { // Decompress
if (tsSIMDEnable && tsAVX512Enable) {
if (tsSIMDEnable && tsAVX512Supported && tsAVX512Enable) {
tsDecompressTimestampAvx512(input, nelements, output, false);
} else if (tsSIMDEnable && tsAVX2Enable) {
} else if (tsSIMDEnable && tsAVX2Supported) {
tsDecompressTimestampAvx2(input, nelements, output, false);
} else {
int64_t *ostream = (int64_t *)output;
@ -1198,9 +1198,9 @@ int32_t tsDecompressFloatImp(const char *const input, const int32_t nelements, c
return nelements * FLOAT_BYTES;
}
if (tsSIMDEnable && tsAVX2Enable) {
if (tsSIMDEnable && tsAVX2Supported) {
tsDecompressFloatImplAvx2(input, nelements, output);
} else if (tsSIMDEnable && tsAVX512Enable) {
} else if (tsSIMDEnable && tsAVX512Supported && tsAVX512Enable) {
tsDecompressFloatImplAvx512(input, nelements, output);
} else { // alternative implementation without SIMD instructions.
tsDecompressFloatHelper(input, nelements, (float *)output);
@ -2713,7 +2713,7 @@ int32_t tsDecompressBigint(void *pIn, int32_t nIn, int32_t nEle, void *pOut, int
int8_t alvl = tsGetCompressL2Level(l2, lvl); \
return compressL2Dict[l2].comprFn(pIn, nIn, pOut, nOut, type, alvl); \
} else { \
uTrace("dencode:%s, dcompress:%s, level:%d, type:%s", "disabled", compressL2Dict[l1].name, lvl, \
uTrace("dencode:%s, decompress:%s, level:%d, type:%s", "disabled", compressL2Dict[l1].name, lvl, \
tDataTypes[type].name); \
return compressL2Dict[l2].decomprFn(pIn, nIn, pOut, nOut, type); \
} \

View File

@ -74,12 +74,12 @@ int32_t tsDecompressIntImpl_Hw(const char *const input, const int32_t nelements,
int32_t batch = 0;
int32_t remain = 0;
if (tsSIMDEnable && tsAVX512Enable) {
if (tsSIMDEnable && tsAVX512Supported && tsAVX512Enable) {
#if __AVX512F__
batch = num >> 3;
remain = num & 0x07;
#endif
} else if (tsSIMDEnable && tsAVX2Enable) {
} else if (tsSIMDEnable && tsAVX2Supported) {
#if __AVX2__
batch = num >> 2;
remain = num & 0x03;
@ -87,7 +87,7 @@ int32_t tsDecompressIntImpl_Hw(const char *const input, const int32_t nelements,
}
if (selector == 0 || selector == 1) {
if (tsSIMDEnable && tsAVX512Enable) {
if (tsSIMDEnable && tsAVX512Supported && tsAVX512Enable) {
#if __AVX512F__
for (int32_t i = 0; i < batch; ++i) {
__m512i prev = _mm512_set1_epi64(prevValue);
@ -98,7 +98,7 @@ int32_t tsDecompressIntImpl_Hw(const char *const input, const int32_t nelements,
p[_pos++] = prevValue;
}
#endif
} else if (tsSIMDEnable && tsAVX2Enable) {
} else if (tsSIMDEnable && tsAVX2Supported) {
for (int32_t i = 0; i < batch; ++i) {
__m256i prev = _mm256_set1_epi64x(prevValue);
_mm256_storeu_si256((__m256i *)&p[_pos], prev);
@ -116,7 +116,7 @@ int32_t tsDecompressIntImpl_Hw(const char *const input, const int32_t nelements,
}
}
} else {
if (tsSIMDEnable && tsAVX512Enable) {
if (tsSIMDEnable && tsAVX512Supported && tsAVX512Enable) {
#if __AVX512F__
__m512i sum_mask1 = _mm512_set_epi64(6, 6, 4, 4, 2, 2, 0, 0);
__m512i sum_mask2 = _mm512_set_epi64(5, 5, 5, 5, 1, 1, 1, 1);
@ -182,7 +182,7 @@ int32_t tsDecompressIntImpl_Hw(const char *const input, const int32_t nelements,
v += bit;
}
#endif
} else if (tsSIMDEnable && tsAVX2Enable) {
} else if (tsSIMDEnable && tsAVX2Supported) {
__m256i base = _mm256_set1_epi64x(w);
__m256i maskVal = _mm256_set1_epi64x(mask);
@ -331,16 +331,16 @@ int32_t tsDecompressFloatImplAvx2(const char *const input, const int32_t nelemen
int32_t tsDecompressTimestampAvx2(const char *const input, const int32_t nelements, char *const output,
bool bigEndian) {
#if 0
int64_t *ostream = (int64_t *)output;
int32_t ipos = 1, opos = 0;
#if __AVX2__
__m128i prevVal = _mm_setzero_si128();
__m128i prevDelta = _mm_setzero_si128();
#if __AVX2__
int32_t batch = nelements >> 1;
int32_t remainder = nelements & 0x01;
__mmask16 mask2[16] = {0, 0x0001, 0x0003, 0x0007, 0x000f, 0x001f, 0x003f, 0x007f, 0x00ff};
// __mmask16 mask2[16] = {0, 0x0001, 0x0003, 0x0007, 0x000f, 0x001f, 0x003f, 0x007f, 0x00ff};
int32_t i = 0;
if (batch > 1) {
@ -398,8 +398,6 @@ int32_t tsDecompressTimestampAvx2(const char *const input, const int32_t nelemen
int8_t nbytes1 = flags & INT8MASK(4); // range of nbytes starts from 0 to 7
int8_t nbytes2 = (flags >> 4) & INT8MASK(4);
// __m128i data1 = _mm_maskz_loadu_epi8(mask2[nbytes1], (const void*)(input + ipos));
// __m128i data2 = _mm_maskz_loadu_epi8(mask2[nbytes2], (const void*)(input + ipos + nbytes1));
__m128i data1;
if (nbytes1 == 0) {
data1 = _mm_setzero_si128();
@ -471,7 +469,6 @@ int32_t tsDecompressTimestampAvx2(const char *const input, const int32_t nelemen
ostream[opos++] = prevVal[1] + prevDeltaX;
}
}
#endif
#endif
return 0;
}

View File

@ -4,9 +4,16 @@
#include <random>
#include "ttypes.h"
namespace {} // namespace
namespace {
} // namespace
TEST(utilTest, decompress_ts_test) {
{
tsSIMDEnable = 1;
tsAVX2Supported = 1;
}
TEST(utilTest, decompress_test) {
int64_t tsList[10] = {1700000000, 1700000100, 1700000200, 1700000300, 1700000400,
1700000500, 1700000600, 1700000700, 1700000800, 1700000900};
@ -57,6 +64,49 @@ TEST(utilTest, decompress_test) {
}
}
TEST(utilTest, decompress_bigint_avx2_test) {
{
tsSIMDEnable = 1;
tsAVX2Supported = 1;
}
int64_t tsList[10] = {1700000000, 1700000100, 1700000200, 1700000300, 1700000400,
1700000500, 1700000600, 1700000700, 1700000800, 1700000900};
char* pOutput[10 * sizeof(int64_t)] = {0};
int32_t len = tsCompressBigint(tsList, sizeof(tsList), sizeof(tsList) / sizeof(tsList[0]), pOutput, 10,
ONE_STAGE_COMP, NULL, 0);
char* decompOutput[10 * 8] = {0};
tsDecompressBigint(pOutput, len, 10, decompOutput, sizeof(int64_t) * 10, ONE_STAGE_COMP, NULL, 0);
for (int32_t i = 0; i < 10; ++i) {
std::cout << ((int64_t*)decompOutput)[i] << std::endl;
}
}
TEST(utilTest, decompress_int_avx2_test) {
{
tsSIMDEnable = 1;
tsAVX2Supported = 1;
}
int32_t tsList[10] = {17000000, 17000001, 17000002, 17000003, 17000004,
17000005, 17000006, 17000007, 17000008, 17000009};
char* pOutput[10 * sizeof(int32_t)] = {0};
int32_t len =
tsCompressInt(tsList, sizeof(tsList), sizeof(tsList) / sizeof(tsList[0]), pOutput, 10, ONE_STAGE_COMP, NULL, 0);
char* decompOutput[10 * 8] = {0};
tsDecompressInt(pOutput, len, 10, decompOutput, sizeof(int32_t) * 10, ONE_STAGE_COMP, NULL, 0);
for (int32_t i = 0; i < 10; ++i) {
std::cout << ((int32_t*)decompOutput)[i] << std::endl;
}
}
TEST(utilTest, decompress_perf_test) {
int32_t num = 10000;