fix(tsdb): add simd for decompress timestamp.

This commit is contained in:
Haojun Liao 2023-11-26 23:53:05 +08:00
parent 9194c0c0cd
commit 1dd5cd17a0
3 changed files with 100 additions and 53 deletions

View File

@ -181,8 +181,8 @@ ELSE ()
MESSAGE(STATUS "SIMD instructions (FMA/AVX/AVX2) is ACTIVATED")
IF (COMPILER_SUPPORT_AVX512F AND COMPILER_SUPPORT_AVX512BMI)
SET(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -mavx512f -mavx512vbmi")
SET(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -mavx512f -mavx512vbmi")
SET(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -mavx512f -mavx512vbmi -mavx512vl")
SET(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -mavx512f -mavx512vbmi -mavx512vl")
MESSAGE(STATUS "avx512 supported by gcc")
ENDIF()
ENDIF()

View File

@ -139,6 +139,7 @@ int32_t getWordLength(char type);
int32_t tsDecompressIntImpl_Hw(const char *const input, const int32_t nelements, char *const output, const char type);
int32_t tsDecompressFloatImplAvx512(const char *const input, const int32_t nelements, char *const output);
int32_t tsDecompressFloatImplAvx2(const char *const input, const int32_t nelements, char *const output);
int32_t tsDecompressTimestampAvx512(const char* const input, const int32_t nelements, char *const output, bool bigEndian);
/*************************************************************************
* STREAM COMPRESSION

View File

@ -247,75 +247,121 @@ int32_t tsDecompressFloatImplAvx2(const char *const input, const int32_t nelemen
return 0;
}
int32_t tsDecompressTimestampAvx2(const char* const input, const int32_t nelements, char *const output, bool bigEndian) {
int32_t tsDecompressTimestampAvx512(const char* const input, const int32_t nelements, char *const output, bool bigEndian) {
int64_t *ostream = (int64_t *)output;
int32_t ipos = 1, opos = 0;
int8_t nbytes = 0;
int64_t prevValue = 0;
int64_t prevDelta = 0;
int64_t deltaOfDelta = 0;
int32_t longBytes = LONG_BYTES;
__m128i prevVal = _mm_setzero_si128();
__m128i prevDelta = _mm_setzero_si128();
// _mm_maskz_loadu_epi8
#if __AVX512F__
// _mm_blendv_epi8
int32_t batch = nelements >> 4;
int32_t remainder = nelements & 0x03;
int32_t batch = nelements >> 1;
int32_t remainder = nelements & 0x01;
for(int32_t i = 0; i < batch; ++i) {
int32_t i = 0;
if (batch > 1) {
// first loop
uint8_t flags = input[ipos++];
// Decode dd1
uint64_t dd1 = 0;
nbytes = flags & INT8MASK(4); // range of nbytes starts from 0 to 7
// __m128i mask = {};//[0], []
int8_t nbytes1 = flags & INT8MASK(4); // range of nbytes starts from 0 to 7
int8_t nbytes2 = (flags >> 4) & INT8MASK(4);
if (nbytes == 0) {
deltaOfDelta = 0;
} else {
if (bigEndian) {
memcpy(((char *)(&dd1)) + longBytes - nbytes, input + ipos, nbytes);
} else {
memcpy(&dd1, input + ipos, nbytes);
}
deltaOfDelta = ZIGZAG_DECODE(int64_t, dd1);
}
__mmask16 mask2[16] = {0, 0x0001, 0x0003, 0x0007, 0x000f, 0x001f, 0x003f, 0x007f, 0x00ff};
__m128i data1 = _mm_maskz_loadu_epi8(mask2[nbytes1], (const void*)(input + ipos));
__m128i data2 = _mm_maskz_loadu_epi8(mask2[nbytes2], (const void*)(input + ipos + nbytes1));
data2 = _mm_broadcastq_epi64(data2);
ipos += nbytes;
prevDelta += deltaOfDelta;
prevValue += prevDelta;
ostream[opos++] = prevValue;
__m128i zzVal = _mm_blend_epi32(data2, data1, 0x03);
// Decode dd2
uint64_t dd2 = 0;
nbytes = (flags >> 4) & INT8MASK(4);
if (nbytes == 0) {
deltaOfDelta = 0;
} else {
if (bigEndian) {
memcpy(((char *)(&dd2)) + longBytes - nbytes, input + ipos, nbytes);
} else {
memcpy(&dd2, input + ipos, nbytes);
}
// zigzag_decoding
deltaOfDelta = ZIGZAG_DECODE(int64_t, dd2);
}
// ZIGZAG_DECODE(T, v) (((v) >> 1) ^ -((T)((v)&1)))
__m128i signmask = _mm_and_si128(_mm_set1_epi64x(1), zzVal);
signmask = _mm_sub_epi64(_mm_setzero_si128(), signmask);
ipos += nbytes;
prevDelta += deltaOfDelta;
prevValue += prevDelta;
ostream[opos++] = prevValue;
// get two zigzag values here
__m128i deltaOfDelta = _mm_xor_si128(_mm_srli_epi64(zzVal, 1), signmask);
if (opos == nelements) {
return nelements * longBytes;
}
__m128i deltaCurrent = _mm_add_epi64(deltaOfDelta, prevDelta);
deltaCurrent = _mm_add_epi64(_mm_slli_si128(deltaCurrent, 8), deltaCurrent);
__m128i val = _mm_add_epi64(deltaCurrent, prevVal);
_mm_storeu_si128((__m128i *)&ostream[opos], val);
// keep the previous value
prevVal = _mm_set1_epi64x(val[1]);
// keep the previous delta of delta, for the first item
prevDelta = _mm_set1_epi64x(deltaOfDelta[1]);
opos += 2;
ipos += nbytes1 + nbytes2;
i += 1;
}
// the remain
for(; i < batch; ++i) {
uint8_t flags = input[ipos++];
int8_t nbytes1 = flags & INT8MASK(4); // range of nbytes starts from 0 to 7
int8_t nbytes2 = (flags >> 4) & INT8MASK(4);
__mmask16 mask2[16] = {0, 0x0001, 0x0003, 0x0007, 0x000f, 0x001f, 0x003f, 0x007f, 0x00ff};
__m128i data1 = _mm_maskz_loadu_epi8(mask2[nbytes1], (const void*)(input + ipos));
__m128i data2 = _mm_maskz_loadu_epi8(mask2[nbytes2], (const void*)(input + ipos + nbytes1));
data2 = _mm_broadcastq_epi64(data2);
__m128i zzVal = _mm_blend_epi32(data2, data1, 0x03);
// ZIGZAG_DECODE(T, v) (((v) >> 1) ^ -((T)((v)&1)))
__m128i signmask = _mm_and_si128(_mm_set1_epi64x(1), zzVal);
signmask = _mm_sub_epi64(_mm_setzero_si128(), signmask);
// get two zigzag values here
__m128i deltaOfDelta = _mm_xor_si128(_mm_srli_epi64(zzVal, 1), signmask);
__m128i deltaCurrent = _mm_add_epi64(deltaOfDelta, prevDelta);
deltaCurrent = _mm_add_epi64(_mm_slli_si128(deltaCurrent, 8), deltaCurrent);
__m128i val = _mm_add_epi64(deltaCurrent, prevVal);
_mm_storeu_si128((__m128i *)&ostream[opos], val);
// keep the previous value
prevVal = _mm_set1_epi64x(val[1]);
// keep the previous delta of delta
__m128i delta = _mm_add_epi64(_mm_slli_si128(deltaOfDelta, 8), deltaOfDelta);
prevDelta = _mm_set1_epi64x(_mm_add_epi64(delta, prevDelta)[1]);
opos += 2;
ipos += nbytes1 + nbytes2;
}
if (remainder > 0) {
uint64_t dd = 0;
uint8_t flags = input[ipos++];
int32_t nbytes = flags & INT8MASK(4);
int64_t deltaOfDelta = 0;
if (nbytes == 0) {
deltaOfDelta = 0;
} else {
// if (is_bigendian()) {
// memcpy(((char *)(&dd1)) + longBytes - nbytes, input + ipos, nbytes);
// } else {
memcpy(&dd, input + ipos, nbytes);
// }
deltaOfDelta = ZIGZAG_DECODE(int64_t, dd);
}
ipos += nbytes;
if (opos == 0) {
ostream[opos++] = deltaOfDelta;
} else {
int64_t prevV = prevVal[1];
int64_t prevDeltaX = deltaOfDelta + prevDelta[1];
ostream[opos++] = prevV + prevDeltaX;
}
}
#endif