fix(tsdb): add simd for decompress timestamp.
This commit is contained in:
parent
9194c0c0cd
commit
1dd5cd17a0
|
@ -181,8 +181,8 @@ ELSE ()
|
|||
MESSAGE(STATUS "SIMD instructions (FMA/AVX/AVX2) is ACTIVATED")
|
||||
|
||||
IF (COMPILER_SUPPORT_AVX512F AND COMPILER_SUPPORT_AVX512BMI)
|
||||
SET(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -mavx512f -mavx512vbmi")
|
||||
SET(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -mavx512f -mavx512vbmi")
|
||||
SET(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -mavx512f -mavx512vbmi -mavx512vl")
|
||||
SET(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -mavx512f -mavx512vbmi -mavx512vl")
|
||||
MESSAGE(STATUS "avx512 supported by gcc")
|
||||
ENDIF()
|
||||
ENDIF()
|
||||
|
|
|
@ -139,6 +139,7 @@ int32_t getWordLength(char type);
|
|||
int32_t tsDecompressIntImpl_Hw(const char *const input, const int32_t nelements, char *const output, const char type);
|
||||
int32_t tsDecompressFloatImplAvx512(const char *const input, const int32_t nelements, char *const output);
|
||||
int32_t tsDecompressFloatImplAvx2(const char *const input, const int32_t nelements, char *const output);
|
||||
int32_t tsDecompressTimestampAvx512(const char* const input, const int32_t nelements, char *const output, bool bigEndian);
|
||||
|
||||
/*************************************************************************
|
||||
* STREAM COMPRESSION
|
||||
|
|
|
@ -247,75 +247,121 @@ int32_t tsDecompressFloatImplAvx2(const char *const input, const int32_t nelemen
|
|||
return 0;
|
||||
}
|
||||
|
||||
int32_t tsDecompressTimestampAvx2(const char* const input, const int32_t nelements, char *const output, bool bigEndian) {
|
||||
int32_t tsDecompressTimestampAvx512(const char* const input, const int32_t nelements, char *const output, bool bigEndian) {
|
||||
int64_t *ostream = (int64_t *)output;
|
||||
int32_t ipos = 1, opos = 0;
|
||||
int8_t nbytes = 0;
|
||||
|
||||
int64_t prevValue = 0;
|
||||
int64_t prevDelta = 0;
|
||||
|
||||
int64_t deltaOfDelta = 0;
|
||||
int32_t longBytes = LONG_BYTES;
|
||||
__m128i prevVal = _mm_setzero_si128();
|
||||
__m128i prevDelta = _mm_setzero_si128();
|
||||
|
||||
// _mm_maskz_loadu_epi8
|
||||
#if __AVX512F__
|
||||
|
||||
// _mm_blendv_epi8
|
||||
int32_t batch = nelements >> 4;
|
||||
int32_t remainder = nelements & 0x03;
|
||||
int32_t batch = nelements >> 1;
|
||||
int32_t remainder = nelements & 0x01;
|
||||
|
||||
for(int32_t i = 0; i < batch; ++i) {
|
||||
int32_t i = 0;
|
||||
if (batch > 1) {
|
||||
// first loop
|
||||
uint8_t flags = input[ipos++];
|
||||
|
||||
// Decode dd1
|
||||
uint64_t dd1 = 0;
|
||||
nbytes = flags & INT8MASK(4); // range of nbytes starts from 0 to 7
|
||||
// __m128i mask = {};//[0], []
|
||||
int8_t nbytes1 = flags & INT8MASK(4); // range of nbytes starts from 0 to 7
|
||||
int8_t nbytes2 = (flags >> 4) & INT8MASK(4);
|
||||
|
||||
if (nbytes == 0) {
|
||||
deltaOfDelta = 0;
|
||||
} else {
|
||||
if (bigEndian) {
|
||||
memcpy(((char *)(&dd1)) + longBytes - nbytes, input + ipos, nbytes);
|
||||
} else {
|
||||
memcpy(&dd1, input + ipos, nbytes);
|
||||
}
|
||||
deltaOfDelta = ZIGZAG_DECODE(int64_t, dd1);
|
||||
__mmask16 mask2[16] = {0, 0x0001, 0x0003, 0x0007, 0x000f, 0x001f, 0x003f, 0x007f, 0x00ff};
|
||||
__m128i data1 = _mm_maskz_loadu_epi8(mask2[nbytes1], (const void*)(input + ipos));
|
||||
__m128i data2 = _mm_maskz_loadu_epi8(mask2[nbytes2], (const void*)(input + ipos + nbytes1));
|
||||
data2 = _mm_broadcastq_epi64(data2);
|
||||
|
||||
__m128i zzVal = _mm_blend_epi32(data2, data1, 0x03);
|
||||
|
||||
// ZIGZAG_DECODE(T, v) (((v) >> 1) ^ -((T)((v)&1)))
|
||||
__m128i signmask = _mm_and_si128(_mm_set1_epi64x(1), zzVal);
|
||||
signmask = _mm_sub_epi64(_mm_setzero_si128(), signmask);
|
||||
|
||||
// get two zigzag values here
|
||||
__m128i deltaOfDelta = _mm_xor_si128(_mm_srli_epi64(zzVal, 1), signmask);
|
||||
|
||||
__m128i deltaCurrent = _mm_add_epi64(deltaOfDelta, prevDelta);
|
||||
deltaCurrent = _mm_add_epi64(_mm_slli_si128(deltaCurrent, 8), deltaCurrent);
|
||||
|
||||
__m128i val = _mm_add_epi64(deltaCurrent, prevVal);
|
||||
_mm_storeu_si128((__m128i *)&ostream[opos], val);
|
||||
|
||||
// keep the previous value
|
||||
prevVal = _mm_set1_epi64x(val[1]);
|
||||
|
||||
// keep the previous delta of delta, for the first item
|
||||
prevDelta = _mm_set1_epi64x(deltaOfDelta[1]);
|
||||
|
||||
opos += 2;
|
||||
ipos += nbytes1 + nbytes2;
|
||||
i += 1;
|
||||
}
|
||||
|
||||
ipos += nbytes;
|
||||
prevDelta += deltaOfDelta;
|
||||
prevValue += prevDelta;
|
||||
ostream[opos++] = prevValue;
|
||||
// the remain
|
||||
for(; i < batch; ++i) {
|
||||
uint8_t flags = input[ipos++];
|
||||
|
||||
// Decode dd2
|
||||
uint64_t dd2 = 0;
|
||||
nbytes = (flags >> 4) & INT8MASK(4);
|
||||
if (nbytes == 0) {
|
||||
deltaOfDelta = 0;
|
||||
} else {
|
||||
if (bigEndian) {
|
||||
memcpy(((char *)(&dd2)) + longBytes - nbytes, input + ipos, nbytes);
|
||||
} else {
|
||||
memcpy(&dd2, input + ipos, nbytes);
|
||||
}
|
||||
// zigzag_decoding
|
||||
deltaOfDelta = ZIGZAG_DECODE(int64_t, dd2);
|
||||
}
|
||||
int8_t nbytes1 = flags & INT8MASK(4); // range of nbytes starts from 0 to 7
|
||||
int8_t nbytes2 = (flags >> 4) & INT8MASK(4);
|
||||
|
||||
ipos += nbytes;
|
||||
prevDelta += deltaOfDelta;
|
||||
prevValue += prevDelta;
|
||||
ostream[opos++] = prevValue;
|
||||
__mmask16 mask2[16] = {0, 0x0001, 0x0003, 0x0007, 0x000f, 0x001f, 0x003f, 0x007f, 0x00ff};
|
||||
__m128i data1 = _mm_maskz_loadu_epi8(mask2[nbytes1], (const void*)(input + ipos));
|
||||
__m128i data2 = _mm_maskz_loadu_epi8(mask2[nbytes2], (const void*)(input + ipos + nbytes1));
|
||||
data2 = _mm_broadcastq_epi64(data2);
|
||||
|
||||
if (opos == nelements) {
|
||||
return nelements * longBytes;
|
||||
}
|
||||
__m128i zzVal = _mm_blend_epi32(data2, data1, 0x03);
|
||||
|
||||
// ZIGZAG_DECODE(T, v) (((v) >> 1) ^ -((T)((v)&1)))
|
||||
__m128i signmask = _mm_and_si128(_mm_set1_epi64x(1), zzVal);
|
||||
signmask = _mm_sub_epi64(_mm_setzero_si128(), signmask);
|
||||
|
||||
// get two zigzag values here
|
||||
__m128i deltaOfDelta = _mm_xor_si128(_mm_srli_epi64(zzVal, 1), signmask);
|
||||
|
||||
__m128i deltaCurrent = _mm_add_epi64(deltaOfDelta, prevDelta);
|
||||
deltaCurrent = _mm_add_epi64(_mm_slli_si128(deltaCurrent, 8), deltaCurrent);
|
||||
|
||||
__m128i val = _mm_add_epi64(deltaCurrent, prevVal);
|
||||
_mm_storeu_si128((__m128i *)&ostream[opos], val);
|
||||
|
||||
// keep the previous value
|
||||
prevVal = _mm_set1_epi64x(val[1]);
|
||||
|
||||
// keep the previous delta of delta
|
||||
__m128i delta = _mm_add_epi64(_mm_slli_si128(deltaOfDelta, 8), deltaOfDelta);
|
||||
prevDelta = _mm_set1_epi64x(_mm_add_epi64(delta, prevDelta)[1]);
|
||||
|
||||
opos += 2;
|
||||
ipos += nbytes1 + nbytes2;
|
||||
}
|
||||
|
||||
if (remainder > 0) {
|
||||
uint64_t dd = 0;
|
||||
uint8_t flags = input[ipos++];
|
||||
|
||||
int32_t nbytes = flags & INT8MASK(4);
|
||||
int64_t deltaOfDelta = 0;
|
||||
if (nbytes == 0) {
|
||||
deltaOfDelta = 0;
|
||||
} else {
|
||||
// if (is_bigendian()) {
|
||||
// memcpy(((char *)(&dd1)) + longBytes - nbytes, input + ipos, nbytes);
|
||||
// } else {
|
||||
memcpy(&dd, input + ipos, nbytes);
|
||||
// }
|
||||
deltaOfDelta = ZIGZAG_DECODE(int64_t, dd);
|
||||
}
|
||||
|
||||
ipos += nbytes;
|
||||
if (opos == 0) {
|
||||
ostream[opos++] = deltaOfDelta;
|
||||
} else {
|
||||
int64_t prevV = prevVal[1];
|
||||
|
||||
int64_t prevDeltaX = deltaOfDelta + prevDelta[1];
|
||||
ostream[opos++] = prevV + prevDeltaX;
|
||||
}
|
||||
}
|
||||
|
||||
#endif
|
||||
|
|
Loading…
Reference in New Issue