refactor: add avx support zigzag decode.
This commit is contained in:
parent
972f9b6948
commit
f90fa07ea9
|
@ -47,6 +47,7 @@ int32_t taosByteArrayToHexStr(char bytes[], int32_t len, char hexstr[]);
|
||||||
int32_t taosHexStrToByteArray(char hexstr[], char bytes[]);
|
int32_t taosHexStrToByteArray(char hexstr[], char bytes[]);
|
||||||
|
|
||||||
int32_t tintToHex(uint64_t val, char hex[]);
|
int32_t tintToHex(uint64_t val, char hex[]);
|
||||||
|
int32_t tintToStr(uint64_t val, size_t radix, char str[]);
|
||||||
|
|
||||||
char *taosIpStr(uint32_t ipInt);
|
char *taosIpStr(uint32_t ipInt);
|
||||||
uint32_t ip2uint(const char *const ip_addr);
|
uint32_t ip2uint(const char *const ip_addr);
|
||||||
|
|
|
@ -235,6 +235,7 @@ void tsdbHeadFileName(STsdb *pTsdb, SDiskID did, int32_t fid, SHeadFile *pHeadF,
|
||||||
void tsdbDataFileName(STsdb *pTsdb, SDiskID did, int32_t fid, SDataFile *pDataF, char fname[]);
|
void tsdbDataFileName(STsdb *pTsdb, SDiskID did, int32_t fid, SDataFile *pDataF, char fname[]);
|
||||||
void tsdbSttFileName(STsdb *pTsdb, SDiskID did, int32_t fid, SSttFile *pSttF, char fname[]);
|
void tsdbSttFileName(STsdb *pTsdb, SDiskID did, int32_t fid, SSttFile *pSttF, char fname[]);
|
||||||
void tsdbSmaFileName(STsdb *pTsdb, SDiskID did, int32_t fid, SSmaFile *pSmaF, char fname[]);
|
void tsdbSmaFileName(STsdb *pTsdb, SDiskID did, int32_t fid, SSmaFile *pSmaF, char fname[]);
|
||||||
|
|
||||||
// SDelFile
|
// SDelFile
|
||||||
void tsdbDelFileName(STsdb *pTsdb, SDelFile *pFile, char fname[]);
|
void tsdbDelFileName(STsdb *pTsdb, SDelFile *pFile, char fname[]);
|
||||||
// tsdbFS.c ==============================================================================================
|
// tsdbFS.c ==============================================================================================
|
||||||
|
|
|
@ -93,8 +93,32 @@ static int32_t tGetSmaFile(uint8_t *p, SSmaFile *pSmaFile) {
|
||||||
|
|
||||||
// EXPOSED APIS ==================================================
|
// EXPOSED APIS ==================================================
|
||||||
void tsdbHeadFileName(STsdb *pTsdb, SDiskID did, int32_t fid, SHeadFile *pHeadF, char fname[]) {
|
void tsdbHeadFileName(STsdb *pTsdb, SDiskID did, int32_t fid, SHeadFile *pHeadF, char fname[]) {
|
||||||
snprintf(fname, TSDB_FILENAME_LEN - 1, "%s%s%s%sv%df%dver%" PRId64 "%s", tfsGetDiskPath(pTsdb->pVnode->pTfs, did),
|
const char* p1 = tfsGetDiskPath(pTsdb->pVnode->pTfs, did);
|
||||||
TD_DIRSEP, pTsdb->path, TD_DIRSEP, TD_VID(pTsdb->pVnode), fid, pHeadF->commitID, ".head");
|
int32_t len = strlen(p1);
|
||||||
|
|
||||||
|
char* p = memcpy(fname, p1, len);
|
||||||
|
p += len;
|
||||||
|
|
||||||
|
*(p++) = TD_DIRSEP[0];
|
||||||
|
len = strlen(pTsdb->path);
|
||||||
|
|
||||||
|
memcpy(p, pTsdb->path, len);
|
||||||
|
p += len;
|
||||||
|
|
||||||
|
*(p++) = TD_DIRSEP[0];
|
||||||
|
*(p++) = 'v';
|
||||||
|
|
||||||
|
p += tintToStr(TD_VID(pTsdb->pVnode), 10, p);
|
||||||
|
*(p++) = 'f';
|
||||||
|
|
||||||
|
p += tintToStr(fid, 10, p);
|
||||||
|
|
||||||
|
memcpy(p, "ver", 3);
|
||||||
|
p += 3;
|
||||||
|
|
||||||
|
p += tintToStr(pHeadF->commitID, 10, p);
|
||||||
|
memcpy(p, ".head", 5);
|
||||||
|
p[5] = 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
void tsdbDataFileName(STsdb *pTsdb, SDiskID did, int32_t fid, SDataFile *pDataF, char fname[]) {
|
void tsdbDataFileName(STsdb *pTsdb, SDiskID did, int32_t fid, SDataFile *pDataF, char fname[]) {
|
||||||
|
|
|
@ -308,30 +308,79 @@ int32_t tsDecompressINTImp(const char *const input, const int32_t nelements, cha
|
||||||
int32_t batch = num >> 2;
|
int32_t batch = num >> 2;
|
||||||
int32_t remain = num & 0x03;
|
int32_t remain = num & 0x03;
|
||||||
#if 1
|
#if 1
|
||||||
|
#if 1
|
||||||
|
__m256i base = _mm256_set1_epi64x(w);
|
||||||
|
__m256i mask_ = _mm256_set1_epi64x(mask);
|
||||||
|
|
||||||
|
__m256i shiftBits = _mm256_set_epi64x(bit * 3 + 4, bit * 2 + 4, bit + 4, 4);
|
||||||
|
__m256i inc = _mm256_set1_epi64x(bit << 2);
|
||||||
|
|
||||||
|
for(int32_t i = 0; i < batch; ++i) {
|
||||||
|
__m256i after = _mm256_srlv_epi64(base, shiftBits);
|
||||||
|
__m256i zz = _mm256_and_si256(after, mask_);
|
||||||
|
printf("1\n");
|
||||||
|
|
||||||
|
//#define ZIGZAG_DECODE(T, v) (((v) >> 1) ^ -((T)((v)&1))) // zigzag decode
|
||||||
|
__m256i signmask = _mm256_and_si256(_mm256_set_epi64x(1, 1, 1, 1), zz);
|
||||||
|
signmask = _mm256_sub_epi64(_mm256_setzero_si256(), signmask);
|
||||||
|
|
||||||
|
// now here we get the four zigzag value
|
||||||
|
__m256i final = _mm256_xor_si256(_mm256_srli_epi64(zz, 1), signmask);
|
||||||
|
|
||||||
|
// calculate the cumulative sum (prefix sum)
|
||||||
|
// decode[0] = prev_value + final[0]
|
||||||
|
// decode[1] = decode[0] + final[1] -----> prev_value + final[0] + final[1]
|
||||||
|
// decode[2] = decode[1] + final[1] -----> prev_value + final[0] + final[1] + final[2]
|
||||||
|
// decode[3] = decode[2] + final[1] -----> prev_value + final[0] + final[1] + final[2] + final[3]
|
||||||
|
|
||||||
|
printf("2\n");
|
||||||
|
|
||||||
|
__m128i prev = _mm_set1_epi64x(prev_value);
|
||||||
|
final = _mm256_add_epi64(final, _mm256_slli_si256(final, 8));
|
||||||
|
// x = 1, 2, 3, 4
|
||||||
|
// + 0, 1, 2, 3
|
||||||
|
// = 1, 3, 5, 7
|
||||||
|
_mm256_storeu_si256((__m256i *)&p[_pos], final);
|
||||||
|
|
||||||
|
__m128i first = _mm_loadu_si128((__m128i *)&p[_pos]);
|
||||||
|
__m128i sec = _mm_add_epi64(_mm_loadu_si128((__m128i *)&p[_pos + 2]), first);
|
||||||
|
sec = _mm_add_epi64(sec, prev);
|
||||||
|
first = _mm_add_epi64(first, prev);
|
||||||
|
|
||||||
|
_mm_storeu_si128((__m128i *)&p[_pos], first);
|
||||||
|
_mm_storeu_si128((__m128i *)&p[_pos + 2], sec);
|
||||||
|
|
||||||
|
shiftBits = _mm256_add_epi64(shiftBits, inc);
|
||||||
|
prev_value = p[_pos + 3];
|
||||||
|
_pos += 4;
|
||||||
|
|
||||||
|
printf("3\n");
|
||||||
|
}
|
||||||
|
#else
|
||||||
// manual unrolling, to erase the hotspot
|
// manual unrolling, to erase the hotspot
|
||||||
|
uint64_t zz[4];
|
||||||
|
|
||||||
for (int32_t i = 0; i < batch; ++i) {
|
for (int32_t i = 0; i < batch; ++i) {
|
||||||
zigzag_value = ((w >> v) & mask);
|
zigzag_value = ((w >> v) & mask);
|
||||||
prev_value += ZIGZAG_DECODE(int64_t, zigzag_value);
|
zz[0] = ZIGZAG_DECODE(int64_t, zigzag_value);
|
||||||
|
|
||||||
p[_pos++] = prev_value;
|
|
||||||
v += bit;
|
v += bit;
|
||||||
|
|
||||||
zigzag_value = ((w >> v) & mask);
|
zigzag_value = ((w >> v) & mask);
|
||||||
prev_value += ZIGZAG_DECODE(int64_t, zigzag_value);
|
zz[1] = ZIGZAG_DECODE(int64_t, zigzag_value);
|
||||||
|
|
||||||
p[_pos++] = prev_value;
|
|
||||||
v += bit;
|
v += bit;
|
||||||
|
|
||||||
zigzag_value = ((w >> v) & mask);
|
zigzag_value = ((w >> v) & mask);
|
||||||
prev_value += ZIGZAG_DECODE(int64_t, zigzag_value);
|
zz[2] = ZIGZAG_DECODE(int64_t, zigzag_value);
|
||||||
|
|
||||||
p[_pos++] = prev_value;
|
|
||||||
v += bit;
|
v += bit;
|
||||||
|
|
||||||
zigzag_value = ((w >> v) & mask);
|
zigzag_value = ((w >> v) & mask);
|
||||||
prev_value += ZIGZAG_DECODE(int64_t, zigzag_value);
|
zz[3] = ZIGZAG_DECODE(int64_t, zigzag_value);
|
||||||
|
|
||||||
p[_pos++] = prev_value;
|
p[_pos] = prev_value + zz[0];
|
||||||
|
p[_pos + 1] = p[_pos] + zz[1];
|
||||||
|
p[_pos + 2] = p[_pos + 1] + zz[2];
|
||||||
|
p[_pos + 3] = p[_pos + 2] + zz[3];
|
||||||
|
prev_value = p[_pos + 3];
|
||||||
v += bit;
|
v += bit;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -345,12 +394,12 @@ int32_t tsDecompressINTImp(const char *const input, const int32_t nelements, cha
|
||||||
}
|
}
|
||||||
|
|
||||||
count += num;
|
count += num;
|
||||||
|
#endif
|
||||||
|
|
||||||
#else
|
#else
|
||||||
for (int32_t i = 0; i < elems && count < nelements; i++, count++) {
|
for (int32_t i = 0; i < elems && count < nelements; i++, count++) {
|
||||||
zigzag_value = ((w >> (4 + v)) & mask);
|
zigzag_value = ((w >> v) & mask);
|
||||||
|
prev_value += ZIGZAG_DECODE(int64_t, zigzag_value);
|
||||||
int64_t diff = ZIGZAG_DECODE(int64_t, zigzag_value);
|
|
||||||
prev_value = diff + prev_value;
|
|
||||||
|
|
||||||
p[_pos++] = prev_value;
|
p[_pos++] = prev_value;
|
||||||
v += bit;
|
v += bit;
|
||||||
|
|
|
@ -336,6 +336,29 @@ int32_t tintToHex(uint64_t val, char hex[]) {
|
||||||
return j;
|
return j;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
int32_t tintToStr(uint64_t val, size_t radix, char str[]) {
|
||||||
|
if (radix < 2 || radix > 16) {
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
const char* s = "0123456789abcdef";
|
||||||
|
char buf[65] = {0};
|
||||||
|
|
||||||
|
int32_t i = 0;
|
||||||
|
uint64_t v = val;
|
||||||
|
while(v > 0) {
|
||||||
|
buf[i++] = s[v % radix];
|
||||||
|
v /= radix;
|
||||||
|
}
|
||||||
|
|
||||||
|
// reverse order
|
||||||
|
for(int32_t j = 0; j < i; ++j) {
|
||||||
|
str[j] = buf[i - j - 1];
|
||||||
|
}
|
||||||
|
|
||||||
|
return i;
|
||||||
|
}
|
||||||
|
|
||||||
int32_t taosByteArrayToHexStr(char bytes[], int32_t len, char hexstr[]) {
|
int32_t taosByteArrayToHexStr(char bytes[], int32_t len, char hexstr[]) {
|
||||||
int32_t i;
|
int32_t i;
|
||||||
char hexval[16] = {'0', '1', '2', '3', '4', '5', '6', '7', '8', '9', 'a', 'b', 'c', 'd', 'e', 'f'};
|
char hexval[16] = {'0', '1', '2', '3', '4', '5', '6', '7', '8', '9', 'a', 'b', 'c', 'd', 'e', 'f'};
|
||||||
|
|
Loading…
Reference in New Issue