compression
This commit is contained in:
parent
6c777907c0
commit
d5ac976e95
|
@ -57,6 +57,8 @@ static const int TEST_NUMBER = 1;
|
||||||
#define SIMPLE8B_MAX_INT64 ((uint64_t)2305843009213693951L)
|
#define SIMPLE8B_MAX_INT64 ((uint64_t)2305843009213693951L)
|
||||||
|
|
||||||
#define safeInt64Add(a, b) (((a >= 0) && (b <= INT64_MAX - a)) || ((a < 0) && (b >= INT64_MIN - a)))
|
#define safeInt64Add(a, b) (((a >= 0) && (b <= INT64_MAX - a)) || ((a < 0) && (b >= INT64_MIN - a)))
|
||||||
|
#define ZIGZAG_ENCODE(T, v) ((u##T)((v) >> (sizeof(T) * 8 - 1))) ^ (((u##T)(v)) << 1) // zigzag encode
|
||||||
|
#define ZIGZAG_DECODE(T, v) ((v) >> 1) ^ -((T)((v)&1)) // zigzag decode
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Compress Integer (Simple8B).
|
* Compress Integer (Simple8B).
|
||||||
|
@ -87,7 +89,7 @@ int tsCompressINTImp(const char *const input, const int nelements, char *const o
|
||||||
break;
|
break;
|
||||||
default:
|
default:
|
||||||
perror("Wrong integer types.\n");
|
perror("Wrong integer types.\n");
|
||||||
exit(1);
|
return -1;
|
||||||
}
|
}
|
||||||
|
|
||||||
int byte_limit = nelements * word_length + 1;
|
int byte_limit = nelements * word_length + 1;
|
||||||
|
@ -122,7 +124,7 @@ int tsCompressINTImp(const char *const input, const int nelements, char *const o
|
||||||
|
|
||||||
int64_t diff = curr_value - prev_value_tmp;
|
int64_t diff = curr_value - prev_value_tmp;
|
||||||
// Zigzag encode the value.
|
// Zigzag encode the value.
|
||||||
uint64_t zigzag_value = (diff >> (LONG_BYTES * BITS_PER_BYTE - 1)) ^ (diff << 1);
|
uint64_t zigzag_value = ZIGZAG_ENCODE(int64_t, diff);
|
||||||
|
|
||||||
if (zigzag_value >= SIMPLE8B_MAX_INT64) goto _copy_and_exit;
|
if (zigzag_value >= SIMPLE8B_MAX_INT64) goto _copy_and_exit;
|
||||||
|
|
||||||
|
@ -168,7 +170,7 @@ int tsCompressINTImp(const char *const input, const int nelements, char *const o
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
int64_t diff = curr_value - prev_value;
|
int64_t diff = curr_value - prev_value;
|
||||||
uint64_t zigzag_value = (diff >> (LONG_BYTES * BITS_PER_BYTE - 1)) ^ (diff << 1);
|
uint64_t zigzag_value = ZIGZAG_ENCODE(int64_t, diff);
|
||||||
buffer |= ((zigzag_value & INT64MASK(bit)) << (bit * k + 4));
|
buffer |= ((zigzag_value & INT64MASK(bit)) << (bit * k + 4));
|
||||||
i++;
|
i++;
|
||||||
prev_value = curr_value;
|
prev_value = curr_value;
|
||||||
|
@ -208,7 +210,7 @@ int tsDecompressINTImp(const char *const input, const int nelements, char *const
|
||||||
break;
|
break;
|
||||||
default:
|
default:
|
||||||
perror("Wrong integer types.\n");
|
perror("Wrong integer types.\n");
|
||||||
exit(1);
|
return -1;
|
||||||
}
|
}
|
||||||
|
|
||||||
// If not compressed.
|
// If not compressed.
|
||||||
|
@ -245,30 +247,30 @@ int tsDecompressINTImp(const char *const input, const int nelements, char *const
|
||||||
} else {
|
} else {
|
||||||
zigzag_value = ((w >> (4 + bit * i)) & INT64MASK(bit));
|
zigzag_value = ((w >> (4 + bit * i)) & INT64MASK(bit));
|
||||||
}
|
}
|
||||||
int64_t diff = (zigzag_value >> 1) ^ -(zigzag_value & 1);
|
int64_t diff = ZIGZAG_DECODE(int64_t, zigzag_value);
|
||||||
int64_t curr_value = diff + prev_value;
|
int64_t curr_value = diff + prev_value;
|
||||||
prev_value = curr_value;
|
prev_value = curr_value;
|
||||||
|
|
||||||
switch (type) {
|
switch (type) {
|
||||||
case TSDB_DATA_TYPE_BIGINT:
|
case TSDB_DATA_TYPE_BIGINT:
|
||||||
*((int64_t *)output + _pos) = curr_value;
|
*((int64_t *)output + _pos) = (int64_t)curr_value;
|
||||||
_pos++;
|
_pos++;
|
||||||
break;
|
break;
|
||||||
case TSDB_DATA_TYPE_INT:
|
case TSDB_DATA_TYPE_INT:
|
||||||
*((int32_t *)output + _pos) = curr_value;
|
*((int32_t *)output + _pos) = (int32_t)curr_value;
|
||||||
_pos++;
|
_pos++;
|
||||||
break;
|
break;
|
||||||
case TSDB_DATA_TYPE_SMALLINT:
|
case TSDB_DATA_TYPE_SMALLINT:
|
||||||
*((int16_t *)output + _pos) = curr_value;
|
*((int16_t *)output + _pos) = (int16_t)curr_value;
|
||||||
_pos++;
|
_pos++;
|
||||||
break;
|
break;
|
||||||
case TSDB_DATA_TYPE_TINYINT:
|
case TSDB_DATA_TYPE_TINYINT:
|
||||||
*((int8_t *)output + _pos) = curr_value;
|
*((int8_t *)output + _pos) = (int8_t)curr_value;
|
||||||
_pos++;
|
_pos++;
|
||||||
break;
|
break;
|
||||||
default:
|
default:
|
||||||
perror("Wrong integer types.\n");
|
perror("Wrong integer types.\n");
|
||||||
exit(1);
|
return -1;
|
||||||
}
|
}
|
||||||
count++;
|
count++;
|
||||||
if (count == nelements) break;
|
if (count == nelements) break;
|
||||||
|
@ -306,7 +308,7 @@ int tsCompressBoolImp(const char *const input, const int nelements, char *const
|
||||||
output[pos] |= t;
|
output[pos] |= t;
|
||||||
} else {
|
} else {
|
||||||
perror("Wrong bool value.\n");
|
perror("Wrong bool value.\n");
|
||||||
exit(1);
|
return -1;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -362,7 +364,7 @@ int tsCompressBoolRLEImp(const char *const input, const int nelements, char *con
|
||||||
output[_pos++] = (counter << 1) | INT8MASK(0);
|
output[_pos++] = (counter << 1) | INT8MASK(0);
|
||||||
} else {
|
} else {
|
||||||
perror("Wrong bool value!\n");
|
perror("Wrong bool value!\n");
|
||||||
exit(1);
|
return -1;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -414,7 +416,7 @@ int tsDecompressStringImp(const char *const input, int compressedSize, char *con
|
||||||
char msg[128] = {0};
|
char msg[128] = {0};
|
||||||
sprintf(msg, "decomp_size:%d, Error decompress in LZ4 algorithm!\n", decompressed_size);
|
sprintf(msg, "decomp_size:%d, Error decompress in LZ4 algorithm!\n", decompressed_size);
|
||||||
perror(msg);
|
perror(msg);
|
||||||
exit(EXIT_FAILURE);
|
return -1;
|
||||||
}
|
}
|
||||||
|
|
||||||
return decompressed_size;
|
return decompressed_size;
|
||||||
|
@ -424,7 +426,7 @@ int tsDecompressStringImp(const char *const input, int compressedSize, char *con
|
||||||
return compressedSize - 1;
|
return compressedSize - 1;
|
||||||
} else {
|
} else {
|
||||||
perror("Wrong compressed string indicator!\n");
|
perror("Wrong compressed string indicator!\n");
|
||||||
exit(EXIT_FAILURE);
|
return -1;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -451,21 +453,21 @@ int tsCompressTimestampImp(const char *const input, const int nelements, char *c
|
||||||
if (!safeInt64Add(curr_delta, -prev_delta)) goto _exit_over;
|
if (!safeInt64Add(curr_delta, -prev_delta)) goto _exit_over;
|
||||||
int64_t delta_of_delta = curr_delta - prev_delta;
|
int64_t delta_of_delta = curr_delta - prev_delta;
|
||||||
// zigzag encode the value.
|
// zigzag encode the value.
|
||||||
uint64_t zigzag_value = (delta_of_delta >> (LONG_BYTES * BITS_PER_BYTE - 1)) ^ (delta_of_delta << 1);
|
uint64_t zigzag_value = ZIGZAG_ENCODE(int64_t, delta_of_delta);
|
||||||
if (i % 2 == 0) {
|
if (i % 2 == 0) {
|
||||||
flags = 0;
|
flags = 0;
|
||||||
dd1 = zigzag_value;
|
dd1 = zigzag_value;
|
||||||
if (dd1 == 0) {
|
if (dd1 == 0) {
|
||||||
flag1 = 0;
|
flag1 = 0;
|
||||||
} else {
|
} else {
|
||||||
flag1 = LONG_BYTES - BUILDIN_CLZL(dd1) / BITS_PER_BYTE;
|
flag1 = (uint8_t)(LONG_BYTES - BUILDIN_CLZL(dd1) / BITS_PER_BYTE);
|
||||||
}
|
}
|
||||||
} else {
|
} else {
|
||||||
dd2 = zigzag_value;
|
dd2 = zigzag_value;
|
||||||
if (dd2 == 0) {
|
if (dd2 == 0) {
|
||||||
flag2 = 0;
|
flag2 = 0;
|
||||||
} else {
|
} else {
|
||||||
flag2 = LONG_BYTES - BUILDIN_CLZL(dd2) / BITS_PER_BYTE;
|
flag2 = (uint8_t)(LONG_BYTES - BUILDIN_CLZL(dd2) / BITS_PER_BYTE);
|
||||||
}
|
}
|
||||||
flags = flag1 | (flag2 << 4);
|
flags = flag1 | (flag2 << 4);
|
||||||
// Encode the flag.
|
// Encode the flag.
|
||||||
|
@ -552,7 +554,7 @@ int tsDecompressTimestampImp(const char *const input, const int nelements, char
|
||||||
} else {
|
} else {
|
||||||
memcpy(&dd1, input + ipos, nbytes);
|
memcpy(&dd1, input + ipos, nbytes);
|
||||||
}
|
}
|
||||||
delta_of_delta = (dd1 >> 1) ^ -(dd1 & 1);
|
delta_of_delta = ZIGZAG_DECODE(int64_t, dd1);
|
||||||
}
|
}
|
||||||
ipos += nbytes;
|
ipos += nbytes;
|
||||||
if (opos == 0) {
|
if (opos == 0) {
|
||||||
|
@ -578,7 +580,7 @@ int tsDecompressTimestampImp(const char *const input, const int nelements, char
|
||||||
memcpy(&dd2, input + ipos, nbytes);
|
memcpy(&dd2, input + ipos, nbytes);
|
||||||
}
|
}
|
||||||
// zigzag_decoding
|
// zigzag_decoding
|
||||||
delta_of_delta = (dd2 >> 1) ^ -(dd2 & 1);
|
delta_of_delta = ZIGZAG_DECODE(int64_t, dd2);
|
||||||
}
|
}
|
||||||
ipos += nbytes;
|
ipos += nbytes;
|
||||||
prev_delta = delta_of_delta + prev_delta;
|
prev_delta = delta_of_delta + prev_delta;
|
||||||
|
@ -640,12 +642,12 @@ int tsCompressDoubleImp(const char *const input, const int nelements, char *cons
|
||||||
uint8_t flag;
|
uint8_t flag;
|
||||||
|
|
||||||
if (trailing_zeros > leading_zeros) {
|
if (trailing_zeros > leading_zeros) {
|
||||||
nbytes = LONG_BYTES - trailing_zeros / BITS_PER_BYTE;
|
nbytes = (uint8_t)(LONG_BYTES - trailing_zeros / BITS_PER_BYTE);
|
||||||
|
|
||||||
if (nbytes > 0) nbytes--;
|
if (nbytes > 0) nbytes--;
|
||||||
flag = ((uint8_t)1 << 3) | nbytes;
|
flag = ((uint8_t)1 << 3) | nbytes;
|
||||||
} else {
|
} else {
|
||||||
nbytes = LONG_BYTES - leading_zeros / BITS_PER_BYTE;
|
nbytes = (uint8_t)(LONG_BYTES - leading_zeros / BITS_PER_BYTE);
|
||||||
if (nbytes > 0) nbytes--;
|
if (nbytes > 0) nbytes--;
|
||||||
flag = nbytes;
|
flag = nbytes;
|
||||||
}
|
}
|
||||||
|
@ -787,12 +789,12 @@ int tsCompressFloatImp(const char *const input, const int nelements, char *const
|
||||||
uint8_t flag;
|
uint8_t flag;
|
||||||
|
|
||||||
if (trailing_zeros > leading_zeros) {
|
if (trailing_zeros > leading_zeros) {
|
||||||
nbytes = FLOAT_BYTES - trailing_zeros / BITS_PER_BYTE;
|
nbytes = (uint8_t)(FLOAT_BYTES - trailing_zeros / BITS_PER_BYTE);
|
||||||
|
|
||||||
if (nbytes > 0) nbytes--;
|
if (nbytes > 0) nbytes--;
|
||||||
flag = ((uint8_t)1 << 3) | nbytes;
|
flag = ((uint8_t)1 << 3) | nbytes;
|
||||||
} else {
|
} else {
|
||||||
nbytes = FLOAT_BYTES - leading_zeros / BITS_PER_BYTE;
|
nbytes = (uint8_t)(FLOAT_BYTES - leading_zeros / BITS_PER_BYTE);
|
||||||
if (nbytes > 0) nbytes--;
|
if (nbytes > 0) nbytes--;
|
||||||
flag = nbytes;
|
flag = nbytes;
|
||||||
}
|
}
|
||||||
|
|
Loading…
Reference in New Issue