From 4ea7d139ba22b506d7c302d0ce1b6f841b23f545 Mon Sep 17 00:00:00 2001 From: Ganlin Zhao Date: Wed, 30 Nov 2022 14:48:39 +0800 Subject: [PATCH 1/2] fix(query): fix avg calculation error after SIMD optimize TD-20803 --- .../libs/function/src/detail/tavgfunction.c | 24 +++++++++++++++---- 1 file changed, 20 insertions(+), 4 deletions(-) diff --git a/source/libs/function/src/detail/tavgfunction.c b/source/libs/function/src/detail/tavgfunction.c index 4cafbd4e6e..e8bd5f8d3c 100644 --- a/source/libs/function/src/detail/tavgfunction.c +++ b/source/libs/function/src/detail/tavgfunction.c @@ -502,7 +502,11 @@ int32_t avgFunction(SqlFunctionCtx* pCtx) { i8VectorSumAVX2(plist, numOfRows, type, pAvgRes); } else { for (int32_t i = pInput->startRowIndex; i < pInput->numOfRows + pInput->startRowIndex; ++i) { - pAvgRes->sum.usum += plist[i]; + if (type == TSDB_DATA_TYPE_TINYINT) { + pAvgRes->sum.isum += plist[i]; + } else { + pAvgRes->sum.usum += (uint8_t)plist[i]; + } } } break; @@ -517,7 +521,11 @@ int32_t avgFunction(SqlFunctionCtx* pCtx) { i16VectorSumAVX2(plist, numOfRows, type, pAvgRes); } else { for (int32_t i = pInput->startRowIndex; i < pInput->numOfRows + pInput->startRowIndex; ++i) { - pAvgRes->sum.isum += plist[i]; + if (type == TSDB_DATA_TYPE_SMALLINT) { + pAvgRes->sum.isum += plist[i]; + } else { + pAvgRes->sum.usum += (uint16_t)plist[i]; + } } } break; @@ -532,7 +540,11 @@ int32_t avgFunction(SqlFunctionCtx* pCtx) { i32VectorSumAVX2(plist, numOfRows, type, pAvgRes); } else { for (int32_t i = pInput->startRowIndex; i < pInput->numOfRows + pInput->startRowIndex; ++i) { - pAvgRes->sum.isum += plist[i]; + if (type == TSDB_DATA_TYPE_INT) { + pAvgRes->sum.isum += plist[i]; + } else { + pAvgRes->sum.usum += (uint32_t)plist[i]; + } } } break; @@ -547,7 +559,11 @@ int32_t avgFunction(SqlFunctionCtx* pCtx) { i64VectorSumAVX2(plist, numOfRows, pAvgRes); } else { for (int32_t i = pInput->startRowIndex; i < pInput->numOfRows + pInput->startRowIndex; ++i) { - pAvgRes->sum.isum += plist[i]; + if (type == TSDB_DATA_TYPE_BIGINT) { + pAvgRes->sum.isum += plist[i]; + } else { + pAvgRes->sum.isum += (uint64_t)plist[i]; + } } } break; From 4048988908b906ccbdd5dd324d5eac005112a051 Mon Sep 17 00:00:00 2001 From: Ganlin Zhao Date: Wed, 30 Nov 2022 14:48:39 +0800 Subject: [PATCH 2/2] fix(query): fix avg calculation error after SIMD optimize TD-20803 --- .../libs/function/src/detail/tavgfunction.c | 93 +++++++++++++------ 1 file changed, 63 insertions(+), 30 deletions(-) diff --git a/source/libs/function/src/detail/tavgfunction.c b/source/libs/function/src/detail/tavgfunction.c index e8bd5f8d3c..f06bafafe3 100644 --- a/source/libs/function/src/detail/tavgfunction.c +++ b/source/libs/function/src/detail/tavgfunction.c @@ -133,6 +133,14 @@ static void i8VectorSumAVX2(const int8_t* plist, int32_t numOfRows, int32_t type sum = _mm256_add_epi64(sum, extVal); p += width; } + + // let sum up the final results + const int64_t* q = (const int64_t*)∑ + pRes->sum.isum += q[0] + q[1] + q[2] + q[3]; + + for (int32_t j = 0; j < remainder; ++j) { + pRes->sum.isum += plist[j + rounds * width]; + } } else { const uint8_t* p = (const uint8_t*)plist; @@ -142,16 +150,16 @@ static void i8VectorSumAVX2(const int8_t* plist, int32_t numOfRows, int32_t type sum = _mm256_add_epi64(sum, extVal); p += width; } + + // let sum up the final results + const uint64_t* q = (const uint64_t*)∑ + pRes->sum.usum += q[0] + q[1] + q[2] + q[3]; + + for (int32_t j = 0; j < remainder; ++j) { + pRes->sum.usum += (uint8_t)plist[j + rounds * width]; + } } - // let sum up the final results - const int64_t* q = (const int64_t*)∑ - pRes->sum.isum += q[0] + q[1] + q[2] + q[3]; - - int32_t startIndex = rounds * width; - for (int32_t j = 0; j < remainder; ++j) { - pRes->sum.isum += plist[j + startIndex]; - } #endif } @@ -176,8 +184,16 @@ static void i16VectorSumAVX2(const int16_t* plist, int32_t numOfRows, int32_t ty sum = _mm256_add_epi64(sum, extVal); p += width; } + + // let sum up the final results + const int64_t* q = (const int64_t*)∑ + pRes->sum.isum += q[0] + q[1] + q[2] + q[3]; + + for (int32_t j = 0; j < remainder; ++j) { + pRes->sum.isum += plist[j + rounds * width]; + } } else { - const uint8_t* p = (const uint8_t*)plist; + const uint16_t* p = (const uint16_t*)plist; for(int32_t i = 0; i < rounds; ++i) { __m128i val = _mm_lddqu_si128((__m128i*)p); @@ -185,16 +201,16 @@ static void i16VectorSumAVX2(const int16_t* plist, int32_t numOfRows, int32_t ty sum = _mm256_add_epi64(sum, extVal); p += width; } + + // let sum up the final results + const uint64_t* q = (const uint64_t*)∑ + pRes->sum.usum += q[0] + q[1] + q[2] + q[3]; + + for (int32_t j = 0; j < remainder; ++j) { + pRes->sum.usum += (uint16_t)plist[j + rounds * width]; + } } - // let sum up the final results - const int64_t* q = (const int64_t*)∑ - pRes->sum.isum += q[0] + q[1] + q[2] + q[3]; - - int32_t startIndex = rounds * width; - for (int32_t j = 0; j < remainder; ++j) { - pRes->sum.isum += plist[j + startIndex]; - } #endif } @@ -219,6 +235,14 @@ static void i32VectorSumAVX2(const int32_t* plist, int32_t numOfRows, int32_t ty sum = _mm256_add_epi64(sum, extVal); p += width; } + + // let sum up the final results + const int64_t* q = (const int64_t*)∑ + pRes->sum.isum += q[0] + q[1] + q[2] + q[3]; + + for (int32_t j = 0; j < remainder; ++j) { + pRes->sum.isum += plist[j + rounds * width]; + } } else { const uint32_t* p = (const uint32_t*)plist; @@ -228,16 +252,16 @@ static void i32VectorSumAVX2(const int32_t* plist, int32_t numOfRows, int32_t ty sum = _mm256_add_epi64(sum, extVal); p += width; } + + // let sum up the final results + const uint64_t* q = (const uint64_t*)∑ + pRes->sum.usum += q[0] + q[1] + q[2] + q[3]; + + for (int32_t j = 0; j < remainder; ++j) { + pRes->sum.usum += (uint32_t)plist[j + rounds * width]; + } } - // let sum up the final results - const int64_t* q = (const int64_t*)∑ - pRes->sum.isum += q[0] + q[1] + q[2] + q[3]; - - int32_t startIndex = rounds * width; - for (int32_t j = 0; j < remainder; ++j) { - pRes->sum.isum += plist[j + startIndex]; - } #endif } @@ -262,13 +286,22 @@ static void i64VectorSumAVX2(const int64_t* plist, int32_t numOfRows, SAvgRes* p } // let sum up the final results - const int64_t* q = (const int64_t*)∑ - pRes->sum.isum += q[0] + q[1] + q[2] + q[3]; + if (type == TSDB_DATA_TYPE_BIGINT) { + const int64_t* q = (const int64_t*)∑ + pRes->sum.isum += q[0] + q[1] + q[2] + q[3]; - int32_t startIndex = rounds * width; - for (int32_t j = 0; j < remainder; ++j) { - pRes->sum.isum += plist[j + startIndex]; + for (int32_t j = 0; j < remainder; ++j) { + pRes->sum.isum += plist[j + rounds * width]; + } + } else { + const uint64_t* q = (const uint64_t*)∑ + pRes->sum.usum += q[0] + q[1] + q[2] + q[3]; + + for (int32_t j = 0; j < remainder; ++j) { + pRes->sum.usum += (uint64_t)plist[j + rounds * width]; + } } + #endif }