From 1e25eac4c7257f00006fcaf2f866039b2faf5f7c Mon Sep 17 00:00:00 2001
From: Haojun Liao <hjliao@taosdata.com>
Date: Sun, 20 Nov 2022 23:11:12 +0800
Subject: [PATCH] refactor: do some internal refactor.

---
 source/libs/function/src/detail/tminmax.c | 577 +++++++++++++++-------
 1 file changed, 396 insertions(+), 181 deletions(-)

diff --git a/source/libs/function/src/detail/tminmax.c b/source/libs/function/src/detail/tminmax.c
index d239315e0e..ed297e2b66 100644
--- a/source/libs/function/src/detail/tminmax.c
+++ b/source/libs/function/src/detail/tminmax.c
@@ -20,68 +20,59 @@
 #include "tglobal.h"
 
 static int32_t i32VectorCmpAVX2(const int32_t* pData, int32_t numOfRows, bool isMinFunc) {
-  int32_t v = 0;
+  int32_t        v = 0;
+  const int32_t  bitWidth = 256;
+  const int32_t* p = pData;
+
+  int32_t width = (bitWidth>>3u) / sizeof(int32_t);
+  int32_t remain = numOfRows % width;
+  int32_t rounds = numOfRows / width;
 
 #if __AVX2__
-  int32_t startElem = 0;//((uint64_t)plist) & ((1<<8u)-1);
-  int32_t bitWidth = 8;
-
-  int32_t remain = (numOfRows - startElem) % bitWidth;
-  int32_t rounds = (numOfRows - startElem) / bitWidth;
-  const int32_t* p = &pData[startElem];
-
   __m256i next;
   __m256i initialVal = _mm256_loadu_si256((__m256i*)p);
-  p += bitWidth;
+  p += width;
 
   if (!isMinFunc) {  // max function
     for (int32_t i = 0; i < rounds; ++i) {
       next = _mm256_lddqu_si256((__m256i*)p);
       initialVal = _mm256_max_epi32(initialVal, next);
-      p += bitWidth;
+      p += width;
     }
 
     // let sum up the final results
     const int32_t* q = (const int32_t*)&initialVal;
-
     v = TMAX(q[0], q[1]);
-    v = TMAX(v, q[2]);
-    v = TMAX(v, q[3]);
-    v = TMAX(v, q[4]);
-    v = TMAX(v, q[5]);
-    v = TMAX(v, q[6]);
-    v = TMAX(v, q[7]);
+    for (int32_t k = 1; k < width; ++k) {
+      v = TMAX(v, q[k]);
+    }
 
     // calculate the front and the reminder items in array list
-    startElem += rounds * bitWidth;
+    int32_t start = rounds * width;
     for (int32_t j = 0; j < remain; ++j) {
-      if (v < p[j + startElem]) {
-        v = p[j + startElem];
+      if (v < p[j + start]) {
+        v = p[j + start];
       }
     }
   } else {  // min function
     for (int32_t i = 0; i < rounds; ++i) {
       next = _mm256_lddqu_si256((__m256i*)p);
       initialVal = _mm256_min_epi32(initialVal, next);
-      p += bitWidth;
+      p += width;
     }
 
     // let sum up the final results
     const int32_t* q = (const int32_t*)&initialVal;
-
     v = TMIN(q[0], q[1]);
-    v = TMIN(v, q[2]);
-    v = TMIN(v, q[3]);
-    v = TMIN(v, q[4]);
-    v = TMIN(v, q[5]);
-    v = TMIN(v, q[6]);
-    v = TMIN(v, q[7]);
+    for (int32_t k = 1; k < width; ++k) {
+      v = TMIN(v, q[k]);
+    }
 
     // calculate the front and the remainder items in array list
-    startElem += rounds * bitWidth;
+    int32_t start = rounds * width;
     for (int32_t j = 0; j < remain; ++j) {
-      if (v > p[j + startElem]) {
-        v = p[j + startElem];
+      if (v > p[j + start]) {
+        v = p[j + start];
       }
     }
   }
@@ -92,69 +83,59 @@ static int32_t i32VectorCmpAVX2(const int32_t* pData, int32_t numOfRows, bool is
 
 static float floatVectorCmpAVX(const float* pData, int32_t numOfRows, bool isMinFunc) {
   float v = 0;
+  const int32_t bitWidth = 256;
+  const float* p = pData;
+
+  int32_t width = (bitWidth>>3u) / sizeof(float);
+  int32_t remain = numOfRows % width;
+  int32_t rounds = numOfRows / width;
 
 #if __AVX__
-  int32_t startElem = 0;//((uint64_t)plist) & ((1<<8u)-1);
-  int32_t i = 0;
-
-  int32_t bitWidth = 8;
-
-  int32_t remain = (numOfRows - startElem) % bitWidth;
-  int32_t rounds = (numOfRows - startElem) / bitWidth;
-  const float* p = &pData[startElem];
 
   __m256 next;
   __m256 initialVal = _mm256_loadu_ps(p);
-  p += bitWidth;
+  p += width;
 
   if (!isMinFunc) {  // max function
-    for (; i < rounds; ++i) {
+    for (int32_t i = 1; i < rounds; ++i) {
       next = _mm256_loadu_ps(p);
       initialVal = _mm256_max_ps(initialVal, next);
-      p += bitWidth;
+      p += width;
     }
 
     // let sum up the final results
     const float* q = (const float*)&initialVal;
-
     v = TMAX(q[0], q[1]);
-    v = TMAX(v, q[2]);
-    v = TMAX(v, q[3]);
-    v = TMAX(v, q[4]);
-    v = TMAX(v, q[5]);
-    v = TMAX(v, q[6]);
-    v = TMAX(v, q[7]);
+    for (int32_t k = 1; k < width; ++k) {
+      v = TMAX(v, q[k]);
+    }
 
     // calculate the front and the reminder items in array list
-    startElem += rounds * bitWidth;
+    int32_t start = rounds * width;
     for (int32_t j = 0; j < remain; ++j) {
-      if (v < p[j + startElem]) {
-        v = p[j + startElem];
+      if (v < p[j + width]) {
+        v = p[j + width];
       }
     }
   } else {  // min function
-    for (; i < rounds; ++i) {
+    for (int32_t i = 1; i < rounds; ++i) {
       next = _mm256_loadu_ps(p);
       initialVal = _mm256_min_ps(initialVal, next);
-      p += bitWidth;
+      p += width;
     }
 
     // let sum up the final results
     const float* q = (const float*)&initialVal;
-
     v = TMIN(q[0], q[1]);
-    v = TMIN(v, q[2]);
-    v = TMIN(v, q[3]);
-    v = TMIN(v, q[4]);
-    v = TMIN(v, q[5]);
-    v = TMIN(v, q[6]);
-    v = TMIN(v, q[7]);
+    for (int32_t k = 1; k < width; ++k) {
+      v = TMIN(v, q[k]);
+    }
 
     // calculate the front and the reminder items in array list
-    startElem += rounds * bitWidth;
+    int32_t start = rounds * bitWidth;
     for (int32_t j = 0; j < remain; ++j) {
-      if (v > p[j + startElem]) {
-        v = p[j + startElem];
+      if (v > p[j + start]) {
+        v = p[j + start];
       }
     }
   }
@@ -163,6 +144,195 @@ static float floatVectorCmpAVX(const float* pData, int32_t numOfRows, bool isMin
   return v;
 }
 
+static int8_t i8VectorCmpAVX2(const int8_t* pData, int32_t numOfRows, bool isMinFunc) {
+  int8_t        v = 0;
+  const int32_t  bitWidth = 256;
+  const int8_t* p = pData;
+
+  int32_t width = (bitWidth>>3u) / sizeof(int8_t);
+  int32_t remain = numOfRows % width;
+  int32_t rounds = numOfRows / width;
+
+#if __AVX2__
+  __m256i next;
+  __m256i initialVal = _mm256_loadu_si256((__m256i*)p);
+  p += width;
+
+  if (!isMinFunc) {  // max function
+    for (int32_t i = 0; i < rounds; ++i) {
+      next = _mm256_lddqu_si256((__m256i*)p);
+      initialVal = _mm256_max_epi8(initialVal, next);
+      p += width;
+    }
+
+    // let sum up the final results
+    const int8_t* q = (const int8_t*)&initialVal;
+    v = TMAX(q[0], q[1]);
+    for (int32_t k = 1; k < width; ++k) {
+      v = TMAX(v, q[k]);
+    }
+
+    // calculate the front and the reminder items in array list
+    int32_t start = rounds * width;
+    for (int32_t j = 0; j < remain; ++j) {
+      if (v < p[j + start]) {
+        v = p[j + start];
+      }
+    }
+  } else {  // min function
+    for (int32_t i = 0; i < rounds; ++i) {
+      next = _mm256_lddqu_si256((__m256i*)p);
+      initialVal = _mm256_min_epi8(initialVal, next);
+      p += width;
+    }
+
+    // let sum up the final results
+    const int8_t* q = (const int8_t*)&initialVal;
+
+    v = TMIN(q[0], q[1]);
+    for(int32_t k = 1; k < width; ++k) {
+      v = TMIN(v, q[k]);
+    }
+
+    // calculate the front and the remainder items in array list
+    int32_t start = rounds * width;
+    for (int32_t j = 0; j < remain; ++j) {
+      if (v > p[j + start]) {
+        v = p[j + start];
+      }
+    }
+  }
+#endif
+
+  return v;
+}
+
+static int16_t i16VectorCmpAVX2(const int16_t* pData, int32_t numOfRows, bool isMinFunc) {
+  int16_t        v = 0;
+  const int32_t  bitWidth = 256;
+  const int16_t* p = pData;
+
+  int32_t width = (bitWidth>>3u) / sizeof(int16_t);
+  int32_t remain = numOfRows % width;
+  int32_t rounds = numOfRows / width;
+
+#if __AVX2__
+  __m256i next;
+  __m256i initialVal = _mm256_loadu_si256((__m256i*)p);
+  p += width;
+
+  if (!isMinFunc) {  // max function
+    for (int32_t i = 0; i < rounds; ++i) {
+      next = _mm256_lddqu_si256((__m256i*)p);
+      initialVal = _mm256_max_epi16(initialVal, next);
+      p += width;
+    }
+
+    // let sum up the final results
+    const int16_t* q = (const int16_t*)&initialVal;
+
+    v = TMAX(q[0], q[1]);
+    for(int32_t k = 1; k < width; ++k) {
+      v = TMAX(v, q[k]);
+    }
+
+    // calculate the front and the reminder items in array list
+    int32_t start = rounds * width;
+    for (int32_t j = 0; j < remain; ++j) {
+      if (v < p[j + start]) {
+        v = p[j + start];
+      }
+    }
+  } else {  // min function
+    for (int32_t i = 0; i < rounds; ++i) {
+      next = _mm256_lddqu_si256((__m256i*)p);
+      initialVal = _mm256_min_epi16(initialVal, next);
+      p += width;
+    }
+
+    // let sum up the final results
+    const int16_t* q = (const int16_t*)&initialVal;
+
+    v = TMIN(q[0], q[1]);
+    for(int32_t k = 1; k < width; ++k) {
+      v = TMIN(v, q[k]);
+    }
+
+    // calculate the front and the remainder items in array list
+    int32_t start = rounds * width;
+    for (int32_t j = 0; j < remain; ++j) {
+      if (v > p[j + start]) {
+        v = p[j + start];
+      }
+    }
+  }
+#endif
+
+  return v;
+}
+
+//static int64_t i64VectorCmpAVX2(const int64_t* pData, int32_t numOfRows, bool isMinFunc) {
+//  int64_t        v = 0;
+//  const int32_t  bitWidth = 256;
+//  const int64_t* p = pData;
+//
+//  int32_t width = (bitWidth>>3u) / sizeof(int64_t);
+//  int32_t remain = numOfRows % width;
+//  int32_t rounds = numOfRows / width;
+//
+//#if __AVX2__
+//  __m256i next;
+//  __m256i initialVal = _mm256_loadu_si256((__m256i*)p);
+//  p += width;
+//
+//  if (!isMinFunc) {  // max function
+//    for (int32_t i = 0; i < rounds; ++i) {
+//      next = _mm256_lddqu_si256((__m256i*)p);
+//      initialVal = _mm256_max_epi64(initialVal, next);
+//      p += width;
+//    }
+//
+//    // let sum up the final results
+//    const int64_t* q = (const int64_t*)&initialVal;
+//    v = TMAX(q[0], q[1]);
+//    for(int32_t k = 1; k < width; ++k) {
+//      v = TMAX(v, q[k]);
+//    }
+//
+//    // calculate the front and the reminder items in array list
+//    int32_t start = rounds * width;
+//    for (int32_t j = 0; j < remain; ++j) {
+//      if (v < p[j + start]) {
+//        v = p[j + start];
+//      }
+//    }
+//  } else {  // min function
+//    for (int32_t i = 0; i < rounds; ++i) {
+//      next = _mm256_lddqu_si256((__m256i*)p);
+//      initialVal = _mm256_min_epi64(initialVal, next);
+//      p += width;
+//    }
+//
+//    // let sum up the final results
+//    const int64_t* q = (const int64_t*)&initialVal;
+//    v = TMIN(q[0], q[1]);
+//    for(int32_t k = 1; k < width; ++k) {
+//      v = TMIN(v, q[k]);
+//    }
+//
+//    // calculate the front and the remainder items in array list
+//    int32_t start = rounds * width;
+//    for (int32_t j = 0; j < remain; ++j) {
+//      if (v > p[j + start]) {
+//        v = p[j + start];
+//      }
+//    }
+//  }
+//#endif
+//
+//  return v;
+//}
+
 static int32_t handleInt32Col(SColumnInfoData* pCol, int32_t start, int32_t numOfRows, SqlFunctionCtx* pCtx,
                               SMinmaxResInfo* pBuf, bool isMinFunc) {
   int32_t* pData = (int32_t*)pCol->pData;
@@ -170,56 +340,56 @@ static int32_t handleInt32Col(SColumnInfoData* pCol, int32_t start, int32_t numO
 
   int32_t numOfElems = 0;
   if (pCol->hasNull || numOfRows <= 8 || pCtx->subsidiaries.num > 0) {
-    if (isMinFunc) {  // min
-      for (int32_t i = start; i < start + numOfRows; ++i) {
-        if (colDataIsNull_f(pCol->nullbitmap, i)) {
-          continue;
-        }
-
-        if (!pBuf->assign) {
-          *val = pData[i];
-          if (pCtx->subsidiaries.num > 0) {
-            pBuf->tuplePos = saveTupleData(pCtx, i, pCtx->pSrcBlock, NULL);
-          }
-          pBuf->assign = true;
-        } else {
-          if (*val > pData[i]) {
-            *val = pData[i];
-            if (pCtx->subsidiaries.num > 0) {
-              updateTupleData(pCtx, i, pCtx->pSrcBlock, &pBuf->tuplePos);
-            }
-          }
-        }
-
-        numOfElems += 1;
-      }
-    } else {  // max function
-      for (int32_t i = start; i < start + numOfRows; ++i) {
-        if (colDataIsNull_f(pCol->nullbitmap, i)) {
-          continue;
-        }
-
-        if (!pBuf->assign) {
-          *val = pData[i];
-          if (pCtx->subsidiaries.num > 0) {
-            pBuf->tuplePos = saveTupleData(pCtx, i, pCtx->pSrcBlock, NULL);
-          }
-          pBuf->assign = true;
-        } else {
-          // ignore the equivalent data value
-          // NOTE: An faster version to avoid one additional comparison with FPU.
-            if (*val < pData[i]) {
-              *val = pData[i];
-              if (pCtx->subsidiaries.num > 0) {
-                updateTupleData(pCtx, i, pCtx->pSrcBlock, &pBuf->tuplePos);
-              }
-            }
-        }
-
-        numOfElems += 1;
+    int32_t i = start;
+    while (i < (start + numOfRows)) {
+      if (!colDataIsNull_f(pCol->nullbitmap, i)) {
+        break;
       }
+      i += 1;
     }
-  } else { // not has null value
+
+    if ((i < (start + numOfRows)) && (!pBuf->assign)) {
+      *val = pData[i];
+      if (pCtx->subsidiaries.num > 0) {
+        pBuf->tuplePos = saveTupleData(pCtx, i, pCtx->pSrcBlock, NULL);
+      }
+      pBuf->assign = true;
+      numOfElems += 1;
+    }
+
+    if (isMinFunc) {  // min
+      for (; i < start + numOfRows; ++i) {
+        if (colDataIsNull_f(pCol->nullbitmap, i)) {
+          continue;
+        }
+
+        if (*val > pData[i]) {
+          *val = pData[i];
+          if (pCtx->subsidiaries.num > 0) {
+            updateTupleData(pCtx, i, pCtx->pSrcBlock, &pBuf->tuplePos);
+          }
+        }
+        numOfElems += 1;
+      }
+
+    } else {  // max function
+      for (; i < start + numOfRows; ++i) {
+        if (colDataIsNull_f(pCol->nullbitmap, i)) {
+          continue;
+        }
+        // ignore the equivalent data value
+        // NOTE: An faster version to avoid one additional comparison with FPU.
+        if (*val < pData[i]) {
+          *val = pData[i];
+          if (pCtx->subsidiaries.num > 0) {
+            updateTupleData(pCtx, i, pCtx->pSrcBlock, &pBuf->tuplePos);
+          }
+        }
+        numOfElems += 1;
+      }
+
+    }
+  } else {  // not has null value
     // AVX2 version to speedup the loop
     if (tsAVX2Enable && tsSIMDEnable) {
       *val = i32VectorCmpAVX2(pData, numOfRows, isMinFunc);
@@ -257,56 +427,55 @@ static int32_t handleFloatCol(SColumnInfoData* pCol, int32_t start, int32_t numO
 
   int32_t numOfElems = 0;
   if (pCol->hasNull || numOfRows < 8 || pCtx->subsidiaries.num > 0) {
+    int32_t i = start;
+    while (i < (start + numOfRows)) {
+      if (!colDataIsNull_f(pCol->nullbitmap, i)) {
+        break;
+      }
+      i += 1;
+    }
+
+    if ((i < (start + numOfRows)) && (!pBuf->assign)) {
+      *val = pData[i];
+      if (pCtx->subsidiaries.num > 0) {
+        pBuf->tuplePos = saveTupleData(pCtx, i, pCtx->pSrcBlock, NULL);
+      }
+      pBuf->assign = true;
+      numOfElems += 1;
+    }
+
     if (isMinFunc) {  // min
-      for (int32_t i = start; i < start + numOfRows; ++i) {
+      for (; i < start + numOfRows; ++i) {
         if (colDataIsNull_f(pCol->nullbitmap, i)) {
           continue;
         }
 
-        if (!pBuf->assign) {
+        if (*val > pData[i]) {
           *val = pData[i];
           if (pCtx->subsidiaries.num > 0) {
-            pBuf->tuplePos = saveTupleData(pCtx, i, pCtx->pSrcBlock, NULL);
-          }
-          pBuf->assign = true;
-        } else {
-          if (*val > pData[i]) {
-            *val = pData[i];
-            if (pCtx->subsidiaries.num > 0) {
-              updateTupleData(pCtx, i, pCtx->pSrcBlock, &pBuf->tuplePos);
-            }
+            updateTupleData(pCtx, i, pCtx->pSrcBlock, &pBuf->tuplePos);
           }
         }
-
         numOfElems += 1;
       }
     } else {  // max function
-      for (int32_t i = start; i < start + numOfRows; ++i) {
+      for (; i < start + numOfRows; ++i) {
         if (colDataIsNull_f(pCol->nullbitmap, i)) {
           continue;
         }
 
-        if (!pBuf->assign) {
+        // ignore the equivalent data value
+        // NOTE: An faster version to avoid one additional comparison with FPU.
+        if (*val < pData[i]) {
           *val = pData[i];
           if (pCtx->subsidiaries.num > 0) {
-            pBuf->tuplePos = saveTupleData(pCtx, i, pCtx->pSrcBlock, NULL);
-          }
-          pBuf->assign = true;
-        } else {
-          // ignore the equivalent data value
-          // NOTE: An faster version to avoid one additional comparison with FPU.
-          if (*val < pData[i]) {
-            *val = pData[i];
-            if (pCtx->subsidiaries.num > 0) {
-              updateTupleData(pCtx, i, pCtx->pSrcBlock, &pBuf->tuplePos);
-            }
+            updateTupleData(pCtx, i, pCtx->pSrcBlock, &pBuf->tuplePos);
           }
         }
-
         numOfElems += 1;
       }
     }
-  } else { // not has null value
+  } else {  // not has null value
     // AVX version to speedup the loop
     if (tsAVXEnable && tsSIMDEnable) {
       *val = (double) floatVectorCmpAVX(pData, numOfRows, isMinFunc);
@@ -337,6 +506,93 @@ static int32_t handleFloatCol(SColumnInfoData* pCol, int32_t start, int32_t numO
   return numOfElems;
 }
 
+static int32_t handleInt8Col(SColumnInfoData* pCol, int32_t start, int32_t numOfRows, SqlFunctionCtx* pCtx,
+                             SMinmaxResInfo* pBuf, bool isMinFunc) {
+  int8_t* pData = (int8_t*)pCol->pData;
+  int8_t* val = (int8_t*)&pBuf->v;
+
+  int32_t numOfElems = 0;
+  if (pCol->hasNull || numOfRows <= 8 || pCtx->subsidiaries.num > 0) {
+    int32_t i = start;
+    while (i < (start + numOfRows)) {
+      if (!colDataIsNull_f(pCol->nullbitmap, i)) {
+        break;
+      }
+      i += 1;
+    }
+
+    if ((i < (start + numOfRows)) && (!pBuf->assign)) {
+      *val = pData[i];
+      if (pCtx->subsidiaries.num > 0) {
+        pBuf->tuplePos = saveTupleData(pCtx, i, pCtx->pSrcBlock, NULL);
+      }
+      pBuf->assign = true;
+      numOfElems += 1;
+    }
+
+    if (isMinFunc) {  // min
+      for (; i < start + numOfRows; ++i) {
+        if (colDataIsNull_f(pCol->nullbitmap, i)) {
+          continue;
+        }
+
+        if (*val > pData[i]) {
+          *val = pData[i];
+          if (pCtx->subsidiaries.num > 0) {
+            updateTupleData(pCtx, i, pCtx->pSrcBlock, &pBuf->tuplePos);
+          }
+        }
+        numOfElems += 1;
+      }
+
+    } else {  // max function
+      for (; i < start + numOfRows; ++i) {
+        if (colDataIsNull_f(pCol->nullbitmap, i)) {
+          continue;
+        }
+        // ignore the equivalent data value
+        // NOTE: An faster version to avoid one additional comparison with FPU.
+        if (*val < pData[i]) {
+          *val = pData[i];
+          if (pCtx->subsidiaries.num > 0) {
+            updateTupleData(pCtx, i, pCtx->pSrcBlock, &pBuf->tuplePos);
+          }
+        }
+        numOfElems += 1;
+      }
+
+    }
+  } else {  // not has null value
+    // AVX2 version to speedup the loop
+    if (tsAVX2Enable && tsSIMDEnable) {
+      *val = i8VectorCmpAVX2(pData, numOfRows, isMinFunc);
+    } else {
+      if (!pBuf->assign) {
+        *val = pData[0];
+        pBuf->assign = true;
+      }
+
+      if (isMinFunc) {  // min
+        for (int32_t i = start; i < start + numOfRows; ++i) {
+          if (*val > pData[i]) {
+            *val = pData[i];
+          }
+        }
+      } else {  // max
+        for (int32_t i = start; i < start + numOfRows; ++i) {
+          if (*val < pData[i]) {
+            *val = pData[i];
+          }
+        }
+      }
+    }
+
+    numOfElems = numOfRows;
+  }
+
+  return numOfElems;
+}
+
 static int32_t findRowIndex(int32_t start, int32_t num, SColumnInfoData* pCol, const char* tval) {
   // the data is loaded, not only the block SMA value
   for (int32_t i = start; i < num + start; ++i) {
@@ -463,42 +719,7 @@ int32_t doMinMaxHelper(SqlFunctionCtx* pCtx, int32_t isMinFunc) {
 
   if (IS_SIGNED_NUMERIC_TYPE(type) || type == TSDB_DATA_TYPE_BOOL) {
     if (type == TSDB_DATA_TYPE_TINYINT || type == TSDB_DATA_TYPE_BOOL) {
-      int8_t* pData = (int8_t*)pCol->pData;
-      int8_t* val = (int8_t*)&pBuf->v;
-
-      for (int32_t i = start; i < start + numOfRows; ++i) {
-        if ((pCol->hasNull) && colDataIsNull_f(pCol->nullbitmap, i)) {
-          continue;
-        }
-
-        if (!pBuf->assign) {
-          *val = pData[i];
-          if (pCtx->subsidiaries.num > 0) {
-            pBuf->tuplePos = saveTupleData(pCtx, i, pCtx->pSrcBlock, NULL);
-          }
-          pBuf->assign = true;
-        } else {
-          // ignore the equivalent data value
-          // NOTE: An faster version to avoid one additional comparison with FPU.
-          if (isMinFunc) {  // min
-            if (*val > pData[i]) {
-              *val = pData[i];
-              if (pCtx->subsidiaries.num > 0) {
-                updateTupleData(pCtx, i, pCtx->pSrcBlock, &pBuf->tuplePos);
-              }
-            }
-          } else {  // max
-            if (*val < pData[i]) {
-              *val = pData[i];
-              if (pCtx->subsidiaries.num > 0) {
-                updateTupleData(pCtx, i, pCtx->pSrcBlock, &pBuf->tuplePos);
-              }
-            }
-          }
-        }
-
-        numOfElems += 1;
-      }
+      numOfElems = handleInt8Col(pCol, start, numOfRows, pCtx, pBuf, isMinFunc);
     } else if (type == TSDB_DATA_TYPE_SMALLINT) {
       int16_t* pData = (int16_t*)pCol->pData;
       int16_t* val = (int16_t*)&pBuf->v;
@@ -537,9 +758,6 @@ int32_t doMinMaxHelper(SqlFunctionCtx* pCtx, int32_t isMinFunc) {
         numOfElems += 1;
       }
     } else if (type == TSDB_DATA_TYPE_INT) {
-      int32_t* pData = (int32_t*)pCol->pData;
-      int32_t* val = (int32_t*)&pBuf->v;
-
       numOfElems = handleInt32Col(pCol, start, numOfRows, pCtx, pBuf, isMinFunc);
 #if 0
       for (int32_t i = start; i < start + numOfRows; ++i) {
@@ -803,9 +1021,6 @@ int32_t doMinMaxHelper(SqlFunctionCtx* pCtx, int32_t isMinFunc) {
       numOfElems += 1;
     }
   } else if (type == TSDB_DATA_TYPE_FLOAT) {
-    float* pData = (float*)pCol->pData;
-    float* val = (float*)&pBuf->v;
-
     numOfElems = handleFloatCol(pCol, start, numOfRows, pCtx, pBuf, isMinFunc);
 #if 0
     for (int32_t i = start; i < start + numOfRows; ++i) {