refactor: do some internal refactor.
This commit is contained in:
parent
7b53b8142e
commit
c888cbf068
|
@ -115,7 +115,7 @@ typedef struct SInputColumnInfoData {
|
|||
int32_t startRowIndex; // handle started row index
|
||||
int32_t numOfRows; // the number of rows needs to be handled
|
||||
int32_t numOfInputCols; // PTS is not included
|
||||
bool colDataAggIsSet; // if agg is set or not
|
||||
bool colDataSMAIsSet; // if agg is set or not
|
||||
SColumnInfoData *pPTS; // primary timestamp column
|
||||
SColumnInfoData **pData;
|
||||
SColumnDataAgg **pColumnDataAgg;
|
||||
|
|
|
@ -349,13 +349,13 @@ typedef struct {
|
|||
} SFunctionCtxStatus;
|
||||
|
||||
static void functionCtxSave(SqlFunctionCtx* pCtx, SFunctionCtxStatus* pStatus) {
|
||||
pStatus->hasAgg = pCtx->input.colDataAggIsSet;
|
||||
pStatus->hasAgg = pCtx->input.colDataSMAIsSet;
|
||||
pStatus->numOfRows = pCtx->input.numOfRows;
|
||||
pStatus->startOffset = pCtx->input.startRowIndex;
|
||||
}
|
||||
|
||||
static void functionCtxRestore(SqlFunctionCtx* pCtx, SFunctionCtxStatus* pStatus) {
|
||||
pCtx->input.colDataAggIsSet = pStatus->hasAgg;
|
||||
pCtx->input.colDataSMAIsSet = pStatus->hasAgg;
|
||||
pCtx->input.numOfRows = pStatus->numOfRows;
|
||||
pCtx->input.startRowIndex = pStatus->startOffset;
|
||||
}
|
||||
|
@ -372,8 +372,8 @@ void doApplyFunctions(SExecTaskInfo* taskInfo, SqlFunctionCtx* pCtx, SColumnInfo
|
|||
|
||||
// not a whole block involved in query processing, statistics data can not be used
|
||||
// NOTE: the original value of isSet have been changed here
|
||||
if (pCtx[k].input.colDataAggIsSet && forwardStep < numOfTotal) {
|
||||
pCtx[k].input.colDataAggIsSet = false;
|
||||
if (pCtx[k].input.colDataSMAIsSet && forwardStep < numOfTotal) {
|
||||
pCtx[k].input.colDataSMAIsSet = false;
|
||||
}
|
||||
|
||||
if (fmIsWindowPseudoColumnFunc(pCtx[k].functionId)) {
|
||||
|
@ -486,7 +486,7 @@ static int32_t doSetInputDataBlock(SExprSupp* pExprSup, SSDataBlock* pBlock, int
|
|||
|
||||
SInputColumnInfoData* pInput = &pCtx[i].input;
|
||||
pInput->uid = pBlock->info.uid;
|
||||
pInput->colDataAggIsSet = false;
|
||||
pInput->colDataSMAIsSet = false;
|
||||
|
||||
SExprInfo* pOneExpr = &pExprSup->pExprInfo[i];
|
||||
for (int32_t j = 0; j < pOneExpr->base.numOfParams; ++j) {
|
||||
|
@ -798,7 +798,7 @@ void setBlockSMAInfo(SqlFunctionCtx* pCtx, SExprInfo* pExprInfo, SSDataBlock* pB
|
|||
pInput->totalRows = numOfRows;
|
||||
|
||||
if (pBlock->pBlockAgg != NULL) {
|
||||
pInput->colDataAggIsSet = true;
|
||||
pInput->colDataSMAIsSet = true;
|
||||
|
||||
for (int32_t j = 0; j < pExprInfo->base.numOfParams; ++j) {
|
||||
SFunctParam* pFuncParam = &pExprInfo->base.pParam[j];
|
||||
|
@ -807,7 +807,7 @@ void setBlockSMAInfo(SqlFunctionCtx* pCtx, SExprInfo* pExprInfo, SSDataBlock* pB
|
|||
int32_t slotId = pFuncParam->pCol->slotId;
|
||||
pInput->pColumnDataAgg[j] = pBlock->pBlockAgg[slotId];
|
||||
if (pInput->pColumnDataAgg[j] == NULL) {
|
||||
pInput->colDataAggIsSet = false;
|
||||
pInput->colDataSMAIsSet = false;
|
||||
}
|
||||
|
||||
// Here we set the column info data since the data type for each column data is required, but
|
||||
|
@ -818,7 +818,7 @@ void setBlockSMAInfo(SqlFunctionCtx* pCtx, SExprInfo* pExprInfo, SSDataBlock* pB
|
|||
}
|
||||
}
|
||||
} else {
|
||||
pInput->colDataAggIsSet = false;
|
||||
pInput->colDataSMAIsSet = false;
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
@ -498,13 +498,13 @@ static int32_t getNumOfElems(SqlFunctionCtx* pCtx) {
|
|||
int32_t numOfElem = 0;
|
||||
|
||||
/*
|
||||
* 1. column data missing (schema modified) causes pInputCol->hasNull == true. pInput->colDataAggIsSet == true;
|
||||
* 2. for general non-primary key columns, pInputCol->hasNull may be true or false, pInput->colDataAggIsSet == true;
|
||||
* 3. for primary key column, pInputCol->hasNull always be false, pInput->colDataAggIsSet == false;
|
||||
* 1. column data missing (schema modified) causes pInputCol->hasNull == true. pInput->colDataSMAIsSet == true;
|
||||
* 2. for general non-primary key columns, pInputCol->hasNull may be true or false, pInput->colDataSMAIsSet == true;
|
||||
* 3. for primary key column, pInputCol->hasNull always be false, pInput->colDataSMAIsSet == false;
|
||||
*/
|
||||
SInputColumnInfoData* pInput = &pCtx->input;
|
||||
SColumnInfoData* pInputCol = pInput->pData[0];
|
||||
if (pInput->colDataAggIsSet && pInput->totalRows == pInput->numOfRows) {
|
||||
if (pInput->colDataSMAIsSet && pInput->totalRows == pInput->numOfRows) {
|
||||
numOfElem = pInput->numOfRows - pInput->pColumnDataAgg[0]->numOfNull;
|
||||
ASSERT(numOfElem >= 0);
|
||||
} else {
|
||||
|
@ -593,7 +593,7 @@ int32_t sumFunction(SqlFunctionCtx* pCtx) {
|
|||
goto _sum_over;
|
||||
}
|
||||
|
||||
if (pInput->colDataAggIsSet) {
|
||||
if (pInput->colDataSMAIsSet) {
|
||||
numOfElem = pInput->numOfRows - pAgg->numOfNull;
|
||||
ASSERT(numOfElem >= 0);
|
||||
|
||||
|
@ -658,7 +658,7 @@ int32_t sumInvertFunction(SqlFunctionCtx* pCtx) {
|
|||
|
||||
SSumRes* pSumRes = GET_ROWCELL_INTERBUF(GET_RES_INFO(pCtx));
|
||||
|
||||
if (pInput->colDataAggIsSet) {
|
||||
if (pInput->colDataSMAIsSet) {
|
||||
numOfElem = pInput->numOfRows - pAgg->numOfNull;
|
||||
ASSERT(numOfElem >= 0);
|
||||
|
||||
|
@ -770,7 +770,7 @@ bool getSumFuncEnv(SFunctionNode* UNUSED_PARAM(pFunc), SFuncExecEnv* pEnv) {
|
|||
// goto _avg_over;
|
||||
// }
|
||||
//
|
||||
// if (pInput->colDataAggIsSet) {
|
||||
// if (pInput->colDataSMAIsSet) {
|
||||
// numOfElem = numOfRows - pAgg->numOfNull;
|
||||
// ASSERT(numOfElem >= 0);
|
||||
//
|
||||
|
@ -1161,7 +1161,7 @@ bool getMinmaxFuncEnv(SFunctionNode* UNUSED_PARAM(pFunc), SFuncExecEnv* pEnv) {
|
|||
// }
|
||||
//
|
||||
// // data in current data block are qualified to the query
|
||||
// if (pInput->colDataAggIsSet) {
|
||||
// if (pInput->colDataSMAIsSet) {
|
||||
// numOfElems = pInput->numOfRows - pAgg->numOfNull;
|
||||
// ASSERT(pInput->numOfRows == pInput->totalRows && numOfElems >= 0);
|
||||
// if (numOfElems == 0) {
|
||||
|
@ -2471,7 +2471,7 @@ int32_t percentileFunction(SqlFunctionCtx* pCtx) {
|
|||
|
||||
// the first stage, only acquire the min/max value
|
||||
if (pInfo->stage == 0) {
|
||||
if (pCtx->input.colDataAggIsSet) {
|
||||
if (pCtx->input.colDataSMAIsSet) {
|
||||
double tmin = 0.0, tmax = 0.0;
|
||||
if (IS_SIGNED_NUMERIC_TYPE(type)) {
|
||||
tmin = (double)GET_INT64_VAL(&pAgg->min);
|
||||
|
@ -2933,14 +2933,14 @@ int32_t firstFunction(SqlFunctionCtx* pCtx) {
|
|||
pInfo->bytes = pInputCol->info.bytes;
|
||||
|
||||
// All null data column, return directly.
|
||||
if (pInput->colDataAggIsSet && (pInput->pColumnDataAgg[0]->numOfNull == pInput->totalRows)) {
|
||||
if (pInput->colDataSMAIsSet && (pInput->pColumnDataAgg[0]->numOfNull == pInput->totalRows)) {
|
||||
ASSERT(pInputCol->hasNull == true);
|
||||
// save selectivity value for column consisted of all null values
|
||||
firstlastSaveTupleData(pCtx->pSrcBlock, pInput->startRowIndex, pCtx, pInfo);
|
||||
return 0;
|
||||
}
|
||||
|
||||
SColumnDataAgg* pColAgg = (pInput->colDataAggIsSet) ? pInput->pColumnDataAgg[0] : NULL;
|
||||
SColumnDataAgg* pColAgg = (pInput->colDataSMAIsSet) ? pInput->pColumnDataAgg[0] : NULL;
|
||||
|
||||
TSKEY startKey = getRowPTs(pInput->pPTS, 0);
|
||||
TSKEY endKey = getRowPTs(pInput->pPTS, pInput->totalRows - 1);
|
||||
|
@ -3037,14 +3037,14 @@ int32_t lastFunction(SqlFunctionCtx* pCtx) {
|
|||
pInfo->bytes = bytes;
|
||||
|
||||
// All null data column, return directly.
|
||||
if (pInput->colDataAggIsSet && (pInput->pColumnDataAgg[0]->numOfNull == pInput->totalRows)) {
|
||||
if (pInput->colDataSMAIsSet && (pInput->pColumnDataAgg[0]->numOfNull == pInput->totalRows)) {
|
||||
ASSERT(pInputCol->hasNull == true);
|
||||
// save selectivity value for column consisted of all null values
|
||||
firstlastSaveTupleData(pCtx->pSrcBlock, pInput->startRowIndex, pCtx, pInfo);
|
||||
return 0;
|
||||
}
|
||||
|
||||
SColumnDataAgg* pColAgg = (pInput->colDataAggIsSet) ? pInput->pColumnDataAgg[0] : NULL;
|
||||
SColumnDataAgg* pColAgg = (pInput->colDataSMAIsSet) ? pInput->pColumnDataAgg[0] : NULL;
|
||||
|
||||
TSKEY startKey = getRowPTs(pInput->pPTS, 0);
|
||||
TSKEY endKey = getRowPTs(pInput->pPTS, pInput->totalRows - 1);
|
||||
|
@ -3988,7 +3988,7 @@ int32_t spreadFunction(SqlFunctionCtx* pCtx) {
|
|||
|
||||
SSpreadInfo* pInfo = GET_ROWCELL_INTERBUF(GET_RES_INFO(pCtx));
|
||||
|
||||
if (pInput->colDataAggIsSet) {
|
||||
if (pInput->colDataSMAIsSet) {
|
||||
numOfElems = pInput->numOfRows - pAgg->numOfNull;
|
||||
if (numOfElems == 0) {
|
||||
goto _spread_over;
|
||||
|
@ -4163,7 +4163,7 @@ int32_t elapsedFunction(SqlFunctionCtx* pCtx) {
|
|||
goto _elapsed_over;
|
||||
}
|
||||
|
||||
if (pInput->colDataAggIsSet) {
|
||||
if (pInput->colDataSMAIsSet) {
|
||||
if (pInfo->min == TSKEY_MAX) {
|
||||
pInfo->min = GET_INT64_VAL(&pAgg->min);
|
||||
pInfo->max = GET_INT64_VAL(&pAgg->max);
|
||||
|
|
|
@ -48,15 +48,14 @@ typedef struct SAvgRes {
|
|||
int16_t type; // store the original input type, used in merge function
|
||||
} SAvgRes;
|
||||
|
||||
static void floatVectorSumAVX(const SInputColumnInfoData* pInput, const float* plist, SAvgRes* pRes) {
|
||||
static void floatVectorSumAVX(const float* plist, int32_t numOfRows, SAvgRes* pRes) {
|
||||
#if __AVX__
|
||||
// find the start position that are aligned to 32bytes address in memory
|
||||
int32_t startIndex = 0; //((uint64_t)plist) & ((1<<8u)-1);
|
||||
int32_t bitWidth = 8;
|
||||
int32_t remainder = numOfRows % bitWidth;
|
||||
int32_t rounds = numOfRows / bitWidth;
|
||||
|
||||
int32_t remain = (pInput->numOfRows - startIndex) % bitWidth;
|
||||
int32_t rounds = (pInput->numOfRows - startIndex) / bitWidth;
|
||||
const float* p = &plist[startIndex];
|
||||
const float* p = plist;
|
||||
|
||||
__m256 val;
|
||||
__m256 sum = _mm256_setzero_ps();
|
||||
|
@ -71,18 +70,126 @@ static void floatVectorSumAVX(const SInputColumnInfoData* pInput, const float* p
|
|||
const float* q = (const float*)∑
|
||||
pRes->sum.dsum += q[0] + q[1] + q[2] + q[3] + q[4] + q[5] + q[6] + q[7];
|
||||
|
||||
// calculate the front and the reminder items in array list
|
||||
for (int32_t j = 0; j < startIndex; ++j) {
|
||||
pRes->sum.dsum += plist[j];
|
||||
int32_t startIndex = rounds * bitWidth;
|
||||
for (int32_t j = 0; j < remainder; ++j) {
|
||||
pRes->sum.dsum += plist[j + startIndex];
|
||||
}
|
||||
#endif
|
||||
}
|
||||
|
||||
static void doubleVectorSumAVX(const double* plist, int32_t numOfRows, SAvgRes* pRes) {
|
||||
#if __AVX__
|
||||
// find the start position that are aligned to 32bytes address in memory
|
||||
int32_t bitWidth = 4;
|
||||
int32_t remainder = numOfRows % bitWidth;
|
||||
int32_t rounds = numOfRows / bitWidth;
|
||||
|
||||
const double* p = plist;
|
||||
|
||||
__m256d val;
|
||||
__m256d sum = _mm256_setzero_pd();
|
||||
|
||||
for (int32_t i = 0; i < rounds; ++i) {
|
||||
val = _mm256_loadu_pd(p);
|
||||
sum = _mm256_add_pd(sum, val);
|
||||
p += bitWidth;
|
||||
}
|
||||
|
||||
startIndex += rounds * bitWidth;
|
||||
for (int32_t j = 0; j < remain; ++j) {
|
||||
// let sum up the final results
|
||||
const double* q = (const double*)∑
|
||||
pRes->sum.dsum += q[0] + q[1] + q[2] + q[3];
|
||||
|
||||
int32_t startIndex = rounds * bitWidth;
|
||||
for (int32_t j = 0; j < remainder; ++j) {
|
||||
pRes->sum.dsum += plist[j + startIndex];
|
||||
}
|
||||
#endif
|
||||
}
|
||||
|
||||
static void i8VectorSumAVX2(const int8_t* plist, int32_t numOfRows, SAvgRes* pRes) {
|
||||
#if __AVX2__
|
||||
// find the start position that are aligned to 32bytes address in memory
|
||||
int32_t bitWidth = 16;
|
||||
int32_t remainder = numOfRows % bitWidth;
|
||||
int32_t rounds = numOfRows / bitWidth;
|
||||
|
||||
const int8_t* p = plist;
|
||||
|
||||
__m256i sum = _mm256_setzero_si256();
|
||||
|
||||
for (int32_t i = 0; i < rounds; ++i) {
|
||||
__m256i val = _mm256_lddqu_si256((__m256i*)p);
|
||||
// __m256i extVal = _mm256_cvtepi8_epi64(val);
|
||||
sum = _mm256_add_epi8(sum, val);
|
||||
p += bitWidth;
|
||||
}
|
||||
|
||||
// let sum up the final results
|
||||
const int8_t* q = (const int8_t*)∑
|
||||
pRes->sum.isum += q[0] + q[1] + q[2] + q[3];
|
||||
|
||||
int32_t startIndex = rounds * bitWidth;
|
||||
for (int32_t j = 0; j < remainder; ++j) {
|
||||
pRes->sum.isum += plist[j + startIndex];
|
||||
}
|
||||
#endif
|
||||
}
|
||||
|
||||
static void i32VectorSumAVX2(const int32_t* plist, int32_t numOfRows, SAvgRes* pRes) {
|
||||
#if __AVX2__
|
||||
// find the start position that are aligned to 32bytes address in memory
|
||||
int32_t bitWidth = 8;
|
||||
int32_t remainder = numOfRows % bitWidth;
|
||||
int32_t rounds = numOfRows / bitWidth;
|
||||
|
||||
const int32_t* p = plist;
|
||||
|
||||
__m256i sum = _mm256_setzero_si256();
|
||||
for (int32_t i = 0; i < rounds; ++i) {
|
||||
__m256i val = _mm256_lddqu_si256((__m256i*)p);
|
||||
sum = _mm256_add_epi32(sum, val);
|
||||
p += bitWidth;
|
||||
}
|
||||
|
||||
// let sum up the final results
|
||||
const int64_t* q = (const int64_t*)∑
|
||||
pRes->sum.isum += q[0] + q[1] + q[2] + q[3];
|
||||
|
||||
int32_t startIndex = rounds * bitWidth;
|
||||
for (int32_t j = 0; j < remainder; ++j) {
|
||||
pRes->sum.isum += plist[j + startIndex];
|
||||
}
|
||||
#endif
|
||||
}
|
||||
|
||||
static void i64VectorSumAVX2(const int64_t* plist, int32_t numOfRows, SAvgRes* pRes) {
|
||||
#if __AVX2__
|
||||
// find the start position that are aligned to 32bytes address in memory
|
||||
int32_t bitWidth = 4;
|
||||
int32_t remainder = numOfRows % bitWidth;
|
||||
int32_t rounds = numOfRows / bitWidth;
|
||||
|
||||
const int64_t* p = plist;
|
||||
|
||||
__m256i sum = _mm256_setzero_si256();
|
||||
|
||||
for (int32_t i = 0; i < rounds; ++i) {
|
||||
__m256i val = _mm256_lddqu_si256((__m256i*)p);
|
||||
sum = _mm256_add_epi64(sum, val);
|
||||
p += bitWidth;
|
||||
}
|
||||
|
||||
// let sum up the final results
|
||||
const int64_t* q = (const int64_t*)∑
|
||||
pRes->sum.isum += q[0] + q[1] + q[2] + q[3];
|
||||
|
||||
int32_t startIndex = rounds * bitWidth;
|
||||
for (int32_t j = 0; j < remainder; ++j) {
|
||||
pRes->sum.isum += plist[j + startIndex];
|
||||
}
|
||||
#endif
|
||||
}
|
||||
|
||||
static int32_t handleFloatCols(const SColumnInfoData* pCol, const SInputColumnInfoData* pInput, SAvgRes* pRes) {
|
||||
int32_t numOfElems = 0;
|
||||
float* plist = (float*)pCol->pData;
|
||||
|
@ -105,7 +212,7 @@ static int32_t handleFloatCols(const SColumnInfoData* pCol, const SInputColumnIn
|
|||
|
||||
// 3. If the CPU supports AVX, let's employ AVX instructions to speedup this loop
|
||||
if (tsAVXEnable && tsSIMDEnable) {
|
||||
floatVectorSumAVX(pInput, plist, pRes);
|
||||
floatVectorSumAVX(plist, pInput->numOfRows, pRes);
|
||||
} else {
|
||||
for (int32_t i = pInput->startRowIndex; i < pInput->numOfRows + pInput->startRowIndex; ++i) {
|
||||
pRes->sum.dsum += plist[i];
|
||||
|
@ -133,8 +240,25 @@ bool avgFunctionSetup(SqlFunctionCtx* pCtx, SResultRowEntryInfo* pResultInfo) {
|
|||
return true;
|
||||
}
|
||||
|
||||
static int32_t calculateAvgBySMAInfo(SAvgRes* pRes, int32_t numOfRows, int32_t type, const SColumnDataAgg* pAgg) {
|
||||
int32_t numOfElem = numOfRows - pAgg->numOfNull;
|
||||
ASSERT(numOfElem >= 0);
|
||||
|
||||
pRes->count += numOfElem;
|
||||
if (IS_SIGNED_NUMERIC_TYPE(type)) {
|
||||
pRes->sum.isum += pAgg->sum;
|
||||
} else if (IS_UNSIGNED_NUMERIC_TYPE(type)) {
|
||||
pRes->sum.usum += pAgg->sum;
|
||||
} else if (IS_FLOAT_TYPE(type)) {
|
||||
pRes->sum.dsum += GET_DOUBLE_VAL((const char*)&(pAgg->sum));
|
||||
}
|
||||
|
||||
return numOfElem;
|
||||
}
|
||||
|
||||
int32_t avgFunction(SqlFunctionCtx* pCtx) {
|
||||
int32_t numOfElem = 0;
|
||||
const int32_t THRESHOLD_SIZE = 8;
|
||||
|
||||
SInputColumnInfoData* pInput = &pCtx->input;
|
||||
SColumnDataAgg* pAgg = pInput->pColumnDataAgg[0];
|
||||
|
@ -154,19 +278,149 @@ int32_t avgFunction(SqlFunctionCtx* pCtx) {
|
|||
goto _avg_over;
|
||||
}
|
||||
|
||||
if (pInput->colDataAggIsSet) {
|
||||
numOfElem = numOfRows - pAgg->numOfNull;
|
||||
ASSERT(numOfElem >= 0);
|
||||
if (pInput->colDataSMAIsSet) { // try to use SMA if available
|
||||
numOfElem = calculateAvgBySMAInfo(pAvgRes, numOfRows, type, pAgg);
|
||||
} else if (!pCol->hasNull) { // try to employ the simd instructions to speed up the loop
|
||||
numOfElem = pInput->numOfRows;
|
||||
pAvgRes->count += pInput->numOfRows;
|
||||
|
||||
pAvgRes->count += numOfElem;
|
||||
if (IS_SIGNED_NUMERIC_TYPE(type)) {
|
||||
pAvgRes->sum.isum += pAgg->sum;
|
||||
} else if (IS_UNSIGNED_NUMERIC_TYPE(type)) {
|
||||
pAvgRes->sum.usum += pAgg->sum;
|
||||
} else if (IS_FLOAT_TYPE(type)) {
|
||||
pAvgRes->sum.dsum += GET_DOUBLE_VAL((const char*)&(pAgg->sum));
|
||||
bool simdAvaiable = tsAVXEnable && tsSIMDEnable && (numOfRows > THRESHOLD_SIZE);
|
||||
|
||||
switch(type) {
|
||||
case TSDB_DATA_TYPE_TINYINT: {
|
||||
const int8_t* plist = (const int8_t*) pCol->pData;
|
||||
|
||||
// 1. If the CPU supports AVX, let's employ AVX instructions to speedup this loop
|
||||
if (simdAvaiable) {
|
||||
i8VectorSumAVX2(plist, numOfRows, pAvgRes);
|
||||
} else {
|
||||
for (int32_t i = pInput->startRowIndex; i < pInput->numOfRows + pInput->startRowIndex; ++i) {
|
||||
pAvgRes->sum.isum += plist[i];
|
||||
}
|
||||
} else { // computing based on the true data block
|
||||
}
|
||||
break;
|
||||
}
|
||||
case TSDB_DATA_TYPE_SMALLINT: {
|
||||
const double* plist = (const double*)pCol->pData;
|
||||
|
||||
// 1. If the CPU supports AVX, let's employ AVX instructions to speedup this loop
|
||||
if (simdAvaiable) {
|
||||
doubleVectorSumAVX(plist, numOfRows, pAvgRes);
|
||||
} else {
|
||||
for (int32_t i = pInput->startRowIndex; i < pInput->numOfRows + pInput->startRowIndex; ++i) {
|
||||
pAvgRes->sum.isum += plist[i];
|
||||
}
|
||||
}
|
||||
break;
|
||||
}
|
||||
case TSDB_DATA_TYPE_INT: {
|
||||
const int32_t* plist = (const int32_t*) pCol->pData;
|
||||
|
||||
// 1. If the CPU supports AVX, let's employ AVX instructions to speedup this loop
|
||||
if (simdAvaiable) {
|
||||
i32VectorSumAVX2(plist, numOfRows, pAvgRes);
|
||||
} else {
|
||||
for (int32_t i = pInput->startRowIndex; i < pInput->numOfRows + pInput->startRowIndex; ++i) {
|
||||
pAvgRes->sum.isum += plist[i];
|
||||
}
|
||||
}
|
||||
break;
|
||||
}
|
||||
case TSDB_DATA_TYPE_BIGINT: {
|
||||
const int64_t* plist = (const int64_t*) pCol->pData;
|
||||
|
||||
// 1. If the CPU supports AVX, let's employ AVX instructions to speedup this loop
|
||||
if (simdAvaiable) {
|
||||
i64VectorSumAVX2(plist, numOfRows, pAvgRes);
|
||||
} else {
|
||||
for (int32_t i = pInput->startRowIndex; i < pInput->numOfRows + pInput->startRowIndex; ++i) {
|
||||
pAvgRes->sum.isum += plist[i];
|
||||
}
|
||||
}
|
||||
break;
|
||||
}
|
||||
case TSDB_DATA_TYPE_FLOAT: {
|
||||
const float* plist = (const float*) pCol->pData;
|
||||
|
||||
// 1. If the CPU supports AVX, let's employ AVX instructions to speedup this loop
|
||||
if (simdAvaiable) {
|
||||
floatVectorSumAVX(plist, numOfRows, pAvgRes);
|
||||
} else {
|
||||
for (int32_t i = pInput->startRowIndex; i < pInput->numOfRows + pInput->startRowIndex; ++i) {
|
||||
pAvgRes->sum.dsum += plist[i];
|
||||
}
|
||||
}
|
||||
break;
|
||||
}
|
||||
case TSDB_DATA_TYPE_DOUBLE: {
|
||||
const double* plist = (const double*) pCol->pData;
|
||||
|
||||
// 1. If the CPU supports AVX, let's employ AVX instructions to speedup this loop
|
||||
if (simdAvaiable) {
|
||||
doubleVectorSumAVX(plist, numOfRows, pAvgRes);
|
||||
} else {
|
||||
for (int32_t i = pInput->startRowIndex; i < pInput->numOfRows + pInput->startRowIndex; ++i) {
|
||||
pAvgRes->sum.dsum += plist[i];
|
||||
}
|
||||
}
|
||||
break;
|
||||
}
|
||||
case TSDB_DATA_TYPE_UTINYINT: {
|
||||
const double* plist = (const double*) pCol->pData;
|
||||
|
||||
// 1. If the CPU supports AVX, let's employ AVX instructions to speedup this loop
|
||||
if (simdAvaiable) {
|
||||
doubleVectorSumAVX(plist, numOfRows, pAvgRes);
|
||||
} else {
|
||||
for (int32_t i = pInput->startRowIndex; i < pInput->numOfRows + pInput->startRowIndex; ++i) {
|
||||
pAvgRes->sum.usum += plist[i];
|
||||
}
|
||||
}
|
||||
break;
|
||||
}
|
||||
case TSDB_DATA_TYPE_USMALLINT: {
|
||||
const double* plist = (const double*) pCol->pData;
|
||||
|
||||
// 1. If the CPU supports AVX, let's employ AVX instructions to speedup this loop
|
||||
if (simdAvaiable) {
|
||||
doubleVectorSumAVX(plist, numOfRows, pAvgRes);
|
||||
} else {
|
||||
for (int32_t i = pInput->startRowIndex; i < pInput->numOfRows + pInput->startRowIndex; ++i) {
|
||||
pAvgRes->sum.usum += plist[i];
|
||||
}
|
||||
}
|
||||
break;
|
||||
}
|
||||
case TSDB_DATA_TYPE_UINT: {
|
||||
const double* plist = (const double*) pCol->pData;
|
||||
|
||||
// 1. If the CPU supports AVX, let's employ AVX instructions to speedup this loop
|
||||
if (simdAvaiable) {
|
||||
doubleVectorSumAVX(plist, numOfRows, pAvgRes);
|
||||
} else {
|
||||
for (int32_t i = pInput->startRowIndex; i < pInput->numOfRows + pInput->startRowIndex; ++i) {
|
||||
pAvgRes->sum.usum += plist[i];
|
||||
}
|
||||
}
|
||||
break;
|
||||
}
|
||||
case TSDB_DATA_TYPE_UBIGINT: {
|
||||
const double* plist = (const double*) pCol->pData;
|
||||
|
||||
// 1. If the CPU supports AVX, let's employ AVX instructions to speedup this loop
|
||||
if (simdAvaiable) {
|
||||
doubleVectorSumAVX(plist, numOfRows, pAvgRes);
|
||||
} else {
|
||||
for (int32_t i = pInput->startRowIndex; i < pInput->numOfRows + pInput->startRowIndex; ++i) {
|
||||
pAvgRes->sum.usum += plist[i];
|
||||
}
|
||||
}
|
||||
break;
|
||||
}
|
||||
default:
|
||||
ASSERT(0);
|
||||
}
|
||||
} else {
|
||||
switch (type) {
|
||||
case TSDB_DATA_TYPE_TINYINT: {
|
||||
int8_t* plist = (int8_t*)pCol->pData;
|
||||
|
|
|
@ -36,7 +36,7 @@ static int32_t i32VectorCmpAVX2(const int32_t* pData, int32_t numOfRows, bool is
|
|||
|
||||
if (!isMinFunc) { // max function
|
||||
for (int32_t i = 0; i < rounds; ++i) {
|
||||
next = _mm256_loadu_si256((__m256i*)p);
|
||||
next = _mm256_lddqu_si256((__m256i*)p);
|
||||
initialVal = _mm256_max_epi32(initialVal, next);
|
||||
p += bitWidth;
|
||||
}
|
||||
|
@ -61,7 +61,7 @@ static int32_t i32VectorCmpAVX2(const int32_t* pData, int32_t numOfRows, bool is
|
|||
}
|
||||
} else { // min function
|
||||
for (int32_t i = 0; i < rounds; ++i) {
|
||||
next = _mm256_loadu_si256((__m256i*)p);
|
||||
next = _mm256_lddqu_si256((__m256i*)p);
|
||||
initialVal = _mm256_min_epi32(initialVal, next);
|
||||
p += bitWidth;
|
||||
}
|
||||
|
@ -369,7 +369,7 @@ int32_t doMinMaxHelper(SqlFunctionCtx* pCtx, int32_t isMinFunc) {
|
|||
}
|
||||
|
||||
// data in current data block are qualified to the query
|
||||
if (pInput->colDataAggIsSet) {
|
||||
if (pInput->colDataSMAIsSet) {
|
||||
numOfElems = pInput->numOfRows - pAgg->numOfNull;
|
||||
ASSERT(pInput->numOfRows == pInput->totalRows && numOfElems >= 0);
|
||||
if (numOfElems == 0) {
|
||||
|
|
Loading…
Reference in New Issue