Merge pull request #17972 from taosdata/fix/TD-20246

refactor: optimize interp linear interpolation ignore NULL values during calculation
This commit is contained in:
dapan1121 2022-11-21 10:15:37 +08:00 committed by GitHub
commit d3a75048c2
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
4 changed files with 1074 additions and 222 deletions

View File

@ -776,7 +776,6 @@ typedef struct STimeSliceOperatorInfo {
SArray* pPrevRow; // SArray<SGroupValue>
SArray* pNextRow; // SArray<SGroupValue>
SArray* pLinearInfo; // SArray<SFillLinearInfo>
bool fillLastPoint;
bool isPrevRowSet;
bool isNextRowSet;
int32_t fillType; // fill type

View File

@ -36,7 +36,8 @@ typedef struct SFillColInfo {
typedef struct SFillLinearInfo {
SPoint start;
SPoint end;
bool hasNull;
bool isStartSet;
bool isEndSet;
int16_t type;
int32_t bytes;
} SFillLinearInfo;

View File

@ -1942,10 +1942,8 @@ static void doKeepPrevRows(STimeSliceOperatorInfo* pSliceInfo, const SSDataBlock
for (int32_t i = 0; i < numOfCols; ++i) {
SColumnInfoData* pColInfoData = taosArrayGet(pBlock->pDataBlock, i);
// null data should not be kept since it can not be used to perform interpolation
if (!colDataIsNull_s(pColInfoData, i)) {
SGroupKeys* pkey = taosArrayGet(pSliceInfo->pPrevRow, i);
SGroupKeys* pkey = taosArrayGet(pSliceInfo->pPrevRow, i);
if (!colDataIsNull_s(pColInfoData, rowIndex)) {
pkey->isNull = false;
char* val = colDataGetData(pColInfoData, rowIndex);
if (!IS_VAR_DATA_TYPE(pkey->type)) {
@ -1953,6 +1951,8 @@ static void doKeepPrevRows(STimeSliceOperatorInfo* pSliceInfo, const SSDataBlock
} else {
memcpy(pkey->pData, val, varDataLen(val));
}
} else {
pkey->isNull = true;
}
}
@ -1964,10 +1964,8 @@ static void doKeepNextRows(STimeSliceOperatorInfo* pSliceInfo, const SSDataBlock
for (int32_t i = 0; i < numOfCols; ++i) {
SColumnInfoData* pColInfoData = taosArrayGet(pBlock->pDataBlock, i);
// null data should not be kept since it can not be used to perform interpolation
if (!colDataIsNull_s(pColInfoData, i)) {
SGroupKeys* pkey = taosArrayGet(pSliceInfo->pNextRow, i);
SGroupKeys* pkey = taosArrayGet(pSliceInfo->pNextRow, i);
if (!colDataIsNull_s(pColInfoData, rowIndex)) {
pkey->isNull = false;
char* val = colDataGetData(pColInfoData, rowIndex);
if (!IS_VAR_DATA_TYPE(pkey->type)) {
@ -1975,50 +1973,51 @@ static void doKeepNextRows(STimeSliceOperatorInfo* pSliceInfo, const SSDataBlock
} else {
memcpy(pkey->pData, val, varDataLen(val));
}
} else {
pkey->isNull = true;
}
}
pSliceInfo->isNextRowSet = true;
}
static void doKeepLinearInfo(STimeSliceOperatorInfo* pSliceInfo, const SSDataBlock* pBlock, int32_t rowIndex,
bool isLastRow) {
static void doKeepLinearInfo(STimeSliceOperatorInfo* pSliceInfo, const SSDataBlock* pBlock, int32_t rowIndex) {
int32_t numOfCols = taosArrayGetSize(pBlock->pDataBlock);
bool fillLastPoint = pSliceInfo->fillLastPoint;
for (int32_t i = 0; i < numOfCols; ++i) {
SColumnInfoData* pColInfoData = taosArrayGet(pBlock->pDataBlock, i);
SColumnInfoData* pTsCol = taosArrayGet(pBlock->pDataBlock, pSliceInfo->tsCol.slotId);
SFillLinearInfo* pLinearInfo = taosArrayGet(pSliceInfo->pLinearInfo, i);
// null data should not be kept since it can not be used to perform interpolation
if (!colDataIsNull_s(pColInfoData, i)) {
if (isLastRow) {
// null value is represented by using key = INT64_MIN for now.
// TODO: optimize to ignore null values for linear interpolation.
if (!pLinearInfo->isStartSet) {
if (!colDataIsNull_s(pColInfoData, rowIndex)) {
pLinearInfo->start.key = *(int64_t*)colDataGetData(pTsCol, rowIndex);
memcpy(pLinearInfo->start.val, colDataGetData(pColInfoData, rowIndex), pLinearInfo->bytes);
} else if (fillLastPoint) {
}
pLinearInfo->isStartSet = true;
} else if (!pLinearInfo->isEndSet) {
if (!colDataIsNull_s(pColInfoData, rowIndex)) {
pLinearInfo->end.key = *(int64_t*)colDataGetData(pTsCol, rowIndex);
memcpy(pLinearInfo->end.val, colDataGetData(pColInfoData, rowIndex), pLinearInfo->bytes);
}
pLinearInfo->isEndSet = true;
} else {
pLinearInfo->start.key = pLinearInfo->end.key;
memcpy(pLinearInfo->start.val, pLinearInfo->end.val, pLinearInfo->bytes);
if (!colDataIsNull_s(pColInfoData, rowIndex)) {
pLinearInfo->end.key = *(int64_t*)colDataGetData(pTsCol, rowIndex);
memcpy(pLinearInfo->end.val, colDataGetData(pColInfoData, rowIndex), pLinearInfo->bytes);
} else {
pLinearInfo->start.key = *(int64_t*)colDataGetData(pTsCol, rowIndex);
pLinearInfo->end.key = *(int64_t*)colDataGetData(pTsCol, rowIndex + 1);
char* val;
val = colDataGetData(pColInfoData, rowIndex);
memcpy(pLinearInfo->start.val, val, pLinearInfo->bytes);
val = colDataGetData(pColInfoData, rowIndex + 1);
memcpy(pLinearInfo->end.val, val, pLinearInfo->bytes);
pLinearInfo->end.key = INT64_MIN;
}
pLinearInfo->hasNull = false;
} else {
pLinearInfo->hasNull = true;
}
}
pSliceInfo->fillLastPoint = isLastRow;
}
static void genInterpolationResult(STimeSliceOperatorInfo* pSliceInfo, SExprSupp* pExprSup, SSDataBlock* pResBlock) {
static bool genInterpolationResult(STimeSliceOperatorInfo* pSliceInfo, SExprSupp* pExprSup, SSDataBlock* pResBlock, bool beforeTs) {
int32_t rows = pResBlock->info.rows;
blockDataEnsureCapacity(pResBlock, rows + 1);
// todo set the correct primary timestamp column
@ -2037,7 +2036,6 @@ static void genInterpolationResult(STimeSliceOperatorInfo* pSliceInfo, SExprSupp
}
int32_t srcSlot = pExprInfo->base.pParam[0].pCol->slotId;
// SColumnInfoData* pSrc = taosArrayGet(pBlock->pDataBlock, srcSlot);
switch (pSliceInfo->fillType) {
case TSDB_FILL_NULL: {
colDataAppendNULL(pDst, rows);
@ -2069,21 +2067,26 @@ static void genInterpolationResult(STimeSliceOperatorInfo* pSliceInfo, SExprSupp
SPoint start = pLinearInfo->start;
SPoint end = pLinearInfo->end;
SPoint current = {.key = pSliceInfo->current};
current.val = taosMemoryCalloc(pLinearInfo->bytes, 1);
// before interp range, do not fill
if (start.key == INT64_MIN || end.key == INT64_MAX) {
// do not interpolate before ts range, only increate pSliceInfo->current
if (beforeTs && !pLinearInfo->isEndSet) {
return true;
}
if (!pLinearInfo->isStartSet || !pLinearInfo->isEndSet) {
hasInterp = false;
break;
}
if (pLinearInfo->hasNull) {
if (start.key == INT64_MIN || end.key == INT64_MIN) {
colDataAppendNULL(pDst, rows);
} else {
taosGetLinearInterpolationVal(&current, pLinearInfo->type, &start, &end, pLinearInfo->type);
colDataAppend(pDst, rows, (char*)current.val, false);
break;
}
current.val = taosMemoryCalloc(pLinearInfo->bytes, 1);
taosGetLinearInterpolationVal(&current, pLinearInfo->type, &start, &end, pLinearInfo->type);
colDataAppend(pDst, rows, (char*)current.val, false);
taosMemoryFree(current.val);
break;
}
@ -2094,7 +2097,11 @@ static void genInterpolationResult(STimeSliceOperatorInfo* pSliceInfo, SExprSupp
}
SGroupKeys* pkey = taosArrayGet(pSliceInfo->pPrevRow, srcSlot);
colDataAppend(pDst, rows, pkey->pData, false);
if (pkey->isNull == false) {
colDataAppend(pDst, rows, pkey->pData, false);
} else {
colDataAppendNULL(pDst, rows);
}
break;
}
@ -2105,7 +2112,11 @@ static void genInterpolationResult(STimeSliceOperatorInfo* pSliceInfo, SExprSupp
}
SGroupKeys* pkey = taosArrayGet(pSliceInfo->pNextRow, srcSlot);
colDataAppend(pDst, rows, pkey->pData, false);
if (pkey->isNull == false) {
colDataAppend(pDst, rows, pkey->pData, false);
} else {
colDataAppendNULL(pDst, rows);
}
break;
}
@ -2118,8 +2129,40 @@ static void genInterpolationResult(STimeSliceOperatorInfo* pSliceInfo, SExprSupp
if (hasInterp) {
pResBlock->info.rows += 1;
}
return hasInterp;
}
static void addCurrentRowToResult(STimeSliceOperatorInfo* pSliceInfo, SExprSupp* pExprSup, SSDataBlock* pResBlock,
SSDataBlock* pSrcBlock, int32_t index) {
blockDataEnsureCapacity(pResBlock, pResBlock->info.rows + 1);
for (int32_t j = 0; j < pExprSup->numOfExprs; ++j) {
SExprInfo* pExprInfo = &pExprSup->pExprInfo[j];
int32_t dstSlot = pExprInfo->base.resSchema.slotId;
SColumnInfoData* pDst = taosArrayGet(pResBlock->pDataBlock, dstSlot);
if (IS_TIMESTAMP_TYPE(pExprInfo->base.resSchema.type)) {
colDataAppend(pDst, pResBlock->info.rows, (char*)&pSliceInfo->current, false);
} else {
int32_t srcSlot = pExprInfo->base.pParam[0].pCol->slotId;
SColumnInfoData* pSrc = taosArrayGet(pSrcBlock->pDataBlock, srcSlot);
if (colDataIsNull_s(pSrc, index)) {
colDataAppendNULL(pDst, pResBlock->info.rows);
continue;
}
char* v = colDataGetData(pSrc, index);
colDataAppend(pDst, pResBlock->info.rows, v, false);
}
}
pResBlock->info.rows += 1;
return;
}
static int32_t initPrevRowsKeeper(STimeSliceOperatorInfo* pInfo, SSDataBlock* pBlock) {
if (pInfo->pPrevRow != NULL) {
return TSDB_CODE_SUCCESS;
@ -2190,24 +2233,19 @@ static int32_t initFillLinearInfo(STimeSliceOperatorInfo* pInfo, SSDataBlock* pB
SFillLinearInfo linearInfo = {0};
linearInfo.start.key = INT64_MIN;
linearInfo.end.key = INT64_MAX;
linearInfo.end.key = INT64_MIN;
linearInfo.start.val = taosMemoryCalloc(1, pColInfo->info.bytes);
linearInfo.end.val = taosMemoryCalloc(1, pColInfo->info.bytes);
linearInfo.hasNull = false;
linearInfo.isStartSet = false;
linearInfo.isEndSet = false;
linearInfo.type = pColInfo->info.type;
linearInfo.bytes = pColInfo->info.bytes;
taosArrayPush(pInfo->pLinearInfo, &linearInfo);
}
pInfo->fillLastPoint = false;
return TSDB_CODE_SUCCESS;
}
static bool needToFillLastPoint(STimeSliceOperatorInfo* pSliceInfo) {
return (pSliceInfo->fillLastPoint == true && pSliceInfo->fillType == TSDB_FILL_LINEAR);
}
static int32_t initKeeperInfo(STimeSliceOperatorInfo* pInfo, SSDataBlock* pBlock) {
int32_t code;
code = initPrevRowsKeeper(pInfo, pBlock);
@ -2263,195 +2301,73 @@ static SSDataBlock* doTimeslice(SOperatorInfo* pOperator) {
for (int32_t i = 0; i < pBlock->info.rows; ++i) {
int64_t ts = *(int64_t*)colDataGetData(pTsCol, i);
if (i == 0 && needToFillLastPoint(pSliceInfo)) { // first row in current block
doKeepLinearInfo(pSliceInfo, pBlock, i, false);
while (pSliceInfo->current < ts && pSliceInfo->current <= pSliceInfo->win.ekey) {
genInterpolationResult(pSliceInfo, &pOperator->exprSupp, pResBlock);
pSliceInfo->current =
taosTimeAdd(pSliceInfo->current, pInterval->interval, pInterval->intervalUnit, pInterval->precision);
}
}
if (pSliceInfo->current > pSliceInfo->win.ekey) {
setOperatorCompleted(pOperator);
break;
}
if (ts == pSliceInfo->current) {
blockDataEnsureCapacity(pResBlock, pResBlock->info.rows + 1);
for (int32_t j = 0; j < pOperator->exprSupp.numOfExprs; ++j) {
SExprInfo* pExprInfo = &pOperator->exprSupp.pExprInfo[j];
addCurrentRowToResult(pSliceInfo, &pOperator->exprSupp, pResBlock, pBlock, i);
int32_t dstSlot = pExprInfo->base.resSchema.slotId;
SColumnInfoData* pDst = taosArrayGet(pResBlock->pDataBlock, dstSlot);
if (IS_TIMESTAMP_TYPE(pExprInfo->base.resSchema.type)) {
colDataAppend(pDst, pResBlock->info.rows, (char*)&pSliceInfo->current, false);
} else {
int32_t srcSlot = pExprInfo->base.pParam[0].pCol->slotId;
SColumnInfoData* pSrc = taosArrayGet(pBlock->pDataBlock, srcSlot);
if (colDataIsNull_s(pSrc, i)) {
colDataAppendNULL(pDst, pResBlock->info.rows);
continue;
}
char* v = colDataGetData(pSrc, i);
colDataAppend(pDst, pResBlock->info.rows, v, false);
}
}
pResBlock->info.rows += 1;
doKeepPrevRows(pSliceInfo, pBlock, i);
doKeepLinearInfo(pSliceInfo, pBlock, i);
// for linear interpolation, always fill value between this and next points;
// if its the first point in data block, also fill values between previous(if there's any) and this point;
// if its the last point in data block, no need to fill, but reserve this point as the start value and do
// the interpolation when processing next data block.
if (pSliceInfo->fillType == TSDB_FILL_LINEAR) {
pSliceInfo->current =
taosTimeAdd(pSliceInfo->current, pInterval->interval, pInterval->intervalUnit, pInterval->precision);
if (i < pBlock->info.rows - 1) {
doKeepLinearInfo(pSliceInfo, pBlock, i, false);
int64_t nextTs = *(int64_t*)colDataGetData(pTsCol, i + 1);
if (nextTs > pSliceInfo->current) {
while (pSliceInfo->current < nextTs && pSliceInfo->current <= pSliceInfo->win.ekey) {
genInterpolationResult(pSliceInfo, &pOperator->exprSupp, pResBlock);
pSliceInfo->current = taosTimeAdd(pSliceInfo->current, pInterval->interval, pInterval->intervalUnit,
pInterval->precision);
}
if (pSliceInfo->current > pSliceInfo->win.ekey) {
setOperatorCompleted(pOperator);
break;
}
}
} else { // it is the last row of current block
// store ts value as start, and calculate interp value when processing next block
doKeepLinearInfo(pSliceInfo, pBlock, i, true);
}
} else { // non-linear interpolation
pSliceInfo->current =
taosTimeAdd(pSliceInfo->current, pInterval->interval, pInterval->intervalUnit, pInterval->precision);
if (pSliceInfo->current > pSliceInfo->win.ekey) {
setOperatorCompleted(pOperator);
break;
}
pSliceInfo->current =
taosTimeAdd(pSliceInfo->current, pInterval->interval, pInterval->intervalUnit, pInterval->precision);
if (pSliceInfo->current > pSliceInfo->win.ekey) {
setOperatorCompleted(pOperator);
break;
}
} else if (ts < pSliceInfo->current) {
// in case of interpolation window starts and ends between two datapoints, fill(prev) need to interpolate
doKeepPrevRows(pSliceInfo, pBlock, i);
doKeepLinearInfo(pSliceInfo, pBlock, i);
if (pSliceInfo->fillType == TSDB_FILL_LINEAR) {
// no need to increate pSliceInfo->current here
// pSliceInfo->current =
// taosTimeAdd(pSliceInfo->current, pInterval->interval, pInterval->intervalUnit, pInterval->precision);
if (i < pBlock->info.rows - 1) {
doKeepLinearInfo(pSliceInfo, pBlock, i, false);
int64_t nextTs = *(int64_t*)colDataGetData(pTsCol, i + 1);
if (nextTs > pSliceInfo->current) {
while (pSliceInfo->current < nextTs && pSliceInfo->current <= pSliceInfo->win.ekey) {
genInterpolationResult(pSliceInfo, &pOperator->exprSupp, pResBlock);
pSliceInfo->current = taosTimeAdd(pSliceInfo->current, pInterval->interval, pInterval->intervalUnit,
pInterval->precision);
}
if (pSliceInfo->current > pSliceInfo->win.ekey) {
setOperatorCompleted(pOperator);
if (i < pBlock->info.rows - 1) {
// in case of interpolation window starts and ends between two datapoints, fill(next) need to interpolate
doKeepNextRows(pSliceInfo, pBlock, i + 1);
int64_t nextTs = *(int64_t*)colDataGetData(pTsCol, i + 1);
if (nextTs > pSliceInfo->current) {
while (pSliceInfo->current < nextTs && pSliceInfo->current <= pSliceInfo->win.ekey) {
if (!genInterpolationResult(pSliceInfo, &pOperator->exprSupp, pResBlock, false) && pSliceInfo->fillType == TSDB_FILL_LINEAR) {
break;
} else {
pSliceInfo->current =
taosTimeAdd(pSliceInfo->current, pInterval->interval, pInterval->intervalUnit, pInterval->precision);
}
}
if (pSliceInfo->current > pSliceInfo->win.ekey) {
setOperatorCompleted(pOperator);
break;
}
} else {
// store ts value as start, and calculate interp value when processing next block
doKeepLinearInfo(pSliceInfo, pBlock, i, true);
}
} else { // non-linear interpolation
if (i < pBlock->info.rows - 1) {
// in case of interpolation window starts and ends between two datapoints, fill(next) need to interpolate
doKeepNextRows(pSliceInfo, pBlock, i + 1);
int64_t nextTs = *(int64_t*)colDataGetData(pTsCol, i + 1);
if (nextTs > pSliceInfo->current) {
while (pSliceInfo->current < nextTs && pSliceInfo->current <= pSliceInfo->win.ekey) {
genInterpolationResult(pSliceInfo, &pOperator->exprSupp, pResBlock);
pSliceInfo->current = taosTimeAdd(pSliceInfo->current, pInterval->interval, pInterval->intervalUnit,
pInterval->precision);
}
if (pSliceInfo->current > pSliceInfo->win.ekey) {
setOperatorCompleted(pOperator);
break;
}
} else {
// ignore current row, and do nothing
}
} else { // it is the last row of current block
doKeepPrevRows(pSliceInfo, pBlock, i);
// ignore current row, and do nothing
}
} else { // it is the last row of current block
doKeepPrevRows(pSliceInfo, pBlock, i);
}
} else { // ts > pSliceInfo->current
// in case of interpolation window starts and ends between two datapoints, fill(next) need to interpolate
doKeepNextRows(pSliceInfo, pBlock, i);
doKeepLinearInfo(pSliceInfo, pBlock, i);
while (pSliceInfo->current < ts && pSliceInfo->current <= pSliceInfo->win.ekey) {
genInterpolationResult(pSliceInfo, &pOperator->exprSupp, pResBlock);
pSliceInfo->current =
taosTimeAdd(pSliceInfo->current, pInterval->interval, pInterval->intervalUnit, pInterval->precision);
if (!genInterpolationResult(pSliceInfo, &pOperator->exprSupp, pResBlock, true) && pSliceInfo->fillType == TSDB_FILL_LINEAR) {
break;
} else {
pSliceInfo->current =
taosTimeAdd(pSliceInfo->current, pInterval->interval, pInterval->intervalUnit, pInterval->precision);
}
}
// add current row if timestamp match
if (ts == pSliceInfo->current && pSliceInfo->current <= pSliceInfo->win.ekey) {
blockDataEnsureCapacity(pResBlock, pResBlock->info.rows + 1);
for (int32_t j = 0; j < pOperator->exprSupp.numOfExprs; ++j) {
SExprInfo* pExprInfo = &pOperator->exprSupp.pExprInfo[j];
int32_t dstSlot = pExprInfo->base.resSchema.slotId;
SColumnInfoData* pDst = taosArrayGet(pResBlock->pDataBlock, dstSlot);
if (IS_TIMESTAMP_TYPE(pExprInfo->base.resSchema.type)) {
colDataAppend(pDst, pResBlock->info.rows, (char*)&pSliceInfo->current, false);
} else {
int32_t srcSlot = pExprInfo->base.pParam[0].pCol->slotId;
SColumnInfoData* pSrc = taosArrayGet(pBlock->pDataBlock, srcSlot);
if (colDataIsNull_s(pSrc, i)) {
colDataAppendNULL(pDst, pResBlock->info.rows);
continue;
}
char* v = colDataGetData(pSrc, i);
colDataAppend(pDst, pResBlock->info.rows, v, false);
}
}
pResBlock->info.rows += 1;
addCurrentRowToResult(pSliceInfo, &pOperator->exprSupp, pResBlock, pBlock, i);
doKeepPrevRows(pSliceInfo, pBlock, i);
if (pSliceInfo->fillType == TSDB_FILL_LINEAR) {
pSliceInfo->current =
taosTimeAdd(pSliceInfo->current, pInterval->interval, pInterval->intervalUnit, pInterval->precision);
if (i < pBlock->info.rows - 1) {
doKeepLinearInfo(pSliceInfo, pBlock, i, false);
int64_t nextTs = *(int64_t*)colDataGetData(pTsCol, i + 1);
if (nextTs > pSliceInfo->current) {
while (pSliceInfo->current < nextTs && pSliceInfo->current <= pSliceInfo->win.ekey) {
genInterpolationResult(pSliceInfo, &pOperator->exprSupp, pResBlock);
pSliceInfo->current = taosTimeAdd(pSliceInfo->current, pInterval->interval, pInterval->intervalUnit,
pInterval->precision);
}
if (pSliceInfo->current > pSliceInfo->win.ekey) {
setOperatorCompleted(pOperator);
break;
}
}
} else { // it is the last row of current block
// store ts value as start, and calculate interp value when processing next block
doKeepLinearInfo(pSliceInfo, pBlock, i, true);
}
} else { // non-linear interpolation
pSliceInfo->current =
taosTimeAdd(pSliceInfo->current, pInterval->interval, pInterval->intervalUnit, pInterval->precision);
}
pSliceInfo->current =
taosTimeAdd(pSliceInfo->current, pInterval->interval, pInterval->intervalUnit, pInterval->precision);
}
if (pSliceInfo->current > pSliceInfo->win.ekey) {
@ -2466,7 +2382,7 @@ static SSDataBlock* doTimeslice(SOperatorInfo* pOperator) {
// except for fill(next), fill(linear)
while (pSliceInfo->current <= pSliceInfo->win.ekey && pSliceInfo->fillType != TSDB_FILL_NEXT &&
pSliceInfo->fillType != TSDB_FILL_LINEAR) {
genInterpolationResult(pSliceInfo, &pOperator->exprSupp, pResBlock);
genInterpolationResult(pSliceInfo, &pOperator->exprSupp, pResBlock, false);
pSliceInfo->current =
taosTimeAdd(pSliceInfo->current, pInterval->interval, pInterval->intervalUnit, pInterval->precision);
}

File diff suppressed because it is too large Load Diff