Merge pull request #15946 from taosdata/feature/3.0_row_optimize

enh: row iter and fetch optimization
This commit is contained in:
Cary Xu 2022-08-11 09:46:27 +08:00 committed by GitHub
commit a6a1cfddd5
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
7 changed files with 155 additions and 165 deletions

View File

@ -200,8 +200,6 @@ struct STag {
#if 1 //================================================================================================================================================
// Imported since 3.0 and use bitmap to demonstrate None/Null/Norm, while use Null/Norm below 3.0 without of bitmap.
#define TD_SUPPORT_BITMAP
#define TD_SUPPORT_READ2
#define TD_SUPPORT_BACK2 // suppport back compatibility of 2.0
#define TASSERT(x) ASSERT(x)

View File

@ -319,17 +319,13 @@ typedef struct {
col_id_t kvIdx; // [0, nKvCols)
} STSRowIter;
void tdSTSRowIterReset(STSRowIter *pIter, STSRow *pRow);
void tdSTSRowIterInit(STSRowIter *pIter, STSchema *pSchema);
void tdSTSRowIterReset(STSRowIter *pIter, STSRow *pRow);
bool tdSTSRowIterFetch(STSRowIter *pIter, col_id_t colId, col_type_t colType, SCellVal *pVal);
bool tdSTSRowIterNext(STSRowIter *pIter, SCellVal *pVal);
int32_t tdSTSRowNew(SArray *pArray, STSchema *pTSchema, STSRow **ppRow);
bool tdSTSRowGetVal(STSRowIter *pIter, col_id_t colId, col_type_t colType, SCellVal *pVal);
bool tdGetTpRowDataOfCol(STSRowIter *pIter, col_type_t colType, int32_t offset, SCellVal *pVal);
bool tdGetKvRowValOfColEx(STSRowIter *pIter, col_id_t colId, col_type_t colType, col_id_t *nIdx, SCellVal *pVal);
bool tdSTSRowIterNext(STSRowIter *pIter, col_id_t colId, col_type_t colType, SCellVal *pVal);
bool tdSTpRowGetVal(STSRow *pRow, col_id_t colId, col_type_t colType, int32_t flen, uint32_t offset, col_id_t colIdx,
SCellVal *pVal);
bool tdSKvRowGetVal(STSRow *pRow, col_id_t colId, col_id_t colIdx, SCellVal *pVal);
void tdSCellValPrint(SCellVal *pVal, int8_t colType);
void tdSRowPrint(STSRow *row, STSchema *pSchema, const char *tag);
#ifdef __cplusplus

View File

@ -33,8 +33,12 @@ const uint8_t tdVTypeByte[2][3] = {{
// declaration
static uint8_t tdGetBitmapByte(uint8_t byte);
static int32_t tdCompareColId(const void *arg1, const void *arg2);
static FORCE_INLINE int32_t compareKvRowColId(const void *key1, const void *key2);
static bool tdSTSRowIterGetTpVal(STSRowIter *pIter, col_type_t colType, int32_t offset, SCellVal *pVal);
static bool tdSTSRowIterGetKvVal(STSRowIter *pIter, col_id_t colId, col_id_t *nIdx, SCellVal *pVal);
static bool tdSTpRowGetVal(STSRow *pRow, col_id_t colId, col_type_t colType, int32_t flen, uint32_t offset,
col_id_t colIdx, SCellVal *pVal);
static bool tdSKvRowGetVal(STSRow *pRow, col_id_t colId, col_id_t colIdx, SCellVal *pVal);
static void tdSCellValPrint(SCellVal *pVal, int8_t colType);
// implementation
/**
@ -330,14 +334,14 @@ void tdSRowPrint(STSRow *row, STSchema *pSchema, const char *tag) {
tdSTSRowIterInit(&iter, pSchema);
tdSTSRowIterReset(&iter, row);
printf("%s >>>type:%d,sver:%d ", tag, (int32_t)TD_ROW_TYPE(row), (int32_t)TD_ROW_SVER(row));
for (int i = 0; i < pSchema->numOfCols; ++i) {
STColumn *stCol = pSchema->columns + i;
SCellVal sVal = {255, NULL};
if (!tdSTSRowIterNext(&iter, stCol->colId, stCol->type, &sVal)) {
STColumn *cols = (STColumn *)&iter.pSchema->columns;
while (true) {
SCellVal sVal = {.valType = 255, NULL};
if (!tdSTSRowIterNext(&iter, &sVal)) {
break;
}
ASSERT(sVal.valType == 0 || sVal.valType == 1 || sVal.valType == 2);
tdSCellValPrint(&sVal, stCol->type);
tdSCellValPrint(&sVal, cols[iter.colIdx - 1].type);
}
printf("\n");
}
@ -420,6 +424,16 @@ void tdSCellValPrint(SCellVal *pVal, int8_t colType) {
}
}
static FORCE_INLINE int32_t compareKvRowColId(const void *key1, const void *key2) {
if (*(col_id_t *)key1 > ((SKvRowIdx *)key2)->colId) {
return 1;
} else if (*(col_id_t *)key1 < ((SKvRowIdx *)key2)->colId) {
return -1;
} else {
return 0;
}
}
bool tdSKvRowGetVal(STSRow *pRow, col_id_t colId, col_id_t colIdx, SCellVal *pVal) {
if (colId == PRIMARYKEY_TIMESTAMP_COL_ID) {
tdRowSetVal(pVal, TD_VTYPE_NORM, TD_ROW_KEY_ADDR(pRow));
@ -456,7 +470,7 @@ bool tdSTpRowGetVal(STSRow *pRow, col_id_t colId, col_type_t colType, int32_t fl
return true;
}
bool tdSTSRowIterNext(STSRowIter *pIter, col_id_t colId, col_type_t colType, SCellVal *pVal) {
bool tdSTSRowIterFetch(STSRowIter *pIter, col_id_t colId, col_type_t colType, SCellVal *pVal) {
if (colId == PRIMARYKEY_TIMESTAMP_COL_ID) {
pVal->val = &pIter->pRow->ts;
pVal->valType = TD_VTYPE_NORM;
@ -477,10 +491,10 @@ bool tdSTSRowIterNext(STSRowIter *pIter, col_id_t colId, col_type_t colType, SCe
return false;
}
}
tdGetTpRowDataOfCol(pIter, pCol->type, pCol->offset - sizeof(TSKEY), pVal);
tdSTSRowIterGetTpVal(pIter, pCol->type, pCol->offset - sizeof(TSKEY), pVal);
++pIter->colIdx;
} else if (TD_IS_KV_ROW(pIter->pRow)) {
return tdGetKvRowValOfColEx(pIter, colId, colType, &pIter->kvIdx, pVal);
return tdSTSRowIterGetKvVal(pIter, colId, &pIter->kvIdx, pVal);
} else {
pVal->valType = TD_VTYPE_NONE;
terrno = TSDB_CODE_INVALID_PARA;
@ -489,13 +503,69 @@ bool tdSTSRowIterNext(STSRowIter *pIter, col_id_t colId, col_type_t colType, SCe
return true;
}
bool tdGetKvRowValOfColEx(STSRowIter *pIter, col_id_t colId, col_type_t colType, col_id_t *nIdx, SCellVal *pVal) {
bool tdSTSRowIterNext(STSRowIter *pIter, SCellVal *pVal) {
if (pIter->colIdx >= pIter->pSchema->numOfCols) {
return false;
}
STColumn *pCol = &pIter->pSchema->columns[pIter->colIdx];
if (pCol->colId == PRIMARYKEY_TIMESTAMP_COL_ID) {
pVal->val = &pIter->pRow->ts;
pVal->valType = TD_VTYPE_NORM;
++pIter->colIdx;
return true;
}
if (TD_IS_TP_ROW(pIter->pRow)) {
tdSTSRowIterGetTpVal(pIter, pCol->type, pCol->offset - sizeof(TSKEY), pVal);
} else if (TD_IS_KV_ROW(pIter->pRow)) {
tdSTSRowIterGetKvVal(pIter, pCol->colId, &pIter->kvIdx, pVal);
ASSERT(0);
} else {
ASSERT(0);
}
++pIter->colIdx;
return true;
}
bool tdSTSRowIterGetTpVal(STSRowIter *pIter, col_type_t colType, int32_t offset, SCellVal *pVal) {
STSRow *pRow = pIter->pRow;
if (pRow->statis == 0) {
pVal->valType = TD_VTYPE_NORM;
if (IS_VAR_DATA_TYPE(colType)) {
pVal->val = POINTER_SHIFT(pRow, *(VarDataOffsetT *)POINTER_SHIFT(TD_ROW_DATA(pRow), offset));
} else {
pVal->val = POINTER_SHIFT(TD_ROW_DATA(pRow), offset);
}
return TSDB_CODE_SUCCESS;
}
if (tdGetBitmapValType(pIter->pBitmap, pIter->colIdx - 1, &pVal->valType, 0) != TSDB_CODE_SUCCESS) {
pVal->valType = TD_VTYPE_NONE;
return terrno;
}
if (pVal->valType == TD_VTYPE_NORM) {
if (IS_VAR_DATA_TYPE(colType)) {
pVal->val = POINTER_SHIFT(pRow, *(VarDataOffsetT *)POINTER_SHIFT(TD_ROW_DATA(pRow), offset));
} else {
pVal->val = POINTER_SHIFT(TD_ROW_DATA(pRow), offset);
}
}
return true;
}
bool tdSTSRowIterGetKvVal(STSRowIter *pIter, col_id_t colId, col_id_t *nIdx, SCellVal *pVal) {
STSRow *pRow = pIter->pRow;
SKvRowIdx *pKvIdx = NULL;
bool colFound = false;
col_id_t kvNCols = tdRowGetNCols(pRow) - 1;
void *pColIdx = TD_ROW_COL_IDX(pRow);
while (*nIdx < kvNCols) {
pKvIdx = (SKvRowIdx *)POINTER_SHIFT(TD_ROW_COL_IDX(pRow), *nIdx * sizeof(SKvRowIdx));
pKvIdx = (SKvRowIdx *)POINTER_SHIFT(pColIdx, *nIdx * sizeof(SKvRowIdx));
if (pKvIdx->colId == colId) {
++(*nIdx);
pVal->val = POINTER_SHIFT(pRow, pKvIdx->offset);
@ -518,48 +588,13 @@ bool tdGetKvRowValOfColEx(STSRowIter *pIter, col_id_t colId, col_type_t colType,
}
}
#ifdef TD_SUPPORT_BITMAP
int16_t colIdx = -1;
if (pKvIdx) colIdx = POINTER_DISTANCE(pKvIdx, TD_ROW_COL_IDX(pRow)) / sizeof(SKvRowIdx);
if (tdGetBitmapValType(pIter->pBitmap, colIdx, &pVal->valType, 0) != TSDB_CODE_SUCCESS) {
if (tdGetBitmapValType(pIter->pBitmap, pIter->kvIdx - 1, &pVal->valType, 0) != TSDB_CODE_SUCCESS) {
pVal->valType = TD_VTYPE_NONE;
}
#else
pVal->valType = isNull(pVal->val, colType) ? TD_VTYPE_NULL : TD_VTYPE_NORM;
#endif
return true;
}
bool tdGetTpRowDataOfCol(STSRowIter *pIter, col_type_t colType, int32_t offset, SCellVal *pVal) {
STSRow *pRow = pIter->pRow;
if (IS_VAR_DATA_TYPE(colType)) {
pVal->val = POINTER_SHIFT(pRow, *(VarDataOffsetT *)POINTER_SHIFT(TD_ROW_DATA(pRow), offset));
} else {
pVal->val = POINTER_SHIFT(TD_ROW_DATA(pRow), offset);
}
#ifdef TD_SUPPORT_BITMAP
if (tdGetBitmapValType(pIter->pBitmap, pIter->colIdx - 1, &pVal->valType, 0) != TSDB_CODE_SUCCESS) {
pVal->valType = TD_VTYPE_NONE;
}
#else
pVal->valType = isNull(pVal->val, colType) ? TD_VTYPE_NULL : TD_VTYPE_NORM;
#endif
return true;
}
static FORCE_INLINE int32_t compareKvRowColId(const void *key1, const void *key2) {
if (*(col_id_t *)key1 > ((SKvRowIdx *)key2)->colId) {
return 1;
} else if (*(col_id_t *)key1 < ((SKvRowIdx *)key2)->colId) {
return -1;
} else {
return 0;
}
}
int32_t tdSTSRowNew(SArray *pArray, STSchema *pTSchema, STSRow **ppRow) {
STColumn *pTColumn;
SColVal *pColVal;
@ -673,6 +708,19 @@ int32_t tdSTSRowNew(SArray *pArray, STSchema *pTSchema, STSRow **ppRow) {
return 0;
}
static FORCE_INLINE int32_t tdCompareColId(const void *arg1, const void *arg2) {
int32_t colId = *(int32_t *)arg1;
STColumn *pCol = (STColumn *)arg2;
if (colId < pCol->colId) {
return -1;
} else if (colId == pCol->colId) {
return 0;
} else {
return 1;
}
}
bool tdSTSRowGetVal(STSRowIter *pIter, col_id_t colId, col_type_t colType, SCellVal *pVal) {
if (colId == PRIMARYKEY_TIMESTAMP_COL_ID) {
pVal->val = &pIter->pRow->ts;
@ -712,19 +760,6 @@ bool tdSTSRowGetVal(STSRowIter *pIter, col_id_t colId, col_type_t colType, SCell
return true;
}
static int32_t tdCompareColId(const void *arg1, const void *arg2) {
int32_t colId = *(int32_t *)arg1;
STColumn *pCol = (STColumn *)arg2;
if (colId < pCol->colId) {
return -1;
} else if (colId == pCol->colId) {
return 0;
} else {
return 1;
}
}
int32_t tdGetBitmapValTypeII(const void *pBitmap, int16_t colIdx, TDRowValT *pValType) {
if (!pBitmap || colIdx < 0) {
TASSERT(0);
@ -938,7 +973,7 @@ int32_t tdAppendColValToRow(SRowBuilder *pBuilder, col_id_t colId, int8_t colTyp
break;
case TD_VTYPE_NONE:
if (!pBuilder->hasNone) pBuilder->hasNone = true;
break;
return TSDB_CODE_SUCCESS;
default:
ASSERT(0);
break;
@ -970,13 +1005,11 @@ int32_t tdAppendColValToKvRow(SRowBuilder *pBuilder, TDRowValT valType, const vo
STSRow *row = pBuilder->pBuf;
// No need to store None/Null values.
if (valType == TD_VTYPE_NORM) {
// ts key stored in STSRow.ts
SKvRowIdx *pColIdx = (SKvRowIdx *)POINTER_SHIFT(TD_ROW_COL_IDX(row), offset);
char *ptr = (char *)POINTER_SHIFT(row, TD_ROW_LEN(row));
pColIdx->colId = colId;
pColIdx->offset = TD_ROW_LEN(row); // the offset include the TD_ROW_HEAD_LEN
if (valType == TD_VTYPE_NORM) {
char *ptr = (char *)POINTER_SHIFT(row, TD_ROW_LEN(row));
if (IS_VAR_DATA_TYPE(colType)) {
if (isCopyVarData) {
memcpy(ptr, val, varDataTLen(val));
@ -987,26 +1020,6 @@ int32_t tdAppendColValToKvRow(SRowBuilder *pBuilder, TDRowValT valType, const vo
TD_ROW_LEN(row) += TYPE_BYTES[colType];
}
}
#ifdef TD_SUPPORT_BACK2
// NULL/None value
else {
SKvRowIdx *pColIdx = (SKvRowIdx *)POINTER_SHIFT(TD_ROW_COL_IDX(row), offset);
char *ptr = (char *)POINTER_SHIFT(row, TD_ROW_LEN(row));
pColIdx->colId = colId;
pColIdx->offset = TD_ROW_LEN(row); // the offset include the TD_ROW_HEAD_LEN
const void *nullVal = getNullValue(colType);
if (IS_VAR_DATA_TYPE(colType)) {
if (isCopyVarData) {
memcpy(ptr, nullVal, varDataTLen(nullVal));
}
TD_ROW_LEN(row) += varDataTLen(nullVal);
} else {
memcpy(ptr, nullVal, TYPE_BYTES[colType]);
TD_ROW_LEN(row) += TYPE_BYTES[colType];
}
}
#endif
return 0;
}
@ -1044,24 +1057,6 @@ int32_t tdAppendColValToTpRow(SRowBuilder *pBuilder, TDRowValT valType, const vo
memcpy(POINTER_SHIFT(TD_ROW_DATA(row), offset), val, TYPE_BYTES[colType]);
}
}
#ifdef TD_SUPPORT_BACK2
// NULL/None value
else {
// TODO: Null value for new data types imported since 3.0 need to be defined.
const void *nullVal = getNullValue(colType);
if (IS_VAR_DATA_TYPE(colType)) {
// ts key stored in STSRow.ts
*(VarDataOffsetT *)POINTER_SHIFT(TD_ROW_DATA(row), offset) = TD_ROW_LEN(row);
if (isCopyVarData) {
memcpy(POINTER_SHIFT(row, TD_ROW_LEN(row)), nullVal, varDataTLen(nullVal));
}
TD_ROW_LEN(row) += varDataTLen(nullVal);
} else {
memcpy(POINTER_SHIFT(TD_ROW_DATA(row), offset), nullVal, TYPE_BYTES[colType]);
}
}
#endif
return 0;
}
@ -1329,7 +1324,7 @@ void tdSTSRowIterReset(STSRowIter *pIter, STSRow *pRow) {
pIter->pRow = pRow;
pIter->pBitmap = tdGetBitmapAddr(pRow, pRow->type, pIter->pSchema->flen, tdRowGetNCols(pRow));
pIter->offset = 0;
pIter->colIdx = PRIMARYKEY_TIMESTAMP_COL_ID;
pIter->colIdx = 0; // PRIMARYKEY_TIMESTAMP_COL_ID;
pIter->kvIdx = 0;
}

View File

@ -325,7 +325,7 @@ int32_t tqRetrieveDataBlock(SSDataBlock* pBlock, STqReader* pReader) {
for (int32_t i = 0; i < colActual; i++) {
SColumnInfoData* pColData = taosArrayGet(pBlock->pDataBlock, i);
SCellVal sVal = {0};
if (!tdSTSRowIterNext(&iter, pColData->info.colId, pColData->info.type, &sVal)) {
if (!tdSTSRowIterFetch(&iter, pColData->info.colId, pColData->info.type, &sVal)) {
break;
}
if (colDataAppend(pColData, curRow, sVal.val, sVal.valType != TD_VTYPE_NORM) < 0) {

View File

@ -182,7 +182,7 @@ static int32_t setColumnIdSlotList(STsdbReader* pReader, SSDataBlock* pBlock) {
if (IS_VAR_DATA_TYPE(pCol->info.type)) {
pSupInfo->buildBuf[i] = taosMemoryMalloc(pCol->info.bytes);
tsdbInfo("-------------------%d\n", pCol->info.bytes);
// tsdbInfo("-------------------%d\n", pCol->info.bytes);
}
}

View File

@ -13,8 +13,8 @@
* along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
#include "os.h"
#include "tsimplehash.h"
#include "os.h"
#include "taoserror.h"
#define SHASH_DEFAULT_LOAD_FACTOR 0.75
@ -25,7 +25,6 @@
#define GET_SHASH_NODE_DATA(_n) ((char *)(_n) + sizeof(SHNode))
#define HASH_INDEX(v, c) ((v) & ((c)-1))
#define HASH_NEED_RESIZE(_h) ((_h)->size >= (_h)->capacity * SHASH_DEFAULT_LOAD_FACTOR)
#define FREE_HASH_NODE(_n) \
do { \
@ -108,7 +107,7 @@ static SHNode *doCreateHashNode(const void *key, size_t keyLen, const void *pDat
}
static void taosHashTableResize(SSHashObj *pHashObj) {
if (!HASH_NEED_RESIZE(pHashObj)) {
if (!SHASH_NEED_RESIZE(pHashObj)) {
return;
}
@ -134,13 +133,14 @@ static void taosHashTableResize(SSHashObj *pHashObj) {
for (int32_t idx = 0; idx < pHashObj->capacity; ++idx) {
SHNode *pNode = pHashObj->hashList[idx];
SHNode *pNext;
SHNode *pPrev = NULL;
if (pNode == NULL) {
continue;
}
SHNode *pNext;
SHNode *pPrev = NULL;
while (pNode != NULL) {
void *key = GET_SHASH_NODE_KEY(pNode, pHashObj->dataLen);
uint32_t hashVal = (*pHashObj->hashFp)(key, (uint32_t)pHashObj->dataLen);
@ -166,7 +166,8 @@ static void taosHashTableResize(SSHashObj *pHashObj) {
int64_t et = taosGetTimestampUs();
// uDebug("hash table resize completed, new capacity:%d, load factor:%f, elapsed time:%fms", (int32_t)pHashObj->capacity,
// uDebug("hash table resize completed, new capacity:%d, load factor:%f, elapsed time:%fms",
// (int32_t)pHashObj->capacity,
// ((double)pHashObj->size) / pHashObj->capacity, (et - st) / 1000.0);
}
@ -230,9 +231,7 @@ static FORCE_INLINE SHNode *doSearchInEntryList(SSHashObj *pHashObj, const void
return pNode;
}
static FORCE_INLINE bool taosHashTableEmpty(const SSHashObj *pHashObj) {
return tSimpleHashGetSize(pHashObj) == 0;
}
static FORCE_INLINE bool taosHashTableEmpty(const SSHashObj *pHashObj) { return tSimpleHashGetSize(pHashObj) == 0; }
void *tSimpleHashGet(SSHashObj *pHashObj, const void *key) {
if (pHashObj == NULL || taosHashTableEmpty(pHashObj) || key == NULL) {

View File

@ -1192,7 +1192,10 @@ static int parseOneRow(SInsertParseContext* pCxt, STableDataBlocks* pDataBlocks,
pBuilder->hasNone = true;
}
tdSRowEnd(pBuilder);
*gotRow = true;
#ifdef TD_DEBUG_PRINT_ROW
STSchema* pSTSchema = tdGetSTSChemaFromSSChema(schema, spd->numOfCols, 1);
tdSRowPrint(row, pSTSchema, __func__);
@ -1201,7 +1204,6 @@ static int parseOneRow(SInsertParseContext* pCxt, STableDataBlocks* pDataBlocks,
}
// *len = pBuilder->extendedRowSize;
tdSRowEnd(pBuilder);
return TSDB_CODE_SUCCESS;
}