enh: add asof

This commit is contained in:
dapan1121 2024-01-11 19:25:12 +08:00
parent e784a5c210
commit 8d1f5ff479
14 changed files with 473 additions and 14 deletions

View File

@ -477,6 +477,7 @@ typedef struct SSortMergeJoinPhysiNode {
EJoinSubType subType;
SNode* pWindowOffset;
SNode* pJLimit;
int32_t asofOp;
int32_t leftPrimSlotId;
int32_t rightPrimSlotId;
SNodeList* pEqLeft;

View File

@ -540,6 +540,7 @@ int32_t* taosGetErrno();
#define TSDB_CODE_QRY_QWORKER_QUIT TAOS_DEF_ERROR_CODE(0, 0x0730)
#define TSDB_CODE_QRY_GEO_NOT_SUPPORT_ERROR TAOS_DEF_ERROR_CODE(0, 0x0731)
#define TSDB_CODE_QRY_EXECUTOR_INTERNAL_ERROR TAOS_DEF_ERROR_CODE(0, 0x0732)
#define TSDB_CODE_QRY_INVALID_JOIN_CONDITION TAOS_DEF_ERROR_CODE(0, 0x0733)
// grant
#define TSDB_CODE_GRANT_EXPIRED TAOS_DEF_ERROR_CODE(0, 0x0800)

View File

@ -149,7 +149,7 @@ typedef enum EOperatorType {
OP_TYPE_BIT_OR,
// binary comparison operator
OP_TYPE_GREATER_THAN = 40,
OP_TYPE_GREATER_THAN = 40, // MUST KEEP IT FIRST AT COMPARE SECTION
OP_TYPE_GREATER_EQUAL,
OP_TYPE_LOWER_THAN,
OP_TYPE_LOWER_EQUAL,
@ -170,6 +170,7 @@ typedef enum EOperatorType {
OP_TYPE_IS_NOT_TRUE,
OP_TYPE_IS_NOT_FALSE,
OP_TYPE_IS_NOT_UNKNOWN,
OP_TYPE_COMPARE_MAX_VALUE = 149, // MUST KEEP IT LAST AT COMPARE SECTION
// json operator
OP_TYPE_JSON_GET_VALUE = 150,

View File

@ -20,8 +20,8 @@ extern "C" {
#endif
#if 1
#define MJOIN_DEFAULT_BLK_ROWS_NUM 2 //4096
#define MJOIN_HJOIN_CART_THRESHOLD 2
#define MJOIN_DEFAULT_BLK_ROWS_NUM 10 //4096
#define MJOIN_HJOIN_CART_THRESHOLD 10
#define MJOIN_BLK_SIZE_LIMIT 0 //10485760
#define MJOIN_ROW_BITMAP_SIZE (2 * 1048576)
#else
@ -172,7 +172,15 @@ typedef struct SMJoinMergeCtx {
} SMJoinMergeCtx;
typedef struct SMJoinWindowCtx {
struct SMJoinOperatorInfo* pJoin;
int32_t asofOp;
bool asofEqRow;
int64_t jLimit;
SSDataBlock* finBlk;
int64_t resRowsNum;
int32_t resRowOffset;
SArray* resArray;
} SMJoinWindowCtx;

View File

@ -932,23 +932,22 @@ static int32_t mFullJoinHandleMergeGrpRemains(SMJoinMergeCtx* pCtx) {
}
int32_t bitBytes = BitmapLen(pGrpRows->endIdx - pGrpRows->beginIdx + 1);
baseIdx = 8 * pNMatch->bitIdx;
rowNum = pGrpRows->endIdx - pGrpRows->beginIdx + 1;
for (; pNMatch->bitIdx < bitBytes; ++pNMatch->bitIdx) {
if (0 == build->pRowBitmap[pGrpRows->rowBitmapOffset + pNMatch->bitIdx]) {
continue;
}
baseIdx = 8 * pNMatch->bitIdx;
char *v = &build->pRowBitmap[pGrpRows->rowBitmapOffset + pNMatch->bitIdx];
while (*v && !BLK_IS_FULL(pCtx->finBlk)) {
uint8_t n = lowest_bit_bitmap[((*v & (*v - 1)) ^ *v) % 11];
if (baseIdx + n > pGrpRows->endIdx) {
if (pGrpRows->beginIdx + baseIdx + n > pGrpRows->endIdx) {
MJOIN_SET_ROW_BITMAP(build->pRowBitmap, pGrpRows->rowBitmapOffset + pNMatch->bitIdx, n);
continue;
}
ASSERT(baseIdx + n <= pGrpRows->endIdx);
MJ_ERR_RET(mFullJoinOutputMergeRow(pCtx, pGrpRows, baseIdx + n));
MJ_ERR_RET(mFullJoinOutputMergeRow(pCtx, pGrpRows, pGrpRows->beginIdx + baseIdx + n));
MJOIN_SET_ROW_BITMAP(build->pRowBitmap, pGrpRows->rowBitmapOffset + pNMatch->bitIdx, n);
if (++pGrpRows->rowMatchNum == rowNum) {
@ -1715,6 +1714,10 @@ _return:
}
int32_t mJoinInitWindowCtx(SMJoinOperatorInfo* pJoin, SSortMergeJoinPhysiNode* pJoinNode) {
SMJoinWindowCtx* pCtx = &pJoin->ctx.windowCtx;
pCtx->pJoin = pJoin;
return TSDB_CODE_SUCCESS;
}

View File

@ -66,7 +66,7 @@ enum {
};
#define COL_DISPLAY_WIDTH 18
#define JT_MAX_LOOP 3000
#define JT_MAX_LOOP 30000
#define LEFT_BLK_ID 0
#define RIGHT_BLK_ID 1
@ -2198,7 +2198,7 @@ void runSingleTest(char* caseName, SJoinTestParam* param) {
bool contLoop = true;
SSortMergeJoinPhysiNode* pNode = createDummySortMergeJoinPhysiNode(param->joinType, param->subType, param->cond, param->filter, param->asc);
createDummyBlkList(20, 20, 20, 20, 3);
createDummyBlkList(50, 50, 50, 50, 10);
while (contLoop) {
rerunBlockedHere();
@ -2435,7 +2435,7 @@ TEST(leftOuterJoin, fullCondTest) {
#endif
#endif
#if 0
#if 1
#if 1
TEST(fullOuterJoin, noCondTest) {
SJoinTestParam param;
@ -2538,7 +2538,7 @@ TEST(fullOuterJoin, fullCondTest) {
#endif
#endif
#if 0
#if 1
#if 1
TEST(leftSemiJoin, noCondTest) {
SJoinTestParam param;
@ -2642,7 +2642,7 @@ TEST(leftSemiJoin, fullCondTest) {
#endif
#if 1
#if 0
#if 1
TEST(leftAntiJoin, noCondTest) {
SJoinTestParam param;
char* caseName = "leftAntiJoin:noCondTest";

View File

@ -55,6 +55,19 @@ typedef struct SSysTableShowAdapter {
const char* pShowCols[2];
} SSysTableShowAdapter;
typedef struct SCollectJoinCondsContext {
bool inOp;
int32_t primCondNum;
int32_t logicAndNum;
int32_t logicOrNum;
int32_t eqCondNum;
int32_t neqCondNum;
bool primDisorder;
int32_t code;
} SCollectJoinCondsContext;
// clang-format off
static const SSysTableShowAdapter sysTableShowAdapter[] = {
{
@ -3156,6 +3169,79 @@ static int32_t translateJoinTable(STranslateContext* pCxt, SJoinTableNode* pJoin
return code;
}
EDealRes joinCondsValidater(SNode* pNode, void* pContext) {
switch (nodeType(pNode)) {
case QUERY_NODE_LOGIC_CONDITION: {
SLogicConditionNode* pLogic = (SLogicConditionNode*)pNode;
if (LOGIC_COND_TYPE_AND != pLogic->condType) {
break;
}
return DEAL_RES_CONTINUE;
}
case QUERY_NODE_OPERATOR: {
SOperatorNode* pOp = (SOperatorNode*)pNode;
if (OP_TYPE_EQUAL < pOp->opType || OP_TYPE_GREATER_THAN > pOp->opType) {
break;
}
if ((QUERY_NODE_COLUMN != nodeType(pOp->pLeft) && QUERY_NODE_FUNCTION != nodeType(pOp->pLeft)) ||
(QUERY_NODE_COLUMN != nodeType(pOp->pRight) && QUERY_NODE_FUNCTION != nodeType(pOp->pRight))){
break;
}
if (QUERY_NODE_COLUMN == nodeType(pOp->pLeft)) {
SColumnNode* pCol = (SColumnNode*)pOp->pLeft;
if (PRIMARYKEY_TIMESTAMP_COL_ID != pCol->colId && OP_TYPE_EQUAL != pOp->opType) {
break;
}
}
if (QUERY_NODE_COLUMN == nodeType(pOp->pRight)) {
SColumnNode* pCol = (SColumnNode*)pOp->pRight;
if (PRIMARYKEY_TIMESTAMP_COL_ID != pCol->colId && OP_TYPE_EQUAL != pOp->opType) {
break;
}
}
if (QUERY_NODE_FUNCTION == nodeType(pOp->pLeft) && FUNCTION_TYPE_TIMETRUNCATE == ((SFunctionNode*)pOp->pLeft)->funcType) {
SFunctionNode* pFunc = (SFunctionNode*)pOp->pLeft;
SNode* pParam = nodesListGetNode(pFunc->pParameterList, 0);
if (QUERY_NODE_COLUMN != nodeType(pParam)) {
break;
}
SColumnNode* pCol = (SColumnNode*)pParam;
if (PRIMARYKEY_TIMESTAMP_COL_ID != pCol->colId) {
break;
}
}
if (QUERY_NODE_FUNCTION == nodeType(pOp->pRight) && FUNCTION_TYPE_TIMETRUNCATE == ((SFunctionNode*)pOp->pRight)->funcType) {
SFunctionNode* pFunc = (SFunctionNode*)pOp->pRight;
SNode* pParam = nodesListGetNode(pFunc->pParameterList, 0);
if (QUERY_NODE_COLUMN != nodeType(pParam)) {
break;
}
SColumnNode* pCol = (SColumnNode*)pParam;
if (PRIMARYKEY_TIMESTAMP_COL_ID != pCol->colId) {
break;
}
}
return DEAL_RES_IGNORE_CHILD;
}
default:
break;
}
*(int32_t*)pContext = TSDB_CODE_QRY_INVALID_JOIN_CONDITION;
return DEAL_RES_ERROR;
}
int32_t validateJoinConds(STranslateContext* pCxt, SJoinTableNode* pJoinTable) {
if (JOIN_STYPE_ASOF != pJoinTable->subType && JOIN_STYPE_WIN != pJoinTable->subType) {
return TSDB_CODE_SUCCESS;
}
int32_t code = 0;
nodesWalkExpr(pJoinTable->pOnCond, joinCondsValidater, &code);
return code;
}
int32_t translateTable(STranslateContext* pCxt, SNode** pTable, SNode* pJoinParent) {
int32_t code = TSDB_CODE_SUCCESS;
switch (nodeType(*pTable)) {
@ -3231,6 +3317,8 @@ int32_t translateTable(STranslateContext* pCxt, SNode** pTable, SNode* pJoinPare
pJoinTable->table.precision = calcJoinTablePrecision(pJoinTable);
pJoinTable->table.singleTable = joinTableIsSingleTable(pJoinTable);
code = translateExpr(pCxt, &pJoinTable->pOnCond);
}
if (TSDB_CODE_SUCCESS == code) {
pJoinTable->hasSubQuery = (nodeType(pJoinTable->pLeft) != QUERY_NODE_REAL_TABLE) ||
(nodeType(pJoinTable->pRight) != QUERY_NODE_REAL_TABLE);
if (nodeType(pJoinTable->pLeft) == QUERY_NODE_JOIN_TABLE) {
@ -3239,6 +3327,7 @@ int32_t translateTable(STranslateContext* pCxt, SNode** pTable, SNode* pJoinPare
if (nodeType(pJoinTable->pRight) == QUERY_NODE_JOIN_TABLE) {
((SJoinTableNode*)pJoinTable->pRight)->isLowLevelJoin = true;
}
code = validateJoinConds(pCxt, pJoinTable);
}
break;
}

View File

@ -727,9 +727,22 @@ static int32_t pdcPushDownCondToChild(SOptimizeContext* pCxt, SLogicNode* pChild
}
static bool pdcJoinIsPrim(SNode* pNode, SSHashObj* pTables) {
if (QUERY_NODE_COLUMN != nodeType(pNode)) {
if (QUERY_NODE_COLUMN != nodeType(pNode) || QUERY_NODE_FUNCTION != nodeType(pNode)) {
return false;
}
if (QUERY_NODE_FUNCTION == nodeType(pNode)) {
SFunctionNode* pFunc = (SFunctionNode*)pNode;
if (FUNCTION_TYPE_TIMETRUNCATE != pFunc->funcType) {
return false;
}
SListCell* pCell = nodesListGetCell(pFunc->pParameterList, 0);
if (NULL == pCell || NULL == pCell->pNode || QUERY_NODE_COLUMN != nodeType(pCell->pNode)) {
return false;
}
pNode = pCell->pNode;
}
SColumnNode* pCol = (SColumnNode*)pNode;
if (PRIMARYKEY_TIMESTAMP_COL_ID != pCol->colId || TSDB_SYSTEM_TABLE == pCol->tableType) {
return false;
@ -743,7 +756,7 @@ static bool pdcJoinIsPrimEqualCond(SJoinLogicNode* pJoin, SNode* pCond) {
}
SOperatorNode* pOper = (SOperatorNode*)pCond;
if (OP_TYPE_EQUAL != pOper->opType) {
if (OP_TYPE_EQUAL != pOper->opType && JOIN_STYPE_ASOF != pJoin->subType) {
return false;
}

View File

@ -427,6 +427,8 @@ TAOS_DEFINE_ERROR(TSDB_CODE_QRY_JOB_NOT_EXIST, "Job not exist")
TAOS_DEFINE_ERROR(TSDB_CODE_QRY_QWORKER_QUIT, "Vnode/Qnode is quitting")
TAOS_DEFINE_ERROR(TSDB_CODE_QRY_GEO_NOT_SUPPORT_ERROR, "Geometry not support in this operator")
TAOS_DEFINE_ERROR(TSDB_CODE_QRY_EXECUTOR_INTERNAL_ERROR, "Executor internal error")
TAOS_DEFINE_ERROR(TSDB_CODE_QRY_EXECUTOR_INTERNAL_ERROR, "Executor internal error")
TAOS_DEFINE_ERROR(TSDB_CODE_QRY_INVALID_JOIN_CONDITION, "Not supported join on condition")
// grant
TAOS_DEFINE_ERROR(TSDB_CODE_GRANT_EXPIRED, "License expired")

View File

@ -61,6 +61,10 @@ run tsim/join/inner_join.sim
run tsim/join/left_join.sim
run tsim/join/right_join.sim
run tsim/join/full_join.sim
run tsim/join/left_semi_join.sim
run tsim/join/right_semi_join.sim
run tsim/join/left_anti_join.sim
run tsim/join/right_anti_join.sim
print ================== restart server to commit data into disk
system sh/exec.sh -n dnode1 -s stop -x SIGINT
@ -71,5 +75,9 @@ run tsim/join/inner_join.sim
run tsim/join/left_join.sim
run tsim/join/right_join.sim
run tsim/join/full_join.sim
run tsim/join/left_semi_join.sim
run tsim/join/right_semi_join.sim
run tsim/join/left_anti_join.sim
run tsim/join/right_anti_join.sim
system sh/exec.sh -n dnode1 -s stop -x SIGINT

View File

@ -0,0 +1,88 @@
sql connect
sql use test0;
sql select a.ts, b.ts from sta a left anti join sta b on a.ts = b.ts and a.ts < '2023-11-17 16:29:02' order by a.ts
if $rows != 5 then
return -1
endi
if $data00 != @23-11-17 16:29:02.000@ then
return -1
endi
if $data01 != NULL then
return -1
endi
if $data10 != @23-11-17 16:29:03.000@ then
return -1
endi
if $data11 != NULL then
return -1
endi
if $data20 != @23-11-17 16:29:03.000@ then
return -1
endi
if $data21 != NULL then
return -1
endi
if $data30 != @23-11-17 16:29:04.000@ then
return -1
endi
if $data31 != NULL then
return -1
endi
if $data40 != @23-11-17 16:29:05.000@ then
return -1
endi
if $data41 != NULL then
return -1
endi
sql select a.col1, b.col1 from sta a left anti join sta b on a.ts = b.ts and a.col1 != b.col1 where a.ts < '2023-11-17 16:29:02' order by a.col1;
if $rows != 1 then
return -1
endi
if $data00 != 3 then
return -1
endi
if $data01 != NULL then
return -1
endi
sql select a.col1, b.col1 from sta a left anti join sta b on a.ts = b.ts;
if $rows != 0 then
return -1
endi
sql select a.ts, b.ts from tba1 a left anti join tba2 b on a.ts = b.ts order by a.ts;
if $rows != 2 then
return -1
endi
if $data00 != @23-11-17 16:29:02.000@ then
return -1
endi
if $data01 != NULL then
return -1
endi
if $data10 != @23-11-17 16:29:04.000@ then
return -1
endi
if $data11 != NULL then
return -1
endi
sql select a.col1, b.col1 from tba2 a left anti join tba1 b on a.ts = b.ts order by a.col1;
if $rows != 2 then
return -1
endi
if $data00 != 3 then
return -1
endi
if $data01 != NULL then
return -1
endi
if $data10 != 7 then
return -1
endi
if $data11 != NULL then
return -1
endi

View File

@ -0,0 +1,76 @@
sql connect
sql use test0;
sql select a.ts, b.ts from sta a left semi join sta b on a.ts = b.ts and a.ts < '2023-11-17 16:29:02' order by a.ts
if $rows != 3 then
return -1
endi
if $data00 != @23-11-17 16:29:00.000@ then
return -1
endi
if $data01 != @23-11-17 16:29:00.000@ then
return -1
endi
if $data10 != @23-11-17 16:29:00.000@ then
return -1
endi
if $data11 != @23-11-17 16:29:00.000@ then
return -1
endi
if $data20 != @23-11-17 16:29:01.000@ then
return -1
endi
if $data21 != @23-11-17 16:29:01.000@ then
return -1
endi
sql select a.col1, b.col1 from sta a left semi join sta b on a.ts = b.ts where a.ts < '2023-11-17 16:29:02' and b.ts < '2023-11-17 16:29:01' order by a.col1;
if $rows != 2 then
return -1
endi
if $data00 != 1 then
return -1
endi
if $data10 != 2 then
return -1
endi
sql select a.col1, b.col1 from sta a left semi join sta b on a.ts = b.ts;
if $rows != 8 then
return -1
endi
sql select a.col1, b.col1 from tba1 a left semi join tba2 b on a.ts = b.ts order by a.col1;
if $rows != 2 then
return -1
endi
if $data00 != 1 then
return -1
endi
if $data01 != 2 then
return -1
endi
if $data10 != 4 then
return -1
endi
if $data11 != 5 then
return -1
endi
sql select a.col1, b.col1 from tba2 a left semi join tba1 b on a.ts = b.ts order by a.col1;
if $rows != 2 then
return -1
endi
if $data00 != 2 then
return -1
endi
if $data01 != 1 then
return -1
endi
if $data10 != 5 then
return -1
endi
if $data11 != 4 then
return -1
endi

View File

@ -0,0 +1,93 @@
sql connect
sql use test0;
sql select a.ts, b.ts from sta a right anti join sta b on a.ts = b.ts and a.ts < '2023-11-17 16:29:02' order by a.ts
if $rows != 5 then
return -1
endi
if $data00 != NULL then
return -1
endi
if $data01 != @23-11-17 16:29:02.000@ then
return -1
endi
if $data10 != NULL then
return -1
endi
if $data11 != @23-11-17 16:29:03.000@ then
return -1
endi
if $data20 != NULL then
return -1
endi
if $data21 != @23-11-17 16:29:03.000@ then
return -1
endi
if $data30 != NULL then
return -1
endi
if $data31 != @23-11-17 16:29:04.000@ then
return -1
endi
if $data40 != NULL then
return -1
endi
if $data41 != @23-11-17 16:29:05.000@ then
return -1
endi
sql select a.col1, b.col1 from sta a right anti join sta b on a.ts = b.ts and a.col1 != b.col1 where a.ts < '2023-11-17 16:29:02' order by a.col1;
if $rows != 0 then
return -1
endi
sql select a.col1, b.col1 from sta a right anti join sta b on a.ts = b.ts and a.col1 != b.col1 where b.ts < '2023-11-17 16:29:02' order by a.col1;
if $rows != 1 then
return -1
endi
if $data00 != NULL then
return -1
endi
if $data01 != 3 then
return -1
endi
sql select a.col1, b.col1 from sta a right anti join sta b on a.ts = b.ts;
if $rows != 0 then
return -1
endi
sql select a.ts, b.ts from tba1 a right anti join tba2 b on a.ts = b.ts order by a.ts;
if $rows != 2 then
return -1
endi
if $data00 != NULL then
return -1
endi
if $data01 != @23-11-17 16:29:01.000@ then
return -1
endi
if $data10 != NULL then
return -1
endi
if $data11 != @23-11-17 16:29:05.000@ then
return -1
endi
sql select a.col1, b.col1 from tba2 a right anti join tba1 b on a.ts = b.ts order by a.col1;
if $rows != 2 then
return -1
endi
if $data00 != NULL then
return -1
endi
if $data01 != 3 then
return -1
endi
if $data10 != NULL then
return -1
endi
if $data11 != 5 then
return -1
endi

View File

@ -0,0 +1,76 @@
sql connect
sql use test0;
sql select a.ts, b.ts from sta a right semi join sta b on a.ts = b.ts and b.ts < '2023-11-17 16:29:02' order by a.ts
if $rows != 3 then
return -1
endi
if $data00 != @23-11-17 16:29:00.000@ then
return -1
endi
if $data01 != @23-11-17 16:29:00.000@ then
return -1
endi
if $data10 != @23-11-17 16:29:00.000@ then
return -1
endi
if $data11 != @23-11-17 16:29:00.000@ then
return -1
endi
if $data20 != @23-11-17 16:29:01.000@ then
return -1
endi
if $data21 != @23-11-17 16:29:01.000@ then
return -1
endi
sql select a.col1, b.col1 from sta a right semi join sta b on a.ts = b.ts where a.ts < '2023-11-17 16:29:02' and b.ts < '2023-11-17 16:29:01' order by b.col1;
if $rows != 2 then
return -1
endi
if $data01 != 1 then
return -1
endi
if $data11 != 2 then
return -1
endi
sql select a.col1, b.col1 from sta a right semi join sta b on a.ts = b.ts;
if $rows != 8 then
return -1
endi
sql select a.col1, b.col1 from tba1 a right semi join tba2 b on a.ts = b.ts order by b.col1;
if $rows != 2 then
return -1
endi
if $data00 != 1 then
return -1
endi
if $data01 != 2 then
return -1
endi
if $data10 != 4 then
return -1
endi
if $data11 != 5 then
return -1
endi
sql select a.col1, b.col1 from tba2 a right semi join tba1 b on a.ts = b.ts order by a.col1;
if $rows != 2 then
return -1
endi
if $data00 != 2 then
return -1
endi
if $data01 != 1 then
return -1
endi
if $data10 != 5 then
return -1
endi
if $data11 != 4 then
return -1
endi