Merge pull request #22934 from taosdata/fix/xsren/TD-26274/mergeSort_main

merge stable sort intead of qsort as insert unordered data
This commit is contained in:
dapan1121 2023-09-19 14:17:47 +08:00 committed by GitHub
commit aabd7a51f7
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
10 changed files with 230 additions and 9 deletions

View File

@ -108,7 +108,7 @@ int32_t tBufferReserve(SBuffer *pBuffer, int64_t nData, void **ppData);
int32_t tRowBuild(SArray *aColVal, const STSchema *pTSchema, SRow **ppRow);
int32_t tRowGet(SRow *pRow, STSchema *pTSchema, int32_t iCol, SColVal *pColVal);
void tRowDestroy(SRow *pRow);
void tRowSort(SArray *aRowP);
int32_t tRowSort(SArray *aRowP);
int32_t tRowMerge(SArray *aRowP, STSchema *pTSchema, int8_t flag);
int32_t tRowUpsertColData(SRow *pRow, STSchema *pTSchema, SColData *aColData, int32_t nColData, int32_t flag);

View File

@ -54,6 +54,17 @@ typedef int32_t (*__ext_compar_fn_t)(const void *p1, const void *p2, const void
*/
void taosqsort(void *src, int64_t numOfElem, int64_t size, const void *param, __ext_compar_fn_t comparFn);
/**
* merge sort, with the compare function requiring additional parameters support
*
* @param src
* @param numOfElem
* @param size
* @param comparFn
* @return int32_t 0 for success, other for failure.
*/
int32_t taosMergeSort(void *src, int64_t numOfElem, int64_t size, __compar_fn_t comparFn);
/**
* binary search, with range support
*

View File

@ -214,12 +214,19 @@ void taosArrayDestroyEx(SArray* pArray, FDelete fp);
void taosArraySwap(SArray* a, SArray* b);
/**
* sort the array
* sort the array use qsort
* @param pArray
* @param compar
*/
void taosArraySort(SArray* pArray, __compar_fn_t comparFn);
/**
* sort the array use merge sort
* @param pArray
* @param compar
*/
int32_t taosArrayMSort(SArray* pArray, __compar_fn_t comparFn);
/**
* search the array
* @param pArray

View File

@ -610,9 +610,13 @@ _exit:
return code;
}
void tRowSort(SArray *aRowP) {
if (TARRAY_SIZE(aRowP) <= 1) return;
taosArraySort(aRowP, tRowPCmprFn);
int32_t tRowSort(SArray *aRowP) {
if (TARRAY_SIZE(aRowP) <= 1) return 0;
int32_t code = taosArrayMSort(aRowP, tRowPCmprFn);
if (code != TSDB_CODE_SUCCESS) {
uError("taosArrayMSort failed caused by %d", code);
}
return code;
}
int32_t tRowMerge(SArray *aRowP, STSchema *pTSchema, int8_t flag) {

View File

@ -289,8 +289,8 @@ int32_t buildSubmitReqFromBlock(SDataInserterHandle* pInserter, SSubmitReq2** pp
}
if (disorderTs) {
tRowSort(tbData.aRowP);
if ((terrno = tRowMerge(tbData.aRowP, (STSchema*)pTSchema, 0)) != 0) {
if ((tRowSort(tbData.aRowP) != TSDB_CODE_SUCCESS) ||
(terrno = tRowMerge(tbData.aRowP, (STSchema*)pTSchema, 0)) != 0) {
goto _end;
}
}

View File

@ -495,9 +495,9 @@ int32_t insMergeTableDataCxt(SHashObj* pTableHash, SArray** pVgDataBlocks) {
tColDataSortMerge(pTableCxt->pData->aCol);
} else {
if (!pTableCxt->ordered) {
tRowSort(pTableCxt->pData->aRowP);
code = tRowSort(pTableCxt->pData->aRowP);
}
if (!pTableCxt->ordered || pTableCxt->duplicateTs) {
if (code == TSDB_CODE_SUCCESS && (!pTableCxt->ordered || pTableCxt->duplicateTs)) {
code = tRowMerge(pTableCxt->pData->aRowP, pTableCxt->pSchema, 0);
}
}

View File

@ -273,3 +273,86 @@ void taosheapsort(void *base, int32_t size, int32_t len, const void *parcompar,
taosMemoryFree(buf);
}
static void taosMerge(void *src, int32_t start, int32_t leftend, int32_t end, int64_t size, const void *param,
__ext_compar_fn_t comparFn, void *tmp) {
int32_t leftSize = leftend - start + 1;
int32_t rightSize = end - leftend;
void *leftBuf = tmp;
void *rightBuf = (char *)tmp + (leftSize * size);
memcpy(leftBuf, elePtrAt(src, size, start), leftSize * size);
memcpy(rightBuf, elePtrAt(src, size, leftend + 1), rightSize * size);
int32_t i = 0, j = 0, k = start;
while (i < leftSize && j < rightSize) {
int32_t ret = comparFn(elePtrAt(leftBuf, size, i), elePtrAt(rightBuf, size, j), param);
if (ret <= 0) {
memcpy(elePtrAt(src, size, k), elePtrAt(leftBuf, size, i), size);
i++;
} else {
memcpy(elePtrAt(src, size, k), elePtrAt(rightBuf, size, j), size);
j++;
}
k++;
}
while (i < leftSize) {
memcpy(elePtrAt(src, size, k), elePtrAt(leftBuf, size, i), size);
i++;
k++;
}
while (j < rightSize) {
memcpy(elePtrAt(src, size, k), elePtrAt(rightBuf, size, j), size);
j++;
k++;
}
}
static int32_t taosMergeSortHelper(void *src, int64_t numOfElem, int64_t size, const void *param,
__ext_compar_fn_t comparFn) {
// short array sort, instead of merge sort process
const int32_t THRESHOLD_SIZE = 6;
char *buf = taosMemoryCalloc(1, size); // prepare the swap buffer
if (buf == NULL) return TSDB_CODE_OUT_OF_MEMORY;
for (int32_t start = 0; start < numOfElem - 1; start += THRESHOLD_SIZE) {
int32_t end = (start + THRESHOLD_SIZE - 1) <= numOfElem - 1 ? (start + THRESHOLD_SIZE - 1) : numOfElem - 1;
tInsertSort(src, size, start, end, param, comparFn, buf);
}
taosMemoryFreeClear(buf);
if (numOfElem > THRESHOLD_SIZE) {
int32_t currSize;
void *tmp = taosMemoryMalloc(numOfElem * size);
if (tmp == NULL) return TSDB_CODE_OUT_OF_MEMORY;
for (currSize = THRESHOLD_SIZE; currSize <= numOfElem - 1; currSize = 2 * currSize) {
int32_t leftStart;
for (leftStart = 0; leftStart < numOfElem - 1; leftStart += 2 * currSize) {
int32_t leftend = leftStart + currSize - 1;
int32_t rightEnd =
(leftStart + 2 * currSize - 1 < numOfElem - 1) ? (leftStart + 2 * currSize - 1) : (numOfElem - 1);
if (leftend >= rightEnd) break;
taosMerge(src, leftStart, leftend, rightEnd, size, param, comparFn, tmp);
}
}
taosMemoryFreeClear(tmp);
}
return 0;
}
int32_t msortHelper(const void *p1, const void *p2, const void *param) {
__compar_fn_t comparFn = param;
return comparFn(p1, p2);
}
int32_t taosMergeSort(void *src, int64_t numOfElem, int64_t size, __compar_fn_t comparFn) {
void *param = comparFn;
return taosMergeSortHelper(src, numOfElem, size, param, msortHelper);
}

View File

@ -417,6 +417,10 @@ void taosArraySort(SArray* pArray, __compar_fn_t compar) {
taosSort(pArray->pData, pArray->size, pArray->elemSize, compar);
}
int32_t taosArrayMSort(SArray* pArray, __compar_fn_t compar) {
return taosMergeSort(pArray->pData, pArray->size, pArray->elemSize, compar);
}
void* taosArraySearch(const SArray* pArray, const void* key, __compar_fn_t comparFn, int32_t flags) {
return taosbsearch(key, pArray->pData, pArray->size, pArray->elemSize, comparFn, flags);
}

View File

@ -84,3 +84,11 @@ add_test(
NAME pageBufferTest
COMMAND pageBufferTest
)
# talgoTest
add_executable(talgoTest "talgoTest.cpp")
target_link_libraries(talgoTest os util gtest_main)
add_test(
NAME talgoTest
COMMAND talgoTest
)

View File

@ -0,0 +1,104 @@
#include <gtest/gtest.h>
#include <stdlib.h>
#include "talgo.h"
struct TestStruct {
int a;
float b;
};
// Define a custom comparison function for testing
int cmpFunc(const void* a, const void* b) {
const TestStruct* pa = reinterpret_cast<const TestStruct*>(a);
const TestStruct* pb = reinterpret_cast<const TestStruct*>(b);
if (pa->a < pb->a) {
return -1;
} else if (pa->a > pb->a) {
return 1;
} else {
return 0;
}
}
TEST(utilTest, taosMSort) {
// Create an array of test data
TestStruct arr[] = {{4, 2.5}, {3, 6}, {2, 1.5}, {3, 2}, {1, 3.5}, {3, 5}};
// Sort the array using taosSort
taosMergeSort(arr, 6, sizeof(TestStruct), cmpFunc);
for (int i = 0; i < sizeof(arr) / sizeof(TestStruct); i++) {
printf("%d: %d %f\n", i, arr[i].a, arr[i].b);
}
// Check that the array is sorted correctly
EXPECT_EQ(arr[0].a, 1);
EXPECT_EQ(arr[1].a, 2);
EXPECT_EQ(arr[2].a, 3);
EXPECT_EQ(arr[2].b, 6);
EXPECT_EQ(arr[3].a, 3);
EXPECT_EQ(arr[3].b, 2);
EXPECT_EQ(arr[4].a, 3);
EXPECT_EQ(arr[4].b, 5);
EXPECT_EQ(arr[5].a, 4);
}
int cmpInt(const void* a, const void* b) {
int int_a = *((int*)a);
int int_b = *((int*)b);
if (int_a == int_b)
return 0;
else if (int_a < int_b)
return -1;
else
return 1;
}
TEST(utilTest, taosMSort2) {
clock_t start_time, end_time;
double cpu_time_used;
int times = 10000;
start_time = clock();
for (int i = 0; i < 10000; i++) {
TestStruct arr[] = {{4, 2.5}, {3, 6}, {2, 1.5}, {3, 2}, {1, 3.5}, {3, 5}};
taosMergeSort(arr, 6, sizeof(TestStruct), cmpFunc);
}
end_time = clock();
cpu_time_used = ((double)(end_time - start_time)) / CLOCKS_PER_SEC;
printf("taosMSort %d times: %f s\n", times, cpu_time_used);
start_time = clock();
for (int i = 0; i < 10000; i++) {
TestStruct arr[] = {{4, 2.5}, {3, 6}, {2, 1.5}, {3, 2}, {1, 3.5}, {3, 5}};
taosSort(arr, 6, sizeof(TestStruct), cmpFunc);
}
end_time = clock();
cpu_time_used = ((double)(end_time - start_time)) / CLOCKS_PER_SEC;
printf("taosSort %d times: %f s\n", times, cpu_time_used);
const int arraySize = 1000000;
int data1[arraySize];
int data2[arraySize];
for (int i = 0; i < arraySize; ++i) {
data1[i] = taosRand();
data2[i] = data1[i];
}
start_time = clock();
taosMergeSort(data1, arraySize, sizeof(int), cmpInt);
end_time = clock();
cpu_time_used = ((double)(end_time - start_time)) / CLOCKS_PER_SEC;
printf("taosMSort length:%d cost: %f s\n", arraySize, cpu_time_used);
start_time = clock();
taosSort(data2, arraySize, sizeof(int), cmpInt);
end_time = clock();
cpu_time_used = ((double)(end_time - start_time)) / CLOCKS_PER_SEC;
printf("taosSort length:%d cost: %f s\n", arraySize, cpu_time_used);
for (int i = 0; i < arraySize - 1; i++) {
EXPECT_EQ(data1[i], data2[i]);
ASSERT_LE(data1[i], data1[i+1]);
}
}