diff --git a/include/util/talgo.h b/include/util/talgo.h index 7c92c0fe87..675bb66431 100644 --- a/include/util/talgo.h +++ b/include/util/talgo.h @@ -54,6 +54,16 @@ typedef int32_t (*__ext_compar_fn_t)(const void *p1, const void *p2, const void */ void taosqsort(void *src, int64_t numOfElem, int64_t size, const void *param, __ext_compar_fn_t comparFn); +/** + * merge sort, with the compare function requiring additional parameters support + * + * @param src + * @param numOfElem + * @param size + * @param comparFn + */ +void taosMergeSort(void *src, int64_t numOfElem, int64_t size, __compar_fn_t comparFn); + /** * binary search, with range support * diff --git a/include/util/tarray.h b/include/util/tarray.h index 4d9c930521..17fc69cde5 100644 --- a/include/util/tarray.h +++ b/include/util/tarray.h @@ -214,12 +214,19 @@ void taosArrayDestroyEx(SArray* pArray, FDelete fp); void taosArraySwap(SArray* a, SArray* b); /** - * sort the array + * sort the array use qsort * @param pArray * @param compar */ void taosArraySort(SArray* pArray, __compar_fn_t comparFn); +/** + * sort the array use merge sort + * @param pArray + * @param compar + */ +void taosArrayMSort(SArray* pArray, __compar_fn_t comparFn); + /** * search the array * @param pArray diff --git a/source/common/src/tdataformat.c b/source/common/src/tdataformat.c index 62504139f0..1f754cd4b8 100644 --- a/source/common/src/tdataformat.c +++ b/source/common/src/tdataformat.c @@ -612,7 +612,7 @@ _exit: void tRowSort(SArray *aRowP) { if (TARRAY_SIZE(aRowP) <= 1) return; - taosArraySort(aRowP, tRowPCmprFn); + taosArrayMSort(aRowP, tRowPCmprFn); } int32_t tRowMerge(SArray *aRowP, STSchema *pTSchema, int8_t flag) { diff --git a/source/util/src/talgo.c b/source/util/src/talgo.c index e373850b3c..a39dd4cc99 100644 --- a/source/util/src/talgo.c +++ b/source/util/src/talgo.c @@ -273,3 +273,82 @@ void taosheapsort(void *base, int32_t size, int32_t len, const void *parcompar, taosMemoryFree(buf); } + +static void taosMerge(void *src, int32_t start, int32_t leftend, int32_t end, int64_t size, const void *param, + __ext_compar_fn_t comparFn, void *tmp) { + int32_t leftSize = leftend - start + 1; + int32_t rightSize = end - leftend; + + void *leftBuf = tmp; + void *rightBuf = (char *)tmp + (leftSize * size); + + memcpy(leftBuf, elePtrAt(src, size, start), leftSize * size); + memcpy(rightBuf, elePtrAt(src, size, leftend + 1), rightSize * size); + + int32_t i = 0, j = 0, k = start; + + while (i < leftSize && j < rightSize) { + int32_t ret = comparFn(elePtrAt(leftBuf, size, i), elePtrAt(rightBuf, size, j), param); + if (ret <= 0) { + memcpy(elePtrAt(src, size, k), elePtrAt(leftBuf, size, i), size); + i++; + } else { + memcpy(elePtrAt(src, size, k), elePtrAt(rightBuf, size, j), size); + j++; + } + k++; + } + + while (i < leftSize) { + memcpy(elePtrAt(src, size, k), elePtrAt(leftBuf, size, i), size); + i++; + k++; + } + + while (j < rightSize) { + memcpy(elePtrAt(src, size, k), elePtrAt(rightBuf, size, j), size); + j++; + k++; + } +} + +static void taosMergeSortHelper(void *src, int64_t numOfElem, int64_t size, const void *param, __ext_compar_fn_t comparFn) { + // short array sort, instead of merge sort process + const int32_t THRESHOLD_SIZE = 6; + char *buf = taosMemoryCalloc(1, size); // prepare the swap buffer + for (int32_t start = 0; start < numOfElem - 1; start += THRESHOLD_SIZE) { + int32_t end = (start + THRESHOLD_SIZE - 1) <= numOfElem - 1 ? (start + THRESHOLD_SIZE - 1) : numOfElem - 1; + tInsertSort(src, size, start, end, param, comparFn, buf); + } + taosMemoryFreeClear(buf); + + if (numOfElem > THRESHOLD_SIZE) { + int32_t currSize; + void *tmp = taosMemoryMalloc(numOfElem * size); + + for (currSize = THRESHOLD_SIZE; currSize <= numOfElem - 1; currSize = 2 * currSize) { + int32_t leftStart; + for (leftStart = 0; leftStart < numOfElem - 1; leftStart += 2 * currSize) { + int32_t leftend = leftStart + currSize - 1; + int32_t rightEnd = + (leftStart + 2 * currSize - 1 < numOfElem - 1) ? (leftStart + 2 * currSize - 1) : (numOfElem - 1); + if (leftend >= rightEnd) break; + + taosMerge(src, leftStart, leftend, rightEnd, size, param, comparFn, tmp); + } + } + + taosMemoryFreeClear(tmp); + } +} + +int32_t msortHelper(const void *p1, const void *p2, const void *param) { + __compar_fn_t comparFn = param; + return comparFn(p1, p2); +} + + +void taosMergeSort(void *src, int64_t numOfElem, int64_t size, __compar_fn_t comparFn) { + void *param = comparFn; + taosMergeSortHelper(src, numOfElem, size, param, msortHelper); +} diff --git a/source/util/src/tarray.c b/source/util/src/tarray.c index 8e7c0f9584..0a71061c52 100644 --- a/source/util/src/tarray.c +++ b/source/util/src/tarray.c @@ -417,6 +417,10 @@ void taosArraySort(SArray* pArray, __compar_fn_t compar) { taosSort(pArray->pData, pArray->size, pArray->elemSize, compar); } +void taosArrayMSort(SArray* pArray, __compar_fn_t compar) { + taosMergeSort(pArray->pData, pArray->size, pArray->elemSize, compar); +} + void* taosArraySearch(const SArray* pArray, const void* key, __compar_fn_t comparFn, int32_t flags) { return taosbsearch(key, pArray->pData, pArray->size, pArray->elemSize, comparFn, flags); } diff --git a/source/util/test/CMakeLists.txt b/source/util/test/CMakeLists.txt index 0bf06e6f44..94f8deee44 100644 --- a/source/util/test/CMakeLists.txt +++ b/source/util/test/CMakeLists.txt @@ -84,3 +84,11 @@ add_test( NAME pageBufferTest COMMAND pageBufferTest ) + +# talgoTest +add_executable(talgoTest "talgoTest.cpp") +target_link_libraries(talgoTest os util gtest_main) +add_test( + NAME talgoTest + COMMAND talgoTest +) diff --git a/source/util/test/talgoTest.cpp b/source/util/test/talgoTest.cpp new file mode 100644 index 0000000000..b5a8db7378 --- /dev/null +++ b/source/util/test/talgoTest.cpp @@ -0,0 +1,104 @@ +#include +#include +#include "talgo.h" + +struct TestStruct { + int a; + float b; +}; + +// Define a custom comparison function for testing +int cmpFunc(const void* a, const void* b) { + const TestStruct* pa = reinterpret_cast(a); + const TestStruct* pb = reinterpret_cast(b); + if (pa->a < pb->a) { + return -1; + } else if (pa->a > pb->a) { + return 1; + } else { + return 0; + } +} + +TEST(utilTest, taosMSort) { + // Create an array of test data + TestStruct arr[] = {{4, 2.5}, {3, 6}, {2, 1.5}, {3, 2}, {1, 3.5}, {3, 5}}; + + // Sort the array using taosSort + taosMergeSort(arr, 6, sizeof(TestStruct), cmpFunc); + + for (int i = 0; i < sizeof(arr) / sizeof(TestStruct); i++) { + printf("%d: %d %f\n", i, arr[i].a, arr[i].b); + } + + // Check that the array is sorted correctly + EXPECT_EQ(arr[0].a, 1); + EXPECT_EQ(arr[1].a, 2); + EXPECT_EQ(arr[2].a, 3); + EXPECT_EQ(arr[2].b, 6); + EXPECT_EQ(arr[3].a, 3); + EXPECT_EQ(arr[3].b, 2); + EXPECT_EQ(arr[4].a, 3); + EXPECT_EQ(arr[4].b, 5); + EXPECT_EQ(arr[5].a, 4); +} + +int cmpInt(const void* a, const void* b) { + int int_a = *((int*)a); + int int_b = *((int*)b); + + if (int_a == int_b) + return 0; + else if (int_a < int_b) + return -1; + else + return 1; +} + +TEST(utilTest, taosMSort2) { + clock_t start_time, end_time; + double cpu_time_used; + + int times = 10000; + start_time = clock(); + for (int i = 0; i < 10000; i++) { + TestStruct arr[] = {{4, 2.5}, {3, 6}, {2, 1.5}, {3, 2}, {1, 3.5}, {3, 5}}; + taosMergeSort(arr, 6, sizeof(TestStruct), cmpFunc); + } + end_time = clock(); + cpu_time_used = ((double)(end_time - start_time)) / CLOCKS_PER_SEC; + printf("taosMSort %d times: %f s\n", times, cpu_time_used); + + start_time = clock(); + for (int i = 0; i < 10000; i++) { + TestStruct arr[] = {{4, 2.5}, {3, 6}, {2, 1.5}, {3, 2}, {1, 3.5}, {3, 5}}; + taosSort(arr, 6, sizeof(TestStruct), cmpFunc); + } + end_time = clock(); + cpu_time_used = ((double)(end_time - start_time)) / CLOCKS_PER_SEC; + printf("taosSort %d times: %f s\n", times, cpu_time_used); + + const int arraySize = 1000000; + int data1[arraySize]; + int data2[arraySize]; + for (int i = 0; i < arraySize; ++i) { + data1[i] = taosRand(); + data2[i] = data1[i]; + } + start_time = clock(); + taosMergeSort(data1, arraySize, sizeof(int), cmpInt); + end_time = clock(); + cpu_time_used = ((double)(end_time - start_time)) / CLOCKS_PER_SEC; + printf("taosMSort length:%d cost: %f s\n", arraySize, cpu_time_used); + + start_time = clock(); + taosSort(data2, arraySize, sizeof(int), cmpInt); + end_time = clock(); + cpu_time_used = ((double)(end_time - start_time)) / CLOCKS_PER_SEC; + printf("taosSort length:%d cost: %f s\n", arraySize, cpu_time_used); + + for (int i = 0; i < arraySize - 1; i++) { + EXPECT_EQ(data1[i], data2[i]); + ASSERT_LE(data1[i], data1[i+1]); + } +}