From 6ac6e8f2bd382c2e3b41dd604e43f197135a770c Mon Sep 17 00:00:00 2001 From: kailixu Date: Thu, 22 Feb 2024 17:46:36 +0800 Subject: [PATCH] enh: unit test and optimization for base58 --- include/util/tbase58.h | 3 ++ source/util/src/tbase58.c | 34 +++++++++--- source/util/test/CMakeLists.txt | 8 +++ source/util/test/tbaseCodecTest.cpp | 80 +++++++++++++++++++++++++++++ 4 files changed, 117 insertions(+), 8 deletions(-) create mode 100644 source/util/test/tbaseCodecTest.cpp diff --git a/include/util/tbase58.h b/include/util/tbase58.h index e1b03f8a8f..ab62e1926e 100644 --- a/include/util/tbase58.h +++ b/include/util/tbase58.h @@ -22,6 +22,9 @@ extern "C" { #endif +#define TBASE_MAX_ILEN 4096 +#define TBASE_MAX_OLEN 5653 + uint8_t *base58_decode(const char *value, size_t inlen, int32_t *outlen); char *base58_encode(const uint8_t *value, int32_t vlen); diff --git a/source/util/src/tbase58.c b/source/util/src/tbase58.c index fa3ecadd14..e1cee72b47 100644 --- a/source/util/src/tbase58.c +++ b/source/util/src/tbase58.c @@ -18,24 +18,29 @@ #include #include -#define BASE_BUF_SIZE 256 +#define TBASE_BUF_SIZE 256 static const char *basis_58 = "123456789ABCDEFGHJKLMNPQRSTUVWXYZabcdefghijkmnopqrstuvwxyz"; char *base58_encode(const uint8_t *value, int32_t vlen) { const uint8_t *pb = value; const uint8_t *pe = pb + vlen; - uint8_t buf[BASE_BUF_SIZE] = {0}; + uint8_t buf[TBASE_BUF_SIZE] = {0}; uint8_t *pbuf = &buf[0]; bool bfree = false; int32_t nz = 0, size = 0, len = 0; + if (vlen > TBASE_MAX_ILEN) { + terrno = TSDB_CODE_INVALID_PARA; + return NULL; + } + while (pb != pe && *pb == 0) { ++pb; ++nz; } size = (pe - pb) * 69 / 50 + 1; - if (size > BASE_BUF_SIZE) { + if (size > TBASE_BUF_SIZE) { if (!(pbuf = taosMemoryCalloc(1, size))) { terrno = TSDB_CODE_OUT_OF_MEMORY; return NULL; @@ -47,7 +52,7 @@ char *base58_encode(const uint8_t *value, int32_t vlen) { int32_t num = *pb; int32_t i = 0; for (int32_t j = (int32_t)size - 1; (num != 0 || i < len) && j >= 0; --j, ++i) { - num += ((int32_t)buf[j]) << 8; + num += ((int32_t)pbuf[j]) << 8; pbuf[j] = num % 58; num /= 58; } @@ -57,7 +62,7 @@ char *base58_encode(const uint8_t *value, int32_t vlen) { const uint8_t *pi = pbuf + (size - len); while (pi != pbuf + size && *pi == 0) ++pi; - uint8_t *result = taosMemoryCalloc(1, size + 1); + uint8_t *result = taosMemoryCalloc(1, nz + (pbuf + size - pi) + 1); if (!result) { terrno = TSDB_CODE_OUT_OF_MEMORY; if (bfree) taosMemoryFree(pbuf); @@ -83,11 +88,23 @@ static const signed char index_58[256] = { uint8_t *base58_decode(const char *value, size_t inlen, int32_t *outlen) { const char *pe = value + inlen; - uint8_t buf[BASE_BUF_SIZE] = {0}; + uint8_t buf[TBASE_BUF_SIZE] = {0}; uint8_t *pbuf = &buf[0]; bool bfree = false; int32_t nz = 0, size = 0, len = 0; + if (inlen > TBASE_MAX_OLEN) { + terrno = TSDB_CODE_INVALID_PARA; + return NULL; + } + + for (int32_t i = 0; i < inlen; ++i) { + if (value[i] == 0) { + terrno = TSDB_CODE_INVALID_PARA; + return NULL; + } + } + while (*value && isspace(*value)) ++value; while (*value == '1') { ++nz; @@ -95,7 +112,7 @@ uint8_t *base58_decode(const char *value, size_t inlen, int32_t *outlen) { } size = (int32_t)(pe - value) * 733 / 1000 + 1; - if (size > BASE_BUF_SIZE) { + if (size > TBASE_BUF_SIZE) { if (!(pbuf = taosMemoryCalloc(1, size))) { terrno = TSDB_CODE_OUT_OF_MEMORY; return NULL; @@ -106,6 +123,7 @@ uint8_t *base58_decode(const char *value, size_t inlen, int32_t *outlen) { while (*value && !isspace(*value)) { int32_t num = index_58[(uint8_t)*value]; if (num == -1) { + terrno = TSDB_CODE_INVALID_PARA; if (bfree) taosMemoryFree(pbuf); return NULL; } @@ -127,7 +145,7 @@ uint8_t *base58_decode(const char *value, size_t inlen, int32_t *outlen) { const uint8_t *it = pbuf + (size - len); while (it != pbuf + size && *it == 0) ++it; - uint8_t *result = taosMemoryCalloc(1, size + 1); + uint8_t *result = taosMemoryCalloc(1, inlen + 1); if (!result) { if (bfree) taosMemoryFree(pbuf); terrno = TSDB_CODE_OUT_OF_MEMORY; diff --git a/source/util/test/CMakeLists.txt b/source/util/test/CMakeLists.txt index f4f3880388..5e32bd82b7 100644 --- a/source/util/test/CMakeLists.txt +++ b/source/util/test/CMakeLists.txt @@ -100,3 +100,11 @@ add_test( NAME talgoTest COMMAND talgoTest ) + +# tbaseCodecTest +add_executable(tbaseCodecTest "tbaseCodecTest.cpp") +target_link_libraries(tbaseCodecTest os util common gtest_main) +add_test( + NAME tbaseCodecTest + COMMAND tbaseCodecTest + diff --git a/source/util/test/tbaseCodecTest.cpp b/source/util/test/tbaseCodecTest.cpp new file mode 100644 index 0000000000..70639e7ec9 --- /dev/null +++ b/source/util/test/tbaseCodecTest.cpp @@ -0,0 +1,80 @@ +#include +#include + +#include +#include "os.h" +#include "taos.h" +#include "taoserror.h" +#include "tbase58.h" +#include "tbase64.h" +#include "tglobal.h" + +using namespace std; + +#pragma GCC diagnostic push +#pragma GCC diagnostic ignored "-Wwrite-strings" +#pragma GCC diagnostic ignored "-Wunused-function" +#pragma GCC diagnostic ignored "-Wunused-variable" +#pragma GCC diagnostic ignored "-Wsign-compare" + +int main(int argc, char **argv) { + testing::InitGoogleTest(&argc, argv); + return RUN_ALL_TESTS(); +} + +static void checkBase58Codec(uint8_t *pRaw, int32_t rawLen, int32_t index) { + char *pEnc = base58_encode((const uint8_t *)pRaw, rawLen); + ASSERT_NE(nullptr, pEnc); + + int32_t encLen = strlen(pEnc); + std::cout << "index:" << index << ", encLen is " << encLen << std::endl; + int32_t decLen = 0; + char *pDec = (char *)base58_decode((const char *)pEnc, encLen, &decLen); + std::cout << "index:" << index << ", decLen is " << decLen << std::endl; + ASSERT_NE(nullptr, pDec); + ASSERT_EQ(rawLen, decLen); + ASSERT_LE(rawLen, encLen); + ASSERT_EQ(0, strncmp((char *)pRaw, pDec, rawLen)); + taosMemoryFreeClear(pDec); + taosMemoryFreeClear(pEnc); +} + +TEST(TD_BASE_CODEC_TEST, tbase58_test) { + const int32_t TEST_LEN_MAX = TBASE_MAX_ILEN; + const int32_t TEST_LEN_STEP = 10; + int32_t rawLen = 0; + uint8_t *pRaw = NULL; + + pRaw = (uint8_t *)taosMemoryCalloc(1, TEST_LEN_MAX); + ASSERT_NE(nullptr, pRaw); + + // 1. normal case + // string blend with char and '\0' + rawLen = TEST_LEN_MAX; + for (int32_t i = 0; i < TEST_LEN_MAX; i += 1000) { + checkBase58Codec(pRaw, rawLen, i); + pRaw[i] = i & 127; + } + + // string without '\0' + for (int32_t i = 0; i < TEST_LEN_MAX; ++i) { + pRaw[i] = i & 127; + } + checkBase58Codec(pRaw, TEST_LEN_MAX, 0); + for (int32_t i = 0; i < TEST_LEN_MAX; i += 1000) { + rawLen = i; + checkBase58Codec(pRaw, rawLen, i); + } + taosMemoryFreeClear(pRaw); + ASSERT_EQ(nullptr, pRaw); + + // 2. overflow case + char tmp[1]; + char *pEnc = base58_encode((const uint8_t *)tmp, TBASE_MAX_ILEN + 1); + ASSERT_EQ(nullptr, pEnc); + char *pDec = (char *)base58_decode((const char *)tmp, TBASE_MAX_OLEN + 1, NULL); + ASSERT_EQ(nullptr, pDec); + + taosMemoryFreeClear(pRaw); + ASSERT_EQ(nullptr, pRaw); +} \ No newline at end of file