diff --git a/include/libs/function/taosudf.h b/include/libs/function/taosudf.h new file mode 100644 index 0000000000..5e84b87a81 --- /dev/null +++ b/include/libs/function/taosudf.h @@ -0,0 +1,266 @@ +/* +* Copyright (c) 2019 TAOS Data, Inc. +* +* This program is free software: you can use, redistribute, and/or modify +* it under the terms of the GNU Affero General Public License, version 3 +* or later ("AGPL"), as published by the Free Software Foundation. +* +* This program is distributed in the hope that it will be useful, but WITHOUT +* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or +* FITNESS FOR A PARTICULAR PURPOSE. +* +* You should have received a copy of the GNU Affero General Public License +* along with this program. If not, see . +*/ + +#ifndef TDENGINE_TAOSUDF_H +#define TDENGINE_TAOSUDF_H + +#include +#include +#include +#include + +#include +#include + +#ifdef __cplusplus +extern "C" { +#endif + +#if defined(__GNUC__) +#define FORCE_INLINE inline __attribute__((always_inline)) +#else +#define FORCE_INLINE +#endif +typedef struct SUdfColumnMeta { + int16_t type; + int32_t bytes; + uint8_t precision; + uint8_t scale; +} SUdfColumnMeta; + +typedef struct SUdfColumnData { + int32_t numOfRows; + int32_t rowsAlloc; + union { + struct { + int32_t nullBitmapLen; + char *nullBitmap; + int32_t dataLen; + char *data; + } fixLenCol; + + struct { + int32_t varOffsetsLen; + int32_t *varOffsets; + int32_t payloadLen; + char *payload; + int32_t payloadAllocLen; + } varLenCol; + }; +} SUdfColumnData; + + +typedef struct SUdfColumn { + SUdfColumnMeta colMeta; + bool hasNull; + SUdfColumnData colData; +} SUdfColumn; + +typedef struct SUdfDataBlock { + int32_t numOfRows; + int32_t numOfCols; + SUdfColumn **udfCols; +} SUdfDataBlock; + +typedef struct SUdfInterBuf { + int32_t bufLen; + char* buf; + int8_t numOfResult; //zero or one +} SUdfInterBuf; +typedef void *UdfcFuncHandle; + +// dynamic lib init and destroy +typedef int32_t (*TUdfInitFunc)(); +typedef int32_t (*TUdfDestroyFunc)(); + +#define UDF_MEMORY_EXP_GROWTH 1.5 +#define NBIT (3u) +#define BitPos(_n) ((_n) & ((1 << NBIT) - 1)) +#define BMCharPos(bm_, r_) ((bm_)[(r_) >> NBIT]) +#define BitmapLen(_n) (((_n) + ((1 << NBIT) - 1)) >> NBIT) + +#define udfColDataIsNull_var(pColumn, row) ((pColumn->colData.varLenCol.varOffsets)[row] == -1) +#define udfColDataIsNull_f(pColumn, row) ((BMCharPos(pColumn->colData.fixLenCol.nullBitmap, row) & (1u << (7u - BitPos(row)))) == (1u << (7u - BitPos(row)))) +#define udfColDataSetNull_f(pColumn, row) \ + do { \ + BMCharPos(pColumn->colData.fixLenCol.nullBitmap, row) |= (1u << (7u - BitPos(row))); \ + } while (0) + +#define udfColDataSetNotNull_f(pColumn, r_) \ + do { \ + BMCharPos(pColumn->colData.fixLenCol.nullBitmap, r_) &= ~(1u << (7u - BitPos(r_))); \ + } while (0) +#define udfColDataSetNull_var(pColumn, row) ((pColumn->colData.varLenCol.varOffsets)[row] = -1) + +typedef uint16_t VarDataLenT; // maxVarDataLen: 32767 +#define VARSTR_HEADER_SIZE sizeof(VarDataLenT) +#define varDataLen(v) ((VarDataLenT *)(v))[0] +#define varDataVal(v) ((char *)(v) + VARSTR_HEADER_SIZE) +#define varDataTLen(v) (sizeof(VarDataLenT) + varDataLen(v)) +#define varDataCopy(dst, v) memcpy((dst), (void *)(v), varDataTLen(v)) +#define varDataLenByData(v) (*(VarDataLenT *)(((char *)(v)) - VARSTR_HEADER_SIZE)) +#define varDataSetLen(v, _len) (((VarDataLenT *)(v))[0] = (VarDataLenT)(_len)) +#define IS_VAR_DATA_TYPE(t) \ + (((t) == TSDB_DATA_TYPE_VARCHAR) || ((t) == TSDB_DATA_TYPE_NCHAR) || ((t) == TSDB_DATA_TYPE_JSON)) +#define IS_STR_DATA_TYPE(t) (((t) == TSDB_DATA_TYPE_VARCHAR) || ((t) == TSDB_DATA_TYPE_NCHAR)) + + +static FORCE_INLINE char* udfColDataGetData(const SUdfColumn* pColumn, int32_t row) { + if (IS_VAR_DATA_TYPE(pColumn->colMeta.type)) { + return pColumn->colData.varLenCol.payload + pColumn->colData.varLenCol.varOffsets[row]; + } else { + return pColumn->colData.fixLenCol.data + pColumn->colMeta.bytes * row; + } +} + +static FORCE_INLINE bool udfColDataIsNull(const SUdfColumn* pColumn, int32_t row) { + if (IS_VAR_DATA_TYPE(pColumn->colMeta.type)) { + if (pColumn->colMeta.type == TSDB_DATA_TYPE_JSON) { + if (udfColDataIsNull_var(pColumn, row)) { + return true; + } + char* data = udfColDataGetData(pColumn, row); + return (*data == TSDB_DATA_TYPE_NULL); + } else { + return udfColDataIsNull_var(pColumn, row); + } + } else { + return udfColDataIsNull_f(pColumn, row); + } +} + +static FORCE_INLINE int32_t udfColEnsureCapacity(SUdfColumn* pColumn, int32_t newCapacity) { + SUdfColumnMeta *meta = &pColumn->colMeta; + SUdfColumnData *data = &pColumn->colData; + + if (newCapacity== 0 || newCapacity <= data->rowsAlloc) { + return TSDB_CODE_SUCCESS; + } + + int allocCapacity = (data->rowsAlloc< 8) ? 8 : data->rowsAlloc; + while (allocCapacity < newCapacity) { + allocCapacity *= UDF_MEMORY_EXP_GROWTH; + } + + if (IS_VAR_DATA_TYPE(meta->type)) { + char* tmp = (char*)realloc(data->varLenCol.varOffsets, sizeof(int32_t) * allocCapacity); + if (tmp == NULL) { + return TSDB_CODE_OUT_OF_MEMORY; + } + data->varLenCol.varOffsets = (int32_t*)tmp; + data->varLenCol.varOffsetsLen = sizeof(int32_t) * allocCapacity; + // for payload, add data in udfColDataAppend + } else { + char* tmp = (char*)realloc(data->fixLenCol.nullBitmap, BitmapLen(allocCapacity)); + if (tmp == NULL) { + return TSDB_CODE_OUT_OF_MEMORY; + } + data->fixLenCol.nullBitmap = tmp; + data->fixLenCol.nullBitmapLen = BitmapLen(allocCapacity); + if (meta->type == TSDB_DATA_TYPE_NULL) { + return TSDB_CODE_SUCCESS; + } + + tmp = (char*)realloc(data->fixLenCol.data, allocCapacity* meta->bytes); + if (tmp == NULL) { + return TSDB_CODE_OUT_OF_MEMORY; + } + + data->fixLenCol.data = tmp; + data->fixLenCol.dataLen = allocCapacity* meta->bytes; + } + + data->rowsAlloc = allocCapacity; + + return TSDB_CODE_SUCCESS; +} + +static FORCE_INLINE void udfColDataSetNull(SUdfColumn* pColumn, int32_t row) { + udfColEnsureCapacity(pColumn, row+1); + if (IS_VAR_DATA_TYPE(pColumn->colMeta.type)) { + udfColDataSetNull_var(pColumn, row); + } else { + udfColDataSetNull_f(pColumn, row); + } + pColumn->hasNull = true; +} + +static FORCE_INLINE int32_t udfColDataSet(SUdfColumn* pColumn, uint32_t currentRow, const char* pData, bool isNull) { + SUdfColumnMeta *meta = &pColumn->colMeta; + SUdfColumnData *data = &pColumn->colData; + udfColEnsureCapacity(pColumn, currentRow+1); + bool isVarCol = IS_VAR_DATA_TYPE(meta->type); + if (isNull) { + udfColDataSetNull(pColumn, currentRow); + } else { + if (!isVarCol) { + udfColDataSetNotNull_f(pColumn, currentRow); + memcpy(data->fixLenCol.data + meta->bytes * currentRow, pData, meta->bytes); + } else { + int32_t dataLen = varDataTLen(pData); + if (meta->type == TSDB_DATA_TYPE_JSON) { + if (*pData == TSDB_DATA_TYPE_NULL) { + dataLen = 0; + } else if (*pData == TSDB_DATA_TYPE_NCHAR) { + dataLen = varDataTLen(pData + sizeof(char)); + } else if (*pData == TSDB_DATA_TYPE_BIGINT || *pData == TSDB_DATA_TYPE_DOUBLE) { + dataLen = sizeof(int64_t); + } else if (*pData == TSDB_DATA_TYPE_BOOL) { + dataLen = sizeof(char); + } + dataLen += sizeof(char); + } + + if (data->varLenCol.payloadAllocLen < data->varLenCol.payloadLen + dataLen) { + uint32_t newSize = data->varLenCol.payloadAllocLen; + if (newSize <= 1) { + newSize = 8; + } + + while (newSize < data->varLenCol.payloadLen + dataLen) { + newSize = newSize * UDF_MEMORY_EXP_GROWTH; + } + + char *buf = (char*)realloc(data->varLenCol.payload, newSize); + if (buf == NULL) { + return TSDB_CODE_OUT_OF_MEMORY; + } + + data->varLenCol.payload = buf; + data->varLenCol.payloadAllocLen = newSize; + } + + uint32_t len = data->varLenCol.payloadLen; + data->varLenCol.varOffsets[currentRow] = len; + + memcpy(data->varLenCol.payload + len, pData, dataLen); + data->varLenCol.payloadLen += dataLen; + } + } + data->numOfRows = (currentRow + 1 > data->numOfRows) ? (currentRow+1) : data->numOfRows; + return 0; +} + +typedef int32_t (*TUdfScalarProcFunc)(SUdfDataBlock* block, SUdfColumn *resultCol); + +typedef int32_t (*TUdfAggStartFunc)(SUdfInterBuf *buf); +typedef int32_t (*TUdfAggProcessFunc)(SUdfDataBlock* block, SUdfInterBuf *interBuf, SUdfInterBuf *newInterBuf); +typedef int32_t (*TUdfAggFinishFunc)(SUdfInterBuf* buf, SUdfInterBuf *resultData); + +#ifdef __cplusplus +} +#endif + +#endif // TDENGINE_TAOSUDF_H diff --git a/include/libs/function/tudf.h b/include/libs/function/tudf.h index 28b1fbe8ce..b4c05fea87 100644 --- a/include/libs/function/tudf.h +++ b/include/libs/function/tudf.h @@ -16,6 +16,13 @@ #ifndef TDENGINE_TUDF_H #define TDENGINE_TUDF_H +#undef malloc +#define malloc malloc +#undef free +#define free free +#undef realloc +#define alloc alloc +#include #include #include @@ -36,56 +43,6 @@ extern "C" { #endif #define UDF_DNODE_ID_ENV_NAME "DNODE_ID" -//====================================================================================== -//begin API to taosd and qworker - -typedef struct SUdfColumnMeta { - int16_t type; - int32_t bytes; - uint8_t precision; - uint8_t scale; -} SUdfColumnMeta; - -typedef struct SUdfColumnData { - int32_t numOfRows; - int32_t rowsAlloc; - union { - struct { - int32_t nullBitmapLen; - char *nullBitmap; - int32_t dataLen; - char *data; - } fixLenCol; - - struct { - int32_t varOffsetsLen; - int32_t *varOffsets; - int32_t payloadLen; - char *payload; - int32_t payloadAllocLen; - } varLenCol; - }; -} SUdfColumnData; - - -typedef struct SUdfColumn { - SUdfColumnMeta colMeta; - bool hasNull; - SUdfColumnData colData; -} SUdfColumn; - -typedef struct SUdfDataBlock { - int32_t numOfRows; - int32_t numOfCols; - SUdfColumn **udfCols; -} SUdfDataBlock; - -typedef struct SUdfInterBuf { - int32_t bufLen; - char* buf; - int8_t numOfResult; //zero or one -} SUdfInterBuf; -typedef void *UdfcFuncHandle; //low level APIs /** @@ -127,177 +84,6 @@ int32_t udfAggFinalize(struct SqlFunctionCtx *pCtx, SSDataBlock* pBlock); int32_t callUdfScalarFunc(char *udfName, SScalarParam *input, int32_t numOfCols, SScalarParam *output); int32_t cleanUpUdfs(); -// end API to taosd and qworker -//============================================================================================================================= -// begin API to UDF writer. - -// dynamic lib init and destroy -typedef int32_t (*TUdfInitFunc)(); -typedef int32_t (*TUdfDestroyFunc)(); - -//TODO: add API to check function arguments type, number etc. - -#define UDF_MEMORY_EXP_GROWTH 1.5 - -#define udfColDataIsNull_var(pColumn, row) ((pColumn->colData.varLenCol.varOffsets)[row] == -1) -#define udfColDataIsNull_f(pColumn, row) ((BMCharPos(pColumn->colData.fixLenCol.nullBitmap, row) & (1u << (7u - BitPos(row)))) == (1u << (7u - BitPos(row)))) -#define udfColDataSetNull_f(pColumn, row) \ - do { \ - BMCharPos(pColumn->colData.fixLenCol.nullBitmap, row) |= (1u << (7u - BitPos(row))); \ - } while (0) - -#define udfColDataSetNotNull_f(pColumn, r_) \ - do { \ - BMCharPos(pColumn->colData.fixLenCol.nullBitmap, r_) &= ~(1u << (7u - BitPos(r_))); \ - } while (0) -#define udfColDataSetNull_var(pColumn, row) ((pColumn->colData.varLenCol.varOffsets)[row] = -1) - - -static FORCE_INLINE char* udfColDataGetData(const SUdfColumn* pColumn, int32_t row) { - if (IS_VAR_DATA_TYPE(pColumn->colMeta.type)) { - return pColumn->colData.varLenCol.payload + pColumn->colData.varLenCol.varOffsets[row]; - } else { - return pColumn->colData.fixLenCol.data + pColumn->colMeta.bytes * row; - } -} - -static FORCE_INLINE bool udfColDataIsNull(const SUdfColumn* pColumn, int32_t row) { - if (IS_VAR_DATA_TYPE(pColumn->colMeta.type)) { - if (pColumn->colMeta.type == TSDB_DATA_TYPE_JSON) { - if (udfColDataIsNull_var(pColumn, row)) { - return true; - } - char* data = udfColDataGetData(pColumn, row); - return (*data == TSDB_DATA_TYPE_NULL); - } else { - return udfColDataIsNull_var(pColumn, row); - } - } else { - return udfColDataIsNull_f(pColumn, row); - } -} - -static FORCE_INLINE int32_t udfColEnsureCapacity(SUdfColumn* pColumn, int32_t newCapacity) { - SUdfColumnMeta *meta = &pColumn->colMeta; - SUdfColumnData *data = &pColumn->colData; - - if (newCapacity== 0 || newCapacity <= data->rowsAlloc) { - return TSDB_CODE_SUCCESS; - } - - int allocCapacity = TMAX(data->rowsAlloc, 8); - while (allocCapacity < newCapacity) { - allocCapacity *= UDF_MEMORY_EXP_GROWTH; - } - - if (IS_VAR_DATA_TYPE(meta->type)) { - char* tmp = taosMemoryRealloc(data->varLenCol.varOffsets, sizeof(int32_t) * allocCapacity); - if (tmp == NULL) { - return TSDB_CODE_OUT_OF_MEMORY; - } - data->varLenCol.varOffsets = (int32_t*)tmp; - data->varLenCol.varOffsetsLen = sizeof(int32_t) * allocCapacity; - // for payload, add data in udfColDataAppend - } else { - char* tmp = taosMemoryRealloc(data->fixLenCol.nullBitmap, BitmapLen(allocCapacity)); - if (tmp == NULL) { - return TSDB_CODE_OUT_OF_MEMORY; - } - data->fixLenCol.nullBitmap = tmp; - data->fixLenCol.nullBitmapLen = BitmapLen(allocCapacity); - if (meta->type == TSDB_DATA_TYPE_NULL) { - return TSDB_CODE_SUCCESS; - } - - tmp = taosMemoryRealloc(data->fixLenCol.data, allocCapacity* meta->bytes); - if (tmp == NULL) { - return TSDB_CODE_OUT_OF_MEMORY; - } - - data->fixLenCol.data = tmp; - data->fixLenCol.dataLen = allocCapacity* meta->bytes; - } - - data->rowsAlloc = allocCapacity; - - return TSDB_CODE_SUCCESS; -} - -static FORCE_INLINE void udfColDataSetNull(SUdfColumn* pColumn, int32_t row) { - udfColEnsureCapacity(pColumn, row+1); - if (IS_VAR_DATA_TYPE(pColumn->colMeta.type)) { - udfColDataSetNull_var(pColumn, row); - } else { - udfColDataSetNull_f(pColumn, row); - } - pColumn->hasNull = true; -} - -static FORCE_INLINE int32_t udfColDataSet(SUdfColumn* pColumn, uint32_t currentRow, const char* pData, bool isNull) { - SUdfColumnMeta *meta = &pColumn->colMeta; - SUdfColumnData *data = &pColumn->colData; - udfColEnsureCapacity(pColumn, currentRow+1); - bool isVarCol = IS_VAR_DATA_TYPE(meta->type); - if (isNull) { - udfColDataSetNull(pColumn, currentRow); - } else { - if (!isVarCol) { - colDataSetNotNull_f(data->fixLenCol.nullBitmap, currentRow); - memcpy(data->fixLenCol.data + meta->bytes * currentRow, pData, meta->bytes); - } else { - int32_t dataLen = varDataTLen(pData); - if (meta->type == TSDB_DATA_TYPE_JSON) { - if (*pData == TSDB_DATA_TYPE_NULL) { - dataLen = 0; - } else if (*pData == TSDB_DATA_TYPE_NCHAR) { - dataLen = varDataTLen(pData + CHAR_BYTES); - } else if (*pData == TSDB_DATA_TYPE_BIGINT || *pData == TSDB_DATA_TYPE_DOUBLE) { - dataLen = LONG_BYTES; - } else if (*pData == TSDB_DATA_TYPE_BOOL) { - dataLen = CHAR_BYTES; - } - dataLen += CHAR_BYTES; - } - - if (data->varLenCol.payloadAllocLen < data->varLenCol.payloadLen + dataLen) { - uint32_t newSize = data->varLenCol.payloadAllocLen; - if (newSize <= 1) { - newSize = 8; - } - - while (newSize < data->varLenCol.payloadLen + dataLen) { - newSize = newSize * UDF_MEMORY_EXP_GROWTH; - } - - char *buf = taosMemoryRealloc(data->varLenCol.payload, newSize); - if (buf == NULL) { - return TSDB_CODE_OUT_OF_MEMORY; - } - - data->varLenCol.payload = buf; - data->varLenCol.payloadAllocLen = newSize; - } - - uint32_t len = data->varLenCol.payloadLen; - data->varLenCol.varOffsets[currentRow] = len; - - memcpy(data->varLenCol.payload + len, pData, dataLen); - data->varLenCol.payloadLen += dataLen; - } - } - data->numOfRows = TMAX(currentRow + 1, data->numOfRows); - return 0; -} - -typedef int32_t (*TUdfScalarProcFunc)(SUdfDataBlock* block, SUdfColumn *resultCol); - -typedef int32_t (*TUdfAggStartFunc)(SUdfInterBuf *buf); -typedef int32_t (*TUdfAggProcessFunc)(SUdfDataBlock* block, SUdfInterBuf *interBuf, SUdfInterBuf *newInterBuf); -typedef int32_t (*TUdfAggFinishFunc)(SUdfInterBuf* buf, SUdfInterBuf *resultData); - - -// end API to UDF writer -//======================================================================================================================= #ifdef __cplusplus } diff --git a/source/libs/function/test/udf1.c b/source/libs/function/test/udf1.c index 9443d5cb94..dfbae357ef 100644 --- a/source/libs/function/test/udf1.c +++ b/source/libs/function/test/udf1.c @@ -2,12 +2,8 @@ #include #include -#include "tudf.h" +#include "taosudf.h" -#undef malloc -#define malloc malloc -#undef free -#define free free DLL_EXPORT int32_t udf1_init() { return 0; diff --git a/source/libs/function/test/udf2.c b/source/libs/function/test/udf2.c index 1c270f5cf4..975832209e 100644 --- a/source/libs/function/test/udf2.c +++ b/source/libs/function/test/udf2.c @@ -1,13 +1,9 @@ #include #include #include +#include -#include "tudf.h" - -#undef malloc -#define malloc malloc -#undef free -#define free free +#include "taosudf.h" DLL_EXPORT int32_t udf2_init() { return 0;