opti:json parser

This commit is contained in:
wangmm0220 2022-12-15 11:25:03 +08:00
parent 097a2b928c
commit 1b2a047397
4 changed files with 102 additions and 103 deletions

View File

@ -19,12 +19,12 @@
#include <string.h> #include <string.h>
#include "clientSml.h" #include "clientSml.h"
#define JUMP_JSON_SPACE(start,end) \ #define JUMP_JSON_SPACE(start) \
while(start < end){\ while(*(start)){\
if(unlikely(isspace(*start) == 0))\ if(unlikely(isspace(*(start)) == 0))\
break;\ break;\
else\ else\
start++;\ (start)++;\
} }
static SArray *smlJsonParseTags(char *start, char *end){ static SArray *smlJsonParseTags(char *start, char *end){
@ -213,12 +213,12 @@ static int32_t smlParseTagsFromJSON(SSmlHandle *info, SSmlLineInfo *elements) {
static char* smlJsonGetObj(char *payload){ static char* smlJsonGetObj(char *payload){
int leftBracketCnt = 0; int leftBracketCnt = 0;
while(*payload) { while(*payload) {
if (*payload == '{') { if (unlikely(*payload == '{')) {
leftBracketCnt++; leftBracketCnt++;
payload++; payload++;
continue; continue;
} }
if (*payload == '}') { if (unlikely(*payload == '}')) {
leftBracketCnt--; leftBracketCnt--;
payload++; payload++;
if (leftBracketCnt == 0) { if (leftBracketCnt == 0) {
@ -233,107 +233,113 @@ static char* smlJsonGetObj(char *payload){
return NULL; return NULL;
} }
static void smlJsonParseObj(char *start, char *end, SSmlLineInfo *element){ static void smlJsonParseObj(char **start, SSmlLineInfo *element){
while(start < end){ while(*(*start)){
if(start[0]== '"' && start[1] == 'm' && start[2] == 'e' && start[3] == 't' if((*start)[0]== '"' && (*start)[1] == 'm' && (*start)[2] == 'e' && (*start)[3] == 't'
&& start[4] == 'r' && start[5] == 'i' && start[6] == 'c' && start[7] == '"'){ && (*start)[4] == 'r' && (*start)[5] == 'i' && (*start)[6] == 'c' && (*start)[7] == '"'){
start += 8; (*start) += 8;
bool isInQuote = false; bool isInQuote = false;
while(start < end){ while(*(*start)){
if(!isInQuote && *start == '"'){ if(unlikely(!isInQuote && *(*start) == '"')){
start++; (*start)++;
element->measure = start; element->measure = (*start);
isInQuote = true; isInQuote = true;
continue; continue;
} }
if(isInQuote && *start == '"'){ if(unlikely(isInQuote && *(*start) == '"')){
element->measureLen = start - element->measure; element->measureLen = (*start) - element->measure;
start++;
break; break;
} }
start++; (*start)++;
} }
}else if(start[0] == '"' && start[1] == 't' && start[2] == 'i' && start[3] == 'm' }else if((*start)[0] == '"' && (*start)[1] == 't' && (*start)[2] == 'i' && (*start)[3] == 'm'
&& start[4] == 'e' && start[5] == 's' && start[6] == 't' && (*start)[4] == 'e' && (*start)[5] == 's' && (*start)[6] == 't'
&& start[7] == 'a' && start[8] == 'm' && start[9] == 'p' && start[10] == '"'){ && (*start)[7] == 'a' && (*start)[8] == 'm' && (*start)[9] == 'p' && (*start)[10] == '"'){
start += 11; (*start) += 11;
bool hasColon = false; bool hasColon = false;
while(start < end){ while(*(*start)){
if(!hasColon && *start == ':'){ if(unlikely(!hasColon && *(*start) == ':')){
start++; (*start)++;
JUMP_JSON_SPACE(start,end) JUMP_JSON_SPACE((*start))
element->timestamp = start; element->timestamp = (*start);
hasColon = true; hasColon = true;
continue; continue;
} }
if(hasColon && (*start == ',' || *start == '}' || isspace(*start) != 0)){ if(unlikely(hasColon && (*(*start) == ',' || *(*start) == '}' || isspace(*(*start)) != 0))){
element->timestampLen = start - element->timestamp; element->timestampLen = (*start) - element->timestamp;
start++;
break; break;
} }
start++; (*start)++;
} }
}else if(start[0]== '"' && start[1] == 'v' && start[2] == 'a' && start[3] == 'l' }else if((*start)[0]== '"' && (*start)[1] == 'v' && (*start)[2] == 'a' && (*start)[3] == 'l'
&& start[4] == 'u' && start[5] == 'e' && start[6] == '"'){ && (*start)[4] == 'u' && (*start)[5] == 'e' && (*start)[6] == '"'){
start += 7; (*start) += 7;
bool hasColon = false; bool hasColon = false;
while(start < end){ while(*(*start)){
if(!hasColon && *start == ':'){ if(unlikely(!hasColon && *(*start) == ':')){
start++; (*start)++;
JUMP_JSON_SPACE(start,end) JUMP_JSON_SPACE((*start))
element->cols = start; element->cols = (*start);
hasColon = true; hasColon = true;
continue; continue;
} }
if(hasColon && (*start == ',' || *start == '}' || isspace(*start) != 0)){ if(unlikely(hasColon && (*(*start) == ',' || *(*start) == '}' || isspace(*(*start)) != 0))){
element->colsLen = start - element->cols; element->colsLen = (*start) - element->cols;
start++;
break; break;
} }
start++; (*start)++;
} }
}else if(start[0] == '"' && start[1] == 't' && start[2] == 'a' && start[3] == 'g' }else if((*start)[0] == '"' && (*start)[1] == 't' && (*start)[2] == 'a' && (*start)[3] == 'g'
&& start[4] == 's' && start[5] == '"'){ && (*start)[4] == 's' && (*start)[5] == '"'){
start += 6; (*start) += 6;
while(start < end){ while(*(*start)){
if(*start == ':'){ if(unlikely(*(*start) == ':')){
start++; (*start)++;
JUMP_JSON_SPACE(start,end) JUMP_JSON_SPACE((*start))
element->tags = start; element->tags = (*start);
element->tagsLen = smlJsonGetObj(start) - start; char* tmp = smlJsonGetObj((*start));
if(tmp){
element->tagsLen = tmp - (*start);
*start = tmp;
}
break; break;
} }
start++; (*start)++;
} }
}else{
start++;
} }
if(*(*start) == '}'){
(*start)++;
break;
}
(*start)++;
} }
} }
static int32_t smlParseJSONString(SSmlHandle *info, char *start, char *end, SSmlLineInfo *elements) { static int32_t smlParseJSONString(SSmlHandle *info, char **start, SSmlLineInfo *elements) {
int32_t ret = TSDB_CODE_SUCCESS; int32_t ret = TSDB_CODE_SUCCESS;
smlJsonParseObj(start, end, elements); smlJsonParseObj(start, elements);
if(**start == '\0') return TSDB_CODE_SUCCESS;
if(unlikely(elements->measure == NULL || elements->measureLen == 0)) { if(unlikely(elements->measure == NULL || elements->measureLen == 0)) {
smlBuildInvalidDataMsg(&info->msgBuf, "invalid measure data", start); smlBuildInvalidDataMsg(&info->msgBuf, "invalid measure data", *start);
return TSDB_CODE_SML_INVALID_DATA; return TSDB_CODE_SML_INVALID_DATA;
} }
if(unlikely(elements->tags == NULL || elements->tagsLen == 0)) { if(unlikely(elements->tags == NULL || elements->tagsLen == 0)) {
smlBuildInvalidDataMsg(&info->msgBuf, "invalid tags data", start); smlBuildInvalidDataMsg(&info->msgBuf, "invalid tags data", *start);
return TSDB_CODE_SML_INVALID_DATA; return TSDB_CODE_SML_INVALID_DATA;
} }
if(unlikely(elements->cols == NULL || elements->colsLen == 0)) { if(unlikely(elements->cols == NULL || elements->colsLen == 0)) {
smlBuildInvalidDataMsg(&info->msgBuf, "invalid cols data", start); smlBuildInvalidDataMsg(&info->msgBuf, "invalid cols data", *start);
return TSDB_CODE_SML_INVALID_DATA; return TSDB_CODE_SML_INVALID_DATA;
} }
if(unlikely(elements->timestamp == NULL || elements->timestampLen == 0)) { if(unlikely(elements->timestamp == NULL || elements->timestampLen == 0)) {
smlBuildInvalidDataMsg(&info->msgBuf, "invalid timestamp data", start); smlBuildInvalidDataMsg(&info->msgBuf, "invalid timestamp data", *start);
return TSDB_CODE_SML_INVALID_DATA; return TSDB_CODE_SML_INVALID_DATA;
} }
@ -404,14 +410,10 @@ int32_t smlParseJSON(SSmlHandle *info, char *payload) {
int cnt = 0; int cnt = 0;
char *dataPointStart = payload; char *dataPointStart = payload;
char *dataPointEnd = NULL;
while (1) { while (1) {
dataPointEnd = smlJsonGetObj(dataPointStart);
if(dataPointEnd == NULL) break;
if(info->dataFormat) { if(info->dataFormat) {
SSmlLineInfo element = {0}; SSmlLineInfo element = {0};
ret = smlParseJSONString(info, dataPointStart, dataPointEnd, &element); ret = smlParseJSONString(info, &dataPointStart, &element);
}else{ }else{
if(cnt >= payloadNum){ if(cnt >= payloadNum){
payloadNum = payloadNum << 1; payloadNum = payloadNum << 1;
@ -420,13 +422,15 @@ int32_t smlParseJSON(SSmlHandle *info, char *payload) {
info->lines = (SSmlLineInfo*)tmp; info->lines = (SSmlLineInfo*)tmp;
} }
} }
ret = smlParseJSONString(info, dataPointStart, dataPointEnd, info->lines + cnt); ret = smlParseJSONString(info, &dataPointStart, info->lines + cnt);
} }
if (unlikely(ret != TSDB_CODE_SUCCESS)) { if (unlikely(ret != TSDB_CODE_SUCCESS)) {
uError("SML:0x%" PRIx64 " Invalid JSON Payload", info->id); uError("SML:0x%" PRIx64 " Invalid JSON Payload", info->id);
return ret; return ret;
} }
if(*dataPointStart == '\0') break;
if(unlikely(info->reRun)){ if(unlikely(info->reRun)){
cnt = 0; cnt = 0;
dataPointStart = payload; dataPointStart = payload;
@ -438,7 +442,6 @@ int32_t smlParseJSON(SSmlHandle *info, char *payload) {
continue; continue;
} }
cnt++; cnt++;
dataPointStart = dataPointEnd;
} }
info->lineNum = cnt; info->lineNum = cnt;

View File

@ -513,32 +513,32 @@ static int32_t smlParseColKv(SSmlHandle *info, char **sql, char *sqlEnd,
} }
if(isSameMeasure){ if(isSameMeasure){
// if(cnt >= taosArrayGetSize(preLineKV)) { if(cnt >= taosArrayGetSize(preLineKV)) {
// info->dataFormat = false; info->dataFormat = false;
// info->reRun = true; info->reRun = true;
// return TSDB_CODE_SUCCESS; return TSDB_CODE_SUCCESS;
// } }
// SSmlKv *preKV = (SSmlKv *)taosArrayGet(preLineKV, cnt); SSmlKv *preKV = (SSmlKv *)taosArrayGet(preLineKV, cnt);
// if(kv.type != preKV->type){ if(kv.type != preKV->type){
// info->dataFormat = false; info->dataFormat = false;
// info->reRun = true; info->reRun = true;
// return TSDB_CODE_SUCCESS; return TSDB_CODE_SUCCESS;
// } }
//
// if(unlikely(IS_VAR_DATA_TYPE(kv.type) && kv.length > preKV->length)){ if(unlikely(IS_VAR_DATA_TYPE(kv.type) && kv.length > preKV->length)){
// preKV->length = kv.length; preKV->length = kv.length;
// SSmlSTableMeta *tableMeta = (SSmlSTableMeta *)nodeListGet(info->superTables, currElement->measure, currElement->measureLen, NULL); SSmlSTableMeta *tableMeta = (SSmlSTableMeta *)nodeListGet(info->superTables, currElement->measure, currElement->measureLen, NULL);
// ASSERT(tableMeta != NULL); ASSERT(tableMeta != NULL);
//
// SSmlKv *oldKV = (SSmlKv *)taosArrayGet(tableMeta->cols, cnt); SSmlKv *oldKV = (SSmlKv *)taosArrayGet(tableMeta->cols, cnt);
// oldKV->length = kv.length; oldKV->length = kv.length;
// info->needModifySchema = true; info->needModifySchema = true;
// } }
// if(unlikely(!IS_SAME_KEY)){ if(unlikely(!IS_SAME_KEY)){
// info->dataFormat = false; info->dataFormat = false;
// info->reRun = true; info->reRun = true;
// return TSDB_CODE_SUCCESS; return TSDB_CODE_SUCCESS;
// } }
}else{ }else{
if(isSuperKVInit){ if(isSuperKVInit){
if(unlikely(cnt >= taosArrayGetSize(superKV))) { if(unlikely(cnt >= taosArrayGetSize(superKV))) {

View File

@ -497,20 +497,16 @@ TEST(testCase, smlParseTelnetLine_diff_json_type2_Test) {
}; };
for (int i = 0; i < sizeof(sql) / sizeof(sql[0]); i++) { for (int i = 0; i < sizeof(sql) / sizeof(sql[0]); i++) {
char *dataPointStart = (char *)sql[i]; char *dataPointStart = (char *)sql[i];
char *dataPointEnd = NULL;
while (1) { while (1) {
dataPointEnd = smlJsonGetObj(dataPointStart);
if (dataPointEnd == NULL) break;
SSmlLineInfo elements = {0}; SSmlLineInfo elements = {0};
smlJsonParseObj(dataPointStart, dataPointEnd, &elements); smlJsonParseObj(&dataPointStart, &elements);
if(*dataPointStart == '\0') break;
SArray *tags = smlJsonParseTags(elements.tags, elements.tags + elements.tagsLen); SArray *tags = smlJsonParseTags(elements.tags, elements.tags + elements.tagsLen);
size_t num = taosArrayGetSize(tags); size_t num = taosArrayGetSize(tags);
ASSERT_EQ(num, 1); ASSERT_EQ(num, 1);
taosArrayDestroy(tags); taosArrayDestroy(tags);
dataPointStart = dataPointEnd;
} }
} }
smlDestroyInfo(info); smlDestroyInfo(info);

View File

@ -738,10 +738,10 @@ int sml_add_tag_col_Test() {
int smlProcess_18784_Test() { int smlProcess_18784_Test() {
TAOS *taos = taos_connect("localhost", "root", "taosdata", NULL, 0); TAOS *taos = taos_connect("localhost", "root", "taosdata", NULL, 0);
TAOS_RES *pRes = taos_query(taos, "create database if not exists sml_db schemaless 1"); TAOS_RES *pRes = taos_query(taos, "create database if not exists db_18784 schemaless 1");
taos_free_result(pRes); taos_free_result(pRes);
pRes = taos_query(taos, "use sml_db"); pRes = taos_query(taos, "use db_18784");
taos_free_result(pRes); taos_free_result(pRes);
const char *sql[] = { const char *sql[] = {