opti:json parser

This commit is contained in:
wangmm0220 2022-12-15 11:25:03 +08:00
parent 097a2b928c
commit 1b2a047397
4 changed files with 102 additions and 103 deletions

View File

@ -19,12 +19,12 @@
#include <string.h>
#include "clientSml.h"
#define JUMP_JSON_SPACE(start,end) \
while(start < end){\
if(unlikely(isspace(*start) == 0))\
#define JUMP_JSON_SPACE(start) \
while(*(start)){\
if(unlikely(isspace(*(start)) == 0))\
break;\
else\
start++;\
(start)++;\
}
static SArray *smlJsonParseTags(char *start, char *end){
@ -213,12 +213,12 @@ static int32_t smlParseTagsFromJSON(SSmlHandle *info, SSmlLineInfo *elements) {
static char* smlJsonGetObj(char *payload){
int leftBracketCnt = 0;
while(*payload) {
if (*payload == '{') {
if (unlikely(*payload == '{')) {
leftBracketCnt++;
payload++;
continue;
}
if (*payload == '}') {
if (unlikely(*payload == '}')) {
leftBracketCnt--;
payload++;
if (leftBracketCnt == 0) {
@ -233,107 +233,113 @@ static char* smlJsonGetObj(char *payload){
return NULL;
}
static void smlJsonParseObj(char *start, char *end, SSmlLineInfo *element){
while(start < end){
if(start[0]== '"' && start[1] == 'm' && start[2] == 'e' && start[3] == 't'
&& start[4] == 'r' && start[5] == 'i' && start[6] == 'c' && start[7] == '"'){
static void smlJsonParseObj(char **start, SSmlLineInfo *element){
while(*(*start)){
if((*start)[0]== '"' && (*start)[1] == 'm' && (*start)[2] == 'e' && (*start)[3] == 't'
&& (*start)[4] == 'r' && (*start)[5] == 'i' && (*start)[6] == 'c' && (*start)[7] == '"'){
start += 8;
(*start) += 8;
bool isInQuote = false;
while(start < end){
if(!isInQuote && *start == '"'){
start++;
element->measure = start;
while(*(*start)){
if(unlikely(!isInQuote && *(*start) == '"')){
(*start)++;
element->measure = (*start);
isInQuote = true;
continue;
}
if(isInQuote && *start == '"'){
element->measureLen = start - element->measure;
start++;
if(unlikely(isInQuote && *(*start) == '"')){
element->measureLen = (*start) - element->measure;
break;
}
start++;
(*start)++;
}
}else if(start[0] == '"' && start[1] == 't' && start[2] == 'i' && start[3] == 'm'
&& start[4] == 'e' && start[5] == 's' && start[6] == 't'
&& start[7] == 'a' && start[8] == 'm' && start[9] == 'p' && start[10] == '"'){
}else if((*start)[0] == '"' && (*start)[1] == 't' && (*start)[2] == 'i' && (*start)[3] == 'm'
&& (*start)[4] == 'e' && (*start)[5] == 's' && (*start)[6] == 't'
&& (*start)[7] == 'a' && (*start)[8] == 'm' && (*start)[9] == 'p' && (*start)[10] == '"'){
start += 11;
(*start) += 11;
bool hasColon = false;
while(start < end){
if(!hasColon && *start == ':'){
start++;
JUMP_JSON_SPACE(start,end)
element->timestamp = start;
while(*(*start)){
if(unlikely(!hasColon && *(*start) == ':')){
(*start)++;
JUMP_JSON_SPACE((*start))
element->timestamp = (*start);
hasColon = true;
continue;
}
if(hasColon && (*start == ',' || *start == '}' || isspace(*start) != 0)){
element->timestampLen = start - element->timestamp;
start++;
if(unlikely(hasColon && (*(*start) == ',' || *(*start) == '}' || isspace(*(*start)) != 0))){
element->timestampLen = (*start) - element->timestamp;
break;
}
start++;
(*start)++;
}
}else if(start[0]== '"' && start[1] == 'v' && start[2] == 'a' && start[3] == 'l'
&& start[4] == 'u' && start[5] == 'e' && start[6] == '"'){
}else if((*start)[0]== '"' && (*start)[1] == 'v' && (*start)[2] == 'a' && (*start)[3] == 'l'
&& (*start)[4] == 'u' && (*start)[5] == 'e' && (*start)[6] == '"'){
start += 7;
(*start) += 7;
bool hasColon = false;
while(start < end){
if(!hasColon && *start == ':'){
start++;
JUMP_JSON_SPACE(start,end)
element->cols = start;
while(*(*start)){
if(unlikely(!hasColon && *(*start) == ':')){
(*start)++;
JUMP_JSON_SPACE((*start))
element->cols = (*start);
hasColon = true;
continue;
}
if(hasColon && (*start == ',' || *start == '}' || isspace(*start) != 0)){
element->colsLen = start - element->cols;
start++;
if(unlikely(hasColon && (*(*start) == ',' || *(*start) == '}' || isspace(*(*start)) != 0))){
element->colsLen = (*start) - element->cols;
break;
}
start++;
(*start)++;
}
}else if(start[0] == '"' && start[1] == 't' && start[2] == 'a' && start[3] == 'g'
&& start[4] == 's' && start[5] == '"'){
start += 6;
}else if((*start)[0] == '"' && (*start)[1] == 't' && (*start)[2] == 'a' && (*start)[3] == 'g'
&& (*start)[4] == 's' && (*start)[5] == '"'){
(*start) += 6;
while(start < end){
if(*start == ':'){
start++;
JUMP_JSON_SPACE(start,end)
element->tags = start;
element->tagsLen = smlJsonGetObj(start) - start;
while(*(*start)){
if(unlikely(*(*start) == ':')){
(*start)++;
JUMP_JSON_SPACE((*start))
element->tags = (*start);
char* tmp = smlJsonGetObj((*start));
if(tmp){
element->tagsLen = tmp - (*start);
*start = tmp;
}
break;
}
start++;
(*start)++;
}
}else{
start++;
}
if(*(*start) == '}'){
(*start)++;
break;
}
(*start)++;
}
}
static int32_t smlParseJSONString(SSmlHandle *info, char *start, char *end, SSmlLineInfo *elements) {
static int32_t smlParseJSONString(SSmlHandle *info, char **start, SSmlLineInfo *elements) {
int32_t ret = TSDB_CODE_SUCCESS;
smlJsonParseObj(start, end, elements);
smlJsonParseObj(start, elements);
if(**start == '\0') return TSDB_CODE_SUCCESS;
if(unlikely(elements->measure == NULL || elements->measureLen == 0)) {
smlBuildInvalidDataMsg(&info->msgBuf, "invalid measure data", start);
smlBuildInvalidDataMsg(&info->msgBuf, "invalid measure data", *start);
return TSDB_CODE_SML_INVALID_DATA;
}
if(unlikely(elements->tags == NULL || elements->tagsLen == 0)) {
smlBuildInvalidDataMsg(&info->msgBuf, "invalid tags data", start);
smlBuildInvalidDataMsg(&info->msgBuf, "invalid tags data", *start);
return TSDB_CODE_SML_INVALID_DATA;
}
if(unlikely(elements->cols == NULL || elements->colsLen == 0)) {
smlBuildInvalidDataMsg(&info->msgBuf, "invalid cols data", start);
smlBuildInvalidDataMsg(&info->msgBuf, "invalid cols data", *start);
return TSDB_CODE_SML_INVALID_DATA;
}
if(unlikely(elements->timestamp == NULL || elements->timestampLen == 0)) {
smlBuildInvalidDataMsg(&info->msgBuf, "invalid timestamp data", start);
smlBuildInvalidDataMsg(&info->msgBuf, "invalid timestamp data", *start);
return TSDB_CODE_SML_INVALID_DATA;
}
@ -404,14 +410,10 @@ int32_t smlParseJSON(SSmlHandle *info, char *payload) {
int cnt = 0;
char *dataPointStart = payload;
char *dataPointEnd = NULL;
while (1) {
dataPointEnd = smlJsonGetObj(dataPointStart);
if(dataPointEnd == NULL) break;
if(info->dataFormat) {
SSmlLineInfo element = {0};
ret = smlParseJSONString(info, dataPointStart, dataPointEnd, &element);
ret = smlParseJSONString(info, &dataPointStart, &element);
}else{
if(cnt >= payloadNum){
payloadNum = payloadNum << 1;
@ -420,13 +422,15 @@ int32_t smlParseJSON(SSmlHandle *info, char *payload) {
info->lines = (SSmlLineInfo*)tmp;
}
}
ret = smlParseJSONString(info, dataPointStart, dataPointEnd, info->lines + cnt);
ret = smlParseJSONString(info, &dataPointStart, info->lines + cnt);
}
if (unlikely(ret != TSDB_CODE_SUCCESS)) {
uError("SML:0x%" PRIx64 " Invalid JSON Payload", info->id);
return ret;
}
if(*dataPointStart == '\0') break;
if(unlikely(info->reRun)){
cnt = 0;
dataPointStart = payload;
@ -438,7 +442,6 @@ int32_t smlParseJSON(SSmlHandle *info, char *payload) {
continue;
}
cnt++;
dataPointStart = dataPointEnd;
}
info->lineNum = cnt;

View File

@ -513,32 +513,32 @@ static int32_t smlParseColKv(SSmlHandle *info, char **sql, char *sqlEnd,
}
if(isSameMeasure){
// if(cnt >= taosArrayGetSize(preLineKV)) {
// info->dataFormat = false;
// info->reRun = true;
// return TSDB_CODE_SUCCESS;
// }
// SSmlKv *preKV = (SSmlKv *)taosArrayGet(preLineKV, cnt);
// if(kv.type != preKV->type){
// info->dataFormat = false;
// info->reRun = true;
// return TSDB_CODE_SUCCESS;
// }
//
// if(unlikely(IS_VAR_DATA_TYPE(kv.type) && kv.length > preKV->length)){
// preKV->length = kv.length;
// SSmlSTableMeta *tableMeta = (SSmlSTableMeta *)nodeListGet(info->superTables, currElement->measure, currElement->measureLen, NULL);
// ASSERT(tableMeta != NULL);
//
// SSmlKv *oldKV = (SSmlKv *)taosArrayGet(tableMeta->cols, cnt);
// oldKV->length = kv.length;
// info->needModifySchema = true;
// }
// if(unlikely(!IS_SAME_KEY)){
// info->dataFormat = false;
// info->reRun = true;
// return TSDB_CODE_SUCCESS;
// }
if(cnt >= taosArrayGetSize(preLineKV)) {
info->dataFormat = false;
info->reRun = true;
return TSDB_CODE_SUCCESS;
}
SSmlKv *preKV = (SSmlKv *)taosArrayGet(preLineKV, cnt);
if(kv.type != preKV->type){
info->dataFormat = false;
info->reRun = true;
return TSDB_CODE_SUCCESS;
}
if(unlikely(IS_VAR_DATA_TYPE(kv.type) && kv.length > preKV->length)){
preKV->length = kv.length;
SSmlSTableMeta *tableMeta = (SSmlSTableMeta *)nodeListGet(info->superTables, currElement->measure, currElement->measureLen, NULL);
ASSERT(tableMeta != NULL);
SSmlKv *oldKV = (SSmlKv *)taosArrayGet(tableMeta->cols, cnt);
oldKV->length = kv.length;
info->needModifySchema = true;
}
if(unlikely(!IS_SAME_KEY)){
info->dataFormat = false;
info->reRun = true;
return TSDB_CODE_SUCCESS;
}
}else{
if(isSuperKVInit){
if(unlikely(cnt >= taosArrayGetSize(superKV))) {

View File

@ -497,20 +497,16 @@ TEST(testCase, smlParseTelnetLine_diff_json_type2_Test) {
};
for (int i = 0; i < sizeof(sql) / sizeof(sql[0]); i++) {
char *dataPointStart = (char *)sql[i];
char *dataPointEnd = NULL;
while (1) {
dataPointEnd = smlJsonGetObj(dataPointStart);
if (dataPointEnd == NULL) break;
SSmlLineInfo elements = {0};
smlJsonParseObj(dataPointStart, dataPointEnd, &elements);
smlJsonParseObj(&dataPointStart, &elements);
if(*dataPointStart == '\0') break;
SArray *tags = smlJsonParseTags(elements.tags, elements.tags + elements.tagsLen);
size_t num = taosArrayGetSize(tags);
ASSERT_EQ(num, 1);
taosArrayDestroy(tags);
dataPointStart = dataPointEnd;
}
}
smlDestroyInfo(info);

View File

@ -738,10 +738,10 @@ int sml_add_tag_col_Test() {
int smlProcess_18784_Test() {
TAOS *taos = taos_connect("localhost", "root", "taosdata", NULL, 0);
TAOS_RES *pRes = taos_query(taos, "create database if not exists sml_db schemaless 1");
TAOS_RES *pRes = taos_query(taos, "create database if not exists db_18784 schemaless 1");
taos_free_result(pRes);
pRes = taos_query(taos, "use sml_db");
pRes = taos_query(taos, "use db_18784");
taos_free_result(pRes);
const char *sql[] = {