opti:json parser
This commit is contained in:
parent
097a2b928c
commit
1b2a047397
|
@ -19,12 +19,12 @@
|
||||||
#include <string.h>
|
#include <string.h>
|
||||||
#include "clientSml.h"
|
#include "clientSml.h"
|
||||||
|
|
||||||
#define JUMP_JSON_SPACE(start,end) \
|
#define JUMP_JSON_SPACE(start) \
|
||||||
while(start < end){\
|
while(*(start)){\
|
||||||
if(unlikely(isspace(*start) == 0))\
|
if(unlikely(isspace(*(start)) == 0))\
|
||||||
break;\
|
break;\
|
||||||
else\
|
else\
|
||||||
start++;\
|
(start)++;\
|
||||||
}
|
}
|
||||||
|
|
||||||
static SArray *smlJsonParseTags(char *start, char *end){
|
static SArray *smlJsonParseTags(char *start, char *end){
|
||||||
|
@ -213,12 +213,12 @@ static int32_t smlParseTagsFromJSON(SSmlHandle *info, SSmlLineInfo *elements) {
|
||||||
static char* smlJsonGetObj(char *payload){
|
static char* smlJsonGetObj(char *payload){
|
||||||
int leftBracketCnt = 0;
|
int leftBracketCnt = 0;
|
||||||
while(*payload) {
|
while(*payload) {
|
||||||
if (*payload == '{') {
|
if (unlikely(*payload == '{')) {
|
||||||
leftBracketCnt++;
|
leftBracketCnt++;
|
||||||
payload++;
|
payload++;
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
if (*payload == '}') {
|
if (unlikely(*payload == '}')) {
|
||||||
leftBracketCnt--;
|
leftBracketCnt--;
|
||||||
payload++;
|
payload++;
|
||||||
if (leftBracketCnt == 0) {
|
if (leftBracketCnt == 0) {
|
||||||
|
@ -233,107 +233,113 @@ static char* smlJsonGetObj(char *payload){
|
||||||
return NULL;
|
return NULL;
|
||||||
}
|
}
|
||||||
|
|
||||||
static void smlJsonParseObj(char *start, char *end, SSmlLineInfo *element){
|
static void smlJsonParseObj(char **start, SSmlLineInfo *element){
|
||||||
while(start < end){
|
while(*(*start)){
|
||||||
if(start[0]== '"' && start[1] == 'm' && start[2] == 'e' && start[3] == 't'
|
if((*start)[0]== '"' && (*start)[1] == 'm' && (*start)[2] == 'e' && (*start)[3] == 't'
|
||||||
&& start[4] == 'r' && start[5] == 'i' && start[6] == 'c' && start[7] == '"'){
|
&& (*start)[4] == 'r' && (*start)[5] == 'i' && (*start)[6] == 'c' && (*start)[7] == '"'){
|
||||||
|
|
||||||
start += 8;
|
(*start) += 8;
|
||||||
bool isInQuote = false;
|
bool isInQuote = false;
|
||||||
while(start < end){
|
while(*(*start)){
|
||||||
if(!isInQuote && *start == '"'){
|
if(unlikely(!isInQuote && *(*start) == '"')){
|
||||||
start++;
|
(*start)++;
|
||||||
element->measure = start;
|
element->measure = (*start);
|
||||||
isInQuote = true;
|
isInQuote = true;
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
if(isInQuote && *start == '"'){
|
if(unlikely(isInQuote && *(*start) == '"')){
|
||||||
element->measureLen = start - element->measure;
|
element->measureLen = (*start) - element->measure;
|
||||||
start++;
|
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
start++;
|
(*start)++;
|
||||||
}
|
}
|
||||||
}else if(start[0] == '"' && start[1] == 't' && start[2] == 'i' && start[3] == 'm'
|
}else if((*start)[0] == '"' && (*start)[1] == 't' && (*start)[2] == 'i' && (*start)[3] == 'm'
|
||||||
&& start[4] == 'e' && start[5] == 's' && start[6] == 't'
|
&& (*start)[4] == 'e' && (*start)[5] == 's' && (*start)[6] == 't'
|
||||||
&& start[7] == 'a' && start[8] == 'm' && start[9] == 'p' && start[10] == '"'){
|
&& (*start)[7] == 'a' && (*start)[8] == 'm' && (*start)[9] == 'p' && (*start)[10] == '"'){
|
||||||
|
|
||||||
start += 11;
|
(*start) += 11;
|
||||||
bool hasColon = false;
|
bool hasColon = false;
|
||||||
while(start < end){
|
while(*(*start)){
|
||||||
if(!hasColon && *start == ':'){
|
if(unlikely(!hasColon && *(*start) == ':')){
|
||||||
start++;
|
(*start)++;
|
||||||
JUMP_JSON_SPACE(start,end)
|
JUMP_JSON_SPACE((*start))
|
||||||
element->timestamp = start;
|
element->timestamp = (*start);
|
||||||
hasColon = true;
|
hasColon = true;
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
if(hasColon && (*start == ',' || *start == '}' || isspace(*start) != 0)){
|
if(unlikely(hasColon && (*(*start) == ',' || *(*start) == '}' || isspace(*(*start)) != 0))){
|
||||||
element->timestampLen = start - element->timestamp;
|
element->timestampLen = (*start) - element->timestamp;
|
||||||
start++;
|
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
start++;
|
(*start)++;
|
||||||
}
|
}
|
||||||
}else if(start[0]== '"' && start[1] == 'v' && start[2] == 'a' && start[3] == 'l'
|
}else if((*start)[0]== '"' && (*start)[1] == 'v' && (*start)[2] == 'a' && (*start)[3] == 'l'
|
||||||
&& start[4] == 'u' && start[5] == 'e' && start[6] == '"'){
|
&& (*start)[4] == 'u' && (*start)[5] == 'e' && (*start)[6] == '"'){
|
||||||
|
|
||||||
start += 7;
|
(*start) += 7;
|
||||||
|
|
||||||
bool hasColon = false;
|
bool hasColon = false;
|
||||||
while(start < end){
|
while(*(*start)){
|
||||||
if(!hasColon && *start == ':'){
|
if(unlikely(!hasColon && *(*start) == ':')){
|
||||||
start++;
|
(*start)++;
|
||||||
JUMP_JSON_SPACE(start,end)
|
JUMP_JSON_SPACE((*start))
|
||||||
element->cols = start;
|
element->cols = (*start);
|
||||||
hasColon = true;
|
hasColon = true;
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
if(hasColon && (*start == ',' || *start == '}' || isspace(*start) != 0)){
|
if(unlikely(hasColon && (*(*start) == ',' || *(*start) == '}' || isspace(*(*start)) != 0))){
|
||||||
element->colsLen = start - element->cols;
|
element->colsLen = (*start) - element->cols;
|
||||||
start++;
|
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
start++;
|
(*start)++;
|
||||||
}
|
}
|
||||||
}else if(start[0] == '"' && start[1] == 't' && start[2] == 'a' && start[3] == 'g'
|
}else if((*start)[0] == '"' && (*start)[1] == 't' && (*start)[2] == 'a' && (*start)[3] == 'g'
|
||||||
&& start[4] == 's' && start[5] == '"'){
|
&& (*start)[4] == 's' && (*start)[5] == '"'){
|
||||||
start += 6;
|
(*start) += 6;
|
||||||
|
|
||||||
while(start < end){
|
while(*(*start)){
|
||||||
if(*start == ':'){
|
if(unlikely(*(*start) == ':')){
|
||||||
start++;
|
(*start)++;
|
||||||
JUMP_JSON_SPACE(start,end)
|
JUMP_JSON_SPACE((*start))
|
||||||
element->tags = start;
|
element->tags = (*start);
|
||||||
element->tagsLen = smlJsonGetObj(start) - start;
|
char* tmp = smlJsonGetObj((*start));
|
||||||
|
if(tmp){
|
||||||
|
element->tagsLen = tmp - (*start);
|
||||||
|
*start = tmp;
|
||||||
|
}
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
start++;
|
(*start)++;
|
||||||
}
|
}
|
||||||
}else{
|
|
||||||
start++;
|
|
||||||
}
|
}
|
||||||
|
if(*(*start) == '}'){
|
||||||
|
(*start)++;
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
(*start)++;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
static int32_t smlParseJSONString(SSmlHandle *info, char *start, char *end, SSmlLineInfo *elements) {
|
static int32_t smlParseJSONString(SSmlHandle *info, char **start, SSmlLineInfo *elements) {
|
||||||
int32_t ret = TSDB_CODE_SUCCESS;
|
int32_t ret = TSDB_CODE_SUCCESS;
|
||||||
|
|
||||||
smlJsonParseObj(start, end, elements);
|
smlJsonParseObj(start, elements);
|
||||||
|
if(**start == '\0') return TSDB_CODE_SUCCESS;
|
||||||
|
|
||||||
if(unlikely(elements->measure == NULL || elements->measureLen == 0)) {
|
if(unlikely(elements->measure == NULL || elements->measureLen == 0)) {
|
||||||
smlBuildInvalidDataMsg(&info->msgBuf, "invalid measure data", start);
|
smlBuildInvalidDataMsg(&info->msgBuf, "invalid measure data", *start);
|
||||||
return TSDB_CODE_SML_INVALID_DATA;
|
return TSDB_CODE_SML_INVALID_DATA;
|
||||||
}
|
}
|
||||||
if(unlikely(elements->tags == NULL || elements->tagsLen == 0)) {
|
if(unlikely(elements->tags == NULL || elements->tagsLen == 0)) {
|
||||||
smlBuildInvalidDataMsg(&info->msgBuf, "invalid tags data", start);
|
smlBuildInvalidDataMsg(&info->msgBuf, "invalid tags data", *start);
|
||||||
return TSDB_CODE_SML_INVALID_DATA;
|
return TSDB_CODE_SML_INVALID_DATA;
|
||||||
}
|
}
|
||||||
if(unlikely(elements->cols == NULL || elements->colsLen == 0)) {
|
if(unlikely(elements->cols == NULL || elements->colsLen == 0)) {
|
||||||
smlBuildInvalidDataMsg(&info->msgBuf, "invalid cols data", start);
|
smlBuildInvalidDataMsg(&info->msgBuf, "invalid cols data", *start);
|
||||||
return TSDB_CODE_SML_INVALID_DATA;
|
return TSDB_CODE_SML_INVALID_DATA;
|
||||||
}
|
}
|
||||||
if(unlikely(elements->timestamp == NULL || elements->timestampLen == 0)) {
|
if(unlikely(elements->timestamp == NULL || elements->timestampLen == 0)) {
|
||||||
smlBuildInvalidDataMsg(&info->msgBuf, "invalid timestamp data", start);
|
smlBuildInvalidDataMsg(&info->msgBuf, "invalid timestamp data", *start);
|
||||||
return TSDB_CODE_SML_INVALID_DATA;
|
return TSDB_CODE_SML_INVALID_DATA;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -404,14 +410,10 @@ int32_t smlParseJSON(SSmlHandle *info, char *payload) {
|
||||||
|
|
||||||
int cnt = 0;
|
int cnt = 0;
|
||||||
char *dataPointStart = payload;
|
char *dataPointStart = payload;
|
||||||
char *dataPointEnd = NULL;
|
|
||||||
while (1) {
|
while (1) {
|
||||||
dataPointEnd = smlJsonGetObj(dataPointStart);
|
|
||||||
if(dataPointEnd == NULL) break;
|
|
||||||
|
|
||||||
if(info->dataFormat) {
|
if(info->dataFormat) {
|
||||||
SSmlLineInfo element = {0};
|
SSmlLineInfo element = {0};
|
||||||
ret = smlParseJSONString(info, dataPointStart, dataPointEnd, &element);
|
ret = smlParseJSONString(info, &dataPointStart, &element);
|
||||||
}else{
|
}else{
|
||||||
if(cnt >= payloadNum){
|
if(cnt >= payloadNum){
|
||||||
payloadNum = payloadNum << 1;
|
payloadNum = payloadNum << 1;
|
||||||
|
@ -420,13 +422,15 @@ int32_t smlParseJSON(SSmlHandle *info, char *payload) {
|
||||||
info->lines = (SSmlLineInfo*)tmp;
|
info->lines = (SSmlLineInfo*)tmp;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
ret = smlParseJSONString(info, dataPointStart, dataPointEnd, info->lines + cnt);
|
ret = smlParseJSONString(info, &dataPointStart, info->lines + cnt);
|
||||||
}
|
}
|
||||||
if (unlikely(ret != TSDB_CODE_SUCCESS)) {
|
if (unlikely(ret != TSDB_CODE_SUCCESS)) {
|
||||||
uError("SML:0x%" PRIx64 " Invalid JSON Payload", info->id);
|
uError("SML:0x%" PRIx64 " Invalid JSON Payload", info->id);
|
||||||
return ret;
|
return ret;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if(*dataPointStart == '\0') break;
|
||||||
|
|
||||||
if(unlikely(info->reRun)){
|
if(unlikely(info->reRun)){
|
||||||
cnt = 0;
|
cnt = 0;
|
||||||
dataPointStart = payload;
|
dataPointStart = payload;
|
||||||
|
@ -438,7 +442,6 @@ int32_t smlParseJSON(SSmlHandle *info, char *payload) {
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
cnt++;
|
cnt++;
|
||||||
dataPointStart = dataPointEnd;
|
|
||||||
}
|
}
|
||||||
info->lineNum = cnt;
|
info->lineNum = cnt;
|
||||||
|
|
||||||
|
|
|
@ -513,32 +513,32 @@ static int32_t smlParseColKv(SSmlHandle *info, char **sql, char *sqlEnd,
|
||||||
}
|
}
|
||||||
|
|
||||||
if(isSameMeasure){
|
if(isSameMeasure){
|
||||||
// if(cnt >= taosArrayGetSize(preLineKV)) {
|
if(cnt >= taosArrayGetSize(preLineKV)) {
|
||||||
// info->dataFormat = false;
|
info->dataFormat = false;
|
||||||
// info->reRun = true;
|
info->reRun = true;
|
||||||
// return TSDB_CODE_SUCCESS;
|
return TSDB_CODE_SUCCESS;
|
||||||
// }
|
}
|
||||||
// SSmlKv *preKV = (SSmlKv *)taosArrayGet(preLineKV, cnt);
|
SSmlKv *preKV = (SSmlKv *)taosArrayGet(preLineKV, cnt);
|
||||||
// if(kv.type != preKV->type){
|
if(kv.type != preKV->type){
|
||||||
// info->dataFormat = false;
|
info->dataFormat = false;
|
||||||
// info->reRun = true;
|
info->reRun = true;
|
||||||
// return TSDB_CODE_SUCCESS;
|
return TSDB_CODE_SUCCESS;
|
||||||
// }
|
}
|
||||||
//
|
|
||||||
// if(unlikely(IS_VAR_DATA_TYPE(kv.type) && kv.length > preKV->length)){
|
if(unlikely(IS_VAR_DATA_TYPE(kv.type) && kv.length > preKV->length)){
|
||||||
// preKV->length = kv.length;
|
preKV->length = kv.length;
|
||||||
// SSmlSTableMeta *tableMeta = (SSmlSTableMeta *)nodeListGet(info->superTables, currElement->measure, currElement->measureLen, NULL);
|
SSmlSTableMeta *tableMeta = (SSmlSTableMeta *)nodeListGet(info->superTables, currElement->measure, currElement->measureLen, NULL);
|
||||||
// ASSERT(tableMeta != NULL);
|
ASSERT(tableMeta != NULL);
|
||||||
//
|
|
||||||
// SSmlKv *oldKV = (SSmlKv *)taosArrayGet(tableMeta->cols, cnt);
|
SSmlKv *oldKV = (SSmlKv *)taosArrayGet(tableMeta->cols, cnt);
|
||||||
// oldKV->length = kv.length;
|
oldKV->length = kv.length;
|
||||||
// info->needModifySchema = true;
|
info->needModifySchema = true;
|
||||||
// }
|
}
|
||||||
// if(unlikely(!IS_SAME_KEY)){
|
if(unlikely(!IS_SAME_KEY)){
|
||||||
// info->dataFormat = false;
|
info->dataFormat = false;
|
||||||
// info->reRun = true;
|
info->reRun = true;
|
||||||
// return TSDB_CODE_SUCCESS;
|
return TSDB_CODE_SUCCESS;
|
||||||
// }
|
}
|
||||||
}else{
|
}else{
|
||||||
if(isSuperKVInit){
|
if(isSuperKVInit){
|
||||||
if(unlikely(cnt >= taosArrayGetSize(superKV))) {
|
if(unlikely(cnt >= taosArrayGetSize(superKV))) {
|
||||||
|
|
|
@ -497,20 +497,16 @@ TEST(testCase, smlParseTelnetLine_diff_json_type2_Test) {
|
||||||
};
|
};
|
||||||
for (int i = 0; i < sizeof(sql) / sizeof(sql[0]); i++) {
|
for (int i = 0; i < sizeof(sql) / sizeof(sql[0]); i++) {
|
||||||
char *dataPointStart = (char *)sql[i];
|
char *dataPointStart = (char *)sql[i];
|
||||||
char *dataPointEnd = NULL;
|
|
||||||
while (1) {
|
while (1) {
|
||||||
dataPointEnd = smlJsonGetObj(dataPointStart);
|
|
||||||
if (dataPointEnd == NULL) break;
|
|
||||||
|
|
||||||
SSmlLineInfo elements = {0};
|
SSmlLineInfo elements = {0};
|
||||||
smlJsonParseObj(dataPointStart, dataPointEnd, &elements);
|
smlJsonParseObj(&dataPointStart, &elements);
|
||||||
|
if(*dataPointStart == '\0') break;
|
||||||
|
|
||||||
SArray *tags = smlJsonParseTags(elements.tags, elements.tags + elements.tagsLen);
|
SArray *tags = smlJsonParseTags(elements.tags, elements.tags + elements.tagsLen);
|
||||||
size_t num = taosArrayGetSize(tags);
|
size_t num = taosArrayGetSize(tags);
|
||||||
ASSERT_EQ(num, 1);
|
ASSERT_EQ(num, 1);
|
||||||
|
|
||||||
taosArrayDestroy(tags);
|
taosArrayDestroy(tags);
|
||||||
dataPointStart = dataPointEnd;
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
smlDestroyInfo(info);
|
smlDestroyInfo(info);
|
||||||
|
|
|
@ -738,10 +738,10 @@ int sml_add_tag_col_Test() {
|
||||||
int smlProcess_18784_Test() {
|
int smlProcess_18784_Test() {
|
||||||
TAOS *taos = taos_connect("localhost", "root", "taosdata", NULL, 0);
|
TAOS *taos = taos_connect("localhost", "root", "taosdata", NULL, 0);
|
||||||
|
|
||||||
TAOS_RES *pRes = taos_query(taos, "create database if not exists sml_db schemaless 1");
|
TAOS_RES *pRes = taos_query(taos, "create database if not exists db_18784 schemaless 1");
|
||||||
taos_free_result(pRes);
|
taos_free_result(pRes);
|
||||||
|
|
||||||
pRes = taos_query(taos, "use sml_db");
|
pRes = taos_query(taos, "use db_18784");
|
||||||
taos_free_result(pRes);
|
taos_free_result(pRes);
|
||||||
|
|
||||||
const char *sql[] = {
|
const char *sql[] = {
|
||||||
|
|
Loading…
Reference in New Issue