Merge branch '3.0' into feature/tq

This commit is contained in:
Liu Jicong 2022-01-06 18:23:15 +08:00
commit 5d3f439a5b
48 changed files with 1649 additions and 425 deletions

View File

@ -48,10 +48,11 @@ int32_t raftServerInit(SRaftServer *pRaftServer, const SRaftServerConfig *pConf,
int32_t raftServerStart(SRaftServer *pRaftServer);
void raftServerClose(SRaftServer *pRaftServer);
int initFsm(struct raft_fsm *fsm);
const char* state2String(unsigned short state);
void printRaftConfiguration(struct raft_configuration *c);
void printRaftState(struct raft *r);
#ifdef __cplusplus

View File

@ -1,7 +1,3 @@
add_executable(raftMain "")
target_sources(raftMain
PRIVATE
"raftMain.c"
"raftServer.c"
)
target_link_libraries(raftMain PUBLIC traft lz4 uv_a)
add_subdirectory(rebalance_leader)
add_subdirectory(make_cluster)

View File

@ -0,0 +1,11 @@
add_executable(makeCluster "")
target_sources(makeCluster
PRIVATE
"raftMain.c"
"raftServer.c"
"config.c"
"console.c"
"simpleHash.c"
"util.c"
)
target_link_libraries(makeCluster PUBLIC traft lz4 uv_a)

View File

@ -0,0 +1,23 @@
#ifndef TRAFT_COMMON_H
#define TRAFT_COMMON_H
#ifdef __cplusplus
extern "C" {
#endif
#include <stdint.h>
#define COMMAND_LEN 512
#define MAX_CMD_COUNT 10
#define TOKEN_LEN 128
#define MAX_PEERS_COUNT 19
#define HOST_LEN 64
#define ADDRESS_LEN (HOST_LEN * 2)
#define BASE_DIR_LEN 128
#ifdef __cplusplus
}
#endif
#endif

View File

@ -0,0 +1,64 @@
#include "config.h"
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
void addrToString(const char *host, uint16_t port, char *addr, int len) { snprintf(addr, len, "%s:%hu", host, port); }
void parseAddr(const char *addr, char *host, int len, uint16_t *port) {
char *tmp = (char *)malloc(strlen(addr) + 1);
strcpy(tmp, addr);
char *context;
char *separator = ":";
char *token = strtok_r(tmp, separator, &context);
if (token) {
snprintf(host, len, "%s", token);
}
token = strtok_r(NULL, separator, &context);
if (token) {
sscanf(token, "%hu", port);
}
free(tmp);
}
int parseConf(int argc, char **argv, RaftServerConfig *pConf) {
memset(pConf, 0, sizeof(*pConf));
int option_index, option_value;
option_index = 0;
static struct option long_options[] = {{"help", no_argument, NULL, 'h'},
{"addr", required_argument, NULL, 'a'},
{"dir", required_argument, NULL, 'd'},
{NULL, 0, NULL, 0}};
while ((option_value = getopt_long(argc, argv, "ha:d:", long_options, &option_index)) != -1) {
switch (option_value) {
case 'a': {
parseAddr(optarg, pConf->me.host, sizeof(pConf->me.host), &pConf->me.port);
break;
}
case 'd': {
snprintf(pConf->baseDir, sizeof(pConf->baseDir), "%s", optarg);
break;
}
case 'h': {
return -2;
}
default: { return -2; }
}
}
return 0;
}
void printConf(RaftServerConfig *pConf) {
printf("\n---printConf: \n");
printf("me: [%s:%hu] \n", pConf->me.host, pConf->me.port);
printf("dataDir: [%s] \n\n", pConf->baseDir);
}

View File

@ -0,0 +1,31 @@
#ifndef TRAFT_CONFIG_H
#define TRAFT_CONFIG_H
#ifdef __cplusplus
extern "C" {
#endif
#include <getopt.h>
#include <stdint.h>
#include "common.h"
typedef struct {
char host[HOST_LEN];
uint16_t port;
} Addr;
typedef struct {
Addr me;
char baseDir[BASE_DIR_LEN];
} RaftServerConfig;
void addrToString(const char *host, uint16_t port, char *addr, int len);
void parseAddr(const char *addr, char *host, int len, uint16_t *port);
int parseConf(int argc, char **argv, RaftServerConfig *pConf);
void printConf(RaftServerConfig *pConf);
#ifdef __cplusplus
}
#endif
#endif

View File

@ -0,0 +1,202 @@
#include "console.h"
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include "raftServer.h"
#include "util.h"
void printHelp() {
printf("---------------------\n");
printf("help: \n\n");
printf("create a vgroup with 3 replicas: \n");
printf("create vnode voter vid 100 peers 127.0.0.1:10001 127.0.0.1:10002 \n");
printf("create vnode voter vid 100 peers 127.0.0.1:10000 127.0.0.1:10002 \n");
printf("create vnode voter vid 100 peers 127.0.0.1:10000 127.0.0.1:10001 \n");
printf("\n");
printf("create a vgroup with only one replica: \n");
printf("create vnode voter vid 200 \n");
printf("\n");
printf("add vnode into vgroup: \n");
printf("create vnode spare vid 100 ---- run at 127.0.0.1:10003\n");
printf("join vnode vid 100 addr 127.0.0.1:10003 ---- run at leader of vgroup 100\n");
printf("\n");
printf("run \n");
printf("put 0 key value \n");
printf("get 0 key \n");
printf("---------------------\n");
}
void console(RaftServer *pRaftServer) {
while (1) {
int ret;
char cmdBuf[COMMAND_LEN];
memset(cmdBuf, 0, sizeof(cmdBuf));
printf("(console)> ");
char *retp = fgets(cmdBuf, COMMAND_LEN, stdin);
if (!retp) {
exit(-1);
}
int pos = strlen(cmdBuf);
if (cmdBuf[pos - 1] == '\n') {
cmdBuf[pos - 1] = '\0';
}
if (strncmp(cmdBuf, "", COMMAND_LEN) == 0) {
continue;
}
char cmds[MAX_CMD_COUNT][TOKEN_LEN];
memset(cmds, 0, sizeof(cmds));
int cmdCount;
cmdCount = splitString(cmdBuf, " ", cmds, MAX_CMD_COUNT);
if (strcmp(cmds[0], "create") == 0 && strcmp(cmds[1], "vnode") == 0 && strcmp(cmds[3], "vid") == 0) {
uint16_t vid;
sscanf(cmds[4], "%hu", &vid);
if (strcmp(cmds[2], "voter") == 0) {
char peers[MAX_PEERS_COUNT][ADDRESS_LEN];
memset(peers, 0, sizeof(peers));
uint32_t peersCount = 0;
if (strcmp(cmds[5], "peers") == 0 && cmdCount > 6) {
// create vnode voter vid 100 peers 127.0.0.1:10001 127.0.0.1:10002
for (int i = 6; i < cmdCount; ++i) {
snprintf(peers[i - 6], ADDRESS_LEN, "%s", cmds[i]);
peersCount++;
}
} else {
// create vnode voter vid 200
}
ret = addRaftVoter(pRaftServer, peers, peersCount, vid);
if (ret == 0) {
printf("create vnode voter ok \n");
} else {
printf("create vnode voter error \n");
}
} else if (strcmp(cmds[2], "spare") == 0) {
ret = addRaftSpare(pRaftServer, vid);
if (ret == 0) {
printf("create vnode spare ok \n");
} else {
printf("create vnode spare error \n");
}
} else {
printHelp();
}
} else if (strcmp(cmds[0], "join") == 0 && strcmp(cmds[1], "vnode") == 0 && strcmp(cmds[2], "vid") == 0 &&
strcmp(cmds[4], "addr") == 0 && cmdCount == 6) {
// join vnode vid 100 addr 127.0.0.1:10004
char * address = cmds[5];
char host[64];
uint16_t port;
parseAddr(address, host, sizeof(host), &port);
uint16_t vid;
sscanf(cmds[3], "%hu", &vid);
HashNode **pp = pRaftServer->raftInstances.find(&pRaftServer->raftInstances, vid);
if (*pp == NULL) {
printf("vid:%hu not found \n", vid);
break;
}
RaftInstance *pRaftInstance = (*pp)->data;
uint64_t destRaftId = encodeRaftId(host, port, vid);
struct raft_change *req = raft_malloc(sizeof(*req));
RaftJoin * pRaftJoin = raft_malloc(sizeof(*pRaftJoin));
pRaftJoin->r = &pRaftInstance->raft;
pRaftJoin->joinId = destRaftId;
req->data = pRaftJoin;
ret = raft_add(&pRaftInstance->raft, req, destRaftId, address, raftChangeAddCb);
if (ret != 0) {
printf("raft_add error: %s \n", raft_errmsg(&pRaftInstance->raft));
}
} else if (strcmp(cmds[0], "dropnode") == 0) {
} else if (strcmp(cmds[0], "state") == 0) {
pRaftServer->raftInstances.print(&pRaftServer->raftInstances);
for (size_t i = 0; i < pRaftServer->raftInstances.length; ++i) {
HashNode *ptr = pRaftServer->raftInstances.table[i];
if (ptr != NULL) {
while (ptr != NULL) {
RaftInstance *pRaftInstance = ptr->data;
printf("instance vid:%hu raftId:%llu \n", ptr->vgroupId, pRaftInstance->raftId);
printRaftState(&pRaftInstance->raft);
printf("\n");
ptr = ptr->next;
}
printf("\n");
}
}
} else if (strcmp(cmds[0], "put") == 0 && cmdCount == 4) {
uint16_t vid;
sscanf(cmds[1], "%hu", &vid);
char * key = cmds[2];
char * value = cmds[3];
HashNode **pp = pRaftServer->raftInstances.find(&pRaftServer->raftInstances, vid);
if (*pp == NULL) {
printf("vid:%hu not found \n", vid);
break;
}
RaftInstance *pRaftInstance = (*pp)->data;
char *raftValue = malloc(TOKEN_LEN * 2 + 3);
snprintf(raftValue, TOKEN_LEN * 2 + 3, "%s--%s", key, value);
putValue(&pRaftInstance->raft, raftValue);
free(raftValue);
} else if (strcmp(cmds[0], "run") == 0) {
pthread_t tidRaftServer;
pthread_create(&tidRaftServer, NULL, startServerFunc, pRaftServer);
} else if (strcmp(cmds[0], "get") == 0 && cmdCount == 3) {
uint16_t vid;
sscanf(cmds[1], "%hu", &vid);
char * key = cmds[2];
HashNode **pp = pRaftServer->raftInstances.find(&pRaftServer->raftInstances, vid);
if (*pp == NULL) {
printf("vid:%hu not found \n", vid);
break;
}
RaftInstance * pRaftInstance = (*pp)->data;
SimpleHash * pKV = pRaftInstance->fsm.data;
SimpleHashNode **ppNode = pKV->find_cstr(pKV, key);
if (*ppNode == NULL) {
printf("key:%s not found \n", key);
} else {
printf("find key:%s value:%s \n", key, (char *)((*ppNode)->data));
}
} else if (strcmp(cmds[0], "transfer") == 0) {
} else if (strcmp(cmds[0], "state") == 0) {
} else if (strcmp(cmds[0], "snapshot") == 0) {
} else if (strcmp(cmds[0], "exit") == 0) {
exit(0);
} else if (strcmp(cmds[0], "quit") == 0) {
exit(0);
} else if (strcmp(cmds[0], "help") == 0) {
printHelp();
} else {
printf("unknown command: %s \n", cmdBuf);
printHelp();
}
/*
printf("cmdBuf: [%s] \n", cmdBuf);
printf("cmdCount : %d \n", cmdCount);
for (int i = 0; i < MAX_CMD_COUNT; ++i) {
printf("cmd%d : %s \n", i, cmds[i]);
}
*/
}
}

View File

@ -0,0 +1,19 @@
#ifndef TRAFT_CONSOLE_H
#define TRAFT_CONSOLE_H
#ifdef __cplusplus
extern "C" {
#endif
#include <getopt.h>
#include <stdint.h>
#include "common.h"
#include "raftServer.h"
void console(RaftServer *pRaftServer);
#ifdef __cplusplus
}
#endif
#endif

View File

@ -0,0 +1,81 @@
#include <assert.h>
#include <getopt.h>
#include <pthread.h>
#include <raft.h>
#include <raft/uv.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <sys/stat.h>
#include <sys/types.h>
#include <time.h>
#include <unistd.h>
#include "common.h"
#include "config.h"
#include "console.h"
#include "raftServer.h"
#include "simpleHash.h"
#include "util.h"
const char *exe_name;
void *startConsoleFunc(void *param) {
RaftServer *pRaftServer = (RaftServer *)param;
console(pRaftServer);
return NULL;
}
void usage() {
printf("\nusage: \n");
printf("%s --addr=127.0.0.1:10000 --dir=./data \n", exe_name);
printf("\n");
}
RaftServerConfig gConfig;
RaftServer gRaftServer;
int main(int argc, char **argv) {
srand(time(NULL));
int32_t ret;
exe_name = argv[0];
if (argc < 3) {
usage();
exit(-1);
}
ret = parseConf(argc, argv, &gConfig);
if (ret != 0) {
usage();
exit(-1);
}
printConf(&gConfig);
if (!dirOK(gConfig.baseDir)) {
ret = mkdir(gConfig.baseDir, 0775);
if (ret != 0) {
fprintf(stderr, "mkdir error, %s \n", gConfig.baseDir);
exit(-1);
}
}
ret = raftServerInit(&gRaftServer, &gConfig);
if (ret != 0) {
fprintf(stderr, "raftServerInit error \n");
exit(-1);
}
/*
pthread_t tidRaftServer;
pthread_create(&tidRaftServer, NULL, startServerFunc, &gRaftServer);
*/
pthread_t tidConsole;
pthread_create(&tidConsole, NULL, startConsoleFunc, &gRaftServer);
while (1) {
sleep(10);
}
return 0;
}

View File

@ -0,0 +1,286 @@
#include "raftServer.h"
#include <stdlib.h>
#include <unistd.h>
#include "common.h"
#include "simpleHash.h"
#include "util.h"
void *startServerFunc(void *param) {
RaftServer *pRaftServer = (RaftServer *)param;
int32_t r = raftServerStart(pRaftServer);
assert(r == 0);
return NULL;
}
void raftChangeAssignCb(struct raft_change *req, int status) {
struct raft *r = req->data;
if (status != 0) {
printf("raftChangeAssignCb error: %s \n", raft_errmsg(r));
} else {
printf("raftChangeAssignCb ok \n");
}
raft_free(req);
}
void raftChangeAddCb(struct raft_change *req, int status) {
RaftJoin *pRaftJoin = req->data;
if (status != 0) {
printf("raftChangeAddCb error: %s \n", raft_errmsg(pRaftJoin->r));
} else {
struct raft_change *req2 = raft_malloc(sizeof(*req2));
req2->data = pRaftJoin->r;
int ret = raft_assign(pRaftJoin->r, req2, pRaftJoin->joinId, RAFT_VOTER, raftChangeAssignCb);
if (ret != 0) {
printf("raftChangeAddCb error: %s \n", raft_errmsg(pRaftJoin->r));
}
}
raft_free(req->data);
raft_free(req);
}
int fsmApplyCb(struct raft_fsm *pFsm, const struct raft_buffer *buf, void **result) {
// get fsm data
SimpleHash *sh = pFsm->data;
// get commit value
char *msg = (char *)buf->base;
printf("fsm apply: [%s] \n", msg);
char arr[2][TOKEN_LEN];
int r = splitString(msg, "--", arr, 2);
assert(r == 2);
// do the value on fsm
sh->insert_cstr(sh, arr[0], arr[1]);
raft_free(buf->base);
return 0;
}
void putValueCb(struct raft_apply *req, int status, void *result) {
struct raft *r = req->data;
if (status != 0) {
printf("putValueCb error: %s \n", raft_errmsg(r));
} else {
printf("putValueCb: %s \n", "ok");
}
raft_free(req);
}
void putValue(struct raft *r, const char *value) {
struct raft_buffer buf;
buf.len = strlen(value) + 1;
buf.base = raft_malloc(buf.len);
snprintf(buf.base, buf.len, "%s", value);
struct raft_apply *req = raft_malloc(sizeof(*req));
req->data = r;
int ret = raft_apply(r, req, &buf, 1, putValueCb);
if (ret == 0) {
printf("put %s \n", (char *)buf.base);
} else {
printf("put error: %s \n", raft_errmsg(r));
}
}
const char *state2String(unsigned short state) {
if (state == RAFT_UNAVAILABLE) {
return "RAFT_UNAVAILABLE";
} else if (state == RAFT_FOLLOWER) {
return "RAFT_FOLLOWER";
} else if (state == RAFT_CANDIDATE) {
return "RAFT_CANDIDATE";
} else if (state == RAFT_LEADER) {
return "RAFT_LEADER";
}
return "UNKNOWN_RAFT_STATE";
}
void printRaftConfiguration(struct raft_configuration *c) {
printf("configuration: \n");
for (int i = 0; i < c->n; ++i) {
printf("%llu -- %d -- %s\n", c->servers[i].id, c->servers[i].role, c->servers[i].address);
}
}
void printRaftState(struct raft *r) {
printf("----Raft State: -----------\n");
printf("mem_addr: %p \n", r);
printf("my_id: %llu \n", r->id);
printf("address: %s \n", r->address);
printf("current_term: %llu \n", r->current_term);
printf("voted_for: %llu \n", r->voted_for);
printf("role: %s \n", state2String(r->state));
printf("commit_index: %llu \n", r->commit_index);
printf("last_applied: %llu \n", r->last_applied);
printf("last_stored: %llu \n", r->last_stored);
printf("configuration_index: %llu \n", r->configuration_index);
printf("configuration_uncommitted_index: %llu \n", r->configuration_uncommitted_index);
printRaftConfiguration(&r->configuration);
printf("----------------------------\n");
}
int32_t addRaftVoter(RaftServer *pRaftServer, char peers[][ADDRESS_LEN], uint32_t peersCount, uint16_t vid) {
int ret;
RaftInstance *pRaftInstance = malloc(sizeof(*pRaftInstance));
assert(pRaftInstance != NULL);
// init raftId
pRaftInstance->raftId = encodeRaftId(pRaftServer->host, pRaftServer->port, vid);
// init dir
snprintf(pRaftInstance->dir, sizeof(pRaftInstance->dir), "%s/%s_%hu_%hu_%llu", pRaftServer->baseDir,
pRaftServer->host, pRaftServer->port, vid, pRaftInstance->raftId);
if (!dirOK(pRaftInstance->dir)) {
ret = mkdir(pRaftInstance->dir, 0775);
if (ret != 0) {
fprintf(stderr, "mkdir error, %s \n", pRaftInstance->dir);
assert(0);
}
}
// init fsm
pRaftInstance->fsm.data = newSimpleHash(2);
pRaftInstance->fsm.apply = fsmApplyCb;
// init io
ret = raft_uv_init(&pRaftInstance->io, &pRaftServer->loop, pRaftInstance->dir, &pRaftServer->transport);
if (ret != 0) {
fprintf(stderr, "raft_uv_init error, %s \n", raft_errmsg(&pRaftInstance->raft));
assert(0);
}
// init raft
ret = raft_init(&pRaftInstance->raft, &pRaftInstance->io, &pRaftInstance->fsm, pRaftInstance->raftId,
pRaftServer->address);
if (ret != 0) {
fprintf(stderr, "raft_init error, %s \n", raft_errmsg(&pRaftInstance->raft));
assert(0);
}
// init raft_configuration
struct raft_configuration conf;
raft_configuration_init(&conf);
raft_configuration_add(&conf, pRaftInstance->raftId, pRaftServer->address, RAFT_VOTER);
for (int i = 0; i < peersCount; ++i) {
char * peerAddress = peers[i];
char host[64];
uint16_t port;
parseAddr(peerAddress, host, sizeof(host), &port);
uint64_t raftId = encodeRaftId(host, port, vid);
raft_configuration_add(&conf, raftId, peers[i], RAFT_VOTER);
}
raft_bootstrap(&pRaftInstance->raft, &conf);
// start raft
ret = raft_start(&pRaftInstance->raft);
if (ret != 0) {
fprintf(stderr, "raft_start error, %s \n", raft_errmsg(&pRaftInstance->raft));
assert(0);
}
// add raft instance into raft server
pRaftServer->raftInstances.insert(&pRaftServer->raftInstances, vid, pRaftInstance);
return 0;
}
int32_t addRaftSpare(RaftServer *pRaftServer, uint16_t vid) {
int ret;
RaftInstance *pRaftInstance = malloc(sizeof(*pRaftInstance));
assert(pRaftInstance != NULL);
// init raftId
pRaftInstance->raftId = encodeRaftId(pRaftServer->host, pRaftServer->port, vid);
// init dir
snprintf(pRaftInstance->dir, sizeof(pRaftInstance->dir), "%s/%s_%hu_%hu_%llu", pRaftServer->baseDir,
pRaftServer->host, pRaftServer->port, vid, pRaftInstance->raftId);
ret = mkdir(pRaftInstance->dir, 0775);
if (ret != 0) {
fprintf(stderr, "mkdir error, %s \n", pRaftInstance->dir);
assert(0);
}
// init fsm
pRaftInstance->fsm.data = newSimpleHash(2);
pRaftInstance->fsm.apply = fsmApplyCb;
// init io
ret = raft_uv_init(&pRaftInstance->io, &pRaftServer->loop, pRaftInstance->dir, &pRaftServer->transport);
if (ret != 0) {
fprintf(stderr, "raft_uv_init error, %s \n", raft_errmsg(&pRaftInstance->raft));
assert(0);
}
// init raft
ret = raft_init(&pRaftInstance->raft, &pRaftInstance->io, &pRaftInstance->fsm, pRaftInstance->raftId,
pRaftServer->address);
if (ret != 0) {
fprintf(stderr, "raft_init error, %s \n", raft_errmsg(&pRaftInstance->raft));
assert(0);
}
// init raft_configuration
struct raft_configuration conf;
raft_configuration_init(&conf);
raft_configuration_add(&conf, pRaftInstance->raftId, pRaftServer->address, RAFT_SPARE);
raft_bootstrap(&pRaftInstance->raft, &conf);
// start raft
ret = raft_start(&pRaftInstance->raft);
if (ret != 0) {
fprintf(stderr, "raft_start error, %s \n", raft_errmsg(&pRaftInstance->raft));
assert(0);
}
// add raft instance into raft server
pRaftServer->raftInstances.insert(&pRaftServer->raftInstances, vid, pRaftInstance);
return 0;
}
int32_t raftServerInit(RaftServer *pRaftServer, const RaftServerConfig *pConf) {
int ret;
// init host, port, address, dir
snprintf(pRaftServer->host, sizeof(pRaftServer->host), "%s", pConf->me.host);
pRaftServer->port = pConf->me.port;
snprintf(pRaftServer->address, sizeof(pRaftServer->address), "%s:%u", pRaftServer->host, pRaftServer->port);
snprintf(pRaftServer->baseDir, sizeof(pRaftServer->baseDir), "%s", pConf->baseDir);
// init loop
ret = uv_loop_init(&pRaftServer->loop);
if (ret != 0) {
fprintf(stderr, "uv_loop_init error: %s \n", uv_strerror(ret));
assert(0);
}
// init network
ret = raft_uv_tcp_init(&pRaftServer->transport, &pRaftServer->loop);
if (ret != 0) {
fprintf(stderr, "raft_uv_tcp_init: error %d \n", ret);
assert(0);
}
// init raft instance container
initIdHash(&pRaftServer->raftInstances, 2);
return 0;
}
int32_t raftServerStart(RaftServer *pRaftServer) {
// start loop
uv_run(&pRaftServer->loop, UV_RUN_DEFAULT);
return 0;
}
void raftServerStop(RaftServer *pRaftServer) {}

View File

@ -0,0 +1,66 @@
#ifndef TDENGINE_RAFT_SERVER_H
#define TDENGINE_RAFT_SERVER_H
#ifdef __cplusplus
extern "C" {
#endif
#include <arpa/inet.h>
#include <assert.h>
#include <netinet/in.h>
#include <string.h>
#include "common.h"
#include "config.h"
#include "raft.h"
#include "raft/uv.h"
#include "simpleHash.h"
typedef struct RaftJoin {
struct raft *r;
raft_id joinId;
} RaftJoin;
typedef struct {
raft_id raftId;
char dir[BASE_DIR_LEN * 2];
struct raft_fsm fsm;
struct raft_io io;
struct raft raft;
} RaftInstance;
typedef struct {
char host[HOST_LEN];
uint16_t port;
char address[ADDRESS_LEN]; /* Raft instance address */
char baseDir[BASE_DIR_LEN]; /* Raft instance address */
struct uv_loop_s loop; /* UV loop */
struct raft_uv_transport transport; /* UV I/O backend transport */
IdHash raftInstances; /* multi raft instances. traft use IdHash to manager multi vgroup inside, here we can use IdHash
too. */
} RaftServer;
void * startServerFunc(void *param);
int32_t addRaftVoter(RaftServer *pRaftServer, char peers[][ADDRESS_LEN], uint32_t peersCount, uint16_t vid);
int32_t addRaftSpare(RaftServer *pRaftServer, uint16_t vid);
int32_t raftServerInit(RaftServer *pRaftServer, const RaftServerConfig *pConf);
int32_t raftServerStart(RaftServer *pRaftServer);
void raftServerStop(RaftServer *pRaftServer);
int fsmApplyCb(struct raft_fsm *pFsm, const struct raft_buffer *buf, void **result);
void putValueCb(struct raft_apply *req, int status, void *result);
void putValue(struct raft *r, const char *value);
void raftChangeAddCb(struct raft_change *req, int status);
const char *state2String(unsigned short state);
void printRaftConfiguration(struct raft_configuration *c);
void printRaftState(struct raft *r);
#ifdef __cplusplus
}
#endif
#endif // TDENGINE_RAFT_SERVER_H

View File

@ -0,0 +1,218 @@
#include "simpleHash.h"
uint32_t mySimpleHash(const char* data, size_t n, uint32_t seed) {
// Similar to murmur hash
const uint32_t m = 0xc6a4a793;
const uint32_t r = 24;
const char* limit = data + n;
uint32_t h = seed ^ (n * m);
// Pick up four bytes at a time
while (data + 4 <= limit) {
// uint32_t w = DecodeFixed32(data);
uint32_t w;
memcpy(&w, data, 4);
data += 4;
h += w;
h *= m;
h ^= (h >> 16);
}
// Pick up remaining bytes
switch (limit - data) {
case 3:
h += (unsigned char)(data[2]) << 16;
do {
} while (0);
case 2:
h += (unsigned char)(data[1]) << 8;
do {
} while (0);
case 1:
h += (unsigned char)(data[0]);
h *= m;
h ^= (h >> r);
break;
}
return h;
}
int insertCStrSimpleHash(struct SimpleHash* ths, char* key, char* data) {
return insertSimpleHash(ths, key, strlen(key) + 1, data, strlen(data) + 1);
}
int removeCStrSimpleHash(struct SimpleHash* ths, char* key) { return removeSimpleHash(ths, key, strlen(key) + 1); }
SimpleHashNode** findCStrSimpleHash(struct SimpleHash* ths, char* key) {
return findSimpleHash(ths, key, strlen(key) + 1);
}
int insertSimpleHash(struct SimpleHash* ths, char* key, size_t keyLen, char* data, size_t dataLen) {
SimpleHashNode** pp = ths->find(ths, key, keyLen);
if (*pp != NULL) {
fprintf(stderr, "insertSimpleHash, already has key \n");
return -1;
}
SimpleHashNode* node = malloc(sizeof(*node));
node->hashCode = ths->hashFunc(key, keyLen);
node->key = malloc(keyLen);
node->keyLen = keyLen;
memcpy(node->key, key, keyLen);
node->data = malloc(dataLen);
node->dataLen = dataLen;
memcpy(node->data, data, dataLen);
node->next = NULL;
// printf("insertSimpleHash: <%s, %ld, %s, %ld, %u> \n", node->key, node->keyLen, node->data, node->dataLen,
// node->hashCode);
size_t index = node->hashCode & (ths->length - 1);
SimpleHashNode* ptr = ths->table[index];
if (ptr != NULL) {
node->next = ptr;
ths->table[index] = node;
} else {
ths->table[index] = node;
}
ths->elems++;
if (ths->elems > 2 * ths->length) {
ths->resize(ths);
}
return 0;
}
int removeSimpleHash(struct SimpleHash* ths, char* key, size_t keyLen) {
SimpleHashNode** pp = ths->find(ths, key, keyLen);
if (*pp == NULL) {
fprintf(stderr, "removeSimpleHash, key not exist \n");
return -1;
}
SimpleHashNode* del = *pp;
*pp = del->next;
free(del->key);
free(del->data);
free(del);
ths->elems--;
return 0;
}
SimpleHashNode** findSimpleHash(struct SimpleHash* ths, char* key, size_t keyLen) {
uint32_t hashCode = ths->hashFunc(key, keyLen);
// size_t index = hashCode % ths->length;
size_t index = hashCode & (ths->length - 1);
// printf("findSimpleHash: %s %ld %u \n", key, keyLen, hashCode);
SimpleHashNode** pp = &(ths->table[index]);
while (*pp != NULL && ((*pp)->hashCode != hashCode || memcmp(key, (*pp)->key, keyLen) != 0)) {
pp = &((*pp)->next);
}
return pp;
}
void printCStrSimpleHash(struct SimpleHash* ths) {
printf("\n--- printCStrSimpleHash: elems:%d length:%d \n", ths->elems, ths->length);
for (size_t i = 0; i < ths->length; ++i) {
SimpleHashNode* ptr = ths->table[i];
if (ptr != NULL) {
printf("%zu: ", i);
while (ptr != NULL) {
printf("<%u, %s, %ld, %s, %ld> ", ptr->hashCode, (char*)ptr->key, ptr->keyLen, (char*)ptr->data, ptr->dataLen);
ptr = ptr->next;
}
printf("\n");
}
}
printf("---------------\n");
}
void destroySimpleHash(struct SimpleHash* ths) {
for (size_t i = 0; i < ths->length; ++i) {
SimpleHashNode* ptr = ths->table[i];
while (ptr != NULL) {
SimpleHashNode* tmp = ptr;
ptr = ptr->next;
free(tmp->key);
free(tmp->data);
free(tmp);
}
}
ths->length = 0;
ths->elems = 0;
free(ths->table);
free(ths);
}
void resizeSimpleHash(struct SimpleHash* ths) {
uint32_t new_length = ths->length;
while (new_length < ths->elems) {
new_length *= 2;
}
printf("resizeSimpleHash: %p from %u to %u \n", ths, ths->length, new_length);
SimpleHashNode** new_table = malloc(new_length * sizeof(SimpleHashNode*));
memset(new_table, 0, new_length * sizeof(SimpleHashNode*));
uint32_t count = 0;
for (uint32_t i = 0; i < ths->length; i++) {
if (ths->table[i] == NULL) {
continue;
}
SimpleHashNode* it = ths->table[i];
while (it != NULL) {
SimpleHashNode* move_node = it;
it = it->next;
// move move_node
move_node->next = NULL;
size_t index = move_node->hashCode & (new_length - 1);
SimpleHashNode* ptr = new_table[index];
if (ptr != NULL) {
move_node->next = ptr;
new_table[index] = move_node;
} else {
new_table[index] = move_node;
}
count++;
}
}
assert(ths->elems == count);
free(ths->table);
ths->table = new_table;
ths->length = new_length;
}
uint32_t simpleHashFunc(const char* key, size_t keyLen) { return mySimpleHash(key, keyLen, 1); }
struct SimpleHash* newSimpleHash(size_t length) {
struct SimpleHash* ths = malloc(sizeof(*ths));
ths->length = length;
ths->elems = 0;
ths->table = malloc(length * sizeof(SimpleHashNode*));
memset(ths->table, 0, length * sizeof(SimpleHashNode*));
ths->insert = insertSimpleHash;
ths->remove = removeSimpleHash;
ths->find = findSimpleHash;
ths->insert_cstr = insertCStrSimpleHash;
ths->remove_cstr = removeCStrSimpleHash;
ths->find_cstr = findCStrSimpleHash;
ths->print_cstr = printCStrSimpleHash;
ths->destroy = destroySimpleHash;
ths->resize = resizeSimpleHash;
ths->hashFunc = simpleHashFunc;
}

View File

@ -0,0 +1,61 @@
#ifndef __SIMPLE_HASH_H__
#define __SIMPLE_HASH_H__
#include <assert.h>
#include <stdint.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
uint32_t mySimpleHash(const char* data, size_t n, uint32_t seed);
typedef struct SimpleHashNode {
uint32_t hashCode;
void* key;
size_t keyLen;
void* data;
size_t dataLen;
struct SimpleHashNode* next;
} SimpleHashNode;
typedef struct SimpleHash {
// public:
int (*insert)(struct SimpleHash* ths, char* key, size_t keyLen, char* data, size_t dataLen);
int (*remove)(struct SimpleHash* ths, char* key, size_t keyLen);
SimpleHashNode** (*find)(struct SimpleHash* ths, char* key, size_t keyLen);
// wrapper
int (*insert_cstr)(struct SimpleHash* ths, char* key, char* data);
int (*remove_cstr)(struct SimpleHash* ths, char* key);
SimpleHashNode** (*find_cstr)(struct SimpleHash* ths, char* key);
void (*print_cstr)(struct SimpleHash* ths);
void (*destroy)(struct SimpleHash* ths);
uint32_t length;
uint32_t elems;
// private:
void (*resize)(struct SimpleHash* ths);
uint32_t (*hashFunc)(const char* key, size_t keyLen);
SimpleHashNode** table;
} SimpleHash;
int insertCStrSimpleHash(struct SimpleHash* ths, char* key, char* data);
int removeCStrSimpleHash(struct SimpleHash* ths, char* key);
SimpleHashNode** findCStrSimpleHash(struct SimpleHash* ths, char* key);
void printCStrSimpleHash(struct SimpleHash* ths);
int insertSimpleHash(struct SimpleHash* ths, char* key, size_t keyLen, char* data, size_t dataLen);
int removeSimpleHash(struct SimpleHash* ths, char* key, size_t keyLen);
SimpleHashNode** findSimpleHash(struct SimpleHash* ths, char* key, size_t keyLen);
void destroySimpleHash(struct SimpleHash* ths);
void resizeSimpleHash(struct SimpleHash* ths);
uint32_t simpleHashFunc(const char* key, size_t keyLen);
struct SimpleHash* newSimpleHash(size_t length);
#endif

View File

@ -0,0 +1,45 @@
#include "util.h"
#include <dirent.h>
#include <stdlib.h>
#include <string.h>
int dirOK(const char *path) {
DIR *dir = opendir(path);
if (dir != NULL) {
closedir(dir);
return 1;
} else {
return 0;
}
}
int splitString(const char *str, char *separator, char (*arr)[TOKEN_LEN], int n_arr) {
if (n_arr <= 0) {
return -1;
}
char *tmp = (char *)malloc(strlen(str) + 1);
strcpy(tmp, str);
char *context;
int n = 0;
char *token = strtok_r(tmp, separator, &context);
if (!token) {
goto ret;
}
strncpy(arr[n], token, TOKEN_LEN);
n++;
while (1) {
token = strtok_r(NULL, separator, &context);
if (!token || n >= n_arr) {
goto ret;
}
strncpy(arr[n], token, TOKEN_LEN);
n++;
}
ret:
free(tmp);
return n;
}

View File

@ -0,0 +1,17 @@
#ifndef TRAFT_UTIL_H
#define TRAFT_UTIL_H
#ifdef __cplusplus
extern "C" {
#endif
#include "common.h"
int dirOK(const char *path);
int splitString(const char *str, char *separator, char (*arr)[TOKEN_LEN], int n_arr);
#ifdef __cplusplus
}
#endif
#endif

View File

@ -0,0 +1,7 @@
add_executable(rebalanceLeader "")
target_sources(rebalanceLeader
PRIVATE
"raftMain.c"
"raftServer.c"
)
target_link_libraries(rebalanceLeader PUBLIC traft lz4 uv_a)

View File

@ -0,0 +1,4 @@
#!/bin/bash
rm -rf 127.0.0.1*
rm -rf ./data

View File

@ -60,9 +60,9 @@ void raftTransferCb(struct raft_transfer *req) {
SRaftServer *pRaftServer = req->data;
raft_free(req);
printf("raftTransferCb: \n");
//printf("raftTransferCb: \n");
updateLeaderStates(pRaftServer);
printLeaderCount();
//printLeaderCount();
int myLeaderCount;
for (int i = 0; i < NODE_COUNT; ++i) {
@ -71,12 +71,13 @@ void raftTransferCb(struct raft_transfer *req) {
}
}
printf("myLeaderCount:%d waterLevel:%d \n", myLeaderCount, pRaftServer->instanceCount / NODE_COUNT);
//printf("myLeaderCount:%d waterLevel:%d \n", myLeaderCount, pRaftServer->instanceCount / NODE_COUNT);
if (myLeaderCount > pRaftServer->instanceCount / NODE_COUNT) {
struct raft *r;
for (int j = 0; j < pRaftServer->instanceCount; ++j) {
if (pRaftServer->instance[j].raft.state == RAFT_LEADER) {
r = &pRaftServer->instance[j].raft;
break;
}
}
@ -87,17 +88,25 @@ void raftTransferCb(struct raft_transfer *req) {
int minIndex = -1;
int minLeaderCount = myLeaderCount;
for (int j = 0; j < NODE_COUNT; ++j) {
if (strcmp(leaderStates[j].address, pRaftServer->address) == 0) continue;
if (strcmp(leaderStates[j].address, pRaftServer->address) == 0) {
continue;
}
if (leaderStates[j].leaderCount <= minLeaderCount) {
minLeaderCount = leaderStates[j].leaderCount;
minIndex = j;
}
}
char myHost[48];
uint16_t myPort;
uint16_t myVid;
decodeRaftId(r->id, myHost, sizeof(myHost), &myPort, &myVid);
//printf("raftTransferCb transfer leader: vid[%u] choose: index:%d, leaderStates[%d].address:%s, leaderStates[%d].leaderCount:%d \n", minIndex, minIndex, leaderStates[minIndex].address, minIndex, leaderStates[minIndex].leaderCount);
char *destAddress = leaderStates[minIndex].address;
char tokens[MAX_PEERS][MAX_TOKEN_LEN];
@ -106,6 +115,9 @@ void raftTransferCb(struct raft_transfer *req) {
uint16_t destPort = atoi(tokens[1]);
destRaftId = encodeRaftId(destHost, destPort, myVid);
printf("\nraftTransferCb transfer leader: vgroupId:%u from:%s:%u --> to:%s:%u ", myVid, myHost, myPort, destHost, destPort);
fflush(stdout);
raft_transfer(r, transfer, destRaftId, raftTransferCb);
}
@ -252,7 +264,6 @@ const char* state2String(unsigned short state) {
void printRaftState2(struct raft *r) {
char leaderAddress[128];
memset(leaderAddress, 0, sizeof(leaderAddress));
@ -350,6 +361,7 @@ void console(SRaftServer *pRaftServer) {
while (1) {
char cmd_buf[COMMAND_LEN];
memset(cmd_buf, 0, sizeof(cmd_buf));
printf("(console)> ");
char *ret = fgets(cmd_buf, COMMAND_LEN, stdin);
if (!ret) {
exit(-1);
@ -403,7 +415,10 @@ void console(SRaftServer *pRaftServer) {
} else if (strcmp(cmd, "dropnode") == 0) {
printf("not support \n");
} else if (strcmp(cmd, "rebalance") == 0) {
} else if (strcmp(cmd, "quit") == 0 || strcmp(cmd, "exit") == 0) {
exit(0);
} else if (strcmp(cmd, "rebalance") == 0 && strcmp(param1, "leader") == 0) {
/*
updateLeaderStates(pRaftServer);
@ -511,10 +526,14 @@ void console(SRaftServer *pRaftServer) {
printRaftState(&pRaftServer->instance[i].raft);
}
} else if (strcmp(cmd, "state2") == 0) {
} else if (strcmp(cmd, "leader") == 0 && strcmp(param1, "state") == 0) {
updateLeaderStates(pRaftServer);
printf("\n--------------------------------------------\n");
printLeaderCount();
for (int i = 0; i < pRaftServer->instanceCount; ++i) {
printRaftState2(&pRaftServer->instance[i].raft);
}
printf("--------------------------------------------\n");
} else if (strcmp(cmd, "snapshot") == 0) {
printf("not support \n");

View File

@ -3,32 +3,34 @@
#include "common.h"
#include "raftServer.h"
char *keys;
char *values;
//char *keys = malloc(MAX_RECORD_COUNT * MAX_KV_LEN);;
//char *values = malloc(MAX_RECORD_COUNT * MAX_KV_LEN);
char keys[MAX_KV_LEN][MAX_RECORD_COUNT];
char values[MAX_KV_LEN][MAX_RECORD_COUNT];
int writeIndex = 0;
void initStore() {
keys = malloc(MAX_RECORD_COUNT * MAX_KV_LEN);
values = malloc(MAX_RECORD_COUNT * MAX_KV_LEN);
writeIndex = 0;
}
void destroyStore() {
free(keys);
free(values);
//free(keys);
//free(values);
}
void putKV(const char *key, const char *value) {
if (writeIndex < MAX_RECORD_COUNT) {
strncpy(&keys[writeIndex], key, MAX_KV_LEN);
strncpy(&values[writeIndex], value, MAX_KV_LEN);
strncpy(keys[writeIndex], key, MAX_KV_LEN);
strncpy(values[writeIndex], value, MAX_KV_LEN);
writeIndex++;
}
}
char *getKV(const char *key) {
for (int i = 0; i < MAX_RECORD_COUNT; ++i) {
if (strcmp(&keys[i], key) == 0) {
return &values[i];
if (strcmp(keys[i], key) == 0) {
return values[i];
}
}
return NULL;

View File

@ -15,11 +15,13 @@ extern "C" {
// simulate a db store, just for test
#define MAX_KV_LEN 100
#define MAX_RECORD_COUNT 500
char *keys;
char *values;
int writeIndex;
#define MAX_KV_LEN 20
#define MAX_RECORD_COUNT 16
//char *keys;
//char *values;
//int writeIndex;
void initStore();
void destroyStore();

View File

@ -50,7 +50,6 @@ typedef struct SCatalogCfg {
uint32_t maxDBCacheNum;
} SCatalogCfg;
int32_t catalogInit(SCatalogCfg *cfg);
/**
@ -88,28 +87,15 @@ int32_t catalogUpdateDBVgroup(struct SCatalog* pCatalog, const char* dbName, SDB
*/
int32_t catalogGetTableMeta(struct SCatalog* pCatalog, void * pTransporter, const SEpSet* pMgmtEps, const SName* pTableName, STableMeta** pTableMeta);
/**
* Get a super table's meta data.
* @param pCatalog (input, got with catalogGetHandle)
* @param pTransporter (input, rpc object)
* @param pMgmtEps (input, mnode EPs)
* @param pTableName (input, table name, NOT including db name)
* @param pTableMeta(output, table meta data, NEED to free it by calller)
* @return error code
*/
int32_t catalogGetSTableMeta(struct SCatalog* pCatalog, void * pTransporter, const SEpSet* pMgmtEps, const SName* pTableName, STableMeta** pTableMeta);
/**
* Force renew a table's local cached meta data.
* @param pCatalog (input, got with catalogGetHandle)
* @param pTransporter (input, rpc object)
* @param pMgmtEps (input, mnode EPs)
* @param pTableName (input, table name, NOT including db name)
* @param isSTable (input, is super table or not, 1:supposed to be stable, 0: supposed not to be stable, -1:not sure)
* @return error code
*/
int32_t catalogRenewTableMeta(struct SCatalog* pCatalog, void * pTransporter, const SEpSet* pMgmtEps, const SName* pTableName, int32_t isSTable);
int32_t catalogRenewTableMeta(struct SCatalog* pCatalog, void * pTransporter, const SEpSet* pMgmtEps, const SName* pTableName);
/**
* Force renew a table's local cached meta data and get the new one.
@ -118,10 +104,9 @@ int32_t catalogRenewTableMeta(struct SCatalog* pCatalog, void * pTransporter, co
* @param pMgmtEps (input, mnode EPs)
* @param pTableName (input, table name, NOT including db name)
* @param pTableMeta(output, table meta data, NEED to free it by calller)
* @param isSTable (input, is super table or not, 1:supposed to be stable, 0: supposed not to be stable, -1:not sure)
* @return error code
*/
int32_t catalogRenewAndGetTableMeta(struct SCatalog* pCatalog, void *pTransporter, const SEpSet* pMgmtEps, const SName* pTableName, STableMeta** pTableMeta, int32_t isSTable);
int32_t catalogRenewAndGetTableMeta(struct SCatalog* pCatalog, void *pTransporter, const SEpSet* pMgmtEps, const SName* pTableName, STableMeta** pTableMeta);
/**

View File

@ -203,12 +203,21 @@ int32_t getPlan(SRequestObj* pRequest, SQueryNode* pQueryNode, SQueryDag** pDag)
int32_t scheduleQuery(SRequestObj* pRequest, SQueryDag* pDag, void** pJob) {
if (TSDB_SQL_INSERT == pRequest->type || TSDB_SQL_CREATE_TABLE == pRequest->type) {
SQueryResult res = {.code = 0, .numOfRows = 0, .msgSize = ERROR_MSG_BUF_DEFAULT_SIZE, .msg = pRequest->msgBuf};
int32_t code = scheduleExecJob(pRequest->pTscObj->pTransporter, NULL, pDag, pJob, &res);
if (code != TSDB_CODE_SUCCESS) {
// handle error and retry
} else {
if (*pJob != NULL) {
scheduleFreeJob(*pJob);
}
}
pRequest->affectedRows = res.numOfRows;
return res.code;
}
return scheduleAsyncExecJob(pRequest->pTscObj->pTransporter, NULL/*todo appInfo.xxx*/, pDag, pJob);
return scheduleAsyncExecJob(pRequest->pTscObj->pTransporter, NULL /*todo appInfo.xxx*/, pDag, pJob);
}
TAOS_RES *tmq_create_topic(TAOS* taos, const char* name, const char* sql, int sqlLen) {
@ -526,9 +535,7 @@ void* doFetchRow(SRequestObj* pRequest) {
int64_t transporterId = 0;
STscObj *pTscObj = pRequest->pTscObj;
asyncSendMsgToServer(pTscObj->pTransporter, &pTscObj->pAppInfo->mgmtEp.epSet, &transporterId, body);
tsem_wait(&pRequest->body.rspSem);
destroySendMsgInfo(body);
pRequest->type = TDMT_VND_SHOW_TABLES_FETCH;
}

View File

@ -152,7 +152,7 @@ TEST(testCase, create_db_Test) {
TAOS* pConn = taos_connect("localhost", "root", "taosdata", NULL, 0);
assert(pConn != NULL);
TAOS_RES* pRes = taos_query(pConn, "create database abc1");
TAOS_RES* pRes = taos_query(pConn, "create database abc1 vgroups 2");
if (taos_errno(pRes) != 0) {
printf("error in create db, reason:%s\n", taos_errstr(pRes));
}
@ -254,7 +254,7 @@ TEST(testCase, use_db_test) {
TAOS* pConn = taos_connect("localhost", "root", "taosdata", NULL, 0);
assert(pConn != NULL);
TAOS_RES* pRes = taos_query(pConn, "create database abc1");
TAOS_RES* pRes = taos_query(pConn, "create database abc1 vgroups 2");
if (taos_errno(pRes) != 0) {
printf("error in create db, reason:%s\n", taos_errstr(pRes));
}
@ -281,18 +281,18 @@ TEST(testCase, use_db_test) {
taos_close(pConn);
}
TEST(testCase, create_table_Test) {
TAOS* pConn = taos_connect("localhost", "root", "taosdata", NULL, 0);
assert(pConn != NULL);
TAOS_RES* pRes = taos_query(pConn, "use abc1");
taos_free_result(pRes);
pRes = taos_query(pConn, "create table tm0(ts timestamp, k int)");
taos_free_result(pRes);
taos_close(pConn);
}
//TEST(testCase, create_table_Test) {
// TAOS* pConn = taos_connect("localhost", "root", "taosdata", NULL, 0);
// assert(pConn != NULL);
//
// TAOS_RES* pRes = taos_query(pConn, "use abc1");
// taos_free_result(pRes);
//
// pRes = taos_query(pConn, "create table tm0(ts timestamp, k int)");
// taos_free_result(pRes);
//
// taos_close(pConn);
//}
//TEST(testCase, create_ctable_Test) {
// TAOS* pConn = taos_connect("localhost", "root", "taosdata", NULL, 0);
@ -505,15 +505,17 @@ TEST(testCase, create_multiple_tables) {
taos_free_result(pRes);
// for(int32_t i = 0; i < 10000; ++i) {
// char sql[512] = {0};
// snprintf(sql, tListLen(sql), "create table t_x_%d using st1 tags(2)", i);
// TAOS_RES* pres = taos_query(pConn, sql);
// if (taos_errno(pres) != 0) {
// printf("failed to create table %d\n, reason:%s", i, taos_errstr(pres));
// }
// taos_free_result(pres);
// }
for(int32_t i = 0; i < 200000; ++i) {
char sql[512] = {0};
snprintf(sql, tListLen(sql), "create table t_x_%d using st1 tags(2)", i);
TAOS_RES* pres = taos_query(pConn, sql);
if (taos_errno(pres) != 0) {
printf("failed to create table %d\n, reason:%s", i, taos_errstr(pres));
}
printf("%d\n", i);
taos_free_result(pres);
}
taos_close(pConn);
}
@ -521,11 +523,11 @@ TEST(testCase, create_multiple_tables) {
TEST(testCase, generated_request_id_test) {
SHashObj *phash = taosHashInit(10000, taosGetDefaultHashFunction(TSDB_DATA_TYPE_BIGINT), false, HASH_ENTRY_LOCK);
for(int32_t i = 0; i < 50000000; ++i) {
for(int32_t i = 0; i < 50000; ++i) {
uint64_t v = generateRequestId();
void* result = taosHashGet(phash, &v, sizeof(v));
if (result != nullptr) {
printf("0x%"PRIx64", index:%d\n", v, i);
printf("0x%lx, index:%d\n", v, i);
}
assert(result == nullptr);
taosHashPut(phash, &v, sizeof(v), NULL, 0);

View File

@ -73,7 +73,7 @@ TEST_F(DndTestQnode, 01_Create_Qnode) {
}
}
TEST_F(DndTestQnode, 01_Drop_Qnode) {
TEST_F(DndTestQnode, 02_Drop_Qnode) {
{
int32_t contLen = sizeof(SDDropQnodeReq);

View File

@ -65,8 +65,8 @@ typedef enum {
TRN_STAGE_PREPARE = 0,
TRN_STAGE_REDO_LOG = 1,
TRN_STAGE_REDO_ACTION = 2,
TRN_STAGE_COMMIT_LOG = 3,
TRN_STAGE_COMMIT = 4,
TRN_STAGE_COMMIT = 3,
TRN_STAGE_COMMIT_LOG = 4,
TRN_STAGE_UNDO_ACTION = 5,
TRN_STAGE_UNDO_LOG = 6,
TRN_STAGE_ROLLBACK = 7,

View File

@ -54,7 +54,7 @@ static int32_t mndRestoreWal(SMnode *pMnode) {
int64_t first = walGetFirstVer(pWal);
int64_t last = walGetLastVer(pWal);
mDebug("restore sdb wal start, sdb ver:%" PRId64 ", wal first:%" PRId64 " last:%" PRId64, lastSdbVer, first, last);
mDebug("start to restore sdb wal, sdb ver:%" PRId64 ", wal first:%" PRId64 " last:%" PRId64, lastSdbVer, first, last);
first = MAX(lastSdbVer + 1, first);
for (int64_t ver = first; ver >= 0 && ver <= last; ++ver) {
@ -71,6 +71,7 @@ static int32_t mndRestoreWal(SMnode *pMnode) {
goto WAL_RESTORE_OVER;
}
mTrace("wal:%" PRId64 ", will be restored, content:%p", ver, pHead->head.body);
if (sdbWriteNotFree(pSdb, (void *)pHead->head.body) < 0) {
mError("failed to read wal from sdb since %s, ver:%" PRId64, terrstr(), ver);
goto WAL_RESTORE_OVER;

View File

@ -24,7 +24,7 @@
static SSdbRaw *mndTransActionEncode(STrans *pTrans);
static SSdbRow *mndTransActionDecode(SSdbRaw *pRaw);
static int32_t mndTransActionInsert(SSdb *pSdb, STrans *pTrans);
static int32_t mndTransActionUpdate(SSdb *pSdb, STrans *OldTrans, STrans *pOldTrans);
static int32_t mndTransActionUpdate(SSdb *pSdb, STrans *OldTrans, STrans *pOld);
static int32_t mndTransActionDelete(SSdb *pSdb, STrans *pTrans);
static int32_t mndTransAppendLog(SArray *pArray, SSdbRaw *pRaw);
@ -112,6 +112,7 @@ static SSdbRaw *mndTransActionEncode(STrans *pTrans) {
int32_t dataPos = 0;
SDB_SET_INT32(pRaw, dataPos, pTrans->id, TRANS_ENCODE_OVER)
SDB_SET_INT8(pRaw, dataPos, pTrans->policy, TRANS_ENCODE_OVER)
SDB_SET_INT8(pRaw, dataPos, pTrans->stage, TRANS_ENCODE_OVER)
SDB_SET_INT32(pRaw, dataPos, redoLogNum, TRANS_ENCODE_OVER)
SDB_SET_INT32(pRaw, dataPos, undoLogNum, TRANS_ENCODE_OVER)
SDB_SET_INT32(pRaw, dataPos, commitLogNum, TRANS_ENCODE_OVER)
@ -216,6 +217,7 @@ static SSdbRow *mndTransActionDecode(SSdbRaw *pRaw) {
SDB_GET_INT32(pRaw, dataPos, &pTrans->id, TRANS_DECODE_OVER)
SDB_GET_INT8(pRaw, dataPos, (int8_t *)&pTrans->policy, TRANS_DECODE_OVER)
SDB_GET_INT8(pRaw, dataPos, (int8_t *)&pTrans->stage, TRANS_DECODE_OVER)
SDB_GET_INT32(pRaw, dataPos, &redoLogNum, TRANS_DECODE_OVER)
SDB_GET_INT32(pRaw, dataPos, &undoLogNum, TRANS_DECODE_OVER)
SDB_GET_INT32(pRaw, dataPos, &commitLogNum, TRANS_DECODE_OVER)
@ -314,9 +316,12 @@ static int32_t mndTransActionDelete(SSdb *pSdb, STrans *pTrans) {
return 0;
}
static int32_t mndTransActionUpdate(SSdb *pSdb, STrans *pOldTrans, STrans *pNewTrans) {
mTrace("trans:%d, perform update action, old_row:%p new_row:%p", pOldTrans->id, pOldTrans, pNewTrans);
pOldTrans->stage = pNewTrans->stage;
static int32_t mndTransActionUpdate(SSdb *pSdb, STrans *pOld, STrans *pNew) {
if (pNew->stage == TRN_STAGE_COMMIT) pNew->stage = TRN_STAGE_COMMIT_LOG;
mTrace("trans:%d, perform update action, old row:%p stage:%d, new row:%p stage:%d", pOld->id, pOld, pOld->stage, pNew,
pNew->stage);
pOld->stage = pNew->stage;
return 0;
}
@ -464,16 +469,16 @@ int32_t mndTransPrepare(SMnode *pMnode, STrans *pTrans) {
}
mDebug("trans:%d, prepare finished", pTrans->id);
STrans *pNewTrans = mndAcquireTrans(pMnode, pTrans->id);
if (pNewTrans == NULL) {
STrans *pNew = mndAcquireTrans(pMnode, pTrans->id);
if (pNew == NULL) {
mError("trans:%d, failed to read from sdb since %s", pTrans->id, terrstr());
return -1;
}
pNewTrans->rpcHandle = pTrans->rpcHandle;
pNewTrans->rpcAHandle = pTrans->rpcAHandle;
mndTransExecute(pMnode, pNewTrans);
mndReleaseTrans(pMnode, pNewTrans);
pNew->rpcHandle = pTrans->rpcHandle;
pNew->rpcAHandle = pTrans->rpcAHandle;
mndTransExecute(pMnode, pNew);
mndReleaseTrans(pMnode, pNew);
return 0;
}
@ -645,7 +650,7 @@ static int32_t mndTransSendActionMsg(SMnode *pMnode, STrans *pTrans, SArray *pAr
pAction->msgReceived = 0;
pAction->errCode = 0;
} else {
mDebug("trans:%d, action:%d not sent since %s", pTrans->id, action, terrstr());
mDebug("trans:%d, action:%d not send since %s", pTrans->id, action, terrstr());
return -1;
}
}

View File

@ -186,7 +186,6 @@ TEST_F(MndTestBnode, 03_Create_Bnode_Rollback) {
SMCreateBnodeReq* pReq = (SMCreateBnodeReq*)rpcMallocCont(contLen);
pReq->dnodeId = htonl(2);
server2.Stop();
SRpcMsg* pRsp = test.SendReq(TDMT_MND_CREATE_BNODE, pReq, contLen);
ASSERT_NE(pRsp, nullptr);
ASSERT_EQ(pRsp->code, TSDB_CODE_SDB_OBJ_CREATING);
@ -199,7 +198,6 @@ TEST_F(MndTestBnode, 03_Create_Bnode_Rollback) {
SMDropBnodeReq* pReq = (SMDropBnodeReq*)rpcMallocCont(contLen);
pReq->dnodeId = htonl(2);
server2.Stop();
SRpcMsg* pRsp = test.SendReq(TDMT_MND_DROP_BNODE, pReq, contLen);
ASSERT_NE(pRsp, nullptr);
ASSERT_EQ(pRsp->code, TSDB_CODE_SDB_OBJ_CREATING);
@ -211,7 +209,7 @@ TEST_F(MndTestBnode, 03_Create_Bnode_Rollback) {
taosMsleep(1000);
int32_t retry = 0;
int32_t retryMax = 10;
int32_t retryMax = 20;
for (retry = 0; retry < retryMax; retry++) {
int32_t contLen = sizeof(SMCreateBnodeReq);
@ -250,7 +248,6 @@ TEST_F(MndTestBnode, 04_Drop_Bnode_Rollback) {
SMCreateBnodeReq* pReq = (SMCreateBnodeReq*)rpcMallocCont(contLen);
pReq->dnodeId = htonl(2);
server2.Stop();
SRpcMsg* pRsp = test.SendReq(TDMT_MND_CREATE_BNODE, pReq, contLen);
ASSERT_NE(pRsp, nullptr);
ASSERT_EQ(pRsp->code, TSDB_CODE_SDB_OBJ_DROPPING);
@ -263,7 +260,6 @@ TEST_F(MndTestBnode, 04_Drop_Bnode_Rollback) {
SMDropBnodeReq* pReq = (SMDropBnodeReq*)rpcMallocCont(contLen);
pReq->dnodeId = htonl(2);
server2.Stop();
SRpcMsg* pRsp = test.SendReq(TDMT_MND_DROP_BNODE, pReq, contLen);
ASSERT_NE(pRsp, nullptr);
ASSERT_EQ(pRsp->code, TSDB_CODE_SDB_OBJ_DROPPING);
@ -275,7 +271,7 @@ TEST_F(MndTestBnode, 04_Drop_Bnode_Rollback) {
taosMsleep(1000);
int32_t retry = 0;
int32_t retryMax = 10;
int32_t retryMax = 20;
for (retry = 0; retry < retryMax; retry++) {
int32_t contLen = sizeof(SMCreateBnodeReq);

View File

@ -186,7 +186,6 @@ TEST_F(MndTestQnode, 03_Create_Qnode_Rollback) {
SMCreateQnodeReq* pReq = (SMCreateQnodeReq*)rpcMallocCont(contLen);
pReq->dnodeId = htonl(2);
server2.Stop();
SRpcMsg* pRsp = test.SendReq(TDMT_MND_CREATE_QNODE, pReq, contLen);
ASSERT_NE(pRsp, nullptr);
ASSERT_EQ(pRsp->code, TSDB_CODE_SDB_OBJ_CREATING);
@ -199,7 +198,6 @@ TEST_F(MndTestQnode, 03_Create_Qnode_Rollback) {
SMDropQnodeReq* pReq = (SMDropQnodeReq*)rpcMallocCont(contLen);
pReq->dnodeId = htonl(2);
server2.Stop();
SRpcMsg* pRsp = test.SendReq(TDMT_MND_DROP_QNODE, pReq, contLen);
ASSERT_NE(pRsp, nullptr);
ASSERT_EQ(pRsp->code, TSDB_CODE_SDB_OBJ_CREATING);
@ -211,7 +209,7 @@ TEST_F(MndTestQnode, 03_Create_Qnode_Rollback) {
taosMsleep(1000);
int32_t retry = 0;
int32_t retryMax = 10;
int32_t retryMax = 20;
for (retry = 0; retry < retryMax; retry++) {
int32_t contLen = sizeof(SMCreateQnodeReq);
@ -250,7 +248,6 @@ TEST_F(MndTestQnode, 04_Drop_Qnode_Rollback) {
SMCreateQnodeReq* pReq = (SMCreateQnodeReq*)rpcMallocCont(contLen);
pReq->dnodeId = htonl(2);
server2.Stop();
SRpcMsg* pRsp = test.SendReq(TDMT_MND_CREATE_QNODE, pReq, contLen);
ASSERT_NE(pRsp, nullptr);
ASSERT_EQ(pRsp->code, TSDB_CODE_SDB_OBJ_DROPPING);
@ -263,7 +260,6 @@ TEST_F(MndTestQnode, 04_Drop_Qnode_Rollback) {
SMDropQnodeReq* pReq = (SMDropQnodeReq*)rpcMallocCont(contLen);
pReq->dnodeId = htonl(2);
server2.Stop();
SRpcMsg* pRsp = test.SendReq(TDMT_MND_DROP_QNODE, pReq, contLen);
ASSERT_NE(pRsp, nullptr);
ASSERT_EQ(pRsp->code, TSDB_CODE_SDB_OBJ_DROPPING);
@ -275,7 +271,7 @@ TEST_F(MndTestQnode, 04_Drop_Qnode_Rollback) {
taosMsleep(1000);
int32_t retry = 0;
int32_t retryMax = 10;
int32_t retryMax = 20;
for (retry = 0; retry < retryMax; retry++) {
int32_t contLen = sizeof(SMCreateQnodeReq);

View File

@ -186,7 +186,6 @@ TEST_F(MndTestSnode, 03_Create_Snode_Rollback) {
SMCreateSnodeReq* pReq = (SMCreateSnodeReq*)rpcMallocCont(contLen);
pReq->dnodeId = htonl(2);
server2.Stop();
SRpcMsg* pRsp = test.SendReq(TDMT_MND_CREATE_SNODE, pReq, contLen);
ASSERT_NE(pRsp, nullptr);
ASSERT_EQ(pRsp->code, TSDB_CODE_SDB_OBJ_CREATING);
@ -199,7 +198,6 @@ TEST_F(MndTestSnode, 03_Create_Snode_Rollback) {
SMDropSnodeReq* pReq = (SMDropSnodeReq*)rpcMallocCont(contLen);
pReq->dnodeId = htonl(2);
server2.Stop();
SRpcMsg* pRsp = test.SendReq(TDMT_MND_DROP_SNODE, pReq, contLen);
ASSERT_NE(pRsp, nullptr);
ASSERT_EQ(pRsp->code, TSDB_CODE_SDB_OBJ_CREATING);
@ -211,7 +209,7 @@ TEST_F(MndTestSnode, 03_Create_Snode_Rollback) {
taosMsleep(1000);
int32_t retry = 0;
int32_t retryMax = 10;
int32_t retryMax = 20;
for (retry = 0; retry < retryMax; retry++) {
int32_t contLen = sizeof(SMCreateSnodeReq);
@ -250,7 +248,6 @@ TEST_F(MndTestSnode, 04_Drop_Snode_Rollback) {
SMCreateSnodeReq* pReq = (SMCreateSnodeReq*)rpcMallocCont(contLen);
pReq->dnodeId = htonl(2);
server2.Stop();
SRpcMsg* pRsp = test.SendReq(TDMT_MND_CREATE_SNODE, pReq, contLen);
ASSERT_NE(pRsp, nullptr);
ASSERT_EQ(pRsp->code, TSDB_CODE_SDB_OBJ_DROPPING);
@ -263,7 +260,6 @@ TEST_F(MndTestSnode, 04_Drop_Snode_Rollback) {
SMDropSnodeReq* pReq = (SMDropSnodeReq*)rpcMallocCont(contLen);
pReq->dnodeId = htonl(2);
server2.Stop();
SRpcMsg* pRsp = test.SendReq(TDMT_MND_DROP_SNODE, pReq, contLen);
ASSERT_NE(pRsp, nullptr);
ASSERT_EQ(pRsp->code, TSDB_CODE_SDB_OBJ_DROPPING);
@ -275,7 +271,7 @@ TEST_F(MndTestSnode, 04_Drop_Snode_Rollback) {
taosMsleep(1000);
int32_t retry = 0;
int32_t retryMax = 10;
int32_t retryMax = 20;
for (retry = 0; retry < retryMax; retry++) {
int32_t contLen = sizeof(SMCreateSnodeReq);

View File

@ -10,12 +10,21 @@
*/
#include "sut.h"
#include "os.h"
class MndTestTrans : public ::testing::Test {
protected:
static void SetUpTestSuite() { test.Init("/tmp/mnode_test_trans", 9013); }
static void TearDownTestSuite() { test.Cleanup(); }
static void SetUpTestSuite() {
test.Init("/tmp/mnode_test_trans", 9013);
const char* fqdn = "localhost";
const char* firstEp = "localhost:9013";
server2.Start("/tmp/mnode_test_trans2", fqdn, 9020, firstEp);
}
static void TearDownTestSuite() {
server2.Stop();
test.Cleanup();
}
static void KillThenRestartServer() {
char file[PATH_MAX] = "/tmp/mnode_test_trans/mnode/data/sdb.data";
FileFd fd = taosOpenFileRead(file);
@ -41,14 +50,16 @@ class MndTestTrans : public ::testing::Test {
test.ServerStart();
}
static Testbase test;
static Testbase test;
static TestServer server2;
public:
void SetUp() override {}
void TearDown() override {}
};
Testbase MndTestTrans::test;
Testbase MndTestTrans::test;
TestServer MndTestTrans::server2;
TEST_F(MndTestTrans, 01_Create_User_Crash) {
{
@ -84,3 +95,97 @@ TEST_F(MndTestTrans, 01_Create_User_Crash) {
CheckBinary("root", TSDB_USER_LEN);
CheckBinary("root", TSDB_USER_LEN);
}
TEST_F(MndTestTrans, 02_Create_Qnode1_Crash) {
{
int32_t contLen = sizeof(SMCreateQnodeReq);
SMCreateQnodeReq* pReq = (SMCreateQnodeReq*)rpcMallocCont(contLen);
pReq->dnodeId = htonl(1);
SRpcMsg* pRsp = test.SendReq(TDMT_MND_CREATE_QNODE, pReq, contLen);
ASSERT_NE(pRsp, nullptr);
ASSERT_EQ(pRsp->code, 0);
test.SendShowMetaReq(TSDB_MGMT_TABLE_QNODE, "");
CHECK_META("show qnodes", 3);
test.SendShowRetrieveReq();
EXPECT_EQ(test.GetShowRows(), 1);
}
KillThenRestartServer();
{
int32_t contLen = sizeof(SMCreateQnodeReq);
SMCreateQnodeReq* pReq = (SMCreateQnodeReq*)rpcMallocCont(contLen);
pReq->dnodeId = htonl(1);
SRpcMsg* pRsp = test.SendReq(TDMT_MND_CREATE_QNODE, pReq, contLen);
ASSERT_NE(pRsp, nullptr);
ASSERT_EQ(pRsp->code, TSDB_CODE_MND_QNODE_ALREADY_EXIST);
test.SendShowMetaReq(TSDB_MGMT_TABLE_QNODE, "");
CHECK_META("show qnodes", 3);
test.SendShowRetrieveReq();
EXPECT_EQ(test.GetShowRows(), 1);
}
}
TEST_F(MndTestTrans, 03_Create_Qnode2_Crash) {
{
int32_t contLen = sizeof(SCreateDnodeMsg);
SCreateDnodeMsg* pReq = (SCreateDnodeMsg*)rpcMallocCont(contLen);
strcpy(pReq->fqdn, "localhost");
pReq->port = htonl(9020);
SRpcMsg* pRsp = test.SendReq(TDMT_MND_CREATE_DNODE, pReq, contLen);
ASSERT_NE(pRsp, nullptr);
ASSERT_EQ(pRsp->code, 0);
taosMsleep(1300);
test.SendShowMetaReq(TSDB_MGMT_TABLE_DNODE, "");
test.SendShowRetrieveReq();
EXPECT_EQ(test.GetShowRows(), 2);
}
{
int32_t contLen = sizeof(SMCreateQnodeReq);
SMCreateQnodeReq* pReq = (SMCreateQnodeReq*)rpcMallocCont(contLen);
pReq->dnodeId = htonl(2);
server2.Stop();
SRpcMsg* pRsp = test.SendReq(TDMT_MND_CREATE_QNODE, pReq, contLen);
ASSERT_NE(pRsp, nullptr);
ASSERT_EQ(pRsp->code, TSDB_CODE_RPC_NETWORK_UNAVAIL);
}
KillThenRestartServer();
server2.DoStart();
{
int32_t retry = 0;
int32_t retryMax = 20;
for (retry = 0; retry < retryMax; retry++) {
int32_t contLen = sizeof(SMCreateQnodeReq);
SMCreateQnodeReq* pReq = (SMCreateQnodeReq*)rpcMallocCont(contLen);
pReq->dnodeId = htonl(2);
SRpcMsg* pRsp = test.SendReq(TDMT_MND_CREATE_QNODE, pReq, contLen);
ASSERT_NE(pRsp, nullptr);
if (pRsp->code == 0) break;
taosMsleep(1000);
}
ASSERT_NE(retry, retryMax);
test.SendShowMetaReq(TSDB_MGMT_TABLE_QNODE, "");
CHECK_META("show qnodes", 3);
test.SendShowRetrieveReq();
EXPECT_EQ(test.GetShowRows(), 2);
}
}

View File

@ -22,12 +22,12 @@ static int vnodeGetTableMeta(SVnode *pVnode, SRpcMsg *pMsg, SRpcMsg **pRsp);
int vnodeQueryOpen(SVnode *pVnode) { return qWorkerInit(NULL, &pVnode->pQuery); }
int vnodeProcessQueryReq(SVnode *pVnode, SRpcMsg *pMsg, SRpcMsg **pRsp) {
vInfo("query message is processed");
vTrace("query message is processed");
return qWorkerProcessQueryMsg(pVnode, pVnode->pQuery, pMsg);
}
int vnodeProcessFetchReq(SVnode *pVnode, SRpcMsg *pMsg, SRpcMsg **pRsp) {
vInfo("fetch message is processed");
vTrace("fetch message is processed");
switch (pMsg->msgType) {
case TDMT_VND_FETCH:
return qWorkerProcessFetchMsg(pVnode, pVnode->pQuery, pMsg);

View File

@ -64,14 +64,6 @@ typedef struct SCatalogMgmt {
typedef uint32_t (*tableNameHashFp)(const char *, uint32_t);
#define CTG_IS_STABLE(isSTable) (1 == (isSTable))
#define CTG_IS_NOT_STABLE(isSTable) (0 == (isSTable))
#define CTG_IS_UNKNOWN_STABLE(isSTable) ((isSTable) < 0)
#define CTG_SET_STABLE(isSTable, tbType) do { (isSTable) = ((tbType) == TSDB_SUPER_TABLE) ? 1 : ((tbType) > TSDB_SUPER_TABLE ? 0 : -1); } while (0)
#define CTG_TBTYPE_MATCH(isSTable, tbType) (CTG_IS_UNKNOWN_STABLE(isSTable) || (CTG_IS_STABLE(isSTable) && (tbType) == TSDB_SUPER_TABLE) || (CTG_IS_NOT_STABLE(isSTable) && (tbType) != TSDB_SUPER_TABLE))
#define CTG_TABLE_NOT_EXIST(code) (code == TSDB_CODE_TDB_INVALID_TABLE_ID)
#define ctgFatal(...) do { if (ctgDebugFlag & DEBUG_FATAL) { taosPrintLog("CTG FATAL ", ctgDebugFlag, __VA_ARGS__); }} while(0)
#define ctgError(...) do { if (ctgDebugFlag & DEBUG_ERROR) { taosPrintLog("CTG ERROR ", ctgDebugFlag, __VA_ARGS__); }} while(0)
#define ctgWarn(...) do { if (ctgDebugFlag & DEBUG_WARN) { taosPrintLog("CTG WARN ", ctgDebugFlag, __VA_ARGS__); }} while(0)

View File

@ -105,8 +105,6 @@ int32_t ctgGetTableMetaFromCache(struct SCatalog* pCatalog, const SName* pTableN
}
*exist = 1;
tbMeta = *pTableMeta;
if (tbMeta->tableType != TSDB_CHILD_TABLE) {
return TSDB_CODE_SUCCESS;
@ -145,29 +143,6 @@ int32_t ctgGetTableMetaFromCache(struct SCatalog* pCatalog, const SName* pTableN
return TSDB_CODE_SUCCESS;
}
int32_t ctgGetTableTypeFromCache(struct SCatalog* pCatalog, const SName* pTableName, int32_t *tbType) {
if (NULL == pCatalog->tableCache.cache) {
return TSDB_CODE_SUCCESS;
}
char tbFullName[TSDB_TABLE_FNAME_LEN];
tNameExtractFullName(pTableName, tbFullName);
size_t sz = 0;
STableMeta *pTableMeta = NULL;
taosHashGetCloneExt(pCatalog->tableCache.cache, tbFullName, strlen(tbFullName), NULL, (void **)&pTableMeta, &sz);
if (NULL == pTableMeta) {
return TSDB_CODE_SUCCESS;
}
*tbType = pTableMeta->tableType;
return TSDB_CODE_SUCCESS;
}
void ctgGenEpSet(SEpSet *epSet, SVgroupInfo *vgroupInfo) {
epSet->inUse = 0;
epSet->numOfEps = vgroupInfo->numOfEps;
@ -178,7 +153,14 @@ void ctgGenEpSet(SEpSet *epSet, SVgroupInfo *vgroupInfo) {
}
}
int32_t ctgGetTableMetaFromMnodeImpl(struct SCatalog* pCatalog, void *pRpc, const SEpSet* pMgmtEps, char* tbFullName, STableMetaOutput* output) {
int32_t ctgGetTableMetaFromMnode(struct SCatalog* pCatalog, void *pRpc, const SEpSet* pMgmtEps, const SName* pTableName, STableMetaOutput* output) {
if (NULL == pCatalog || NULL == pRpc || NULL == pMgmtEps || NULL == pTableName || NULL == output) {
CTG_ERR_RET(TSDB_CODE_CTG_INVALID_INPUT);
}
char tbFullName[TSDB_TABLE_FNAME_LEN];
tNameExtractFullName(pTableName, tbFullName);
SBuildTableMetaInput bInput = {.vgId = 0, .dbName = NULL, .tableFullName = tbFullName};
char *msg = NULL;
SEpSet *pVnodeEpSet = NULL;
@ -197,12 +179,6 @@ int32_t ctgGetTableMetaFromMnodeImpl(struct SCatalog* pCatalog, void *pRpc, cons
rpcSendRecv(pRpc, (SEpSet*)pMgmtEps, &rpcMsg, &rpcRsp);
if (TSDB_CODE_SUCCESS != rpcRsp.code) {
if (CTG_TABLE_NOT_EXIST(rpcRsp.code)) {
output->metaNum = 0;
ctgDebug("tbmeta:%s not exist in mnode", tbFullName);
return TSDB_CODE_SUCCESS;
}
ctgError("error rsp for table meta, code:%x", rpcRsp.code);
CTG_ERR_RET(rpcRsp.code);
}
@ -212,13 +188,6 @@ int32_t ctgGetTableMetaFromMnodeImpl(struct SCatalog* pCatalog, void *pRpc, cons
return TSDB_CODE_SUCCESS;
}
int32_t ctgGetTableMetaFromMnode(struct SCatalog* pCatalog, void *pRpc, const SEpSet* pMgmtEps, const SName* pTableName, STableMetaOutput* output) {
char tbFullName[TSDB_TABLE_FNAME_LEN];
tNameExtractFullName(pTableName, tbFullName);
return ctgGetTableMetaFromMnodeImpl(pCatalog, pRpc, pMgmtEps, tbFullName, output);
}
int32_t ctgGetTableMetaFromVnode(struct SCatalog* pCatalog, void *pRpc, const SEpSet* pMgmtEps, const SName* pTableName, SVgroupInfo *vgroupInfo, STableMetaOutput* output) {
if (NULL == pCatalog || NULL == pRpc || NULL == pMgmtEps || NULL == pTableName || NULL == vgroupInfo || NULL == output) {
@ -228,7 +197,7 @@ int32_t ctgGetTableMetaFromVnode(struct SCatalog* pCatalog, void *pRpc, const SE
char dbFullName[TSDB_DB_FNAME_LEN];
tNameGetFullDbName(pTableName, dbFullName);
SBuildTableMetaInput bInput = {.vgId = vgroupInfo->vgId, .dbName = dbFullName, .tableFullName = (char *)pTableName->tname};
SBuildTableMetaInput bInput = {.vgId = vgroupInfo->vgId, .dbName = dbFullName, .tableFullName = pTableName->tname};
char *msg = NULL;
SEpSet *pVnodeEpSet = NULL;
int32_t msgLen = 0;
@ -248,12 +217,6 @@ int32_t ctgGetTableMetaFromVnode(struct SCatalog* pCatalog, void *pRpc, const SE
rpcSendRecv(pRpc, &epSet, &rpcMsg, &rpcRsp);
if (TSDB_CODE_SUCCESS != rpcRsp.code) {
if (CTG_TABLE_NOT_EXIST(rpcRsp.code)) {
output->metaNum = 0;
ctgDebug("tbmeta:%s not exist in vnode", pTableName->tname);
return TSDB_CODE_SUCCESS;
}
ctgError("error rsp for table meta, code:%x", rpcRsp.code);
CTG_ERR_RET(rpcRsp.code);
}
@ -347,39 +310,40 @@ int32_t ctgGetVgInfoFromHashValue(SDBVgroupInfo *dbInfo, const SName *pTableName
}
if (NULL == vgInfo) {
ctgError("no hash range found for hashvalue[%u]", hashValue);
ctgError("no hash range found for hash value [%u], numOfVgId:%d", hashValue, taosHashGetSize(dbInfo->vgInfo));
void *pIter1 = taosHashIterate(dbInfo->vgInfo, NULL);
while (pIter1) {
vgInfo = pIter1;
ctgError("valid range:[%u, %u], vgId:%d", vgInfo->hashBegin, vgInfo->hashEnd, vgInfo->vgId);
pIter1 = taosHashIterate(dbInfo->vgInfo, pIter1);
}
CTG_ERR_JRET(TSDB_CODE_CTG_INTERNAL_ERROR);
}
*pVgroup = *vgInfo;
_return:
CTG_RET(TSDB_CODE_SUCCESS);
}
int32_t ctgGetTableMetaImpl(struct SCatalog* pCatalog, void *pRpc, const SEpSet* pMgmtEps, const SName* pTableName, bool forceUpdate, STableMeta** pTableMeta, int32_t isSTable) {
int32_t ctgGetTableMetaImpl(struct SCatalog* pCatalog, void *pRpc, const SEpSet* pMgmtEps, const SName* pTableName, bool forceUpdate, STableMeta** pTableMeta) {
if (NULL == pCatalog || NULL == pRpc || NULL == pMgmtEps || NULL == pTableName || NULL == pTableMeta) {
CTG_ERR_RET(TSDB_CODE_CTG_INVALID_INPUT);
}
int32_t exist = 0;
if (!forceUpdate) {
if (!forceUpdate) {
CTG_ERR_RET(ctgGetTableMetaFromCache(pCatalog, pTableName, pTableMeta, &exist));
if (exist && CTG_TBTYPE_MATCH(isSTable, (*pTableMeta)->tableType)) {
if (exist) {
return TSDB_CODE_SUCCESS;
}
} else if (CTG_IS_UNKNOWN_STABLE(isSTable)) {
int32_t tbType = 0;
CTG_ERR_RET(ctgGetTableTypeFromCache(pCatalog, pTableName, &tbType));
CTG_SET_STABLE(isSTable, tbType);
}
CTG_ERR_RET(ctgRenewTableMetaImpl(pCatalog, pRpc, pMgmtEps, pTableName, isSTable));
CTG_ERR_RET(catalogRenewTableMeta(pCatalog, pRpc, pMgmtEps, pTableName));
CTG_ERR_RET(ctgGetTableMetaFromCache(pCatalog, pTableName, pTableMeta, &exist));
@ -406,27 +370,19 @@ int32_t ctgUpdateTableMetaCache(struct SCatalog *pCatalog, STableMetaOutput *out
}
if (NULL == pCatalog->tableCache.cache) {
SHashObj *cache = taosHashInit(ctgMgmt.cfg.maxTblCacheNum, taosGetDefaultHashFunction(TSDB_DATA_TYPE_BINARY), true, HASH_ENTRY_LOCK);
if (NULL == cache) {
pCatalog->tableCache.cache = taosHashInit(ctgMgmt.cfg.maxTblCacheNum, taosGetDefaultHashFunction(TSDB_DATA_TYPE_BINARY), true, HASH_ENTRY_LOCK);
if (NULL == pCatalog->tableCache.cache) {
ctgError("init hash[%d] for tablemeta cache failed", ctgMgmt.cfg.maxTblCacheNum);
CTG_ERR_RET(TSDB_CODE_CTG_MEM_ERROR);
}
if (NULL != atomic_val_compare_exchange_ptr(&pCatalog->tableCache.cache, NULL, cache)) {
taosHashCleanup(cache);
}
}
if (NULL == pCatalog->tableCache.stableCache) {
SHashObj *cache = taosHashInit(ctgMgmt.cfg.maxTblCacheNum, taosGetDefaultHashFunction(TSDB_DATA_TYPE_UBIGINT), true, HASH_ENTRY_LOCK);
if (NULL == cache) {
pCatalog->tableCache.stableCache = taosHashInit(ctgMgmt.cfg.maxTblCacheNum, taosGetDefaultHashFunction(TSDB_DATA_TYPE_UBIGINT), true, HASH_ENTRY_LOCK);
if (NULL == pCatalog->tableCache.stableCache) {
ctgError("init hash[%d] for stablemeta cache failed", ctgMgmt.cfg.maxTblCacheNum);
CTG_ERR_RET(TSDB_CODE_CTG_MEM_ERROR);
}
if (NULL != atomic_val_compare_exchange_ptr(&pCatalog->tableCache.stableCache, NULL, cache)) {
taosHashCleanup(cache);
}
}
if (output->metaNum == 2) {
@ -531,50 +487,6 @@ int32_t ctgValidateAndRemoveDb(struct SCatalog* pCatalog, const char* dbName, SD
return TSDB_CODE_SUCCESS;
}
int32_t ctgRenewTableMetaImpl(struct SCatalog* pCatalog, void *pTransporter, const SEpSet* pMgmtEps, const SName* pTableName, int32_t isSTable) {
if (NULL == pCatalog || NULL == pTransporter || NULL == pMgmtEps || NULL == pTableName) {
CTG_ERR_RET(TSDB_CODE_CTG_INVALID_INPUT);
}
SVgroupInfo vgroupInfo = {0};
int32_t code = 0;
CTG_ERR_RET(catalogGetTableHashVgroup(pCatalog, pTransporter, pMgmtEps, pTableName, &vgroupInfo));
STableMetaOutput voutput = {0};
STableMetaOutput moutput = {0};
STableMetaOutput *output = &voutput;
if (CTG_IS_STABLE(isSTable)) {
CTG_ERR_JRET(ctgGetTableMetaFromMnode(pCatalog, pTransporter, pMgmtEps, pTableName, &moutput));
if (0 == moutput.metaNum) {
CTG_ERR_JRET(ctgGetTableMetaFromVnode(pCatalog, pTransporter, pMgmtEps, pTableName, &vgroupInfo, &voutput));
} else {
output = &moutput;
}
} else {
CTG_ERR_JRET(ctgGetTableMetaFromVnode(pCatalog, pTransporter, pMgmtEps, pTableName, &vgroupInfo, &voutput));
if (voutput.metaNum > 0 && TSDB_SUPER_TABLE == voutput.tbMeta->tableType) {
CTG_ERR_JRET(ctgGetTableMetaFromMnodeImpl(pCatalog, pTransporter, pMgmtEps, voutput.tbFname, &moutput));
tfree(voutput.tbMeta);
voutput.tbMeta = moutput.tbMeta;
moutput.tbMeta = NULL;
}
}
CTG_ERR_JRET(ctgUpdateTableMetaCache(pCatalog, output));
_return:
tfree(voutput.tbMeta);
tfree(moutput.tbMeta);
CTG_RET(code);
}
int32_t catalogInit(SCatalogCfg *cfg) {
if (ctgMgmt.pCluster) {
@ -737,15 +649,11 @@ int32_t catalogUpdateDBVgroup(struct SCatalog* pCatalog, const char* dbName, SDB
}
if (NULL == pCatalog->dbCache.cache) {
SHashObj *cache = taosHashInit(ctgMgmt.cfg.maxDBCacheNum, taosGetDefaultHashFunction(TSDB_DATA_TYPE_BINARY), true, HASH_ENTRY_LOCK);
if (NULL == cache) {
pCatalog->dbCache.cache = taosHashInit(ctgMgmt.cfg.maxDBCacheNum, taosGetDefaultHashFunction(TSDB_DATA_TYPE_BINARY), true, HASH_ENTRY_LOCK);
if (NULL == pCatalog->dbCache.cache) {
ctgError("init hash[%d] for db cache failed", CTG_DEFAULT_CACHE_DB_NUMBER);
CTG_ERR_JRET(TSDB_CODE_CTG_MEM_ERROR);
}
if (NULL != atomic_val_compare_exchange_ptr(&pCatalog->dbCache.cache, NULL, cache)) {
taosHashCleanup(cache);
}
} else {
CTG_ERR_JRET(ctgValidateAndRemoveDb(pCatalog, dbName, dbInfo));
}
@ -770,23 +678,34 @@ _return:
}
int32_t catalogGetTableMeta(struct SCatalog* pCatalog, void *pTransporter, const SEpSet* pMgmtEps, const SName* pTableName, STableMeta** pTableMeta) {
return ctgGetTableMetaImpl(pCatalog, pTransporter, pMgmtEps, pTableName, false, pTableMeta, -1);
return ctgGetTableMetaImpl(pCatalog, pTransporter, pMgmtEps, pTableName, false, pTableMeta);
}
int32_t catalogGetSTableMeta(struct SCatalog* pCatalog, void * pTransporter, const SEpSet* pMgmtEps, const SName* pTableName, STableMeta** pTableMeta) {
return ctgGetTableMetaImpl(pCatalog, pTransporter, pMgmtEps, pTableName, false, pTableMeta, 1);
}
int32_t catalogRenewTableMeta(struct SCatalog* pCatalog, void *pTransporter, const SEpSet* pMgmtEps, const SName* pTableName, int32_t isSTable) {
int32_t catalogRenewTableMeta(struct SCatalog* pCatalog, void *pTransporter, const SEpSet* pMgmtEps, const SName* pTableName) {
if (NULL == pCatalog || NULL == pTransporter || NULL == pMgmtEps || NULL == pTableName) {
CTG_ERR_RET(TSDB_CODE_CTG_INVALID_INPUT);
}
return ctgRenewTableMetaImpl(pCatalog, pTransporter, pMgmtEps, pTableName, isSTable);
SVgroupInfo vgroupInfo = {0};
int32_t code = 0;
CTG_ERR_RET(catalogGetTableHashVgroup(pCatalog, pTransporter, pMgmtEps, pTableName, &vgroupInfo));
STableMetaOutput output = {0};
CTG_ERR_RET(ctgGetTableMetaFromVnode(pCatalog, pTransporter, pMgmtEps, pTableName, &vgroupInfo, &output));
//CTG_ERR_RET(ctgGetTableMetaFromMnode(pCatalog, pRpc, pMgmtEps, pTableName, &output));
CTG_ERR_JRET(ctgUpdateTableMetaCache(pCatalog, &output));
_return:
tfree(output.tbMeta);
CTG_RET(code);
}
int32_t catalogRenewAndGetTableMeta(struct SCatalog* pCatalog, void *pTransporter, const SEpSet* pMgmtEps, const SName* pTableName, STableMeta** pTableMeta, int32_t isSTable) {
return ctgGetTableMetaImpl(pCatalog, pTransporter, pMgmtEps, pTableName, true, pTableMeta, isSTable);
int32_t catalogRenewAndGetTableMeta(struct SCatalog* pCatalog, void *pTransporter, const SEpSet* pMgmtEps, const SName* pTableName, STableMeta** pTableMeta) {
return ctgGetTableMetaImpl(pCatalog, pTransporter, pMgmtEps, pTableName, true, pTableMeta);
}
int32_t catalogGetTableDistVgroup(struct SCatalog* pCatalog, void *pRpc, const SEpSet* pMgmtEps, const SName* pTableName, SArray** pVgroupList) {
@ -861,7 +780,6 @@ int32_t catalogGetTableHashVgroup(struct SCatalog *pCatalog, void *pTransporter,
CTG_ERR_JRET(ctgGetVgInfoFromHashValue(dbInfo, pTableName, pVgroup));
_return:
if (dbInfo) {
CTG_UNLOCK(CTG_READ, &dbInfo->lock);
taosHashRelease(pCatalog->dbCache.cache, dbInfo);

View File

@ -45,7 +45,7 @@ void ctgTestSetPrepareSTableMeta();
bool ctgTestStop = false;
bool ctgTestEnableSleep = false;
bool ctgTestDeadLoop = false;
bool ctgTestDeadLoop = true;
int32_t ctgTestCurrentVgVersion = 0;
int32_t ctgTestVgVersion = 1;
@ -600,6 +600,7 @@ void *ctgTestSetCtableMetaThread(void *param) {
}
#if 0
TEST(tableMeta, normalTable) {
struct SCatalog* pCtg = NULL;
@ -767,7 +768,7 @@ TEST(tableMeta, superTableCase) {
ASSERT_EQ(tableMeta->tableInfo.rowSize, 12);
tableMeta = NULL;
code = catalogRenewAndGetTableMeta(pCtg, mockPointer, (const SEpSet *)mockPointer, &n, &tableMeta, 0);
code = catalogRenewAndGetTableMeta(pCtg, mockPointer, (const SEpSet *)mockPointer, &n, &tableMeta);
ASSERT_EQ(code, 0);
ASSERT_EQ(tableMeta->vgId, 9);
ASSERT_EQ(tableMeta->tableType, TSDB_CHILD_TABLE);
@ -998,6 +999,8 @@ TEST(multiThread, getSetDbVgroupCase) {
catalogDestroy();
}
#endif
TEST(multiThread, ctableMeta) {
struct SCatalog* pCtg = NULL;

View File

@ -115,7 +115,7 @@ typedef struct TFileCacheKey {
int32_t nColName;
} ICacheKey;
int indexFlushCacheTFile(SIndex* sIdx, void*);
int indexFlushCacheToTFile(SIndex* sIdx, void*);
int32_t indexSerialCacheKey(ICacheKey* key, char* buf);

View File

@ -21,9 +21,8 @@
#include "tskiplist.h"
// ----------------- key structure in skiplist ---------------------
/* A data row, the format is like below:
* content: |<--totalLen-->|<-- value len--->|<-- value -->|<--uid -->|<--version--->|<-- itermType -->|
* len : |<--int32_t -->|<--- int32_t --->|<--valuelen->|<--uint64_t->|<-- int32_t-->|<-- int8_t --->|
/* A data row, the format is like below
* content: |<---colVal---->|<-- version--->|<-- uid--->|<-- colType --->|<--operaType--->|
*/
#ifdef __cplusplus
@ -40,11 +39,12 @@ typedef struct IndexCache {
SIndex* index;
char* colName;
int32_t version;
int32_t nTerm;
int64_t occupiedMem;
int8_t type;
uint64_t suid;
pthread_mutex_t mtx;
pthread_cond_t finished;
} IndexCache;
#define CACHE_VERSION(cache) atomic_load_32(&cache->version)

View File

@ -384,7 +384,6 @@ static void indexMergeSameKey(SArray* result, TFileValue* tv) {
}
} else {
taosArrayPush(result, &tv);
// indexError("merge colVal: %s", tv->colVal);
}
}
static void indexDestroyTempResult(SArray* result) {
@ -395,10 +394,12 @@ static void indexDestroyTempResult(SArray* result) {
}
taosArrayDestroy(result);
}
int indexFlushCacheTFile(SIndex* sIdx, void* cache) {
int indexFlushCacheToTFile(SIndex* sIdx, void* cache) {
if (sIdx == NULL) { return -1; }
indexInfo("suid %" PRIu64 " merge cache into tindex", sIdx->suid);
int64_t st = taosGetTimestampUs();
IndexCache* pCache = (IndexCache*)cache;
TFileReader* pReader = tfileGetReaderByCol(sIdx->tindex, pCache->suid, pCache->colName);
if (pReader == NULL) { indexWarn("empty tfile reader found"); }
@ -458,6 +459,7 @@ int indexFlushCacheTFile(SIndex* sIdx, void* cache) {
}
int ret = indexGenTFile(sIdx, pCache, result);
indexDestroyTempResult(result);
indexCacheDestroyImm(pCache);
indexCacheIteratorDestroy(cacheIter);
@ -465,7 +467,14 @@ int indexFlushCacheTFile(SIndex* sIdx, void* cache) {
tfileReaderUnRef(pReader);
indexCacheUnRef(pCache);
return 0;
int64_t cost = taosGetTimestampUs() - st;
if (ret != 0) {
indexError("failed to merge, time cost: %" PRId64 "ms", cost / 1000);
} else {
indexInfo("success to merge , time cost: %" PRId64 "ms", cost / 1000);
}
return ret;
}
void iterateValueDestroy(IterateValue* value, bool destroy) {
if (destroy) {
@ -506,7 +515,10 @@ static int indexGenTFile(SIndex* sIdx, IndexCache* cache, SArray* batch) {
pthread_mutex_unlock(&sIdx->mtx);
return ret;
END:
tfileWriterClose(tw);
if (tw != NULL) {
writerCtxDestroy(tw->ctx, true);
free(tw);
}
return -1;
}

View File

@ -21,13 +21,15 @@
#define MAX_INDEX_KEY_LEN 256 // test only, change later
#define MEM_TERM_LIMIT 10 * 10000
#define MEM_THRESHOLD 1024 * 1024 * 2
#define MEM_ESTIMATE_RADIO 1.5
static void indexMemRef(MemTable* tbl);
static void indexMemUnRef(MemTable* tbl);
static void cacheTermDestroy(CacheTerm* ct);
static char* getIndexKey(const void* pData);
static int32_t compareKey(const void* l, const void* r);
static void indexCacheTermDestroy(CacheTerm* ct);
static int32_t indexCacheTermCompare(const void* l, const void* r);
static char* indexCacheTermGet(const void* pData);
static MemTable* indexInternalCacheCreate(int8_t type);
@ -43,14 +45,16 @@ IndexCache* indexCacheCreate(SIndex* idx, uint64_t suid, const char* colName, in
return NULL;
};
cache->mem = indexInternalCacheCreate(type);
cache->colName = calloc(1, strlen(colName) + 1);
memcpy(cache->colName, colName, strlen(colName));
cache->colName = tstrdup(colName);
cache->type = type;
cache->index = idx;
cache->version = 0;
cache->suid = suid;
cache->occupiedMem = 0;
pthread_mutex_init(&cache->mtx, NULL);
pthread_cond_init(&cache->finished, NULL);
indexCacheRef(cache);
return cache;
}
@ -121,6 +125,7 @@ void indexCacheDestroyImm(IndexCache* cache) {
pthread_mutex_lock(&cache->mtx);
tbl = cache->imm;
cache->imm = NULL; // or throw int bg thread
pthread_cond_broadcast(&cache->finished);
pthread_mutex_unlock(&cache->mtx);
indexMemUnRef(tbl);
@ -133,6 +138,9 @@ void indexCacheDestroy(void* cache) {
indexMemUnRef(pCache->imm);
free(pCache->colName);
pthread_mutex_destroy(&pCache->mtx);
pthread_cond_destroy(&pCache->finished);
free(pCache);
}
@ -173,19 +181,19 @@ int indexCacheSchedToMerge(IndexCache* pCache) {
}
static void indexCacheMakeRoomForWrite(IndexCache* cache) {
while (true) {
if (cache->nTerm < MEM_TERM_LIMIT) {
cache->nTerm += 1;
if (cache->occupiedMem * MEM_ESTIMATE_RADIO < MEM_THRESHOLD) {
break;
} else if (cache->imm != NULL) {
// TODO: wake up by condition variable
pthread_mutex_unlock(&cache->mtx);
taosMsleep(50);
pthread_mutex_lock(&cache->mtx);
pthread_cond_wait(&cache->finished, &cache->mtx);
// pthread_mutex_unlock(&cache->mtx);
// taosMsleep(50);
// pthread_mutex_lock(&cache->mtx);
} else {
indexCacheRef(cache);
cache->imm = cache->mem;
cache->mem = indexInternalCacheCreate(cache->type);
cache->nTerm = 1;
cache->occupiedMem = 0;
// sched to merge
// unref cache in bgwork
indexCacheSchedToMerge(cache);
@ -211,8 +219,9 @@ int indexCachePut(void* cache, SIndexTerm* term, uint64_t uid) {
ct->operaType = term->operType;
// ugly code, refactor later
int64_t estimate = sizeof(ct) + strlen(ct->colVal);
pthread_mutex_lock(&pCache->mtx);
pCache->occupiedMem += estimate;
indexCacheMakeRoomForWrite(pCache);
MemTable* tbl = pCache->mem;
indexMemRef(tbl);
@ -232,7 +241,7 @@ int indexCacheDel(void* cache, const char* fieldValue, int32_t fvlen, uint64_t u
static int indexQueryMem(MemTable* mem, CacheTerm* ct, EIndexQueryType qtype, SArray* result, STermValueType* s) {
if (mem == NULL) { return 0; }
char* key = getIndexKey(ct);
char* key = indexCacheTermGet(ct);
SSkipListIterator* iter = tSkipListCreateIterFromVal(mem->mem, key, TSDB_DATA_TYPE_BINARY, TSDB_ORDER_ASC);
while (tSkipListIterNext(iter)) {
@ -310,17 +319,16 @@ void indexMemUnRef(MemTable* tbl) {
}
}
static void cacheTermDestroy(CacheTerm* ct) {
static void indexCacheTermDestroy(CacheTerm* ct) {
if (ct == NULL) { return; }
free(ct->colVal);
free(ct);
}
static char* getIndexKey(const void* pData) {
static char* indexCacheTermGet(const void* pData) {
CacheTerm* p = (CacheTerm*)pData;
return (char*)p;
}
static int32_t compareKey(const void* l, const void* r) {
static int32_t indexCacheTermCompare(const void* l, const void* r) {
CacheTerm* lt = (CacheTerm*)l;
CacheTerm* rt = (CacheTerm*)r;
@ -333,8 +341,9 @@ static int32_t compareKey(const void* l, const void* r) {
static MemTable* indexInternalCacheCreate(int8_t type) {
MemTable* tbl = calloc(1, sizeof(MemTable));
indexMemRef(tbl);
if (type == TSDB_DATA_TYPE_BINARY) {
tbl->mem = tSkipListCreate(MAX_SKIP_LIST_LEVEL, type, MAX_INDEX_KEY_LEN, compareKey, SL_ALLOW_DUP_KEY, getIndexKey);
if (type == TSDB_DATA_TYPE_BINARY || type == TSDB_DATA_TYPE_NCHAR) {
tbl->mem = tSkipListCreate(MAX_SKIP_LIST_LEVEL, type, MAX_INDEX_KEY_LEN, indexCacheTermCompare, SL_ALLOW_DUP_KEY,
indexCacheTermGet);
}
return tbl;
}
@ -342,7 +351,7 @@ static MemTable* indexInternalCacheCreate(int8_t type) {
static void doMergeWork(SSchedMsg* msg) {
IndexCache* pCache = msg->ahandle;
SIndex* sidx = (SIndex*)pCache->index;
indexFlushCacheTFile(sidx, pCache);
indexFlushCacheToTFile(sidx, pCache);
}
static bool indexCacheIteratorNext(Iterate* itera) {
SSkipListIterator* iter = itera->iter;
@ -364,4 +373,7 @@ static bool indexCacheIteratorNext(Iterate* itera) {
return next;
}
static IterateValue* indexCacheIteratorGetValue(Iterate* iter) { return &iter->val; }
static IterateValue* indexCacheIteratorGetValue(Iterate* iter) {
// opt later
return &iter->val;
}

View File

@ -18,8 +18,6 @@
#include "tutil.h"
static int writeCtxDoWrite(WriterCtx* ctx, uint8_t* buf, int len) {
// if (ctx->offset + len > ctx->limit) { return -1; }
if (ctx->type == TFile) {
assert(len == tfWrite(ctx->file.fd, buf, len));
} else {
@ -125,6 +123,7 @@ void writerCtxDestroy(WriterCtx* ctx, bool remove) {
if (ctx->type == TMemory) {
free(ctx->mem.buf);
} else {
ctx->flush(ctx);
tfClose(ctx->file.fd);
if (ctx->file.readOnly) {
#ifdef USE_MMAP

View File

@ -346,9 +346,6 @@ int indexTFilePut(void* tfile, SIndexTerm* term, uint64_t uid) {
}
static bool tfileIteratorNext(Iterate* iiter) {
IterateValue* iv = &iiter->val;
if (iv->colVal != NULL && iv->val != NULL) {
// indexError("value in fst: colVal: %s, size: %d", iv->colVal, (int)taosArrayGetSize(iv->val));
}
iterateValueDestroy(iv, false);
char* colVal = NULL;

View File

@ -48,7 +48,7 @@ class FstWriter {
class FstReadMemory {
public:
FstReadMemory(size_t size, const std::string& fileName = fileName) {
FstReadMemory(size_t size, const std::string& fileName = "/tmp/tindex.tindex") {
tfInit();
_wc = writerCtxCreate(TFile, fileName.c_str(), true, 64 * 1024);
_w = fstCountingWriterCreate(_wc);
@ -307,7 +307,7 @@ void validateTFile(char* arg) {
tfCleanup();
}
int main(int argc, char* argv[]) {
// tool to check all kind of fst test
// tool to check all kind of fst test
// if (argc > 1) { validateTFile(argv[1]); }
// checkFstCheckIterator();
// checkFstLongTerm();

View File

@ -28,7 +28,7 @@
#include "tutil.h"
using namespace std;
#define NUM_OF_THREAD 5
#define NUM_OF_THREAD 10
class DebugInfo {
public:
@ -882,8 +882,8 @@ static void single_write_and_search(IndexObj* idx) {
static void multi_write_and_search(IndexObj* idx) {
int target = idx->SearchOne("tag1", "Hello");
target = idx->SearchOne("tag2", "Test");
idx->WriteMultiMillonData("tag1", "Hello", 100 * 10000);
idx->WriteMultiMillonData("tag2", "Test", 100 * 10000);
idx->WriteMultiMillonData("tag1", "hello world test", 100 * 10000);
idx->WriteMultiMillonData("tag2", "world test nothing", 100 * 10000);
}
TEST_F(IndexEnv2, testIndex_serarch_cache_and_tfile) {
std::string path = "/tmp/cache_and_tfile";

View File

@ -339,7 +339,6 @@ static int32_t doParseSerializeTagValue(SSchema* pTagSchema, int32_t numOfInputT
code = parseValueToken(&endPtr, pItem, pSchema, tsPrecision, tmpTokenBuf, KvRowAppend, &param, pMsgBuf);
if (code != TSDB_CODE_SUCCESS) {
tdDestroyKVRowBuilder(pKvRowBuilder);
return buildInvalidOperationMsg(pMsgBuf, msg1);
}
}
@ -393,6 +392,9 @@ static int32_t doCheckAndBuildCreateCTableReq(SCreateTableSql* pCreateTable, SPa
const char* msg3 = "tag value too long";
const char* msg4 = "illegal value or data overflow";
int32_t code = 0;
STableMeta* pSuperTableMeta = NULL;
SHashObj* pVgroupHashmap = taosHashInit(4, taosGetDefaultHashFunction(TSDB_DATA_TYPE_INT), false, HASH_NO_LOCK);
// super table name, create table by using dst
@ -401,29 +403,30 @@ static int32_t doCheckAndBuildCreateCTableReq(SCreateTableSql* pCreateTable, SPa
SCreatedTableInfo* pCreateTableInfo = taosArrayGet(pCreateTable->childTableInfo, j);
SToken* pSTableNameToken = &pCreateTableInfo->stbName;
int32_t code = parserValidateNameToken(pSTableNameToken);
code = parserValidateNameToken(pSTableNameToken);
if (code != TSDB_CODE_SUCCESS) {
return buildInvalidOperationMsg(pMsgBuf, msg1);
code = buildInvalidOperationMsg(pMsgBuf, msg1);
goto _error;
}
SName name = {0};
code = createSName(&name, pSTableNameToken, pCtx, pMsgBuf);
if (code != TSDB_CODE_SUCCESS) {
return code;
goto _error;
}
SKVRowBuilder kvRowBuilder = {0};
if (tdInitKVRowBuilder(&kvRowBuilder) < 0) {
return TSDB_CODE_TSC_OUT_OF_MEMORY;
code = TSDB_CODE_TSC_OUT_OF_MEMORY;
goto _error;
}
SArray* pValList = pCreateTableInfo->pTagVals;
size_t numOfInputTag = taosArrayGetSize(pValList);
STableMeta* pSuperTableMeta = NULL;
code = catalogGetTableMeta(pCtx->pCatalog, pCtx->pTransporter, &pCtx->mgmtEpSet, &name, &pSuperTableMeta);
if (code != TSDB_CODE_SUCCESS) {
return code;
goto _error;
}
assert(pSuperTableMeta != NULL);
@ -442,8 +445,8 @@ static int32_t doCheckAndBuildCreateCTableReq(SCreateTableSql* pCreateTable, SPa
if (numOfInputTag != numOfBoundTags || schemaSize < numOfInputTag) {
tdDestroyKVRowBuilder(&kvRowBuilder);
tfree(pSuperTableMeta);
return buildInvalidOperationMsg(pMsgBuf, msg2);
code = buildInvalidOperationMsg(pMsgBuf, msg2);
goto _error;
}
bool findColumnIndex = false;
@ -475,8 +478,8 @@ static int32_t doCheckAndBuildCreateCTableReq(SCreateTableSql* pCreateTable, SPa
if (pSchema->type == TSDB_DATA_TYPE_BINARY || pSchema->type == TSDB_DATA_TYPE_NCHAR) {
if (pItem->pVar.nLen > pSchema->bytes) {
tdDestroyKVRowBuilder(&kvRowBuilder);
tfree(pSuperTableMeta);
return buildInvalidOperationMsg(pMsgBuf, msg3);
code = buildInvalidOperationMsg(pMsgBuf, msg3);
goto _error;
}
} else if (pSchema->type == TSDB_DATA_TYPE_TIMESTAMP) {
if (pItem->pVar.nType == TSDB_DATA_TYPE_BINARY) {
@ -492,19 +495,19 @@ static int32_t doCheckAndBuildCreateCTableReq(SCreateTableSql* pCreateTable, SPa
code = taosVariantDump(&(pItem->pVar), tagVal, pSchema->type, true);
// check again after the convert since it may be converted from binary to nchar.
if (pSchema->type == TSDB_DATA_TYPE_BINARY || pSchema->type == TSDB_DATA_TYPE_NCHAR) {
if (IS_VAR_DATA_TYPE(pSchema->type)) {
int16_t len = varDataTLen(tagVal);
if (len > pSchema->bytes) {
tdDestroyKVRowBuilder(&kvRowBuilder);
tfree(pSuperTableMeta);
return buildInvalidOperationMsg(pMsgBuf, msg3);
code = buildInvalidOperationMsg(pMsgBuf, msg3);
goto _error;
}
}
if (code != TSDB_CODE_SUCCESS) {
tdDestroyKVRowBuilder(&kvRowBuilder);
tfree(pSuperTableMeta);
return buildInvalidOperationMsg(pMsgBuf, msg4);
code = buildInvalidOperationMsg(pMsgBuf, msg4);
goto _error;
}
tdAddColToKVRow(&kvRowBuilder, pSchema->colId, pSchema->type, tagVal);
@ -522,23 +525,22 @@ static int32_t doCheckAndBuildCreateCTableReq(SCreateTableSql* pCreateTable, SPa
} else {
if (schemaSize != numOfInputTag) {
tdDestroyKVRowBuilder(&kvRowBuilder);
tfree(pSuperTableMeta);
return buildInvalidOperationMsg(pMsgBuf, msg2);
code = buildInvalidOperationMsg(pMsgBuf, msg2);
goto _error;
}
code = doParseSerializeTagValue(pTagSchema, numOfInputTag, &kvRowBuilder, pValList, tinfo.precision, pMsgBuf);
if (code != TSDB_CODE_SUCCESS) {
tdDestroyKVRowBuilder(&kvRowBuilder);
tfree(pSuperTableMeta);
return code;
goto _error;
}
}
SKVRow row = tdGetKVRowFromBuilder(&kvRowBuilder);
tdDestroyKVRowBuilder(&kvRowBuilder);
if (row == NULL) {
tfree(pSuperTableMeta);
return TSDB_CODE_QRY_OUT_OF_MEMORY;
code = TSDB_CODE_QRY_OUT_OF_MEMORY;
goto _error;
}
tdSortKVRowByColIdx(row);
@ -546,22 +548,34 @@ static int32_t doCheckAndBuildCreateCTableReq(SCreateTableSql* pCreateTable, SPa
SName tableName = {0};
code = createSName(&tableName, &pCreateTableInfo->name, pCtx, pMsgBuf);
if (code != TSDB_CODE_SUCCESS) {
tfree(pSuperTableMeta);
return code;
goto _error;
}
// Find a appropriate vgroup to accommodate this table , according to the table name
SVgroupInfo info = {0};
catalogGetTableHashVgroup(pCtx->pCatalog, pCtx->pTransporter, &pCtx->mgmtEpSet, &tableName, &info);
code = catalogGetTableHashVgroup(pCtx->pCatalog, pCtx->pTransporter, &pCtx->mgmtEpSet, &tableName, &info);
if (code != TSDB_CODE_SUCCESS) {
goto _error;
}
addCreateTbReqIntoVgroup(pVgroupHashmap, &tableName, row, pSuperTableMeta->uid, &info);
tfree(pSuperTableMeta);
}
*pBufArray = doSerializeVgroupCreateTableInfo(pVgroupHashmap);
if (*pBufArray == NULL) {
code = terrno;
goto _error;
}
taosHashCleanup(pVgroupHashmap);
return TSDB_CODE_SUCCESS;
_error:
taosHashCleanup(pVgroupHashmap);
tfree(pSuperTableMeta);
terrno = code;
return code;
}
static int32_t serializeVgroupTablesBatchImpl(SVgroupTablesBatch* pTbBatch, SArray* pBufArray) {
@ -612,11 +626,12 @@ static int32_t doBuildSingleTableBatchReq(SName* pTableName, SArray* pColumns, S
int32_t doCheckAndBuildCreateTableReq(SCreateTableSql* pCreateTable, SParseBasicCtx* pCtx, SMsgBuf* pMsgBuf, char** pOutput, int32_t* len) {
SArray* pBufArray = NULL;
int32_t code = 0;
// it is a sql statement to create a normal table
if (pCreateTable->childTableInfo == NULL) {
assert(taosArrayGetSize(pCreateTable->colInfo.pColumns) > 0 && pCreateTable->colInfo.pTagColumns == NULL);
int32_t code = doCheckForCreateTable(pCreateTable, pMsgBuf);
code = doCheckForCreateTable(pCreateTable, pMsgBuf);
if (code != TSDB_CODE_SUCCESS) {
return code;
}
@ -645,7 +660,10 @@ int32_t doCheckAndBuildCreateTableReq(SCreateTableSql* pCreateTable, SParseBasic
destroyCreateTbReqBatch(&tbatch);
} else { // it is a child table, created according to a super table
doCheckAndBuildCreateCTableReq(pCreateTable, pCtx, pMsgBuf, &pBufArray);
code = doCheckAndBuildCreateCTableReq(pCreateTable, pCtx, pMsgBuf, &pBufArray);
if (code != 0) {
return code;
}
}
SVnodeModifOpStmtInfo* pStmtInfo = calloc(1, sizeof(SVnodeModifOpStmtInfo));

View File

@ -67,8 +67,8 @@ typedef struct SSchTask {
int32_t msgLen; // msg length
int8_t status; // task status
SQueryNodeAddr execAddr; // task actual executed node address
int8_t condidateIdx; // current try condidation index
SArray *condidateAddrs; // condidate node addresses, element is SQueryNodeAddr
int8_t candidateIdx; // current try condidation index
SArray *candidateAddrs; // condidate node addresses, element is SQueryNodeAddr
SQueryProfileSummary summary; // task execution summary
int32_t childReady; // child task ready number
SArray *children; // the datasource tasks,from which to fetch the result, element is SQueryTask*
@ -82,12 +82,11 @@ typedef struct SSchJobAttr {
} SSchJobAttr;
typedef struct SSchJob {
uint64_t queryId;
int32_t levelNum;
int32_t levelIdx;
int8_t status;
SSchJobAttr attr;
SQueryProfileSummary summary;
uint64_t queryId;
int32_t levelNum;
int32_t levelIdx;
int8_t status;
SSchJobAttr attr;
SEpSet dataSrcEps;
SEpAddr resEp;
void *transport;
@ -95,18 +94,20 @@ typedef struct SSchJob {
tsem_t rspSem;
int32_t userFetch;
int32_t remoteFetch;
SSchTask *fetchTask;
int32_t errCode;
void *res;
int32_t resNumOfRows;
SHashObj *execTasks; // executing tasks, key:taskid, value:SQueryTask*
SHashObj *succTasks; // succeed tasks, key:taskid, value:SQueryTask*
SHashObj *failTasks; // failed tasks, key:taskid, value:SQueryTask*
SArray *levels; // Element is SQueryLevel, starting from 0.
SArray *subPlans; // Element is SArray*, and nested element is SSubplan. The execution level of subplan, starting from 0.
SHashObj *execTasks; // executing tasks, key:taskid, value:SQueryTask*
SHashObj *succTasks; // succeed tasks, key:taskid, value:SQueryTask*
SHashObj *failTasks; // failed tasks, key:taskid, value:SQueryTask*
SArray *levels; // Element is SQueryLevel, starting from 0. SArray<SSchLevel>
SArray *subPlans; // Element is SArray*, and nested element is SSubplan. The execution level of subplan, starting from 0. SArray<void*>
SQueryProfileSummary summary;
} SSchJob;
#define SCH_HAS_QNODE_IN_CLUSTER(type) (false) //TODO CLUSTER TYPE
@ -114,9 +115,8 @@ typedef struct SSchJob {
#define SCH_IS_DATA_SRC_TASK(task) ((task)->plan->type == QUERY_TYPE_SCAN)
#define SCH_TASK_NEED_WAIT_ALL(task) ((task)->plan->type == QUERY_TYPE_MODIFY)
#define SCH_JOB_ELOG(param, ...) qError("QID:% "PRIx64 param, job->queryId, __VA_ARGS__)
#define SCH_TASK_ELOG(param, ...) qError("QID:%"PRIx64",TID:% "PRIx64 param, job->queryId, task->taskId, __VA_ARGS__)
#define SCH_TASK_DLOG(param, ...) qDebug("QID:%"PRIx64",TID:% "PRIx64 param, job->queryId, task->taskId, __VA_ARGS__)
#define SCH_JOB_ERR_LOG(param, ...) qError("QID:%"PRIx64 param, job->queryId, __VA_ARGS__)
#define SCH_TASK_ERR_LOG(param, ...) qError("QID:%"PRIx64",TID:%"PRIx64 param, job->queryId, task->taskId, __VA_ARGS__)
#define SCH_ERR_RET(c) do { int32_t _code = c; if (_code != TSDB_CODE_SUCCESS) { terrno = _code; return _code; } } while (0)
#define SCH_RET(c) do { int32_t _code = c; if (_code != TSDB_CODE_SUCCESS) { terrno = _code; } return _code; } while (0)

View File

@ -18,8 +18,7 @@
#include "query.h"
#include "catalog.h"
SSchedulerMgmt schMgmt = {0};
static SSchedulerMgmt schMgmt = {0};
int32_t schBuildTaskRalation(SSchJob *job, SHashObj *planToTask) {
for (int32_t i = 0; i < job->levelNum; ++i) {
@ -93,11 +92,30 @@ int32_t schBuildTaskRalation(SSchJob *job, SHashObj *planToTask) {
return TSDB_CODE_SUCCESS;
}
static SSchTask initTask(SSchJob* pJob, SSubplan* plan, SSchLevel *pLevel) {
SSchTask task = {0};
if (plan->type == QUERY_TYPE_MODIFY) {
pJob->attr.needFetch = false;
} else {
pJob->attr.queryJob = true;
}
int32_t schValidateAndBuildJob(SQueryDag *dag, SSchJob *job) {
task.plan = plan;
task.level = pLevel;
task.status = JOB_TASK_STATUS_NOT_START;
task.taskId = atomic_add_fetch_64(&schMgmt.taskId, 1);
return task;
}
static void cleanupTask(SSchTask* pTask) {
taosArrayDestroy(pTask->candidateAddrs);
}
int32_t schValidateAndBuildJob(SQueryDag *dag, SSchJob *pJob) {
int32_t code = 0;
job->queryId = dag->queryId;
pJob->queryId = dag->queryId;
if (dag->numOfSubplans <= 0) {
qError("invalid subplan num:%d", dag->numOfSubplans);
@ -115,20 +133,20 @@ int32_t schValidateAndBuildJob(SQueryDag *dag, SSchJob *job) {
qError("taosHashInit %d failed", SCHEDULE_DEFAULT_TASK_NUMBER);
SCH_ERR_RET(TSDB_CODE_QRY_OUT_OF_MEMORY);
}
job->levels = taosArrayInit(levelNum, sizeof(SSchLevel));
if (NULL == job->levels) {
pJob->levels = taosArrayInit(levelNum, sizeof(SSchLevel));
if (NULL == pJob->levels) {
qError("taosArrayInit %d failed", levelNum);
SCH_ERR_JRET(TSDB_CODE_QRY_OUT_OF_MEMORY);
}
//??
job->attr.needFetch = true;
job->levelNum = levelNum;
job->levelIdx = levelNum - 1;
pJob->attr.needFetch = true;
job->subPlans = dag->pSubplans;
pJob->levelNum = levelNum;
pJob->levelIdx = levelNum - 1;
pJob->subPlans = dag->pSubplans;
SSchLevel level = {0};
SArray *levelPlans = NULL;
@ -138,12 +156,12 @@ int32_t schValidateAndBuildJob(SQueryDag *dag, SSchJob *job) {
level.status = JOB_TASK_STATUS_NOT_START;
for (int32_t i = 0; i < levelNum; ++i) {
if (NULL == taosArrayPush(job->levels, &level)) {
if (NULL == taosArrayPush(pJob->levels, &level)) {
qError("taosArrayPush failed");
SCH_ERR_JRET(TSDB_CODE_QRY_OUT_OF_MEMORY);
}
pLevel = taosArrayGet(job->levels, i);
pLevel = taosArrayGet(pJob->levels, i);
pLevel->level = i;
levelPlans = taosArrayGetP(dag->pSubplans, i);
@ -168,20 +186,13 @@ int32_t schValidateAndBuildJob(SQueryDag *dag, SSchJob *job) {
for (int32_t n = 0; n < levelPlanNum; ++n) {
SSubplan *plan = taosArrayGetP(levelPlans, n);
SSchTask task = {0};
if (plan->type == QUERY_TYPE_MODIFY) {
job->attr.needFetch = false;
pJob->attr.needFetch = false;
} else {
job->attr.queryJob = true;
pJob->attr.queryJob = true;
}
task.taskId = atomic_add_fetch_64(&schMgmt.taskId, 1);
task.plan = plan;
task.level = pLevel;
task.status = JOB_TASK_STATUS_NOT_START;
SSchTask task = initTask(pJob, plan, pLevel);
void *p = taosArrayPush(pLevel->subTasks, &task);
if (NULL == p) {
qError("taosArrayPush failed");
@ -193,10 +204,9 @@ int32_t schValidateAndBuildJob(SQueryDag *dag, SSchJob *job) {
SCH_ERR_JRET(TSDB_CODE_QRY_OUT_OF_MEMORY);
}
}
}
SCH_ERR_JRET(schBuildTaskRalation(job, planToTask));
SCH_ERR_JRET(schBuildTaskRalation(pJob, planToTask));
if (planToTask) {
taosHashCleanup(planToTask);
@ -216,20 +226,20 @@ _return:
SCH_RET(code);
}
int32_t schSetTaskCondidateAddrs(SSchJob *job, SSchTask *task) {
if (task->condidateAddrs) {
int32_t schSetTaskCandidateAddrs(SSchJob *job, SSchTask *task) {
if (task->candidateAddrs) {
return TSDB_CODE_SUCCESS;
}
task->condidateIdx = 0;
task->condidateAddrs = taosArrayInit(SCH_MAX_CONDIDATE_EP_NUM, sizeof(SQueryNodeAddr));
if (NULL == task->condidateAddrs) {
task->candidateIdx = 0;
task->candidateAddrs = taosArrayInit(SCH_MAX_CONDIDATE_EP_NUM, sizeof(SQueryNodeAddr));
if (NULL == task->candidateAddrs) {
qError("taosArrayInit failed");
SCH_ERR_RET(TSDB_CODE_QRY_OUT_OF_MEMORY);
}
if (task->plan->execNode.numOfEps > 0) {
if (NULL == taosArrayPush(task->condidateAddrs, &task->plan->execNode)) {
if (NULL == taosArrayPush(task->candidateAddrs, &task->plan->execNode)) {
qError("taosArrayPush failed");
SCH_ERR_RET(TSDB_CODE_QRY_OUT_OF_MEMORY);
}
@ -243,7 +253,7 @@ int32_t schSetTaskCondidateAddrs(SSchJob *job, SSchTask *task) {
for (int32_t i = 0; i < nodeNum && addNum < SCH_MAX_CONDIDATE_EP_NUM; ++i) {
SQueryNodeAddr *naddr = taosArrayGet(job->nodeList, i);
if (NULL == taosArrayPush(task->condidateAddrs, &task->plan->execNode)) {
if (NULL == taosArrayPush(task->candidateAddrs, &task->plan->execNode)) {
qError("taosArrayPush failed");
SCH_ERR_RET(TSDB_CODE_QRY_OUT_OF_MEMORY);
}
@ -263,21 +273,20 @@ int32_t schSetTaskCondidateAddrs(SSchJob *job, SSchTask *task) {
return TSDB_CODE_SUCCESS;
}
int32_t schPushTaskToExecList(SSchJob *job, SSchTask *task) {
if (0 != taosHashPut(job->execTasks, &task->taskId, sizeof(task->taskId), &task, POINTER_BYTES)) {
qError("taosHashPut failed");
int32_t schPushTaskToExecList(SSchJob *pJob, SSchTask *pTask) {
if (0 != taosHashPut(pJob->execTasks, &pTask->taskId, sizeof(pTask->taskId), &pTask, POINTER_BYTES)) {
qError("failed to add new task, taskId:0x%"PRIx64", reqId:0x"PRIx64", out of memory", pJob->queryId);
SCH_ERR_RET(TSDB_CODE_QRY_OUT_OF_MEMORY);
}
SCH_TASK_DLOG("push to %s list", "execTasks");
qDebug("add one task, taskId:0x%"PRIx64", numOfTasks:%d, reqId:0x%"PRIx64, pTask->taskId, taosHashGetSize(pJob->execTasks),
pJob->queryId);
return TSDB_CODE_SUCCESS;
}
int32_t schMoveTaskToSuccList(SSchJob *job, SSchTask *task, bool *moved) {
if (0 != taosHashRemove(job->execTasks, &task->taskId, sizeof(task->taskId))) {
qWarn("remove task[%"PRIx64"] from execTasks failed", task->taskId);
qError("remove task taskId:0x%"PRIx64" from execTasks failed, reqId:0x%"PRIx64, task->taskId, job->queryId);
return TSDB_CODE_SUCCESS;
}
@ -286,8 +295,6 @@ int32_t schMoveTaskToSuccList(SSchJob *job, SSchTask *task, bool *moved) {
SCH_ERR_RET(TSDB_CODE_QRY_OUT_OF_MEMORY);
}
SCH_TASK_DLOG("push to %s list", "succTasks");
*moved = true;
return TSDB_CODE_SUCCESS;
@ -303,8 +310,6 @@ int32_t schMoveTaskToFailList(SSchJob *job, SSchTask *task, bool *moved) {
SCH_ERR_RET(TSDB_CODE_QRY_OUT_OF_MEMORY);
}
SCH_TASK_DLOG("push to %s list", "failTasks");
*moved = true;
return TSDB_CODE_SUCCESS;
@ -342,11 +347,12 @@ _return:
int32_t schProcessOnJobPartialSuccess(SSchJob *job) {
job->status = JOB_TASK_STATUS_PARTIAL_SUCCEED;
bool needFetch = job->userFetch;
if ((!job->attr.needFetch) && job->attr.syncSchedule) {
tsem_post(&job->rspSem);
}
if (job->userFetch) {
if (needFetch) {
SCH_ERR_RET(schFetchFromRemote(job));
}
@ -378,7 +384,7 @@ int32_t schProcessOnTaskSuccess(SSchJob *job, SSchTask *task) {
SCH_ERR_RET(schMoveTaskToSuccList(job, task, &moved));
if (!moved) {
SCH_TASK_ELOG("task may already moved, status:%d", task->status);
SCH_TASK_ERR_LOG(" task may already moved, status:%d", task->status);
return TSDB_CODE_SUCCESS;
}
@ -416,7 +422,6 @@ int32_t schProcessOnTaskSuccess(SSchJob *job, SSchTask *task) {
}
job->fetchTask = task;
SCH_ERR_RET(schProcessOnJobPartialSuccess(job));
return TSDB_CODE_SUCCESS;
@ -453,11 +458,11 @@ int32_t schProcessOnTaskFailure(SSchJob *job, SSchTask *task, int32_t errCode) {
SCH_ERR_RET(schTaskCheckAndSetRetry(job, task, errCode, &needRetry));
if (!needRetry) {
SCH_TASK_ELOG("task failed[%x], no more retry", errCode);
SCH_TASK_ERR_LOG("task failed[%x], no more retry", errCode);
SCH_ERR_RET(schMoveTaskToFailList(job, task, &moved));
if (!moved) {
SCH_TASK_ELOG("task may already moved, status:%d", task->status);
SCH_TASK_ERR_LOG("task may already moved, status:%d", task->status);
}
if (SCH_TASK_NEED_WAIT_ALL(task)) {
@ -491,12 +496,12 @@ int32_t schProcessRspMsg(SSchJob *job, SSchTask *task, int32_t msgType, char *ms
if (rspCode != TSDB_CODE_SUCCESS) {
SCH_ERR_JRET(schProcessOnTaskFailure(job, task, rspCode));
} else {
// job->resNumOfRows += rsp->affectedRows;
code = schProcessOnTaskSuccess(job, task);
if (code) {
goto _task_error;
}
}
break;
}
case TDMT_VND_SUBMIT_RSP: {
@ -573,27 +578,24 @@ int32_t schHandleCallback(void* param, const SDataBuf* pMsg, int32_t msgType, in
int32_t code = 0;
SSchCallbackParam *pParam = (SSchCallbackParam *)param;
SSchJob **pjob = taosHashGet(schMgmt.jobs, &pParam->queryId, sizeof(pParam->queryId));
if (NULL == pjob || NULL == (*pjob)) {
SSchJob **job = taosHashGet(schMgmt.jobs, &pParam->queryId, sizeof(pParam->queryId));
if (NULL == job || NULL == (*job)) {
qError("taosHashGet queryId:%"PRIx64" not exist", pParam->queryId);
SCH_ERR_JRET(TSDB_CODE_SCH_INTERNAL_ERROR);
}
SSchJob *job = *pjob;
int32_t s = taosHashGetSize((*job)->execTasks);
assert(s != 0);
SSchTask **ptask = taosHashGet(job->execTasks, &pParam->taskId, sizeof(pParam->taskId));
if (NULL == ptask || NULL == (*ptask)) {
qError("taosHashGet taskId:%"PRIx64" not exist", pParam->taskId);
SSchTask **task = taosHashGet((*job)->execTasks, &pParam->taskId, sizeof(pParam->taskId));
if (NULL == task || NULL == (*task)) {
qError("failed to get task, taskId:%"PRIx64" not exist, reqId:0x%"PRIx64, pParam->taskId, (*job)->queryId);
SCH_ERR_JRET(TSDB_CODE_SCH_INTERNAL_ERROR);
}
SSchTask *task = *ptask;
SCH_TASK_DLOG("Got msg:%d, rspCode:%d", msgType, rspCode);
schProcessRspMsg(job, task, msgType, pMsg->pData, pMsg->len, rspCode);
schProcessRspMsg(*job, *task, msgType, pMsg->pData, pMsg->len, rspCode);
_return:
_return:
tfree(param);
SCH_RET(code);
}
@ -800,7 +802,7 @@ int32_t schBuildAndSendMsg(SSchJob *job, SSchTask *task, int32_t msgType) {
}
SEpSet epSet;
SQueryNodeAddr *addr = taosArrayGet(task->condidateAddrs, task->condidateIdx);
SQueryNodeAddr *addr = taosArrayGet(task->candidateAddrs, task->candidateIdx);
schConvertAddrToEpSet(addr, &epSet);
@ -818,17 +820,16 @@ _return:
int32_t schLaunchTask(SSchJob *job, SSchTask *task) {
SSubplan *plan = task->plan;
SCH_ERR_RET(qSubPlanToString(plan, &task->msg, &task->msgLen));
SCH_ERR_RET(schSetTaskCondidateAddrs(job, task));
SCH_ERR_RET(schSetTaskCandidateAddrs(job, task));
if (NULL == task->condidateAddrs || taosArrayGetSize(task->condidateAddrs) <= 0) {
SCH_TASK_ELOG("no valid condidate node for task:%"PRIx64, task->taskId);
if (NULL == task->candidateAddrs || taosArrayGetSize(task->candidateAddrs) <= 0) {
SCH_TASK_ERR_LOG("no valid candidate node for task:%"PRIx64, task->taskId);
SCH_ERR_RET(TSDB_CODE_SCH_INTERNAL_ERROR);
}
// int32_t msgType = (plan->type == QUERY_TYPE_MODIFY)? TDMT_VND_SUBMIT : TDMT_VND_QUERY;
SCH_ERR_RET(schBuildAndSendMsg(job, task, plan->msgType));
// NOTE: race condition: the task should be put into the hash table before send msg to server
SCH_ERR_RET(schPushTaskToExecList(job, task));
SCH_ERR_RET(schBuildAndSendMsg(job, task, plan->msgType));
task->status = JOB_TASK_STATUS_EXECUTING;
return TSDB_CODE_SUCCESS;
@ -850,18 +851,26 @@ void schDropJobAllTasks(SSchJob *job) {
void *pIter = taosHashIterate(job->succTasks, NULL);
while (pIter) {
SSchTask *task = *(SSchTask **)pIter;
int32_t msgType = task->plan->msgType;
if (msgType == TDMT_VND_CREATE_TABLE || msgType == TDMT_VND_SUBMIT) {
break;
}
schBuildAndSendMsg(job, task, TDMT_VND_DROP_TASK);
pIter = taosHashIterate(job->succTasks, pIter);
}
pIter = taosHashIterate(job->failTasks, NULL);
while (pIter) {
SSchTask *task = *(SSchTask **)pIter;
int32_t msgType = task->plan->msgType;
if (msgType == TDMT_VND_CREATE_TABLE || msgType == TDMT_VND_SUBMIT) {
break;
}
schBuildAndSendMsg(job, task, TDMT_VND_DROP_TASK);
pIter = taosHashIterate(job->succTasks, pIter);
}
}
@ -944,16 +953,15 @@ int32_t scheduleExecJobImpl(void *transport, SArray *nodeList, SQueryDag* pDag,
code = taosHashPut(schMgmt.jobs, &job->queryId, sizeof(job->queryId), &job, POINTER_BYTES);
if (0 != code) {
if (HASH_NODE_EXIST(code)) {
qError("taosHashPut queryId:%"PRIx64" already exist", job->queryId);
qError("taosHashPut queryId:0x%"PRIx64" already exist", job->queryId);
SCH_ERR_JRET(TSDB_CODE_QRY_INVALID_INPUT);
} else {
qError("taosHashPut queryId:%"PRIx64" failed", job->queryId);
qError("taosHashPut queryId:0x%"PRIx64" failed", job->queryId);
SCH_ERR_JRET(TSDB_CODE_SCH_INTERNAL_ERROR);
}
}
job->status = JOB_TASK_STATUS_NOT_START;
SCH_ERR_JRET(schLaunchJob(job));
*(SSchJob **)pJob = job;
@ -965,7 +973,6 @@ int32_t scheduleExecJobImpl(void *transport, SArray *nodeList, SQueryDag* pDag,
return TSDB_CODE_SUCCESS;
_return:
*(SSchJob **)pJob = NULL;
scheduleFreeJob(job);
@ -973,7 +980,7 @@ _return:
}
int32_t scheduleExecJob(void *transport, SArray *nodeList, SQueryDag* pDag, void** pJob, SQueryResult *pRes) {
if (NULL == transport || /* NULL == nodeList || */ NULL == pDag || NULL == pDag->pSubplans || NULL == pJob || NULL == pRes) {
if (NULL == transport || NULL == pDag || NULL == pDag->pSubplans || NULL == pJob || NULL == pRes) {
SCH_ERR_RET(TSDB_CODE_QRY_INVALID_INPUT);
}
@ -1074,8 +1081,26 @@ void scheduleFreeJob(void *pJob) {
schDropJobAllTasks(job);
}
//TODO free job
job->subPlans = NULL; // it is a reference to pDag->pSubplans
int32_t numOfLevels = taosArrayGetSize(job->levels);
for(int32_t i = 0; i < numOfLevels; ++i) {
SSchLevel *pLevel = taosArrayGet(job->levels, i);
int32_t numOfTasks = taosArrayGetSize(pLevel->subTasks);
for(int32_t j = 0; j < numOfTasks; ++j) {
SSchTask* pTask = taosArrayGet(pLevel->subTasks, j);
cleanupTask(pTask);
}
taosArrayDestroy(pLevel->subTasks);
}
taosHashCleanup(job->execTasks);
taosHashCleanup(job->failTasks);
taosHashCleanup(job->succTasks);
taosArrayDestroy(job->levels);
tfree(job);
}
void schedulerDestroy(void) {