[migration] split es_scroll/bulk_indexing pipeline task
This commit is contained in:
parent
dd68300ab2
commit
cd85d21ef0
|
@ -8,24 +8,27 @@ import (
|
||||||
"context"
|
"context"
|
||||||
"errors"
|
"errors"
|
||||||
"fmt"
|
"fmt"
|
||||||
elastic2 "infini.sh/framework/modules/elastic"
|
|
||||||
"net/http"
|
"net/http"
|
||||||
"strconv"
|
"strconv"
|
||||||
"strings"
|
"strings"
|
||||||
"time"
|
"time"
|
||||||
|
|
||||||
"infini.sh/framework/core/api/rbac/enum"
|
|
||||||
|
|
||||||
log "github.com/cihub/seelog"
|
log "github.com/cihub/seelog"
|
||||||
|
|
||||||
"infini.sh/console/model"
|
"infini.sh/console/model"
|
||||||
|
migration_model "infini.sh/console/plugin/migration/model"
|
||||||
|
migration_util "infini.sh/console/plugin/migration/util"
|
||||||
|
|
||||||
"infini.sh/framework/core/api"
|
"infini.sh/framework/core/api"
|
||||||
"infini.sh/framework/core/api/rbac"
|
"infini.sh/framework/core/api/rbac"
|
||||||
|
"infini.sh/framework/core/api/rbac/enum"
|
||||||
httprouter "infini.sh/framework/core/api/router"
|
httprouter "infini.sh/framework/core/api/router"
|
||||||
"infini.sh/framework/core/elastic"
|
"infini.sh/framework/core/elastic"
|
||||||
"infini.sh/framework/core/global"
|
"infini.sh/framework/core/global"
|
||||||
"infini.sh/framework/core/orm"
|
"infini.sh/framework/core/orm"
|
||||||
task2 "infini.sh/framework/core/task"
|
task2 "infini.sh/framework/core/task"
|
||||||
"infini.sh/framework/core/util"
|
"infini.sh/framework/core/util"
|
||||||
|
elastic2 "infini.sh/framework/modules/elastic"
|
||||||
)
|
)
|
||||||
|
|
||||||
func InitAPI() {
|
func InitAPI() {
|
||||||
|
@ -53,7 +56,7 @@ type APIHandler struct {
|
||||||
}
|
}
|
||||||
|
|
||||||
func (h *APIHandler) createDataMigrationTask(w http.ResponseWriter, req *http.Request, ps httprouter.Params) {
|
func (h *APIHandler) createDataMigrationTask(w http.ResponseWriter, req *http.Request, ps httprouter.Params) {
|
||||||
clusterTaskConfig := &ClusterMigrationTaskConfig{}
|
clusterTaskConfig := &migration_model.ClusterMigrationTaskConfig{}
|
||||||
err := h.DecodeJSON(req, clusterTaskConfig)
|
err := h.DecodeJSON(req, clusterTaskConfig)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
log.Error(err)
|
log.Error(err)
|
||||||
|
@ -178,7 +181,7 @@ func (h *APIHandler) searchDataMigrationTask(w http.ResponseWriter, req *http.Re
|
||||||
for _, hit := range searchRes.Hits.Hits {
|
for _, hit := range searchRes.Hits.Hits {
|
||||||
sourceM := util.MapStr(hit.Source)
|
sourceM := util.MapStr(hit.Source)
|
||||||
buf := util.MustToJSONBytes(sourceM["config"])
|
buf := util.MustToJSONBytes(sourceM["config"])
|
||||||
dataConfig := ClusterMigrationTaskConfig{}
|
dataConfig := migration_model.ClusterMigrationTaskConfig{}
|
||||||
err = util.FromJSONBytes(buf, &dataConfig)
|
err = util.FromJSONBytes(buf, &dataConfig)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
log.Error(err)
|
log.Error(err)
|
||||||
|
@ -246,7 +249,7 @@ func (h *APIHandler) startDataMigration(w http.ResponseWriter, req *http.Request
|
||||||
return
|
return
|
||||||
}
|
}
|
||||||
|
|
||||||
writeLog(&obj, &task2.TaskResult{
|
migration_util.WriteLog(&obj, &task2.TaskResult{
|
||||||
Success: true,
|
Success: true,
|
||||||
}, "task status manually set to ready")
|
}, "task status manually set to ready")
|
||||||
|
|
||||||
|
@ -311,7 +314,7 @@ func (h *APIHandler) stopDataMigrationTask(w http.ResponseWriter, req *http.Requ
|
||||||
h.WriteError(w, err.Error(), http.StatusInternalServerError)
|
h.WriteError(w, err.Error(), http.StatusInternalServerError)
|
||||||
return
|
return
|
||||||
}
|
}
|
||||||
writeLog(&obj, &task2.TaskResult{
|
migration_util.WriteLog(&obj, &task2.TaskResult{
|
||||||
Success: true,
|
Success: true,
|
||||||
}, "task status manually set to pending stop")
|
}, "task status manually set to pending stop")
|
||||||
|
|
||||||
|
@ -320,17 +323,6 @@ func (h *APIHandler) stopDataMigrationTask(w http.ResponseWriter, req *http.Requ
|
||||||
}, 200)
|
}, 200)
|
||||||
}
|
}
|
||||||
|
|
||||||
func getTaskConfig(task *task2.Task, config interface{}) error {
|
|
||||||
if task.Config_ == nil {
|
|
||||||
return util.FromJSONBytes([]byte(task.ConfigString), config)
|
|
||||||
}
|
|
||||||
buf, err := util.ToJSONBytes(task.Config_)
|
|
||||||
if err != nil {
|
|
||||||
return err
|
|
||||||
}
|
|
||||||
return util.FromJSONBytes(buf, config)
|
|
||||||
}
|
|
||||||
|
|
||||||
func getIndexRefreshInterval(indexNames []string, targetESClient elastic.API) (map[string]string, error) {
|
func getIndexRefreshInterval(indexNames []string, targetESClient elastic.API) (map[string]string, error) {
|
||||||
const step = 50
|
const step = 50
|
||||||
var (
|
var (
|
||||||
|
@ -383,8 +375,8 @@ func (h *APIHandler) getIndexRefreshIntervals(w http.ResponseWriter, req *http.R
|
||||||
}, http.StatusNotFound)
|
}, http.StatusNotFound)
|
||||||
return
|
return
|
||||||
}
|
}
|
||||||
taskConfig := &ClusterMigrationTaskConfig{}
|
taskConfig := &migration_model.ClusterMigrationTaskConfig{}
|
||||||
err = getTaskConfig(&obj, taskConfig)
|
err = migration_util.GetTaskConfig(&obj, taskConfig)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
log.Error(err)
|
log.Error(err)
|
||||||
h.WriteError(w, err.Error(), http.StatusInternalServerError)
|
h.WriteError(w, err.Error(), http.StatusInternalServerError)
|
||||||
|
@ -421,8 +413,8 @@ func (h *APIHandler) getDataMigrationTaskInfo(w http.ResponseWriter, req *http.R
|
||||||
}, http.StatusNotFound)
|
}, http.StatusNotFound)
|
||||||
return
|
return
|
||||||
}
|
}
|
||||||
taskConfig := &ClusterMigrationTaskConfig{}
|
taskConfig := &migration_model.ClusterMigrationTaskConfig{}
|
||||||
err = getTaskConfig(&obj, taskConfig)
|
err = migration_util.GetTaskConfig(&obj, taskConfig)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
log.Error(err)
|
log.Error(err)
|
||||||
h.WriteError(w, err.Error(), http.StatusInternalServerError)
|
h.WriteError(w, err.Error(), http.StatusInternalServerError)
|
||||||
|
@ -471,7 +463,7 @@ func (h *APIHandler) getDataMigrationTaskInfo(w http.ResponseWriter, req *http.R
|
||||||
obj.Metadata.Labels["completed_indices"] = completedIndices
|
obj.Metadata.Labels["completed_indices"] = completedIndices
|
||||||
h.WriteJSON(w, obj, http.StatusOK)
|
h.WriteJSON(w, obj, http.StatusOK)
|
||||||
}
|
}
|
||||||
func getMajorTaskInfoByIndex(taskID string) (map[string]IndexStateInfo, error) {
|
func getMajorTaskInfoByIndex(taskID string) (map[string]migration_model.IndexStateInfo, error) {
|
||||||
query := util.MapStr{
|
query := util.MapStr{
|
||||||
"size": 0,
|
"size": 0,
|
||||||
"aggs": util.MapStr{
|
"aggs": util.MapStr{
|
||||||
|
@ -529,13 +521,13 @@ func getMajorTaskInfoByIndex(taskID string) (map[string]IndexStateInfo, error) {
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return nil, err
|
return nil, err
|
||||||
}
|
}
|
||||||
resBody := map[string]IndexStateInfo{}
|
resBody := map[string]migration_model.IndexStateInfo{}
|
||||||
|
|
||||||
if taskAgg, ok := searchRes.Aggregations["group_by_task"]; ok {
|
if taskAgg, ok := searchRes.Aggregations["group_by_task"]; ok {
|
||||||
for _, bk := range taskAgg.Buckets {
|
for _, bk := range taskAgg.Buckets {
|
||||||
if key, ok := bk["key"].(string); ok {
|
if key, ok := bk["key"].(string); ok {
|
||||||
//resBody[key] = int(bk["doc_count"].(float64))
|
//resBody[key] = int(bk["doc_count"].(float64))
|
||||||
resBody[key] = IndexStateInfo{}
|
resBody[key] = migration_model.IndexStateInfo{}
|
||||||
if statusAgg, ok := bk["group_by_status"].(map[string]interface{}); ok {
|
if statusAgg, ok := bk["group_by_status"].(map[string]interface{}); ok {
|
||||||
if sbks, ok := statusAgg["buckets"].([]interface{}); ok {
|
if sbks, ok := statusAgg["buckets"].([]interface{}); ok {
|
||||||
for _, sbk := range sbks {
|
for _, sbk := range sbks {
|
||||||
|
@ -565,7 +557,7 @@ func getMajorTaskInfoByIndex(taskID string) (map[string]IndexStateInfo, error) {
|
||||||
return resBody, nil
|
return resBody, nil
|
||||||
}
|
}
|
||||||
|
|
||||||
func getIndexTaskDocCount(ctx context.Context, index *IndexConfig, targetESClient elastic.API) (int64, error) {
|
func getIndexTaskDocCount(ctx context.Context, index *migration_model.IndexConfig, targetESClient elastic.API) (int64, error) {
|
||||||
targetIndexName := index.Target.Name
|
targetIndexName := index.Target.Name
|
||||||
if targetIndexName == "" {
|
if targetIndexName == "" {
|
||||||
if v, ok := index.IndexRename[index.Source.Name].(string); ok {
|
if v, ok := index.IndexRename[index.Source.Name].(string); ok {
|
||||||
|
@ -741,8 +733,8 @@ func (h *APIHandler) getDataMigrationTaskOfIndex(w http.ResponseWriter, req *htt
|
||||||
startTime = subTasks[0].StartTimeInMillis
|
startTime = subTasks[0].StartTimeInMillis
|
||||||
}
|
}
|
||||||
for i, ptask := range subTasks {
|
for i, ptask := range subTasks {
|
||||||
cfg := IndexMigrationTaskConfig{}
|
cfg := migration_model.IndexMigrationTaskConfig{}
|
||||||
err := getTaskConfig(&ptask, &cfg)
|
err := migration_util.GetTaskConfig(&ptask, &cfg)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
log.Errorf("failed to get task config, err: %v", err)
|
log.Errorf("failed to get task config, err: %v", err)
|
||||||
continue
|
continue
|
||||||
|
@ -950,6 +942,11 @@ func (h *APIHandler) validateMultiType(w http.ResponseWriter, req *http.Request,
|
||||||
}, http.StatusOK)
|
}, http.StatusOK)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
type InitIndexRequest struct {
|
||||||
|
Mappings map[string]interface{} `json:"mappings"`
|
||||||
|
Settings map[string]interface{} `json:"settings"`
|
||||||
|
}
|
||||||
|
|
||||||
func (h *APIHandler) initIndex(w http.ResponseWriter, req *http.Request, ps httprouter.Params) {
|
func (h *APIHandler) initIndex(w http.ResponseWriter, req *http.Request, ps httprouter.Params) {
|
||||||
targetClusterID := ps.MustGetParameter("id")
|
targetClusterID := ps.MustGetParameter("id")
|
||||||
indexName := ps.MustGetParameter("index")
|
indexName := ps.MustGetParameter("index")
|
||||||
|
@ -978,7 +975,7 @@ func (h *APIHandler) initIndex(w http.ResponseWriter, req *http.Request, ps http
|
||||||
}
|
}
|
||||||
if ml := len(reqBody.Mappings); ml > 0 {
|
if ml := len(reqBody.Mappings); ml > 0 {
|
||||||
var (
|
var (
|
||||||
docType = ""
|
docType = ""
|
||||||
mapping interface{} = reqBody.Mappings
|
mapping interface{} = reqBody.Mappings
|
||||||
)
|
)
|
||||||
if ml == 1 {
|
if ml == 1 {
|
||||||
|
@ -989,7 +986,7 @@ func (h *APIHandler) initIndex(w http.ResponseWriter, req *http.Request, ps http
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
mappingBytes := util.MustToJSONBytes(mapping)
|
mappingBytes := util.MustToJSONBytes(mapping)
|
||||||
_, err = client.UpdateMapping(indexName, docType, mappingBytes)
|
_, err = client.UpdateMapping(indexName, docType, mappingBytes)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
log.Error(err)
|
log.Error(err)
|
||||||
|
@ -997,7 +994,7 @@ func (h *APIHandler) initIndex(w http.ResponseWriter, req *http.Request, ps http
|
||||||
return
|
return
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}else{
|
} else {
|
||||||
indexSettings := map[string]interface{}{}
|
indexSettings := map[string]interface{}{}
|
||||||
if len(reqBody.Settings) > 0 {
|
if len(reqBody.Settings) > 0 {
|
||||||
indexSettings["settings"] = reqBody.Settings
|
indexSettings["settings"] = reqBody.Settings
|
||||||
|
@ -1063,7 +1060,7 @@ func (h *APIHandler) deleteDataMigrationTask(w http.ResponseWriter, req *http.Re
|
||||||
},
|
},
|
||||||
},
|
},
|
||||||
}
|
}
|
||||||
err = orm.DeleteBy( &obj, util.MustToJSONBytes(q))
|
err = orm.DeleteBy(&obj, util.MustToJSONBytes(q))
|
||||||
if err != nil {
|
if err != nil {
|
||||||
h.WriteError(w, err.Error(), http.StatusInternalServerError)
|
h.WriteError(w, err.Error(), http.StatusInternalServerError)
|
||||||
log.Error(err)
|
log.Error(err)
|
||||||
|
@ -1076,7 +1073,7 @@ func (h *APIHandler) deleteDataMigrationTask(w http.ResponseWriter, req *http.Re
|
||||||
}, 200)
|
}, 200)
|
||||||
}
|
}
|
||||||
|
|
||||||
func getMajorTaskStatsFromInstances(majorTaskID string) (taskStats MajorTaskState, err error) {
|
func getMajorTaskStatsFromInstances(majorTaskID string) (taskStats migration_model.MajorTaskState, err error) {
|
||||||
taskQuery := util.MapStr{
|
taskQuery := util.MapStr{
|
||||||
"size": 500,
|
"size": 500,
|
||||||
"query": util.MapStr{
|
"query": util.MapStr{
|
||||||
|
@ -1179,7 +1176,7 @@ func getMajorTaskStatsFromInstances(majorTaskID string) (taskStats MajorTaskStat
|
||||||
return taskStats, nil
|
return taskStats, nil
|
||||||
}
|
}
|
||||||
|
|
||||||
func getMajorTaskByIndexFromES(majorTaskID string) (map[string]IndexStateInfo, error) {
|
func getMajorTaskByIndexFromES(majorTaskID string) (map[string]migration_model.IndexStateInfo, error) {
|
||||||
taskQuery := util.MapStr{
|
taskQuery := util.MapStr{
|
||||||
"size": 500,
|
"size": 500,
|
||||||
"query": util.MapStr{
|
"query": util.MapStr{
|
||||||
|
@ -1229,7 +1226,7 @@ func getMajorTaskByIndexFromES(majorTaskID string) (map[string]IndexStateInfo, e
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
state := map[string]IndexStateInfo{}
|
state := map[string]migration_model.IndexStateInfo{}
|
||||||
for instID, taskIDs := range pipelineTaskIDs {
|
for instID, taskIDs := range pipelineTaskIDs {
|
||||||
inst := &model.Instance{}
|
inst := &model.Instance{}
|
||||||
inst.ID = instID
|
inst.ID = instID
|
||||||
|
|
|
@ -2,7 +2,7 @@
|
||||||
* Web: https://infinilabs.com
|
* Web: https://infinilabs.com
|
||||||
* Email: hello#infini.ltd */
|
* Email: hello#infini.ltd */
|
||||||
|
|
||||||
package migration
|
package model
|
||||||
|
|
||||||
import (
|
import (
|
||||||
"fmt"
|
"fmt"
|
||||||
|
@ -95,16 +95,6 @@ type ClusterInfo struct {
|
||||||
Distribution string `json:"distribution,omitempty"`
|
Distribution string `json:"distribution,omitempty"`
|
||||||
}
|
}
|
||||||
|
|
||||||
type TaskCompleteState struct {
|
|
||||||
IsComplete bool
|
|
||||||
Error string
|
|
||||||
PipelineIds []string
|
|
||||||
RunningPhase int
|
|
||||||
TotalDocs int64
|
|
||||||
SuccessDocs int64
|
|
||||||
ScrolledDocs int64
|
|
||||||
}
|
|
||||||
|
|
||||||
type MajorTaskState struct {
|
type MajorTaskState struct {
|
||||||
ScrolledDocs float64
|
ScrolledDocs float64
|
||||||
IndexDocs float64
|
IndexDocs float64
|
||||||
|
@ -116,11 +106,6 @@ type IndexStateInfo struct {
|
||||||
IndexDocs float64
|
IndexDocs float64
|
||||||
}
|
}
|
||||||
|
|
||||||
type InitIndexRequest struct {
|
|
||||||
Mappings map[string]interface{} `json:"mappings"`
|
|
||||||
Settings map[string]interface{} `json:"settings"`
|
|
||||||
}
|
|
||||||
|
|
||||||
type IndexMigrationTaskConfig struct {
|
type IndexMigrationTaskConfig struct {
|
||||||
Source IndexMigrationSourceConfig `json:"source"`
|
Source IndexMigrationSourceConfig `json:"source"`
|
||||||
Target IndexMigrationTargetConfig `json:"target"`
|
Target IndexMigrationTargetConfig `json:"target"`
|
|
@ -0,0 +1,7 @@
|
||||||
|
package model
|
||||||
|
|
||||||
|
import "infini.sh/framework/core/task"
|
||||||
|
|
||||||
|
type Processor interface {
|
||||||
|
Process(t *task.Task) (err error)
|
||||||
|
}
|
File diff suppressed because it is too large
Load Diff
|
@ -0,0 +1,402 @@
|
||||||
|
package pipeline_task
|
||||||
|
|
||||||
|
import (
|
||||||
|
"errors"
|
||||||
|
"fmt"
|
||||||
|
"time"
|
||||||
|
|
||||||
|
log "github.com/cihub/seelog"
|
||||||
|
|
||||||
|
"infini.sh/console/model"
|
||||||
|
migration_model "infini.sh/console/plugin/migration/model"
|
||||||
|
migration_util "infini.sh/console/plugin/migration/util"
|
||||||
|
|
||||||
|
"infini.sh/framework/core/elastic"
|
||||||
|
"infini.sh/framework/core/orm"
|
||||||
|
"infini.sh/framework/core/task"
|
||||||
|
"infini.sh/framework/core/util"
|
||||||
|
)
|
||||||
|
|
||||||
|
type processor struct {
|
||||||
|
Elasticsearch string
|
||||||
|
IndexName string
|
||||||
|
LogIndexName string
|
||||||
|
}
|
||||||
|
|
||||||
|
func NewProcessor(elasticsearch, indexName, logIndexName string) migration_model.Processor {
|
||||||
|
return &processor{
|
||||||
|
Elasticsearch: elasticsearch,
|
||||||
|
IndexName: indexName,
|
||||||
|
LogIndexName: logIndexName,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func (p *processor) Process(t *task.Task) (err error) {
|
||||||
|
switch t.Status {
|
||||||
|
case task.StatusReady:
|
||||||
|
// schedule pipeline task & create pipeline
|
||||||
|
err = p.handleReadyPipelineTask(t)
|
||||||
|
case task.StatusRunning:
|
||||||
|
// check pipeline log
|
||||||
|
err = p.handleRunningPipelineTask(t)
|
||||||
|
case task.StatusPendingStop:
|
||||||
|
// stop pipeline
|
||||||
|
err = p.handlePendingStopPipelineTask(t)
|
||||||
|
}
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
|
||||||
|
func (p *processor) handleReadyPipelineTask(taskItem *task.Task) error {
|
||||||
|
cleanPipeline, cleanQueue := true, false
|
||||||
|
|
||||||
|
switch taskItem.Metadata.Labels["pipeline_id"] {
|
||||||
|
case "es_scroll":
|
||||||
|
// try to clear queue before running es_scroll
|
||||||
|
cleanQueue = true
|
||||||
|
case "bulk_indexing":
|
||||||
|
default:
|
||||||
|
return fmt.Errorf("task [%s] has unknown pipeline_id [%s]", taskItem.ID, taskItem.Metadata.Labels["pipeline_id"])
|
||||||
|
}
|
||||||
|
|
||||||
|
instance, err := p.cleanGatewayPipeline(taskItem, cleanPipeline, cleanQueue)
|
||||||
|
if err != nil {
|
||||||
|
log.Errorf("failed to prepare instance before running pipeline, err: %v", err)
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
|
||||||
|
taskItem.RetryTimes++
|
||||||
|
|
||||||
|
cfg := migration_model.PipelineTaskConfig{}
|
||||||
|
err = migration_util.GetTaskConfig(taskItem, &cfg)
|
||||||
|
if err != nil {
|
||||||
|
log.Errorf("failed to get task config, err: %v", err)
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
|
||||||
|
cfg.Labels["retry_times"] = taskItem.RetryTimes
|
||||||
|
|
||||||
|
// call instance api to create pipeline task
|
||||||
|
err = instance.CreatePipeline(util.MustToJSONBytes(cfg))
|
||||||
|
if err != nil {
|
||||||
|
log.Errorf("create pipeline task [%s] failed, err: %+v", taskItem.ID, err)
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
|
||||||
|
taskItem.Status = task.StatusRunning
|
||||||
|
taskItem.StartTimeInMillis = time.Now().UnixMilli()
|
||||||
|
p.saveTaskAndWriteLog(taskItem, "wait_for", &task.TaskResult{
|
||||||
|
Success: true,
|
||||||
|
}, fmt.Sprintf("pipeline task [%s] started", taskItem.ID))
|
||||||
|
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
|
||||||
|
func (p *processor) handleRunningPipelineTask(taskItem *task.Task) error {
|
||||||
|
switch taskItem.Metadata.Labels["pipeline_id"] {
|
||||||
|
case "es_scroll":
|
||||||
|
return p.handleRunningEsScrollPipelineTask(taskItem)
|
||||||
|
case "bulk_indexing":
|
||||||
|
return p.handleRunningBulkIndexingPipelineTask(taskItem)
|
||||||
|
default:
|
||||||
|
return fmt.Errorf("task [%s] has unknown pipeline_id [%s]", taskItem.ID, taskItem.Metadata.Labels["pipeline_id"])
|
||||||
|
}
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
|
||||||
|
func (p *processor) handleRunningEsScrollPipelineTask(taskItem *task.Task) error {
|
||||||
|
scrolledDocs, totalHits, scrolled, err := p.getEsScrollTaskState(taskItem)
|
||||||
|
|
||||||
|
if !scrolled {
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
|
||||||
|
var errMsg string
|
||||||
|
if err != nil {
|
||||||
|
errMsg = err.Error()
|
||||||
|
}
|
||||||
|
if errMsg == "" {
|
||||||
|
if scrolledDocs < totalHits {
|
||||||
|
errMsg = fmt.Sprintf("scrolled finished but docs count unmatch: %d / %d", scrolledDocs, totalHits)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
now := time.Now()
|
||||||
|
taskItem.CompletedTime = &now
|
||||||
|
taskItem.Metadata.Labels["scrolled_docs"] = scrolledDocs
|
||||||
|
if errMsg != "" {
|
||||||
|
taskItem.Status = task.StatusError
|
||||||
|
} else {
|
||||||
|
taskItem.Status = task.StatusComplete
|
||||||
|
}
|
||||||
|
|
||||||
|
p.saveTaskAndWriteLog(taskItem, "", &task.TaskResult{
|
||||||
|
Success: errMsg == "",
|
||||||
|
Error: errMsg,
|
||||||
|
}, fmt.Sprintf("pipeline task [%s] completed", taskItem.ID))
|
||||||
|
// clean queue if scroll failed
|
||||||
|
p.cleanGatewayPipeline(taskItem, true, taskItem.Status == task.StatusError)
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
|
||||||
|
func (p *processor) handleRunningBulkIndexingPipelineTask(taskItem *task.Task) error {
|
||||||
|
successDocs, indexDocs, bulked, err := p.getBulkIndexingTaskState(taskItem)
|
||||||
|
if !bulked {
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
|
||||||
|
var errMsg string
|
||||||
|
if err != nil {
|
||||||
|
errMsg = err.Error()
|
||||||
|
}
|
||||||
|
// TODO: handle multiple run bulk_indexing pipeline tasks and total_docs from index_migration
|
||||||
|
now := time.Now()
|
||||||
|
taskItem.CompletedTime = &now
|
||||||
|
taskItem.Metadata.Labels["index_docs"] = indexDocs
|
||||||
|
taskItem.Metadata.Labels["success_docs"] = successDocs
|
||||||
|
if errMsg != "" {
|
||||||
|
taskItem.Status = task.StatusError
|
||||||
|
} else {
|
||||||
|
taskItem.Status = task.StatusComplete
|
||||||
|
}
|
||||||
|
|
||||||
|
p.saveTaskAndWriteLog(taskItem, "", &task.TaskResult{
|
||||||
|
Success: errMsg == "",
|
||||||
|
Error: errMsg,
|
||||||
|
}, fmt.Sprintf("pipeline task [%s] completed", taskItem.ID))
|
||||||
|
// clean queue if bulk completed
|
||||||
|
p.cleanGatewayPipeline(taskItem, true, taskItem.Status == task.StatusComplete)
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
|
||||||
|
func (p *processor) handlePendingStopPipelineTask(taskItem *task.Task) error {
|
||||||
|
switch taskItem.Metadata.Labels["pipeline_id"] {
|
||||||
|
case "es_scroll":
|
||||||
|
case "bulk_indexing":
|
||||||
|
default:
|
||||||
|
return fmt.Errorf("task [%s] has unknown pipeline_id [%s]", taskItem.ID, taskItem.Metadata.Labels["pipeline_id"])
|
||||||
|
}
|
||||||
|
|
||||||
|
hits, err := p.getPipelineLogs(taskItem, []string{"STOPPED"})
|
||||||
|
if err != nil {
|
||||||
|
log.Errorf("failed to get pipeline logs for task [%s], err: %v", taskItem.ID, err)
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
|
||||||
|
stopped := len(hits) > 0
|
||||||
|
|
||||||
|
if stopped {
|
||||||
|
taskItem.Status = task.StatusStopped
|
||||||
|
p.saveTaskAndWriteLog(taskItem, "", nil, fmt.Sprintf("task [%s] stopped", taskItem.ID))
|
||||||
|
// clean all stuffs if manually stopped
|
||||||
|
p.cleanGatewayPipeline(taskItem, true, true)
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
|
||||||
|
_, instance, err := p.getPipelineExecutionInstance(taskItem)
|
||||||
|
if err != nil {
|
||||||
|
log.Errorf("failed to get execution instance for task [%s], err: %v", taskItem.ID, err)
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
|
||||||
|
err = instance.StopPipelineWithTimeout(taskItem.ID, time.Second)
|
||||||
|
if err != nil {
|
||||||
|
log.Errorf("failed to stop pipeline, err: %v", err)
|
||||||
|
}
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
|
||||||
|
func (p *processor) cleanGatewayPipeline(taskItem *task.Task, pipeline, queue bool) (instance model.Instance, err error) {
|
||||||
|
parentTask, instance, err := p.getPipelineExecutionInstance(taskItem)
|
||||||
|
if err != nil {
|
||||||
|
return
|
||||||
|
}
|
||||||
|
if pipeline {
|
||||||
|
err = instance.DeletePipeline(taskItem.ID)
|
||||||
|
if err != nil {
|
||||||
|
log.Errorf("delete pipeline failed, err: %v", err)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if queue {
|
||||||
|
selector := util.MapStr{
|
||||||
|
"labels": util.MapStr{
|
||||||
|
"migration_task_id": parentTask.ID,
|
||||||
|
},
|
||||||
|
}
|
||||||
|
err = instance.DeleteQueueBySelector(selector)
|
||||||
|
if err != nil {
|
||||||
|
log.Errorf("failed to delete queue, err: %v", err)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return instance, nil
|
||||||
|
}
|
||||||
|
|
||||||
|
func (p *processor) getPipelineExecutionInstance(taskItem *task.Task) (parentTask *task.Task, instance model.Instance, err error) {
|
||||||
|
parentTask, err = p.getParentTask(taskItem, "index_migration")
|
||||||
|
if err != nil {
|
||||||
|
return
|
||||||
|
}
|
||||||
|
// Use sub task's execution instance
|
||||||
|
instanceID := parentTask.Metadata.Labels["execution_instance_id"]
|
||||||
|
instance.ID, _ = util.ExtractString(instanceID)
|
||||||
|
_, err = orm.Get(&instance)
|
||||||
|
if err != nil {
|
||||||
|
log.Errorf("failed to get instance, err: %v", err)
|
||||||
|
return
|
||||||
|
}
|
||||||
|
return
|
||||||
|
}
|
||||||
|
|
||||||
|
func (p *processor) getParentTask(taskItem *task.Task, taskType string) (*task.Task, error) {
|
||||||
|
queryDsl := util.MapStr{
|
||||||
|
"size": 1,
|
||||||
|
"query": util.MapStr{
|
||||||
|
"bool": util.MapStr{
|
||||||
|
"must": []util.MapStr{
|
||||||
|
{
|
||||||
|
"terms": util.MapStr{
|
||||||
|
"_id": taskItem.ParentId,
|
||||||
|
},
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"term": util.MapStr{
|
||||||
|
"metadata.type": taskType,
|
||||||
|
},
|
||||||
|
},
|
||||||
|
},
|
||||||
|
},
|
||||||
|
},
|
||||||
|
}
|
||||||
|
|
||||||
|
esClient := elastic.GetClient(p.Elasticsearch)
|
||||||
|
res, err := esClient.SearchWithRawQueryDSL(p.IndexName, util.MustToJSONBytes(queryDsl))
|
||||||
|
if err != nil {
|
||||||
|
log.Errorf("query tasks from es failed, err: %v", err)
|
||||||
|
return nil, err
|
||||||
|
}
|
||||||
|
if res.GetTotal() == 0 {
|
||||||
|
return nil, errors.New("no parent task found")
|
||||||
|
}
|
||||||
|
for _, hit := range res.Hits.Hits {
|
||||||
|
buf, err := util.ToJSONBytes(hit.Source)
|
||||||
|
if err != nil {
|
||||||
|
log.Errorf("marshal task json failed, err: %v", err)
|
||||||
|
return nil, err
|
||||||
|
}
|
||||||
|
tk := task.Task{}
|
||||||
|
err = util.FromJSONBytes(buf, &tk)
|
||||||
|
if err != nil {
|
||||||
|
log.Errorf("unmarshal task json failed, err: %v", err)
|
||||||
|
return nil, err
|
||||||
|
}
|
||||||
|
return &tk, nil
|
||||||
|
}
|
||||||
|
return nil, errors.New("not reachable")
|
||||||
|
}
|
||||||
|
|
||||||
|
func (p *processor) getEsScrollTaskState(taskItem *task.Task) (scrolledDocs int64, totalHits int64, scrolled bool, err error) {
|
||||||
|
hits, err := p.getPipelineLogs(taskItem, []string{"FINISHED", "FAILED"})
|
||||||
|
if err != nil {
|
||||||
|
log.Errorf("failed to get pipeline logs for task [%s], err: %v", taskItem.ID, err)
|
||||||
|
err = nil
|
||||||
|
return
|
||||||
|
}
|
||||||
|
for _, hit := range hits {
|
||||||
|
scrolled = true
|
||||||
|
resultErr, _ := util.MapStr(hit.Source).GetValue("payload.pipeline.logging.result.error")
|
||||||
|
if errStr, ok := resultErr.(string); ok && errStr != "" {
|
||||||
|
err = errors.New(errStr)
|
||||||
|
return
|
||||||
|
}
|
||||||
|
m := util.MapStr(hit.Source)
|
||||||
|
scroll, total := migration_util.GetMapIntValue(m, "payload.pipeline.logging.context.es_scroll.scrolled_docs"), migration_util.GetMapIntValue(m, "payload.pipeline.logging.context.es_scroll.total_hits")
|
||||||
|
|
||||||
|
scrolledDocs += scroll
|
||||||
|
totalHits += total
|
||||||
|
}
|
||||||
|
return
|
||||||
|
}
|
||||||
|
|
||||||
|
func (p *processor) getBulkIndexingTaskState(taskItem *task.Task) (successDocs int64, indexDocs int64, bulked bool, err error) {
|
||||||
|
hits, err := p.getPipelineLogs(taskItem, []string{"FINISHED", "FAILED"})
|
||||||
|
if err != nil {
|
||||||
|
log.Errorf("failed to get pipeline logs for task [%s], err: %v", taskItem.ID, err)
|
||||||
|
err = nil
|
||||||
|
return
|
||||||
|
}
|
||||||
|
for _, hit := range hits {
|
||||||
|
bulked = true
|
||||||
|
resultErr, _ := util.MapStr(hit.Source).GetValue("payload.pipeline.logging.result.error")
|
||||||
|
if errStr, ok := resultErr.(string); ok && errStr != "" {
|
||||||
|
err = errors.New(errStr)
|
||||||
|
return
|
||||||
|
}
|
||||||
|
|
||||||
|
m := util.MapStr(hit.Source)
|
||||||
|
success, failure, invalid := migration_util.GetMapIntValue(m, "payload.pipeline.logging.context.bulk_indexing.success.count"), migration_util.GetMapIntValue(m, "payload.pipeline.logging.context.bulk_indexing.failure.count"), migration_util.GetMapIntValue(m, "payload.pipeline.logging.context.bulk_indexing.invalid.count")
|
||||||
|
successDocs += success
|
||||||
|
indexDocs += success + invalid + failure
|
||||||
|
}
|
||||||
|
return
|
||||||
|
}
|
||||||
|
|
||||||
|
func (p *processor) getPipelineLogs(taskItem *task.Task, status []string) ([]elastic.IndexDocument, error) {
|
||||||
|
query := util.MapStr{
|
||||||
|
"sort": []util.MapStr{
|
||||||
|
{
|
||||||
|
"timestamp": util.MapStr{
|
||||||
|
"order": "desc",
|
||||||
|
},
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"payload.pipeline.logging.steps": util.MapStr{
|
||||||
|
"order": "desc",
|
||||||
|
},
|
||||||
|
},
|
||||||
|
},
|
||||||
|
"collapse": util.MapStr{
|
||||||
|
"field": "metadata.labels.task_id",
|
||||||
|
},
|
||||||
|
"query": util.MapStr{
|
||||||
|
"bool": util.MapStr{
|
||||||
|
"must": []util.MapStr{
|
||||||
|
{
|
||||||
|
"term": util.MapStr{
|
||||||
|
"metadata.labels.task_id": taskItem.ID,
|
||||||
|
},
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"terms": util.MapStr{
|
||||||
|
"payload.pipeline.logging.status": status,
|
||||||
|
},
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"range": util.MapStr{
|
||||||
|
"metadata.labels.retry_times": util.MapStr{
|
||||||
|
"gte": taskItem.RetryTimes,
|
||||||
|
},
|
||||||
|
},
|
||||||
|
},
|
||||||
|
},
|
||||||
|
},
|
||||||
|
},
|
||||||
|
}
|
||||||
|
esClient := elastic.GetClient(p.Elasticsearch)
|
||||||
|
res, err := esClient.SearchWithRawQueryDSL(p.LogIndexName, util.MustToJSONBytes(query))
|
||||||
|
if err != nil {
|
||||||
|
log.Errorf("search task log from es failed, err: %v", err)
|
||||||
|
return nil, err
|
||||||
|
}
|
||||||
|
return res.Hits.Hits, nil
|
||||||
|
}
|
||||||
|
|
||||||
|
func (p *processor) saveTaskAndWriteLog(taskItem *task.Task, refresh string, taskResult *task.TaskResult, message string) {
|
||||||
|
esClient := elastic.GetClient(p.Elasticsearch)
|
||||||
|
_, err := esClient.Index(p.IndexName, "", taskItem.ID, taskItem, refresh)
|
||||||
|
if err != nil {
|
||||||
|
log.Errorf("failed to update task, err: %v", err)
|
||||||
|
}
|
||||||
|
if message != "" {
|
||||||
|
migration_util.WriteLog(taskItem, taskResult, message)
|
||||||
|
}
|
||||||
|
}
|
|
@ -0,0 +1,66 @@
|
||||||
|
package util
|
||||||
|
|
||||||
|
import (
|
||||||
|
log "github.com/cihub/seelog"
|
||||||
|
|
||||||
|
"infini.sh/framework/core/event"
|
||||||
|
"infini.sh/framework/core/task"
|
||||||
|
"infini.sh/framework/core/util"
|
||||||
|
)
|
||||||
|
|
||||||
|
func WriteLog(taskItem *task.Task, taskResult *task.TaskResult, message string) {
|
||||||
|
labels := util.MapStr{}
|
||||||
|
labels.Update(util.MapStr(taskItem.Metadata.Labels))
|
||||||
|
labels["task_type"] = taskItem.Metadata.Type
|
||||||
|
labels["task_id"] = taskItem.ID
|
||||||
|
labels["parent_task_id"] = taskItem.ParentId
|
||||||
|
labels["retry_no"] = taskItem.RetryTimes
|
||||||
|
event.SaveLog(event.Event{
|
||||||
|
Metadata: event.EventMetadata{
|
||||||
|
Category: "task",
|
||||||
|
Name: "logging",
|
||||||
|
Datatype: "event",
|
||||||
|
Labels: labels,
|
||||||
|
},
|
||||||
|
Fields: util.MapStr{
|
||||||
|
"task": util.MapStr{
|
||||||
|
"logging": util.MapStr{
|
||||||
|
"config": taskItem.ConfigString,
|
||||||
|
"status": taskItem.Status,
|
||||||
|
"message": message,
|
||||||
|
"result": taskResult,
|
||||||
|
},
|
||||||
|
},
|
||||||
|
},
|
||||||
|
})
|
||||||
|
}
|
||||||
|
|
||||||
|
var runningTaskStatus = []string{task.StatusRunning, task.StatusReady, task.StatusReady1}
|
||||||
|
|
||||||
|
func IsRunningState(status string) bool {
|
||||||
|
return util.StringInArray(runningTaskStatus, status)
|
||||||
|
}
|
||||||
|
|
||||||
|
func GetTaskConfig(task *task.Task, config interface{}) error {
|
||||||
|
if task.Config_ == nil {
|
||||||
|
return util.FromJSONBytes([]byte(task.ConfigString), config)
|
||||||
|
}
|
||||||
|
buf, err := util.ToJSONBytes(task.Config_)
|
||||||
|
if err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
return util.FromJSONBytes(buf, config)
|
||||||
|
}
|
||||||
|
|
||||||
|
func GetMapIntValue(m util.MapStr, key string) int64 {
|
||||||
|
v, err := m.GetValue(key)
|
||||||
|
if err != nil {
|
||||||
|
return 0
|
||||||
|
}
|
||||||
|
vv, err := util.ExtractInt(v)
|
||||||
|
if err != nil {
|
||||||
|
log.Errorf("got %s but failed to extract, err: %v", key, err)
|
||||||
|
return 0
|
||||||
|
}
|
||||||
|
return vv
|
||||||
|
}
|
Loading…
Reference in New Issue