根据标签匹配任务

Former-commit-id: cf60ab6a9ef1dc5c19bf442199bca280d30270b0
This commit is contained in:
zhangwei 2023-07-26 14:56:22 +08:00
parent fffd0e3382
commit 77dfd5c84c
18 changed files with 537 additions and 465 deletions

View File

@ -181,7 +181,7 @@ type (
}
TaskYaml {
TaskId int64 `yaml:"taskId"`
serviceName string `yaml:"serviceName"`
taskType string `yaml:"taskType"`
metadata interface{} `yaml:"metadata"`
}
)

View File

@ -23,43 +23,43 @@ info(
service pcm {
@handler scheduleTaskByYamlHandler
post /core/scheduleTaskByYaml (scheduleTaskByYamlReq) returns ()
@handler scheduleTaskHandler
post /core/scheduleTask (scheduleTaskReq) returns ()
@handler TaskListHandler
get /core/taskList () returns (taskListResp)
@handler JobTotalHandler
get /core/jobTotal () returns (jobTotalResp)
@handler listCenterHandler
get /core/listCenter () returns (listCenterResp)
@handler listClusterHandler
get /core/listCluster/:centerId (listClusterReq) returns (listClusterResp)
@handler submitJobHandler
post /core/submitJob (submitJobReq) returns (submitJobResp)
@handler getRegionHandler
get /core/getRegion () returns (getRegionResp)
@handler listRegionHandler
get /core/listRegion () returns (listRegionResp)
@handler getComputingPowerHandler
get /core/getComputingPower returns (cpResp)
@handler getGeneralInfoHandler
get /core/getGeneralInfo () returns (GiResp)
@handler listDomainResourceHandler
get /core/listDomainResource returns (DomainResourceResp)
@handler getResourcePanelConfigHandler
get /core/getResourcePanelConfigHandler () returns (ResourcePanelConfigResp)
@handler putResourcePanelConfigHandler
put /core/resourcePanelConfigHandler (ResourcePanelConfigReq) returns ()
}
@ -72,7 +72,7 @@ service pcm {
service pcm {
@handler listJobHandler
get /hpc/listJob (listJobReq) returns (listJobResp)
@handler listHistoryJobHandler
get /hpc/listHistoryJob (listHistoryJobReq) returns (listHistoryJobResp)
}
@ -85,10 +85,10 @@ service pcm {
service pcm {
@handler listDataSetHandler
get /ai/listDataSet/:projectId (DataSetReq) returns (DataSetResp)
@handler CreateDataSetHandler
post /ai/createDataSet/:projectId (CreateDataSetReq) returns (CreateDataSetResp)
@handler DeleteDataSetHandler
delete /ai/deleteDataSet/:projectId/:datasetId (DeleteDataSetReq) returns (DeleteDataSetResp)
// creat task 创建导入任务
@ -119,7 +119,7 @@ service pcm {
// ShowAlgorithmByUuid 展示算法详情
@handler ShowAlgorithmByUuid
get /ai/ShowAlgorithmByUuid/:projectId/:algorithmId (ShowAlgorithmByUuidReq) returns (ShowAlgorithmByUuidResp)
// creat export task 创建导出任务
@handler CreateExportTaskHandler
post /ai/CreateExportTask/:projectId/:datasetId (CreateExportTaskReq) returns (ExportTaskDataResp)
@ -127,11 +127,11 @@ service pcm {
get /ai/GetExportTasksOfDataset/:projectId/:datasetId (GetExportTasksOfDatasetReq) returns (GetExportTasksOfDatasetResp)
@handler GetExportTaskStatusOfDatasetHandler
get /ai/GetExportTaskStatusOfDataset/:projectId/:resourceId/:taskId (GetExportTaskStatusOfDatasetReq) returns (GetExportTaskStatusOfDatasetResp)
// create processor task 创建处理任务
@handler CreateProcessorTaskHandler
post /ai/CreateProcessorTask (CreateProcessorTaskReq) returns (CreateProcessorTaskResp)
// create service 创建服务
@handler CreateServiceHandler
post /ai/CreateService/:projectId (CreateServiceReq) returns (CreateServiceResp)
@ -147,7 +147,7 @@ service pcm {
// ListClusters查询专属资源池列表
@handler ListClustersHandler
get /ai/ListClusters (ListClustersReq) returns (ListClustersResp)
/******************Notebook Method start*************************/
@handler listNotebookHandler
get /ai/listNotebook (ListNotebookReq) returns (ListNotebookResp)
@ -162,7 +162,7 @@ service pcm {
@handler mountNotebookStorageHandler
post /ai/mountNotebookStorage (MountNotebookStorageReq) returns (MountNotebookStorageResp)
/******************Notebook Method end*************************/
/******************Visualization Job Method start*************************/
@handler getVisualizationJobHandler
get /ai/getVisualizationJob (GetVisualizationJobReq) returns (GetVisualizationJobResp)
@ -193,16 +193,16 @@ service pcm {
service pcm {
@handler uploadHandler
post /upload () returns ()
@handler chunkHandler
post /chunk () returns ()
@handler imageListHandler
get /image/list () returns (imageListResp)
@handler dataSetCheckHandler
get /dataSet/check/:fileMd5 (checkReq) returns (checkResp)
@handler uploadDataSetHandler
post /dataSet/upload () returns ()
}

View File

@ -308,6 +308,5 @@ func RegisterHandlers(server *rest.Server, serverCtx *svc.ServiceContext) {
},
},
rest.WithPrefix("/pcm/v1"),
rest.WithMaxBytes(1111111111),
)
}

View File

@ -56,12 +56,12 @@ func (l *ScheduleTaskByYamlLogic) ScheduleTaskByYaml(req *types.ScheduleTaskByYa
logx.Error(err)
return err
}
switch task.ServiceName {
case "kubeNative":
switch task.TaskType {
case "cloud":
l.svcCtx.ScheduleCloudClient.Push(string(reqMessage))
case "ac", "th":
case "hpc":
l.svcCtx.ScheduleHpcClient.Push(string(reqMessage))
case "modelArts":
case "ai":
l.svcCtx.ScheduleAiClient.Push(string(reqMessage))
}
}

View File

@ -61,7 +61,7 @@ func (l *ScheduleTaskLogic) ScheduleTask(req *types.ScheduleTaskReq) (err error)
logx.Error(err)
return err
}
switch task.ServiceName {
switch task.TaskType {
case "kubeNative":
l.svcCtx.ScheduleCloudClient.Push(string(reqMessage))
case "ac", "th":

View File

@ -30,10 +30,9 @@ func (l *ScheduleAiMq) Consume(_, val string) error {
var task *types.TaskInfo
json.Unmarshal([]byte(val), &task)
ai := model.Ai{
TaskId: task.TaskId,
Status: "Saved",
ServiceName: task.ServiceName,
YamlString: val,
TaskId: task.TaskId,
Status: "Saved",
YamlString: val,
}
tool.Convert(task.Metadata, &ai)
// 存储数据

View File

@ -5,6 +5,7 @@ import (
"context"
"encoding/json"
"github.com/zeromicro/go-zero/core/logx"
"gitlink.org.cn/jcce-pcm/pcm-coordinator/api/internal/pkg/scheduler"
"gitlink.org.cn/jcce-pcm/pcm-coordinator/api/internal/svc"
"gitlink.org.cn/jcce-pcm/pcm-coordinator/api/internal/types"
"gitlink.org.cn/jcce-pcm/pcm-coordinator/model"
@ -54,13 +55,12 @@ func UnMarshalK8sStruct(yamlString string, taskId int64) model.Cloud {
unstructureObj := &unstructured.Unstructured{Object: unstructuredMap}
cloud = model.Cloud{
TaskId: taskId,
ApiVersion: unstructureObj.GetAPIVersion(),
Name: unstructureObj.GetName(),
Kind: unstructureObj.GetKind(),
Namespace: unstructureObj.GetNamespace(),
Status: "Saved",
ServiceName: "kubeNative",
TaskId: taskId,
ApiVersion: unstructureObj.GetAPIVersion(),
Name: unstructureObj.GetName(),
Kind: unstructureObj.GetKind(),
Namespace: unstructureObj.GetNamespace(),
Status: "Saved",
}
}
return cloud
@ -69,6 +69,10 @@ func UnMarshalK8sStruct(yamlString string, taskId int64) model.Cloud {
func (l *ScheduleCloudMq) Consume(_, val string) error {
var task *types.TaskInfo
json.Unmarshal([]byte(val), &task)
participantId, err := scheduler.MatchLabels(l.svcCtx.DbEngin, task)
if err != nil {
return err
}
// 构建提交作业到云算的结构体
bytes, err := json.Marshal(task.Metadata)
if err != nil {
@ -76,6 +80,9 @@ func (l *ScheduleCloudMq) Consume(_, val string) error {
}
cloud := UnMarshalK8sStruct(string(bytes), task.TaskId)
cloud.YamlString = string(bytes)
if len(participantId) != 0 {
cloud.ParticipantId = participantId[0]
}
// 存储数据
tx := l.svcCtx.DbEngin.Create(&cloud)
if tx.Error != nil {

View File

@ -33,7 +33,7 @@ func (l *ScheduleHpcMq) Consume(_, val string) error {
hpc := model.Hpc{
TaskId: task.TaskId,
Status: "Saved",
ServiceName: task.ServiceName,
ServiceName: task.TaskType,
YamlString: val,
}
tool.Convert(task.Metadata, &hpc)

View File

@ -0,0 +1 @@
package scheduler

View File

@ -0,0 +1,62 @@
package scheduler
import (
"gitlink.org.cn/jcce-pcm/pcm-coordinator/api/internal/types"
"gorm.io/gorm"
"math/rand"
"time"
)
func MatchLabels(dbEngin *gorm.DB, task *types.TaskInfo) ([]int64, error) {
var ids []int64
count := 0
for key := range task.MatchLabels {
var participantId []int64
dbEngin.Raw("select participant_id from sc_participant_label_info where `key` = ? and value = ?", key, task.MatchLabels[key]).Scan(&participantId)
if count == 0 {
ids = participantId
}
if len(participantId) == 0 || len(ids) == 0 {
return nil, nil
}
ids = intersect(ids, participantId)
count++
}
return micsSlice(ids, 1), nil
}
// 求交集
func intersect(slice1, slice2 []int64) []int64 {
m := make(map[int64]int)
nn := make([]int64, 0)
for _, v := range slice1 {
m[v]++
}
for _, v := range slice2 {
times, _ := m[v]
if times == 1 {
nn = append(nn, v)
}
}
return nn
}
func micsSlice(origin []int64, count int) []int64 {
tmpOrigin := make([]int64, len(origin))
copy(tmpOrigin, origin)
//一定要seed
rand.Seed(time.Now().Unix())
rand.Shuffle(len(tmpOrigin), func(i int, j int) {
tmpOrigin[i], tmpOrigin[j] = tmpOrigin[j], tmpOrigin[i]
})
result := make([]int64, 0, count)
for index, value := range tmpOrigin {
if index == count {
break
}
result = append(result, value)
}
return result
}

View File

@ -161,9 +161,9 @@ type ScheduleTaskByYamlReq struct {
}
type TaskYaml struct {
TaskId int64 `yaml:"taskId"`
ServiceName string `yaml:"serviceName"`
Metadata interface{} `yaml:"metadata"`
TaskId int64 `yaml:"taskId"`
TaskType string `yaml:"taskType"`
Metadata interface{} `yaml:"metadata"`
}
type ScheduleTaskReq struct {
@ -175,9 +175,10 @@ type ScheduleTaskReq struct {
}
type TaskInfo struct {
TaskId int64 `json:"taskId,optional"`
ServiceName string `json:"serviceName"`
Metadata interface{} `json:"metadata"`
TaskId int64 `json:"taskId,optional"`
TaskType string `json:"taskType"`
MatchLabels map[string]string `json:"matchLabels"`
Metadata interface{} `json:"metadata"`
}
type JobTotalResp struct {

View File

@ -64,7 +64,6 @@ func main() {
for _, mq := range services {
serviceGroup.Add(mq)
}
logx.Infof("Starting server at %s:%d...\n", c.Host, c.Port)
serviceGroup.Start()

View File

@ -36,18 +36,19 @@ type (
}
Ai struct {
Id int64 `db:"id"` // id
TaskId int64 `db:"task_id"` // 任务id
ProjectId string `db:"project_id"` // 项目id
Name string `db:"name"` // 名称
Status string `db:"status"` // 状态
StartTime string `db:"start_time"` // 开始时间
RunningTime int64 `db:"running_time"` // 运行时间
CreatedBy int64 `db:"created_by"` // 创建人
CreatedTime sql.NullTime `db:"created_time"` // 创建时间
UpdatedBy int64 `db:"updated_by"` // 更新人
UpdatedTime sql.NullTime `db:"updated_time"` // 更新时间
DeletedFlag int64 `db:"deleted_flag"` // 是否删除0-否1-是)
Id int64 `db:"id"` // id
TaskId int64 `db:"task_id"` // 任务id
ParticipantId int64 `db:"participant_id"` // 集群静态信息id
ProjectId string `db:"project_id"` // 项目id
Name string `db:"name"` // 名称
Status string `db:"status"` // 状态
StartTime string `db:"start_time"` // 开始时间
RunningTime int64 `db:"running_time"` // 运行时间
CreatedBy int64 `db:"created_by"` // 创建人
CreatedTime sql.NullTime `db:"created_time"` // 创建时间
UpdatedBy int64 `db:"updated_by"` // 更新人
UpdatedTime sql.NullTime `db:"updated_time"` // 更新时间
DeletedFlag int64 `db:"deleted_flag"` // 是否删除0-否1-是)
ServiceName string `db:"service_name"`
Result string `db:"result"`
YamlString string `db:"yaml_string"`

View File

@ -36,23 +36,24 @@ type (
}
Cloud struct {
Id int64 `db:"id"` // id
TaskId int64 `db:"task_id"` // 任务id
ApiVersion string `db:"api_version"`
Name string `db:"name"` // 名称
Namespace string `db:"namespace"` // 命名空间
Kind string `db:"kind"` // 种类
Status string `db:"status"` // 状态
StartTime string `db:"start_time"` // 开始时间
RunningTime int64 `db:"running_time"` // 运行时长
CreatedBy int64 `db:"created_by"` // 创建人
CreatedTime sql.NullTime `db:"created_time"` // 创建时间
UpdatedBy int64 `db:"updated_by"` // 更新人
UpdatedTime sql.NullTime `db:"updated_time"` // 更新时间
DeletedFlag int64 `db:"deleted_flag"` // 是否删除0-否1-是)
ServiceName string `db:"service_name"`
YamlString string `db:"yaml_string"`
Result string `db:"result"`
Id int64 `db:"id"` // id
TaskId int64 `db:"task_id"` // 任务id
ParticipantId int64 `db:"participant_id"` // 集群静态信息id
ApiVersion string `db:"api_version"`
Name string `db:"name"` // 名称
Namespace string `db:"namespace"` // 命名空间
Kind string `db:"kind"` // 种类
Status string `db:"status"` // 状态
StartTime string `db:"start_time"` // 开始时间
RunningTime int64 `db:"running_time"` // 运行时长
CreatedBy int64 `db:"created_by"` // 创建人
CreatedTime sql.NullTime `db:"created_time"` // 创建时间
UpdatedBy int64 `db:"updated_by"` // 更新人
UpdatedTime sql.NullTime `db:"updated_time"` // 更新时间
DeletedFlag int64 `db:"deleted_flag"` // 是否删除0-否1-是)
ServiceName string `db:"service_name"`
YamlString string `db:"yaml_string"`
Result string `db:"result"`
}
)

View File

@ -36,34 +36,35 @@ type (
}
Hpc struct {
Id int64 `db:"id"` // id
TaskId int64 `db:"task_id"` // 任务id
JobId string `db:"job_id"` // 作业id
ServiceName string `db:"service_name"` // 服务名称
Name string `db:"name"` // 名称
Status string `db:"status"` // 状态
StartTime string `db:"start_time"` // 开始时间
RunningTime int64 `db:"running_time"` // 运行时间
CardCount int64 `db:"card_count"` // 卡数
CreatedBy int64 `db:"created_by"` // 创建人
CreatedTime sql.NullTime `db:"created_time"` // 创建时间
UpdatedBy int64 `db:"updated_by"` // 更新人
UpdatedTime sql.NullTime `db:"updated_time"` // 更新时间
DeletedFlag int64 `db:"deleted_flag"` // 是否删除0-否1-是)
WorkDir string `db:"work_dir"`
WallTime string `db:"wall_time"`
Result string `db:"result"`
YamlString string `db:"yaml_string"`
CmdScript string `db:"cmd_script"`
derivedEs string `db:"derived_es"`
cluster string `db:"cluster"`
blockId string `db:"block_id"`
allocNodes uint32 `db:"alloc_nodes"`
allocCpu uint32 `db:"alloc_cpu"`
version string `db:"version"`
account string `db:"account"`
exitCode uint32 `db:"exit_code"`
assocId uint32 `db:"assoc_id"`
Id int64 `db:"id"` // id
TaskId int64 `db:"task_id"` // 任务id
ParticipantId int64 `db:"participant_id"` // 集群静态信息id
JobId string `db:"job_id"` // 作业id
ServiceName string `db:"service_name"` // 服务名称
Name string `db:"name"` // 名称
Status string `db:"status"` // 状态
StartTime string `db:"start_time"` // 开始时间
RunningTime int64 `db:"running_time"` // 运行时间
CardCount int64 `db:"card_count"` // 卡数
CreatedBy int64 `db:"created_by"` // 创建人
CreatedTime sql.NullTime `db:"created_time"` // 创建时间
UpdatedBy int64 `db:"updated_by"` // 更新人
UpdatedTime sql.NullTime `db:"updated_time"` // 更新时间
DeletedFlag int64 `db:"deleted_flag"` // 是否删除0-否1-是)
WorkDir string `db:"work_dir"`
WallTime string `db:"wall_time"`
Result string `db:"result"`
YamlString string `db:"yaml_string"`
CmdScript string `db:"cmd_script"`
derivedEs string `db:"derived_es"`
cluster string `db:"cluster"`
blockId string `db:"block_id"`
allocNodes uint32 `db:"alloc_nodes"`
allocCpu uint32 `db:"alloc_cpu"`
version string `db:"version"`
account string `db:"account"`
exitCode uint32 `db:"exit_code"`
assocId uint32 `db:"assoc_id"`
}
)

View File

@ -4,7 +4,7 @@ package pcmCore;
option go_package = "/pcmCore";
message SyncInfoReq {
string serviceName = 1;
int64 participantId = 1;
string kind = 2;
repeated HpcInfo HpcInfoList = 3;
repeated CloudInfo CloudInfoList = 4;
@ -12,7 +12,7 @@ message SyncInfoReq {
}
message AiInfo {
string serviceName = 1;
int64 participantId = 1;
int64 taskId = 2;
string project_id = 3;
string name = 4;
@ -30,7 +30,7 @@ message AiInfo {
}
message CloudInfo {
string serviceName = 1;
int64 participant = 1;
int64 taskId = 2;
string apiVersion = 3;
string kind = 4;
@ -44,7 +44,7 @@ message CloudInfo {
}
message HpcInfo {
string serviceName = 1;
int64 participantId = 1;
int64 taskId = 2;
string jobId = 3;
@ -74,7 +74,7 @@ message SyncInfoResp{
message InfoListReq{
string kind = 1;
string serviceName = 2;
string participantId = 2;
}
message InfoListResp{
@ -158,7 +158,7 @@ message NodePhyInfo {
message ParticipantHeartbeatReq{
int64 participantId = 1; //participantId
string host = 2; //host
int32 port = 3; //port
string port = 3; //port
}
// participant

File diff suppressed because it is too large Load Diff

View File

@ -1,8 +1,8 @@
// Code generated by protoc-gen-go-grpc. DO NOT EDIT.
// versions:
// - protoc-gen-go-grpc v1.3.0
// - protoc v4.23.4
// source: pb/pcmCore.proto
// - protoc v3.19.4
// source: pcmCore.proto
package pcmCore
@ -27,9 +27,9 @@ const (
//
// For semantics around ctx use and closing/ending streaming RPCs, please refer to https://pkg.go.dev/google.golang.org/grpc/?tab=doc#ClientConn.NewStream.
type PcmCoreClient interface {
// SyncInfo Synchronous data information
//SyncInfo Synchronous data information
SyncInfo(ctx context.Context, in *SyncInfoReq, opts ...grpc.CallOption) (*SyncInfoResp, error)
// InfoList
//InfoList
InfoList(ctx context.Context, in *InfoListReq, opts ...grpc.CallOption) (*InfoListResp, error)
}
@ -63,9 +63,9 @@ func (c *pcmCoreClient) InfoList(ctx context.Context, in *InfoListReq, opts ...g
// All implementations must embed UnimplementedPcmCoreServer
// for forward compatibility
type PcmCoreServer interface {
// SyncInfo Synchronous data information
//SyncInfo Synchronous data information
SyncInfo(context.Context, *SyncInfoReq) (*SyncInfoResp, error)
// InfoList
//InfoList
InfoList(context.Context, *InfoListReq) (*InfoListResp, error)
mustEmbedUnimplementedPcmCoreServer()
}
@ -146,7 +146,7 @@ var PcmCore_ServiceDesc = grpc.ServiceDesc{
},
},
Streams: []grpc.StreamDesc{},
Metadata: "pb/pcmCore.proto",
Metadata: "pcmCore.proto",
}
const (
@ -160,7 +160,7 @@ const (
type ParticipantServiceClient interface {
// registerParticipant Participant注册接口
RegisterParticipant(ctx context.Context, in *ParticipantPhyReq, opts ...grpc.CallOption) (*ParticipantPhyResp, error)
// 心跳
//心跳
ReportHeartbeat(ctx context.Context, in *ParticipantHeartbeatReq, opts ...grpc.CallOption) (*HealthCheckResp, error)
}
@ -196,7 +196,7 @@ func (c *participantServiceClient) ReportHeartbeat(ctx context.Context, in *Part
type ParticipantServiceServer interface {
// registerParticipant Participant注册接口
RegisterParticipant(context.Context, *ParticipantPhyReq) (*ParticipantPhyResp, error)
// 心跳
//心跳
ReportHeartbeat(context.Context, *ParticipantHeartbeatReq) (*HealthCheckResp, error)
mustEmbedUnimplementedParticipantServiceServer()
}
@ -277,5 +277,5 @@ var ParticipantService_ServiceDesc = grpc.ServiceDesc{
},
},
Streams: []grpc.StreamDesc{},
Metadata: "pb/pcmCore.proto",
Metadata: "pcmCore.proto",
}