超算任务提交修改

Former-commit-id: e6dfc882d8ee008b632763f6e04a52ee364abad0
This commit is contained in:
zhangwei 2024-10-22 22:09:51 +08:00
parent a1b3aa4ba6
commit b1cb906370
5 changed files with 68 additions and 68 deletions

View File

@ -75,49 +75,49 @@ type PushNoticeResp struct {
} }
type HpcInfo struct { type HpcInfo struct {
Id int64 `json:"id"` // id Id int64 `json:"id"` // id
TaskId int64 `json:"task_id"` // 任务id TaskId int64 `json:"task_id"` // 任务id
JobId string `json:"job_id"` // 作业id(在第三方系统中的作业id) JobId string `json:"job_id"` // 作业id(在第三方系统中的作业id)
AdapterId int64 `json:"adapter_id"` // 执行任务的适配器id AdapterId int64 `json:"adapter_id"` // 执行任务的适配器id
AdapterName string `json:"adapterName,omitempty,optional"` AdapterName string `json:"adapterName,omitempty,optional"`
ClusterId int64 `json:"cluster_id"` // 执行任务的集群id ClusterId int64 `json:"cluster_id"` // 执行任务的集群id
ClusterName string `json:"clusterName,omitempty,optional"` ClusterName string `json:"clusterName,omitempty,optional"`
ClusterType string `json:"cluster_type"` // 执行任务的集群类型 ClusterType string `json:"cluster_type"` // 执行任务的集群类型
Name string `json:"name"` // 名称 Name string `json:"name"` // 名称
Status string `json:"status"` // 状态 Status string `json:"status"` // 状态
CmdScript string `json:"cmd_script"` CmdScript string `json:"cmd_script"`
StartTime string `json:"start_time"` // 开始时间 StartTime string `json:"start_time"` // 开始时间
RunningTime int64 `json:"running_time"` // 运行时间 RunningTime int64 `json:"running_time"` // 运行时间
DerivedEs string `json:"derived_es"` DerivedEs string `json:"derived_es"`
Cluster string `json:"cluster"` Cluster string `json:"cluster"`
BlockId int64 `json:"block_id"` BlockId int64 `json:"block_id"`
AllocNodes int64 `json:"alloc_nodes"` AllocNodes int64 `json:"alloc_nodes"`
AllocCpu int64 `json:"alloc_cpu"` AllocCpu int64 `json:"alloc_cpu"`
CardCount int64 `json:"card_count"` // 卡数 CardCount int64 `json:"card_count"` // 卡数
Version string `json:"version"` Version string `json:"version"`
Account string `json:"account"` Account string `json:"account"`
WorkDir string `json:"work_dir"` // 工作路径 WorkDir string `json:"work_dir"` // 工作路径
AssocId int64 `json:"assoc_id"` AssocId int64 `json:"assoc_id"`
Partition string `json:"partition,omitempty,optional"` Partition string `json:"partition,omitempty,optional"`
ExitCode int64 `json:"exit_code"` ExitCode int64 `json:"exit_code"`
WallTime string `json:"wall_time"` // 最大运行时间 WallTime string `json:"wall_time"` // 最大运行时间
Result string `json:"result"` // 运行结果 Result string `json:"result"` // 运行结果
DeletedAt sql.NullTime `json:"deleted_at"` // 删除时间 DeletedAt sql.NullTime `json:"deleted_at"` // 删除时间
YamlString string `json:"yaml_string"` YamlString string `json:"yaml_string"`
AppType string `json:"app_type"` // 应用类型 AppType string `json:"app_type"` // 应用类型
AppName string `json:"app_name"` // 应用名称 AppName string `json:"app_name"` // 应用名称
Queue string `json:"queue"` // 队列名称 Queue string `json:"queue"` // 队列名称
SubmitType string `json:"submit_type"` // cmd命令行模式 SubmitType string `json:"submit_type"` // cmd命令行模式
NNode string `json:"n_node"` // 节点个数当指定该参数时GAP_NODE_STRING必须为"" NNode string `json:"n_node"` // 节点个数当指定该参数时GAP_NODE_STRING必须为""
StdOutFile string `json:"std_out_file"` // 工作路径/std.err.%j StdOutFile string `json:"std_out_file"` // 工作路径/std.err.%j
StdErrFile string `json:"std_err_file"` // 工作路径/std.err.%j StdErrFile string `json:"std_err_file"` // 工作路径/std.err.%j
StdInput string `json:"std_input"` StdInput string `json:"std_input"`
Environment map[string]string `json:"environment"` Environment string `json:"environment"`
DeletedFlag int64 `json:"deleted_flag"` // 是否删除0-否1-是) DeletedFlag int64 `json:"deleted_flag"` // 是否删除0-否1-是)
CreatedBy int64 `json:"created_by"` // 创建人 CreatedBy int64 `json:"created_by"` // 创建人
CreatedTime time.Time `json:"created_time"` // 创建时间 CreatedTime time.Time `json:"created_time"` // 创建时间
UpdatedBy int64 `json:"updated_by"` // 更新人 UpdatedBy int64 `json:"updated_by"` // 更新人
UpdatedTime time.Time `json:"updated_time"` // 更新时间 UpdatedTime time.Time `json:"updated_time"` // 更新时间
} }
type CloudInfo struct { type CloudInfo struct {

View File

@ -27,10 +27,7 @@ type (
Queue string `json:"queue,optional"` Queue string `json:"queue,optional"`
NNode string `json:"nNode,optional"` NNode string `json:"nNode,optional"`
SubmitType string `json:"submitType,optional"` SubmitType string `json:"submitType,optional"`
StdOutFile string `json:"stdOutFile,optional"`
StdErrFile string `json:"stdErrFile,optional"`
StdInput string `json:"stdInput,optional"` StdInput string `json:"stdInput,optional"`
Environment map[string]string `json:"environment,optional"`
ClusterType string `json:"clusterType,optional"` ClusterType string `json:"clusterType,optional"`
Partition string `json:"partition"` Partition string `json:"partition"`
} }

View File

@ -5,8 +5,6 @@ import (
"errors" "errors"
clientCore "gitlink.org.cn/JointCloud/pcm-coordinator/client" clientCore "gitlink.org.cn/JointCloud/pcm-coordinator/client"
"gitlink.org.cn/JointCloud/pcm-coordinator/pkg/models" "gitlink.org.cn/JointCloud/pcm-coordinator/pkg/models"
"k8s.io/apimachinery/pkg/util/json"
"math/rand"
"time" "time"
"gitlink.org.cn/JointCloud/pcm-coordinator/internal/svc" "gitlink.org.cn/JointCloud/pcm-coordinator/internal/svc"
@ -31,10 +29,21 @@ func NewCommitHpcTaskLogic(ctx context.Context, svcCtx *svc.ServiceContext) *Com
func (l *CommitHpcTaskLogic) CommitHpcTask(req *types.CommitHpcTaskReq) (resp *types.CommitHpcTaskResp, err error) { func (l *CommitHpcTaskLogic) CommitHpcTask(req *types.CommitHpcTaskReq) (resp *types.CommitHpcTaskResp, err error) {
var clusterInfo types.ClusterInfo
l.svcCtx.DbEngin.Raw("SELECT * FROM `t_cluster` where id = ? and label = ?", req.ClusterId, req.ClusterType).First(&clusterInfo)
if len(clusterInfo.Id) == 0 {
resp.Code = 400
resp.Msg = "no cluster found"
return resp, nil
}
// 构建主任务结构体 // 构建主任务结构体
taskModel := models.Task{ taskModel := models.Task{
Name: req.Name, Name: req.Name,
Description: req.Description, Description: req.Description,
CommitTime: time.Now(),
Status: "Saved",
AdapterTypeDict: "2", AdapterTypeDict: "2",
} }
@ -44,31 +53,21 @@ func (l *CommitHpcTaskLogic) CommitHpcTask(req *types.CommitHpcTaskReq) (resp *t
return nil, tx.Error return nil, tx.Error
} }
var clusterIds []int64
l.svcCtx.DbEngin.Raw("SELECT id FROM `t_cluster` where adapter_id in ? and label = ?", req.AdapterIds, req.ClusterType).Scan(&clusterIds)
if len(clusterIds) == 0 || clusterIds == nil {
resp.Code = 400
resp.Msg = "no cluster found"
return resp, nil
}
var clusterName string var clusterName string
var adapterId int64 var adapterId int64
var adapterName string var adapterName string
clusterId := clusterIds[rand.Intn(len(clusterIds))] l.svcCtx.DbEngin.Raw("SELECT nickname FROM `t_cluster` where id = ?", req.ClusterId).Scan(&clusterName)
l.svcCtx.DbEngin.Raw("SELECT nickname FROM `t_cluster` where id = ?", clusterId).Scan(&clusterName) l.svcCtx.DbEngin.Raw("SELECT adapter_id FROM `t_cluster` where id = ?", req.ClusterId).Scan(&adapterId)
l.svcCtx.DbEngin.Raw("SELECT adapter_id FROM `t_cluster` where id = ?", clusterId).Scan(&adapterId)
l.svcCtx.DbEngin.Raw("SELECT name FROM `t_adapter` where id = ?", adapterId).Scan(&adapterName) l.svcCtx.DbEngin.Raw("SELECT name FROM `t_adapter` where id = ?", adapterId).Scan(&adapterName)
if len(adapterName) == 0 || adapterName == "" { if len(adapterName) == 0 || adapterName == "" {
return nil, errors.New("no corresponding adapter found") return nil, errors.New("no corresponding adapter found")
} }
env, _ := json.Marshal(req.Environment)
hpcInfo := models.TaskHpc{ hpcInfo := models.TaskHpc{
TaskId: taskModel.Id, TaskId: taskModel.Id,
AdapterId: uint(adapterId), AdapterId: adapterId,
AdapterName: adapterName, AdapterName: adapterName,
ClusterId: uint(clusterId), ClusterId: req.ClusterId,
ClusterName: clusterName, ClusterName: clusterName,
Name: taskModel.Name, Name: taskModel.Name,
CmdScript: req.CmdScript, CmdScript: req.CmdScript,
@ -81,14 +80,17 @@ func (l *CommitHpcTaskLogic) CommitHpcTask(req *types.CommitHpcTaskReq) (resp *t
Queue: req.Queue, Queue: req.Queue,
SubmitType: req.SubmitType, SubmitType: req.SubmitType,
NNode: req.NNode, NNode: req.NNode,
Account: req.Account, Account: clusterInfo.Username,
StdOutFile: req.StdOutFile, StdOutFile: req.StdOutFile,
StdErrFile: req.StdErrFile, StdErrFile: req.StdErrFile,
StdInput: req.StdInput, StdInput: req.StdInput,
Partition: req.Partition, Partition: req.Partition,
Environment: string(env), Environment: clusterInfo.Environment,
CreatedTime: time.Now(),
UpdatedTime: time.Now(),
Status: "Saved",
} }
hpcInfo.WorkDir = clusterInfo.WorkDir + "/" + req.WorkDir
tx = l.svcCtx.DbEngin.Create(&hpcInfo) tx = l.svcCtx.DbEngin.Create(&hpcInfo)
if tx.Error != nil { if tx.Error != nil {
return nil, tx.Error return nil, tx.Error
@ -96,7 +98,7 @@ func (l *CommitHpcTaskLogic) CommitHpcTask(req *types.CommitHpcTaskReq) (resp *t
noticeInfo := clientCore.NoticeInfo{ noticeInfo := clientCore.NoticeInfo{
AdapterId: adapterId, AdapterId: adapterId,
AdapterName: adapterName, AdapterName: adapterName,
ClusterId: clusterId, ClusterId: req.ClusterId,
ClusterName: clusterName, ClusterName: clusterName,
NoticeType: "create", NoticeType: "create",
TaskName: req.Name, TaskName: req.Name,

View File

@ -835,6 +835,7 @@ type ClusterInfo struct {
Location string `json:"location,omitempty" db:"location"` Location string `json:"location,omitempty" db:"location"`
CreateTime string `json:"createTime,omitempty" db:"created_time" gorm:"autoCreateTime"` CreateTime string `json:"createTime,omitempty" db:"created_time" gorm:"autoCreateTime"`
Environment string `json:"environment,omitempty" db:"environment"` Environment string `json:"environment,omitempty" db:"environment"`
WorkDir string `json:"workDir,omitempty" db:"work_dir"`
} }
type ClusterDelReq struct { type ClusterDelReq struct {

View File

@ -39,9 +39,9 @@ type (
Id int64 `db:"id"` // id Id int64 `db:"id"` // id
TaskId int64 `db:"task_id"` // 任务id TaskId int64 `db:"task_id"` // 任务id
JobId string `db:"job_id"` // 作业id(在第三方系统中的作业id) JobId string `db:"job_id"` // 作业id(在第三方系统中的作业id)
AdapterId uint `db:"adapter_d"` // 适配器id AdapterId int64 `db:"adapter_d"` // 适配器id
AdapterName string `db:"adapter_name"` //适配器名称 AdapterName string `db:"adapter_name"` //适配器名称
ClusterId uint `db:"cluster_id"` //集群id ClusterId int64 `db:"cluster_id"` //集群id
ClusterName string `db:"cluster_name"` //集群名称 ClusterName string `db:"cluster_name"` //集群名称
Name string `db:"name"` // 名称 Name string `db:"name"` // 名称
Status string `db:"status"` // 状态 Status string `db:"status"` // 状态