hpc job submit

Former-commit-id: b8bbe255f022d1a2ea7ce405bf3ed3378210451d
This commit is contained in:
zhouqunjie 2024-03-12 15:13:30 +08:00
parent 271175d0c2
commit 9824a3e38a
12 changed files with 307 additions and 463 deletions

View File

@ -7,142 +7,6 @@ info(
email: "450705171@qq.com"
)
type (
submitJobReq {
SlurmVersion string `json:"slurmVersion"`
/****************parmas from ac********************/
Apptype string `json:"apptype,optional"`
Appname string `json:"appname,optional"`
StrJobManagerID int64 `json:"strJobManagerID,optional"`
MapAppJobInfo MapAppJobInfo `json:"mapAppJobInfo,optional"`
/****************parmas from ac********************/
Account string `json:"account,optional"` //
Acctg_freq string `json:"acctg_freq,optional"`
Alloc_node string `json:"alloc_node,optional"`
Alloc_resp_port int32 `json:"alloc_resp_port,optional"`
Alloc_sid int32 `json:"alloc_sid,optional"`
Argc int32 `json:"argc,optional"`
Argv []Argv `json:"Argv,optional"`
Array_inx string `json:"array_inx,optional"`
Begin_time int64 `json:"begin_time,optional"`
Ckpt_interval int32 `json:"ckpt_interval,optional"`
Ckpt_dir string `json:"ckpt_dir,optional"`
Comment string `json:"comment,optional"`
Contiguous int32 `json:"contiguous,optional"`
Cpu_bind string `json:"cpu_bind,optional"`
Cpu_bind_type int32 `json:"cpu_bind_type,optional"`
Dependency string `json:"dependency,optional"`
End_time int64 `json:"end_time,optional"`
Environment []Environment `json:"Environment,optional"`
Env_size int32 `json:"env_size,optional"`
Exc_nodes string `json:"exc_nodes,optional"`
Features string `json:"features,optional"`
Gres string `json:"gres,optional"`
Group_id int32 `json:"group_id,optional"`
Immediate int32 `json:"immediate,optional"`
Job_id int32 `json:"job_id,optional"`
Kill_on_node_fail int32 `json:"kill_on_node_fail,optional"`
Licenses string `json:"licenses,optional"`
Mail_type int32 `json:"mail_type,optional"`
Mail_user string `json:"mail_user,optional"`
Mem_bind string `json:"mem_bind,optional"`
Mem_bind_type int32 `json:"mem_bind_type,optional"`
Name string `json:"name,optional"` //
Network string `json:"network,optional"`
Nice int32 `json:"nice,optional"`
Num_tasks int32 `json:"num_tasks,optional"`
Open_mode int32 `json:"open_mode,optional"`
Other_port int32 `json:"other_port,optional"`
Overcommit int32 `json:"overcommit,optional"`
Partition string `json:"partition,optional"`
Plane_size int32 `json:"plane_size,optional"`
Priority int32 `json:"priority,optional"`
Profile int32 `json:"profile,optional"`
Qos string `json:"qos,optional"`
Resp_host string `json:"resp_host,optional"`
Req_nodes string `json:"req_nodes,optional"`
Requeue int32 `json:"requeue,optional"`
Reservation string `json:"reservation,optional"`
Script string `json:"script,optional"` //
Shared int32 `json:"shared,optional"`
Spank_job_env_size int32 `json:"spank_job_env_size,optional"`
Task_dist int32 `json:"task_dist,optional"`
Time_limit int32 `json:"time_limit,optional"`
Time_min int32 `json:"time_min,optional"`
User_id int32 `json:"user_id,optional"` //
Wait_all_nodes int32 `json:"wait_all_nodes,optional"`
Warn_signal int32 `json:"warn_signal,optional"`
Warn_time int32 `json:"warn_time,optional"`
Work_dir string `json:"work_dir,optional"`
Cpus_per_task int32 `json:"cpus_per_task,optional"`
Min_cpus int32 `json:"min_cpus,optional"` //
Max_cpus int32 `json:"max_cpus,optional"`
Min_nodes int32 `json:"min_nodes,optional"`
Max_nodes int32 `json:"max_nodes,optional"`
Boards_per_node int32 `json:"boards_per_node,optional"`
Sockets_per_board int32 `json:"sockets_per_board,optional"`
Sockets_per_node int32 `json:"sockets_per_node,optional"`
Cores_per_socket int32 `json:"cores_per_socket,optional"`
Threads_per_core int32 `json:"threads_per_core,optional"`
Ntasks_per_node int32 `json:"ntasks_per_node,optional"`
Ntasks_per_socket int32 `json:"ntasks_per_socket,optional"`
Ntasks_per_core int32 `json:"ntasks_per_core,optional"`
Ntasks_per_board int32 `json:"ntasks_per_board,optional"`
Pn_min_cpus int32 `json:"pn_min_cpus,optional"`
Pn_min_memory int32 `json:"pn_min_memory,optional"`
Pn_min_tmp_disk int32 `json:"pn_min_tmp_disk,optional"`
Reboot int32 `json:"reboot,optional"`
Rotate int32 `json:"rotate,optional"`
Req_switch int32 `json:"req_switch,optional"`
Std_err string `json:"std_err,optional"`
Std_in string `json:"std_in,optional"`
Std_out string `json:"std_out,optional"`
Wait4switch int32 `json:"wait4switch,optional"`
Wckey string `json:"wckey,optional"`
}
submitJobResp {
//Job_id int32 `json:"job_id"`
//Step_id int32 `json:"step_id"`
//Error_code int32 `json:"error_code"`
Code string `json:"code"` //Error_code 返回码 in Tianhe
Msg string `json:"msg"`
Data string `json:"data"` //Job_id 作业id in Tianhe
}
MapAppJobInfo {
GAP_CMD_FILE string `json:"GAP_CMD_FILE"` //命令行内容
GAP_NNODE string `json:"GAP_NNODE"` //节点个数当指定该参数时GAP_NODE_STRING必须为""
GAP_NODE_STRING string `json:"GAP_NODE_STRING,optional"` //指定节点当指定该参数时GAP_NNODE必须为""
GAP_SUBMIT_TYPE string `json:"GAP_SUBMIT_TYPE"` //cmd命令行模式
GAP_JOB_NAME string `json:"GAP_JOB_NAME"` //作业名称
GAP_WORK_DIR string `json:"GAP_WORK_DIR"` //工作路径
GAP_QUEUE string `json:"GAP_QUEUE"` //队列名称
GAP_NPROC string `json:"GAP_NPROC,optional"` //总核心数GAP_NPROC和GAP_PPN选其一填写
GAP_PPN string `json:"GAP_PPN,optional"` //CPU核心/节点GAP_NPROC和GAP_PPN选其一填写
GAP_NGPU string `json:"GAP_NGPU,optional"` //GPU卡数/节点
GAP_NDCU string `json:"GAP_NDCU,optional"` //DCU卡数/节点
GAP_JOB_MEM string `json:"GAP_JOB_MEM,optional"` //每个节点内存值单位为MB/GB
GAP_WALL_TIME string `json:"GAP_WALL_TIME"` //最大运行时长HH:MM:ss
GAP_EXCLUSIVE string `json:"GAP_EXCLUSIVE,optional"` // 是否独占节点1为独占空为非独占
GAP_APPNAME string `json:"GAP_APPNAME"` //BASE基础应用支持填写具体的应用英文名称
GAP_MULTI_SUB string `json:"GAP_MULTI_SUB,optional"` //作业组长度建议为小于等于50的正整数
GAP_STD_OUT_FILE string `json:"GAP_STD_OUT_FILE"` //工作路径/std.out.%j
GAP_STD_ERR_FILE string `json:"GAP_STD_ERR_FILE"` //工作路径/std.err.%j
}
Argv {
Argv string `json:"argv,optional"`
}
Environment {
Environment string `json:"environment,optional"`
}
)
type (
getRegionResp {
Code int32 `json:"code"`
@ -221,36 +85,6 @@ type (
}
)
type (
commitHpcTaskReq {
Name string `json:"name"`
Description string `json:"description,optional"`
tenantId int64 `json:"tenantId,optional"`
TaskId int64 `json:"taskId,optional"`
participantId int64 `json:"participantId,optional"`
matchLabels map[string]string `json:"matchLabels,optional"`
cardCount int64 `json:"cardCount,optional"`
workDir string `json:"workDir,optional"`
wallTime string `json:"wallTime,optional"`
cmdScript string `json:"cmdScript,optional"`
appType string `json:"appType,optional"`
appName string `json:"appName,optional"`
queue string `json:"queue,optional"`
nNode string `json:"nNode,optional"`
submitType string `json:"submitType,optional"`
stdOutFile string `json:"stdOutFile,optional"`
stdErrFile string `json:"stdErrFile,optional"`
stdInput string `json:"stdInput,optional"`
environment map[string]string `json:"environment,optional"`
}
)
type (
commitHpcTaskResp {
TaskId int64 `json:"taskId"`
}
)
type (
commitVmTaskReq {
server ServerCommit `json:"server,optional"`

View File

@ -16,6 +16,37 @@ type Job {
StateofJob string `json:"StateofJob"`
}
type (
commitHpcTaskReq {
Name string `json:"name"` // paratera:jobName
Description string `json:"description,optional"`
tenantId int64 `json:"tenantId,optional"`
TaskId int64 `json:"taskId,optional"`
participantId int64 `json:"participantId,optional"`
matchLabels map[string]string `json:"matchLabels,optional"`
cardCount int64 `json:"cardCount,optional"`
workDir string `json:"workDir,optional"` //paratera:workingDir
wallTime string `json:"wallTime,optional"`
cmdScript string `json:"cmdScript,optional"` // paratera:bootScript
appType string `json:"appType,optional"`
appName string `json:"appName,optional"` // paratera:jobGroupName ac:appname
queue string `json:"queue,optional"`
nNode string `json:"nNode,optional"`
submitType string `json:"submitType,optional"`
stdOutFile string `json:"stdOutFile,optional"`
stdErrFile string `json:"stdErrFile,optional"`
stdInput string `json:"stdInput,optional"`
environment map[string]string `json:"environment,optional"`
}
)
type (
commitHpcTaskResp {
TaskId int64 `json:"taskId"`
}
)
type (
listJobReq {
}

View File

@ -37,10 +37,6 @@ service pcm {
@handler commitTaskHandler
post /core/commitTask (commitTaskReq)
@doc "提交超算任务"
@handler commitHpcTaskHandler
post /core/commitHpcTask (commitHpcTaskReq) returns (commitHpcTaskResp)
@doc "提交虚拟机任务"
@handler commitVmTaskHandler
post /core/commitVmTask (commitVmTaskReq) returns (commitVmTaskResp)
@ -69,10 +65,6 @@ service pcm {
@handler listClusterHandler
get /core/listCluster/:centerId (listClusterReq) returns (listClusterResp)
@doc "提交任务(超算)"
@handler submitJobHandler
post /core/submitJob (submitJobReq) returns (submitJobResp)
@doc "获取region"
@handler getRegionHandler
get /core/getRegion returns (getRegionResp)
@ -120,6 +112,10 @@ service pcm {
group : hpc
)
service pcm {
@doc "提交超算任务"
@handler commitHpcTaskHandler
post /hpc/commitHpcTask (commitHpcTaskReq) returns (commitHpcTaskResp)
@doc "超算查询任务列表"
@handler listJobHandler
get /hpc/listJob (listJobReq) returns (listJobResp)
@ -130,7 +126,7 @@ service pcm {
@doc "超算查询资产列表"
@handler queueAssetsHandler
get /queue/assets returns (QueueAssetsResp)
get /hpc/queueAssets returns (QueueAssetsResp)
}
//cloud二级接口

View File

@ -5,7 +5,7 @@ Port: 8999
Timeout: 50000
DB:
DataSource: root:uJpLd6u-J?HC1@(10.206.0.12:3306)/pcm?parseTime=true&loc=Local
DataSource: root:uJpLd6u-J?HC1@(47.92.88.143:3306)/pcm?parseTime=true&loc=Local
Redis:
Host: 10.206.0.12:6379

View File

@ -1,42 +0,0 @@
/*
Copyright (c) [2023] [pcm]
[pcm-coordinator] is licensed under Mulan PSL v2.
You can use this software according to the terms and conditions of the Mulan PSL v2.
You may obtain a copy of Mulan PSL v2 at:
http://license.coscl.org.cn/MulanPSL2
THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND,
EITHER EXPaRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT,
MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE.
See the Mulan PSL v2 for more details.
*/
package core
import (
"net/http"
"github.com/zeromicro/go-zero/rest/httpx"
"gitlink.org.cn/JointCloud/pcm-coordinator/api/internal/logic/core"
"gitlink.org.cn/JointCloud/pcm-coordinator/api/internal/svc"
"gitlink.org.cn/JointCloud/pcm-coordinator/api/internal/types"
)
func SubmitJobHandler(svcCtx *svc.ServiceContext) http.HandlerFunc {
return func(w http.ResponseWriter, r *http.Request) {
var req types.SubmitJobReq
if err := httpx.Parse(r, &req); err != nil {
httpx.ErrorCtx(r.Context(), w, err)
return
}
l := core.NewSubmitJobLogic(r.Context(), svcCtx)
resp, err := l.SubmitJob(&req)
if err != nil {
httpx.ErrorCtx(r.Context(), w, err)
} else {
httpx.OkJsonCtx(r.Context(), w, resp)
}
}
}

View File

@ -1,10 +1,10 @@
package core
package hpc
import (
"net/http"
"github.com/zeromicro/go-zero/rest/httpx"
"gitlink.org.cn/JointCloud/pcm-coordinator/api/internal/logic/core"
"gitlink.org.cn/JointCloud/pcm-coordinator/api/internal/logic/hpc"
"gitlink.org.cn/JointCloud/pcm-coordinator/api/internal/svc"
"gitlink.org.cn/JointCloud/pcm-coordinator/api/internal/types"
)
@ -17,7 +17,7 @@ func CommitHpcTaskHandler(svcCtx *svc.ServiceContext) http.HandlerFunc {
return
}
l := core.NewCommitHpcTaskLogic(r.Context(), svcCtx)
l := hpc.NewCommitHpcTaskLogic(r.Context(), svcCtx)
resp, err := l.CommitHpcTask(&req)
if err != nil {
httpx.ErrorCtx(r.Context(), w, err)

View File

@ -1,84 +0,0 @@
/*
Copyright (c) [2023] [pcm]
[pcm-coordinator] is licensed under Mulan PSL v2.
You can use this software according to the terms and conditions of the Mulan PSL v2.
You may obtain a copy of Mulan PSL v2 at:
http://license.coscl.org.cn/MulanPSL2
THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND,
EITHER EXPaRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT,
MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE.
See the Mulan PSL v2 for more details.
*/
package core
import (
"context"
"github.com/jinzhu/copier"
"github.com/pkg/errors"
"gitlink.org.cn/JointCloud/pcm-coordinator/api/internal/svc"
"gitlink.org.cn/JointCloud/pcm-coordinator/api/internal/types"
"gitlink.org.cn/JointCloud/pcm-coordinator/pkg/helper/xerr"
"gitlink.org.cn/JointCloud/pcm-coordinator/pkg/utils"
slurmClient "gitlink.org.cn/JointCloud/pcm-slurm/slurmclient"
"gitlink.org.cn/jcce-pcm/pcm-ac/hpcAC"
"strconv"
"github.com/zeromicro/go-zero/core/logx"
)
type SubmitJobLogic struct {
logx.Logger
ctx context.Context
svcCtx *svc.ServiceContext
}
func NewSubmitJobLogic(ctx context.Context, svcCtx *svc.ServiceContext) *SubmitJobLogic {
return &SubmitJobLogic{
Logger: logx.WithContext(ctx),
ctx: ctx,
svcCtx: svcCtx,
}
}
func (l *SubmitJobLogic) SubmitJob(req *types.SubmitJobReq) (resp *types.SubmitJobResp, err error) {
coreResp := &types.SubmitJobResp{}
version := req.SlurmVersion
switch version {
case "ac":
acReq := &hpcAC.SubmitJobReq{}
err = copier.CopyWithOption(acReq, req, copier.Option{Converters: utils.Converters})
acResp, err := l.svcCtx.ACRpc.SubmitJob(l.ctx, acReq)
if err != nil {
return nil, errors.Wrapf(xerr.NewErrMsg("Failed to submit job to AC"), "Failed to submit job to Shuguang err : %v ,req:%+v", err, req)
}
if acResp.Data == "" {
return nil, errors.Wrapf(xerr.NewErrMsg("Failed to submit job to AC, job id is empty"), "Failed to submit job to Shuguang err : %v", err)
}
coreResp.Msg = acResp.Msg
coreResp.Code = acResp.Code
coreResp.Data = acResp.Data
case "th":
thReq := &slurmClient.SubmitJobReq{}
err = copier.CopyWithOption(thReq, req, copier.Option{Converters: utils.Converters})
tianheResp, err := l.svcCtx.THRpc.SubmitJob(l.ctx, thReq)
if err != nil {
return nil, errors.Wrapf(xerr.NewErrMsg("Failed to submit job to Tianhe"), "Failed to submit job to Tianhe err : %v ,req:%+v", err, req)
}
coreResp.Msg = "success"
coreResp.Code = strconv.Itoa(200)
coreResp.Data = strconv.Itoa(int(tianheResp.JobId))
}
return coreResp, nil
}

View File

@ -0,0 +1,86 @@
package hpc
import (
"context"
"gitlink.org.cn/JointCloud/pcm-coordinator/pkg/constants"
"gitlink.org.cn/JointCloud/pcm-coordinator/pkg/models"
"k8s.io/apimachinery/pkg/util/json"
"time"
"gitlink.org.cn/JointCloud/pcm-coordinator/api/internal/svc"
"gitlink.org.cn/JointCloud/pcm-coordinator/api/internal/types"
"github.com/zeromicro/go-zero/core/logx"
)
type CommitHpcTaskLogic struct {
logx.Logger
ctx context.Context
svcCtx *svc.ServiceContext
}
func NewCommitHpcTaskLogic(ctx context.Context, svcCtx *svc.ServiceContext) *CommitHpcTaskLogic {
return &CommitHpcTaskLogic{
Logger: logx.WithContext(ctx),
ctx: ctx,
svcCtx: svcCtx,
}
}
func (l *CommitHpcTaskLogic) CommitHpcTask(req *types.CommitHpcTaskReq) (resp *types.CommitHpcTaskResp, err error) {
// 构建主任务结构体
taskModel := models.Task{
Status: constants.Saved,
Description: req.Description,
Name: req.Name,
CommitTime: time.Now(),
}
// 保存任务数据到数据库
tx := l.svcCtx.DbEngin.Create(&taskModel)
if tx.Error != nil {
return nil, tx.Error
}
env, _ := json.Marshal(req.Environment)
hpcInfo := models.TaskHpc{
TaskId: taskModel.Id,
ClusterId: 1706858330967773111,
Name: taskModel.Name,
Status: "Saved",
CmdScript: req.CmdScript,
StartTime: time.Now().String(),
CardCount: req.CardCount,
WorkDir: req.WorkDir,
WallTime: req.WallTime,
AppType: req.AppType,
AppName: req.AppName,
Queue: req.Queue,
SubmitType: req.SubmitType,
NNode: req.NNode,
StdOutFile: req.StdOutFile,
StdErrFile: req.StdErrFile,
StdInput: req.StdInput,
DeletedFlag: 0,
CreatedBy: req.ParticipantId,
CreatedTime: time.Now(),
UpdatedBy: req.ParticipantId,
UpdatedTime: time.Now(),
Environment: string(env),
}
tx = l.svcCtx.DbEngin.Create(&hpcInfo)
if tx.Error != nil {
return nil, tx.Error
}
// 将任务数据转换成消息体
//reqMessage, err := json.Marshal(mqInfo)
//if err != nil {
// logx.Error(err)
// return nil, err
//}
//publish := l.svcCtx.RedisClient.Publish(context.Background(), mqInfo.TaskType, reqMessage)
//if publish.Err() != nil {
// return nil, publish.Err()
//}
return
}

View File

@ -1,132 +1,6 @@
// Code generated by goctl. DO NOT EDIT.
package types
type SubmitJobReq struct {
SlurmVersion string `json:"slurmVersion"`
Apptype string `json:"apptype,optional"`
Appname string `json:"appname,optional"`
StrJobManagerID int64 `json:"strJobManagerID,optional"`
MapAppJobInfo MapAppJobInfo `json:"mapAppJobInfo,optional"`
Account string `json:"account,optional"` //
Acctg_freq string `json:"acctg_freq,optional"`
Alloc_node string `json:"alloc_node,optional"`
Alloc_resp_port int32 `json:"alloc_resp_port,optional"`
Alloc_sid int32 `json:"alloc_sid,optional"`
Argc int32 `json:"argc,optional"`
Argv []Argv `json:"Argv,optional"`
Array_inx string `json:"array_inx,optional"`
Begin_time int64 `json:"begin_time,optional"`
Ckpt_interval int32 `json:"ckpt_interval,optional"`
Ckpt_dir string `json:"ckpt_dir,optional"`
Comment string `json:"comment,optional"`
Contiguous int32 `json:"contiguous,optional"`
Cpu_bind string `json:"cpu_bind,optional"`
Cpu_bind_type int32 `json:"cpu_bind_type,optional"`
Dependency string `json:"dependency,optional"`
End_time int64 `json:"end_time,optional"`
Environment []Environment `json:"Environment,optional"`
Env_size int32 `json:"env_size,optional"`
Exc_nodes string `json:"exc_nodes,optional"`
Features string `json:"features,optional"`
Gres string `json:"gres,optional"`
Group_id int32 `json:"group_id,optional"`
Immediate int32 `json:"immediate,optional"`
Job_id int32 `json:"job_id,optional"`
Kill_on_node_fail int32 `json:"kill_on_node_fail,optional"`
Licenses string `json:"licenses,optional"`
Mail_type int32 `json:"mail_type,optional"`
Mail_user string `json:"mail_user,optional"`
Mem_bind string `json:"mem_bind,optional"`
Mem_bind_type int32 `json:"mem_bind_type,optional"`
Name string `json:"name,optional"` //
Network string `json:"network,optional"`
Nice int32 `json:"nice,optional"`
Num_tasks int32 `json:"num_tasks,optional"`
Open_mode int32 `json:"open_mode,optional"`
Other_port int32 `json:"other_port,optional"`
Overcommit int32 `json:"overcommit,optional"`
Partition string `json:"partition,optional"`
Plane_size int32 `json:"plane_size,optional"`
Priority int32 `json:"priority,optional"`
Profile int32 `json:"profile,optional"`
Qos string `json:"qos,optional"`
Resp_host string `json:"resp_host,optional"`
Req_nodes string `json:"req_nodes,optional"`
Requeue int32 `json:"requeue,optional"`
Reservation string `json:"reservation,optional"`
Script string `json:"script,optional"` //
Shared int32 `json:"shared,optional"`
Spank_job_env_size int32 `json:"spank_job_env_size,optional"`
Task_dist int32 `json:"task_dist,optional"`
Time_limit int32 `json:"time_limit,optional"`
Time_min int32 `json:"time_min,optional"`
User_id int32 `json:"user_id,optional"` //
Wait_all_nodes int32 `json:"wait_all_nodes,optional"`
Warn_signal int32 `json:"warn_signal,optional"`
Warn_time int32 `json:"warn_time,optional"`
Work_dir string `json:"work_dir,optional"`
Cpus_per_task int32 `json:"cpus_per_task,optional"`
Min_cpus int32 `json:"min_cpus,optional"` //
Max_cpus int32 `json:"max_cpus,optional"`
Min_nodes int32 `json:"min_nodes,optional"`
Max_nodes int32 `json:"max_nodes,optional"`
Boards_per_node int32 `json:"boards_per_node,optional"`
Sockets_per_board int32 `json:"sockets_per_board,optional"`
Sockets_per_node int32 `json:"sockets_per_node,optional"`
Cores_per_socket int32 `json:"cores_per_socket,optional"`
Threads_per_core int32 `json:"threads_per_core,optional"`
Ntasks_per_node int32 `json:"ntasks_per_node,optional"`
Ntasks_per_socket int32 `json:"ntasks_per_socket,optional"`
Ntasks_per_core int32 `json:"ntasks_per_core,optional"`
Ntasks_per_board int32 `json:"ntasks_per_board,optional"`
Pn_min_cpus int32 `json:"pn_min_cpus,optional"`
Pn_min_memory int32 `json:"pn_min_memory,optional"`
Pn_min_tmp_disk int32 `json:"pn_min_tmp_disk,optional"`
Reboot int32 `json:"reboot,optional"`
Rotate int32 `json:"rotate,optional"`
Req_switch int32 `json:"req_switch,optional"`
Std_err string `json:"std_err,optional"`
Std_in string `json:"std_in,optional"`
Std_out string `json:"std_out,optional"`
Wait4switch int32 `json:"wait4switch,optional"`
Wckey string `json:"wckey,optional"`
}
type SubmitJobResp struct {
Code string `json:"code"` //Error_code 返回码 in Tianhe
Msg string `json:"msg"`
Data string `json:"data"` //Job_id 作业id in Tianhe
}
type MapAppJobInfo struct {
GAP_CMD_FILE string `json:"GAP_CMD_FILE"` //命令行内容
GAP_NNODE string `json:"GAP_NNODE"` //节点个数当指定该参数时GAP_NODE_STRING必须为""
GAP_NODE_STRING string `json:"GAP_NODE_STRING,optional"` //指定节点当指定该参数时GAP_NNODE必须为""
GAP_SUBMIT_TYPE string `json:"GAP_SUBMIT_TYPE"` //cmd命令行模式
GAP_JOB_NAME string `json:"GAP_JOB_NAME"` //作业名称
GAP_WORK_DIR string `json:"GAP_WORK_DIR"` //工作路径
GAP_QUEUE string `json:"GAP_QUEUE"` //队列名称
GAP_NPROC string `json:"GAP_NPROC,optional"` //总核心数GAP_NPROC和GAP_PPN选其一填写
GAP_PPN string `json:"GAP_PPN,optional"` //CPU核心/节点GAP_NPROC和GAP_PPN选其一填写
GAP_NGPU string `json:"GAP_NGPU,optional"` //GPU卡数/节点
GAP_NDCU string `json:"GAP_NDCU,optional"` //DCU卡数/节点
GAP_JOB_MEM string `json:"GAP_JOB_MEM,optional"` //每个节点内存值单位为MB/GB
GAP_WALL_TIME string `json:"GAP_WALL_TIME"` //最大运行时长HH:MM:ss
GAP_EXCLUSIVE string `json:"GAP_EXCLUSIVE,optional"` // 是否独占节点1为独占空为非独占
GAP_APPNAME string `json:"GAP_APPNAME"` //BASE基础应用支持填写具体的应用英文名称
GAP_MULTI_SUB string `json:"GAP_MULTI_SUB,optional"` //作业组长度建议为小于等于50的正整数
GAP_STD_OUT_FILE string `json:"GAP_STD_OUT_FILE"` //工作路径/std.out.%j
GAP_STD_ERR_FILE string `json:"GAP_STD_ERR_FILE"` //工作路径/std.err.%j
}
type Argv struct {
Argv string `json:"argv,optional"`
}
type Environment struct {
Environment string `json:"environment,optional"`
}
type GetRegionResp struct {
Code int32 `json:"code"`
Msg string `json:"msg"`
@ -202,32 +76,6 @@ type TaskYaml struct {
Metadata interface{} `yaml:"metadata"`
}
type CommitHpcTaskReq struct {
Name string `json:"name"`
Description string `json:"description,optional"`
TenantId int64 `json:"tenantId,optional"`
TaskId int64 `json:"taskId,optional"`
ParticipantId int64 `json:"participantId,optional"`
MatchLabels map[string]string `json:"matchLabels,optional"`
CardCount int64 `json:"cardCount,optional"`
WorkDir string `json:"workDir,optional"`
WallTime string `json:"wallTime,optional"`
CmdScript string `json:"cmdScript,optional"`
AppType string `json:"appType,optional"`
AppName string `json:"appName,optional"`
Queue string `json:"queue,optional"`
NNode string `json:"nNode,optional"`
SubmitType string `json:"submitType,optional"`
StdOutFile string `json:"stdOutFile,optional"`
StdErrFile string `json:"stdErrFile,optional"`
StdInput string `json:"stdInput,optional"`
Environment map[string]string `json:"environment,optional"`
}
type CommitHpcTaskResp struct {
TaskId int64 `json:"taskId"`
}
type CommitVmTaskReq struct {
Server ServerCommit `json:"server,optional"`
Platform string `json:"platform,optional"`
@ -902,6 +750,32 @@ type Job struct {
StateofJob string `json:"StateofJob"`
}
type CommitHpcTaskReq struct {
Name string `json:"name"` // paratera:jobName
Description string `json:"description,optional"`
TenantId int64 `json:"tenantId,optional"`
TaskId int64 `json:"taskId,optional"`
ParticipantId int64 `json:"participantId,optional"`
MatchLabels map[string]string `json:"matchLabels,optional"`
CardCount int64 `json:"cardCount,optional"`
WorkDir string `json:"workDir,optional"` //paratera:workingDir
WallTime string `json:"wallTime,optional"`
CmdScript string `json:"cmdScript,optional"` // paratera:bootScript
AppType string `json:"appType,optional"`
AppName string `json:"appName,optional"` // paratera:jobGroupName ac:appname
Queue string `json:"queue,optional"`
NNode string `json:"nNode,optional"`
SubmitType string `json:"submitType,optional"`
StdOutFile string `json:"stdOutFile,optional"`
StdErrFile string `json:"stdErrFile,optional"`
StdInput string `json:"stdInput,optional"`
Environment map[string]string `json:"environment,optional"`
}
type CommitHpcTaskResp struct {
TaskId int64 `json:"taskId"`
}
type ListJobReq struct {
}

View File

@ -0,0 +1,29 @@
package models
import "github.com/zeromicro/go-zero/core/stores/sqlx"
var _ TaskHpcModel = (*customTaskHpcModel)(nil)
type (
// TaskHpcModel is an interface to be customized, add more methods here,
// and implement the added methods in customTaskHpcModel.
TaskHpcModel interface {
taskHpcModel
withSession(session sqlx.Session) TaskHpcModel
}
customTaskHpcModel struct {
*defaultTaskHpcModel
}
)
// NewTaskHpcModel returns a model for the database table.
func NewTaskHpcModel(conn sqlx.SqlConn) TaskHpcModel {
return &customTaskHpcModel{
defaultTaskHpcModel: newTaskHpcModel(conn),
}
}
func (m *customTaskHpcModel) withSession(session sqlx.Session) TaskHpcModel {
return NewTaskHpcModel(sqlx.NewSqlConnFromSession(session))
}

View File

@ -0,0 +1,121 @@
// Code generated by goctl. DO NOT EDIT.
package models
import (
"context"
"database/sql"
"fmt"
"strings"
"time"
"github.com/zeromicro/go-zero/core/stores/builder"
"github.com/zeromicro/go-zero/core/stores/sqlc"
"github.com/zeromicro/go-zero/core/stores/sqlx"
"github.com/zeromicro/go-zero/core/stringx"
)
var (
taskHpcFieldNames = builder.RawFieldNames(&TaskHpc{})
taskHpcRows = strings.Join(taskHpcFieldNames, ",")
taskHpcRowsExpectAutoSet = strings.Join(stringx.Remove(taskHpcFieldNames, "`id`", "`create_at`", "`create_time`", "`created_at`", "`update_at`", "`update_time`", "`updated_at`"), ",")
taskHpcRowsWithPlaceHolder = strings.Join(stringx.Remove(taskHpcFieldNames, "`id`", "`create_at`", "`create_time`", "`created_at`", "`update_at`", "`update_time`", "`updated_at`"), "=?,") + "=?"
)
type (
taskHpcModel interface {
Insert(ctx context.Context, data *TaskHpc) (sql.Result, error)
FindOne(ctx context.Context, id int64) (*TaskHpc, error)
Update(ctx context.Context, data *TaskHpc) error
Delete(ctx context.Context, id int64) error
}
defaultTaskHpcModel struct {
conn sqlx.SqlConn
table string
}
TaskHpc struct {
Id int64 `db:"id"` // id
TaskId int64 `db:"task_id"` // 任务id
JobId string `db:"job_id"` // 作业id(在第三方系统中的作业id)
ClusterId int64 `db:"cluster_id"` // 执行任务的集群id
Name string `db:"name"` // 名称
Status string `db:"status"` // 状态
CmdScript string `db:"cmd_script"`
StartTime string `db:"start_time"` // 开始时间
RunningTime int64 `db:"running_time"` // 运行时间
DerivedEs string `db:"derived_es"`
Cluster string `db:"cluster"`
BlockId int64 `db:"block_id"`
AllocNodes int64 `db:"alloc_nodes"`
AllocCpu int64 `db:"alloc_cpu"`
CardCount int64 `db:"card_count"` // 卡数
Version string `db:"version"`
Account string `db:"account"`
WorkDir string `db:"work_dir"` // 工作路径
AssocId int64 `db:"assoc_id"`
ExitCode int64 `db:"exit_code"`
WallTime string `db:"wall_time"` // 最大运行时间
Result string `db:"result"` // 运行结果
DeletedAt sql.NullTime `db:"deleted_at"` // 删除时间
YamlString string `db:"yaml_string"`
AppType string `db:"app_type"` // 应用类型
AppName string `db:"app_name"` // 应用名称
Queue string `db:"queue"` // 队列名称
SubmitType string `db:"submit_type"` // cmd命令行模式
NNode string `db:"n_node"` // 节点个数当指定该参数时GAP_NODE_STRING必须为""
StdOutFile string `db:"std_out_file"` // 工作路径/std.err.%j
StdErrFile string `db:"std_err_file"` // 工作路径/std.err.%j
StdInput string `db:"std_input"`
Environment string `db:"environment"`
DeletedFlag int64 `db:"deleted_flag"` // 是否删除0-否1-是)
CreatedBy int64 `db:"created_by"` // 创建人
CreatedTime time.Time `db:"created_time"` // 创建时间
UpdatedBy int64 `db:"updated_by"` // 更新人
UpdatedTime time.Time `db:"updated_time"` // 更新时间
}
)
func newTaskHpcModel(conn sqlx.SqlConn) *defaultTaskHpcModel {
return &defaultTaskHpcModel{
conn: conn,
table: "`task_hpc`",
}
}
func (m *defaultTaskHpcModel) Delete(ctx context.Context, id int64) error {
query := fmt.Sprintf("delete from %s where `id` = ?", m.table)
_, err := m.conn.ExecCtx(ctx, query, id)
return err
}
func (m *defaultTaskHpcModel) FindOne(ctx context.Context, id int64) (*TaskHpc, error) {
query := fmt.Sprintf("select %s from %s where `id` = ? limit 1", taskHpcRows, m.table)
var resp TaskHpc
err := m.conn.QueryRowCtx(ctx, &resp, query, id)
switch err {
case nil:
return &resp, nil
case sqlc.ErrNotFound:
return nil, ErrNotFound
default:
return nil, err
}
}
func (m *defaultTaskHpcModel) Insert(ctx context.Context, data *TaskHpc) (sql.Result, error) {
query := fmt.Sprintf("insert into %s (%s) values (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)", m.table, taskHpcRowsExpectAutoSet)
ret, err := m.conn.ExecCtx(ctx, query, data.TaskId, data.JobId, data.ClusterId, data.Name, data.Status, data.CmdScript, data.StartTime, data.RunningTime, data.DerivedEs, data.Cluster, data.BlockId, data.AllocNodes, data.AllocCpu, data.CardCount, data.Version, data.Account, data.WorkDir, data.AssocId, data.ExitCode, data.WallTime, data.Result, data.DeletedAt, data.YamlString, data.AppType, data.AppName, data.Queue, data.SubmitType, data.NNode, data.StdOutFile, data.StdErrFile, data.StdInput, data.Environment, data.DeletedFlag, data.CreatedBy, data.CreatedTime, data.UpdatedBy, data.UpdatedTime)
return ret, err
}
func (m *defaultTaskHpcModel) Update(ctx context.Context, data *TaskHpc) error {
query := fmt.Sprintf("update %s set %s where `id` = ?", m.table, taskHpcRowsWithPlaceHolder)
_, err := m.conn.ExecCtx(ctx, query, data.TaskId, data.JobId, data.ClusterId, data.Name, data.Status, data.CmdScript, data.StartTime, data.RunningTime, data.DerivedEs, data.Cluster, data.BlockId, data.AllocNodes, data.AllocCpu, data.CardCount, data.Version, data.Account, data.WorkDir, data.AssocId, data.ExitCode, data.WallTime, data.Result, data.DeletedAt, data.YamlString, data.AppType, data.AppName, data.Queue, data.SubmitType, data.NNode, data.StdOutFile, data.StdErrFile, data.StdInput, data.Environment, data.DeletedFlag, data.CreatedBy, data.CreatedTime, data.UpdatedBy, data.UpdatedTime, data.Id)
return err
}
func (m *defaultTaskHpcModel) tableName() string {
return m.table
}

View File

@ -16,7 +16,6 @@ package tracker
import (
"testing"
"time"
)
func TestGetNamedMetrics(t *testing.T) {
@ -28,9 +27,9 @@ func TestGetNamedMetrics(t *testing.T) {
//})
//println("zzz", result[0].MetricValues[0].Sample.Value())
client, _ := NewPrometheus("http://10.105.20.4:30766")
result := client.GetNamedMetricsByTime([]string{"pod_cpu_usage", "pod_memory_usage_wo_cache"}, "1700521446", "1700551446", 10*time.Minute, ControllerOption{
PodsName: "sealos-task-112703-65c776b4b5-q4jgf",
})
println("zzz", result)
//client, _ := NewPrometheus("http://10.105.20.4:30766")
//result := client.GetNamedMetricsByTime([]string{"pod_cpu_usage", "pod_memory_usage_wo_cache"}, "1700521446", "1700551446", 10*time.Minute, ControllerOption{
// PodsName: "sealos-task-112703-65c776b4b5-q4jgf",
//})
//println("zzz", result)
}