Merge branch 'master' of https://gitlink.org.cn/JointCloud/pcm-coordinator
# Conflicts: # Dockerfile # etc/pcm.yaml # go.mod # go.sum # internal/logic/inference/deployinstancelistlogic.go # internal/logic/inference/getdeploytasksbytypelogic.go # internal/logic/inference/startallbydeploytaskidlogic.go # internal/logic/inference/stopallbydeploytaskidlogic.go # internal/storeLink/modelarts.go # internal/storeLink/storeLink.go Former-commit-id: 6bcef559a95de140a7790257734f2838e8e92bad
This commit is contained in:
commit
5cdb491d05
|
@ -0,0 +1,53 @@
|
|||
version: 2
|
||||
name: aly
|
||||
description: 发布环境:https://comnet.jointcloud.net
|
||||
global:
|
||||
concurrent: 1
|
||||
workflow:
|
||||
- ref: start
|
||||
name: 开始
|
||||
task: start
|
||||
- ref: end
|
||||
name: 结束
|
||||
task: end
|
||||
needs:
|
||||
- ssh_cmd_0
|
||||
- ref: git_clone_0
|
||||
name: git clone
|
||||
task: git_clone@1.2.9
|
||||
input:
|
||||
remote_url: '"https://gitlink.org.cn/JointCloud/pcm-coordinator.git"'
|
||||
ref: '"refs/heads/master"'
|
||||
commit_id: '""'
|
||||
depth: 1
|
||||
needs:
|
||||
- start
|
||||
- ref: docker_image_build_0
|
||||
name: docker镜像构建
|
||||
task: docker_image_build@1.6.0
|
||||
input:
|
||||
docker_username: ((aly.docker_user))
|
||||
docker_password: ((aly.docker_password))
|
||||
image_name: '"registry.cn-hangzhou.aliyuncs.com/jcce/pcm-core-api"'
|
||||
image_tag: '"latest"'
|
||||
registry_address: '"registry.cn-hangzhou.aliyuncs.com"'
|
||||
docker_file: '"Dockerfile"'
|
||||
docker_build_path: '"."'
|
||||
workspace: git_clone_0.git_path
|
||||
image_clean: true
|
||||
image_push: true
|
||||
build_args: '""'
|
||||
needs:
|
||||
- git_clone_0
|
||||
- ref: ssh_cmd_0
|
||||
name: ssh执行命令
|
||||
task: ssh_cmd@1.1.1
|
||||
input:
|
||||
ssh_private_key: ((aly.ssh_private_key))
|
||||
ssh_ip: '"47.92.39.128"'
|
||||
ssh_port: '"22"'
|
||||
ssh_user: '"root"'
|
||||
ssh_cmd: '"kubectl rollout restart deployment pcm-core-api -n ns-admin"'
|
||||
needs:
|
||||
- docker_image_build_0
|
||||
|
|
@ -0,0 +1,55 @@
|
|||
name: Docker
|
||||
|
||||
on:
|
||||
push:
|
||||
branches: [ "master" ]
|
||||
tags: [ 'v*.*.*' ]
|
||||
pull_request:
|
||||
branches: [ "master" ]
|
||||
workflow_dispatch: # 允许手动触发
|
||||
|
||||
env:
|
||||
REGISTRY: registry.cn-hangzhou.aliyuncs.com # 修改为你的阿里云镜像仓库地址
|
||||
IMAGE_NAME: jcce/pcm-core-api # 修改为你的阿里云镜像仓库名称
|
||||
IMAGE_TAG: latest
|
||||
|
||||
jobs:
|
||||
build:
|
||||
runs-on: ubuntu-latest
|
||||
permissions:
|
||||
contents: read
|
||||
packages: write
|
||||
id-token: write
|
||||
|
||||
steps:
|
||||
- name: Checkout repository
|
||||
uses: actions/checkout@v4
|
||||
|
||||
- name: Set up QEMU
|
||||
uses: docker/setup-qemu-action@v3
|
||||
|
||||
- name: Set up Docker Buildx
|
||||
uses: docker/setup-buildx-action@v3
|
||||
|
||||
- name: Login to Docker Hub
|
||||
uses: docker/login-action@v3
|
||||
with:
|
||||
registry: ${{ env.REGISTRY }}
|
||||
username: ${{ secrets.ALIYUN_USERNAME }}
|
||||
password: ${{ secrets.ALIYUN_PASSWORD }}
|
||||
|
||||
- name: Build and push
|
||||
run: |
|
||||
docker build -t ${{env.REGISTRY}}/${{env.IMAGE_NAME}}:${{env.IMAGE_TAG}} .
|
||||
docker push ${{env.REGISTRY}}/${{env.IMAGE_NAME}}:${{env.IMAGE_TAG}}
|
||||
|
||||
- name: Set up SSH key
|
||||
run: |
|
||||
mkdir -p ~/.ssh
|
||||
echo "${{ secrets.SSH_PRIVATE_KEY }}" > ~/.ssh/id_rsa
|
||||
chmod 600 ~/.ssh/id_rsa
|
||||
echo -e "Host *\n\tStrictHostKeyChecking no\n\n" > ~/.ssh/config
|
||||
|
||||
- name: SSH to remote server and restart deployment
|
||||
if: github.event_name != 'pull_request'
|
||||
run: ssh ${{ secrets.SSH_USER }}@${{ secrets.SSH_HOST }} "kubectl rollout restart deployment ${{ secrets.SSH_DEPLOYMENT }} -n ${{ secrets.SSH_NAMESPACE }}"
|
|
@ -0,0 +1,25 @@
|
|||
name: Sync Mirror Repository
|
||||
|
||||
on:
|
||||
schedule:
|
||||
- cron: '0 * * * *' # 每小时同步一次
|
||||
workflow_dispatch: # 允许手动触发
|
||||
|
||||
jobs:
|
||||
mirror:
|
||||
runs-on: ubuntu-latest
|
||||
|
||||
steps:
|
||||
- name: Checkout target repository
|
||||
uses: actions/checkout@v2
|
||||
with:
|
||||
fetch-depth: 1 # 获取完整的提交历史
|
||||
|
||||
- name: Mirror source repository
|
||||
uses: wearerequired/git-mirror-action@v1
|
||||
with:
|
||||
source-repo: "https://gitlink.org.cn/JointCloud/pcm-coordinator.git" # 源仓库的URL
|
||||
destination-repo: "git@github.com:${{ github.repository }}.git" # 目标仓库的URL
|
||||
ssh: true
|
||||
env:
|
||||
SSH_PRIVATE_KEY: ${{ secrets.SSH_PRIVATE_KEY }}
|
|
@ -1,4 +1,4 @@
|
|||
FROM registry.cn-hangzhou.aliyuncs.com/jcce-images/golang:1.22.4-alpine3.20 AS builder
|
||||
FROM golang:1.22.4-alpine3.20 AS builder
|
||||
|
||||
WORKDIR /app
|
||||
COPY . .
|
||||
|
@ -9,12 +9,11 @@ RUN go mod download
|
|||
RUN CGO_ENABLED=0 go build -ldflags="-w -s" -o pcm-core-api
|
||||
|
||||
|
||||
FROM registry.cn-hangzhou.aliyuncs.com/jcce-images/alpine:3.20
|
||||
FROM alpine:latest
|
||||
WORKDIR /app
|
||||
|
||||
#修改alpine源为上海交通大学
|
||||
RUN sed -i 's/dl-cdn.alpinelinux.org/mirrors.sjtug.sjtu.edu.cn/g' /etc/apk/repositories && \
|
||||
apk add --no-cache ca-certificates && update-ca-certificates && \
|
||||
RUN apk add --no-cache ca-certificates && update-ca-certificates && \
|
||||
apk add --update tzdata && \
|
||||
rm -rf /var/cache/apk/*
|
||||
|
||||
|
|
|
@ -3,11 +3,11 @@ Host: 0.0.0.0
|
|||
Port: 8999
|
||||
MaxBytes: 524288000
|
||||
|
||||
Timeout: 50000
|
||||
Timeout: 500000
|
||||
|
||||
DB:
|
||||
# DataSource: root:uJpLd6u-J?HC1@(10.206.0.12:3306)/pcm?parseTime=true&loc=Local
|
||||
DataSource: root:uJpLd6u-J?HC1@(47.92.88.143:3306)/pcm?parseTime=true&loc=Local
|
||||
DataSource: root:uJpLd6u-J?HC1@(10.206.0.12:3306)/pcm?parseTime=true&loc=Local
|
||||
#DataSource: root:uJpLd6u-J?HC1@(47.92.88.143:3306)/pcm?parseTime=true&loc=Local
|
||||
Redis:
|
||||
Host: 10.206.0.12:6379
|
||||
Pass: redisPW123
|
||||
|
@ -27,7 +27,7 @@ ModelArtsRpcConf:
|
|||
Endpoints:
|
||||
- 127.0.0.1:2002
|
||||
NonBlock: true
|
||||
Timeout: 20000
|
||||
Timeout: 500000
|
||||
|
||||
#rpc
|
||||
ModelArtsImgRpcConf:
|
||||
|
|
32
go.mod
32
go.mod
|
@ -14,12 +14,12 @@ require (
|
|||
github.com/pkg/errors v0.9.1
|
||||
github.com/prometheus-operator/prometheus-operator/pkg/apis/monitoring v0.71.2
|
||||
github.com/prometheus/alertmanager v0.27.0
|
||||
github.com/prometheus/client_golang v1.20.2
|
||||
github.com/prometheus/common v0.57.0
|
||||
github.com/prometheus/client_golang v1.20.3
|
||||
github.com/prometheus/common v0.59.1
|
||||
github.com/robfig/cron/v3 v3.0.1
|
||||
github.com/zeromicro/go-zero v1.7.0
|
||||
github.com/zeromicro/go-zero v1.7.2
|
||||
gitlink.org.cn/JointCloud/pcm-ac v0.0.0-20240725071305-f751eec4dde1
|
||||
gitlink.org.cn/JointCloud/pcm-modelarts v0.0.0-20240831031531-89bdb156378a
|
||||
gitlink.org.cn/JointCloud/pcm-modelarts v0.0.0-20240909072501-939c3144cd9e
|
||||
gitlink.org.cn/JointCloud/pcm-octopus v0.0.0-20240817071412-44397870b110
|
||||
gitlink.org.cn/JointCloud/pcm-openstack v0.0.0-20240403033338-e7edabad4203
|
||||
gitlink.org.cn/JointCloud/pcm-slurm v0.0.0-20240301080743-8b94bbaf57f5
|
||||
|
@ -126,7 +126,7 @@ require (
|
|||
go.etcd.io/etcd/api/v3 v3.5.15 // indirect
|
||||
go.etcd.io/etcd/client/pkg/v3 v3.5.15 // indirect
|
||||
go.etcd.io/etcd/client/v3 v3.5.15 // indirect
|
||||
go.mongodb.org/mongo-driver v1.16.0 // indirect
|
||||
go.mongodb.org/mongo-driver v1.16.1 // indirect
|
||||
go.opentelemetry.io/otel v1.29.0 // indirect
|
||||
go.opentelemetry.io/otel/exporters/jaeger v1.17.0 // indirect
|
||||
go.opentelemetry.io/otel/exporters/otlp/otlptrace v1.29.0 // indirect
|
||||
|
@ -140,27 +140,27 @@ require (
|
|||
go.uber.org/automaxprocs v1.5.3 // indirect
|
||||
go.uber.org/multierr v1.11.0 // indirect
|
||||
go.uber.org/zap v1.27.0 // indirect
|
||||
golang.org/x/crypto v0.26.0 // indirect
|
||||
golang.org/x/crypto v0.27.0 // indirect
|
||||
golang.org/x/exp v0.0.0-20240119083558-1b970713d09a // indirect
|
||||
golang.org/x/mod v0.20.0 // indirect
|
||||
golang.org/x/net v0.28.0 // indirect
|
||||
golang.org/x/oauth2 v0.22.0 // indirect
|
||||
golang.org/x/mod v0.21.0 // indirect
|
||||
golang.org/x/net v0.29.0 // indirect
|
||||
golang.org/x/oauth2 v0.23.0 // indirect
|
||||
golang.org/x/sync v0.8.0 // indirect
|
||||
golang.org/x/sys v0.24.0 // indirect
|
||||
golang.org/x/term v0.23.0 // indirect
|
||||
golang.org/x/text v0.17.0 // indirect
|
||||
golang.org/x/sys v0.25.0 // indirect
|
||||
golang.org/x/term v0.24.0 // indirect
|
||||
golang.org/x/text v0.18.0 // indirect
|
||||
golang.org/x/time v0.6.0 // indirect
|
||||
golang.org/x/tools v0.24.0 // indirect
|
||||
google.golang.org/genproto/googleapis/api v0.0.0-20240827150818-7e3bb234dfed // indirect
|
||||
google.golang.org/genproto/googleapis/rpc v0.0.0-20240827150818-7e3bb234dfed // indirect
|
||||
google.golang.org/genproto/googleapis/api v0.0.0-20240903143218-8af14fe29dc1 // indirect
|
||||
google.golang.org/genproto/googleapis/rpc v0.0.0-20240903143218-8af14fe29dc1 // indirect
|
||||
google.golang.org/protobuf v1.34.2 // indirect
|
||||
gopkg.in/inf.v0 v0.9.1 // indirect
|
||||
gopkg.in/yaml.v2 v2.4.0 // indirect
|
||||
gopkg.in/yaml.v3 v3.0.1 // indirect
|
||||
k8s.io/api v0.31.0 // indirect
|
||||
k8s.io/klog/v2 v2.130.1 // indirect
|
||||
k8s.io/kube-openapi v0.0.0-20240827152857-f7e401e7b4c2 // indirect
|
||||
k8s.io/utils v0.0.0-20240821151609-f90d01438635 // indirect
|
||||
k8s.io/kube-openapi v0.0.0-20240903163716-9e1beecbcb38 // indirect
|
||||
k8s.io/utils v0.0.0-20240902221715-702e33fdd3c3 // indirect
|
||||
sigs.k8s.io/json v0.0.0-20221116044647-bc3834ca7abd // indirect
|
||||
sigs.k8s.io/structured-merge-diff/v4 v4.4.1 // indirect
|
||||
)
|
||||
|
|
64
go.sum
64
go.sum
|
@ -393,8 +393,8 @@ github.com/prometheus/client_golang v1.0.0/go.mod h1:db9x61etRT2tGnBNRi70OPL5Fsn
|
|||
github.com/prometheus/client_golang v1.4.0/go.mod h1:e9GMxYsXl05ICDXkRhurwBS4Q3OK1iX/F2sw+iXX5zU=
|
||||
github.com/prometheus/client_golang v1.7.1/go.mod h1:PY5Wy2awLA44sXw4AOSfFBetzPP4j5+D6mVACh+pe2M=
|
||||
github.com/prometheus/client_golang v1.11.0/go.mod h1:Z6t4BnS23TR94PD6BsDNk8yVqroYurpAkEiz0P2BEV0=
|
||||
github.com/prometheus/client_golang v1.20.2 h1:5ctymQzZlyOON1666svgwn3s6IKWgfbjsejTMiXIyjg=
|
||||
github.com/prometheus/client_golang v1.20.2/go.mod h1:PIEt8X02hGcP8JWbeHyeZ53Y/jReSnHgO035n//V5WE=
|
||||
github.com/prometheus/client_golang v1.20.3 h1:oPksm4K8B+Vt35tUhw6GbSNSgVlVSBH0qELP/7u83l4=
|
||||
github.com/prometheus/client_golang v1.20.3/go.mod h1:PIEt8X02hGcP8JWbeHyeZ53Y/jReSnHgO035n//V5WE=
|
||||
github.com/prometheus/client_model v0.0.0-20180712105110-5c3871d89910/go.mod h1:MbSGuTsp3dbXC40dX6PRTWyKYBIrTGTE9sqQNg2J8bo=
|
||||
github.com/prometheus/client_model v0.0.0-20190129233127-fd36f4220a90/go.mod h1:xMI15A0UPsDsEKsMN9yxemIoYk6Tm2C1GtYGdfGttqA=
|
||||
github.com/prometheus/client_model v0.0.0-20190812154241-14fe0d1b01d4/go.mod h1:xMI15A0UPsDsEKsMN9yxemIoYk6Tm2C1GtYGdfGttqA=
|
||||
|
@ -406,8 +406,8 @@ github.com/prometheus/common v0.9.1/go.mod h1:yhUN8i9wzaXS3w1O07YhxHEBxD+W35wd8b
|
|||
github.com/prometheus/common v0.10.0/go.mod h1:Tlit/dnDKsSWFlCLTWaA1cyBgKHSMdTB80sz/V91rCo=
|
||||
github.com/prometheus/common v0.26.0/go.mod h1:M7rCNAaPfAosfx8veZJCuw84e35h3Cfd9VFqTh1DIvc=
|
||||
github.com/prometheus/common v0.29.0/go.mod h1:vu+V0TpY+O6vW9J44gczi3Ap/oXXR10b+M/gUGO4Hls=
|
||||
github.com/prometheus/common v0.57.0 h1:Ro/rKjwdq9mZn1K5QPctzh+MA4Lp0BuYk5ZZEVhoNcY=
|
||||
github.com/prometheus/common v0.57.0/go.mod h1:7uRPFSUTbfZWsJ7MHY56sqt7hLQu3bxXHDnNhl8E9qI=
|
||||
github.com/prometheus/common v0.59.1 h1:LXb1quJHWm1P6wq/U824uxYi4Sg0oGvNeUm1z5dJoX0=
|
||||
github.com/prometheus/common v0.59.1/go.mod h1:GpWM7dewqmVYcd7SmRaiWVe9SSqjf0UrwnYnpEZNuT0=
|
||||
github.com/prometheus/common/sigv4 v0.1.0 h1:qoVebwtwwEhS85Czm2dSROY5fTo2PAPEVdDeppTwGX4=
|
||||
github.com/prometheus/common/sigv4 v0.1.0/go.mod h1:2Jkxxk9yYvCkE5G1sQT7GuEXm57JrvHu9k5YwTjsNtI=
|
||||
github.com/prometheus/exporter-toolkit v0.11.0 h1:yNTsuZ0aNCNFQ3aFTD2uhPOvr4iD7fdBvKPAEGkNf+g=
|
||||
|
@ -464,12 +464,12 @@ github.com/yuin/goldmark v1.3.5/go.mod h1:mwnBkeHKe2W/ZEtQ+71ViKU8L12m81fl3OWwC1
|
|||
github.com/yuin/goldmark v1.4.13/go.mod h1:6yULJ656Px+3vBD8DxQVa3kxgyrAnzto9xy5taEt/CY=
|
||||
github.com/yuin/gopher-lua v1.1.1 h1:kYKnWBjvbNP4XLT3+bPEwAXJx262OhaHDWDVOPjL46M=
|
||||
github.com/yuin/gopher-lua v1.1.1/go.mod h1:GBR0iDaNXjAgGg9zfCvksxSRnQx76gclCIb7kdAd1Pw=
|
||||
github.com/zeromicro/go-zero v1.7.0 h1:B+y7tUVlo3qVQ6F0I0R9bi+Dq4I1QdO9ZB+dz1r0p1s=
|
||||
github.com/zeromicro/go-zero v1.7.0/go.mod h1:ypW4PzQI+jUrMcNJDDQ+7YW+pE+tMua9Xj/pmtmS1Dc=
|
||||
github.com/zeromicro/go-zero v1.7.2 h1:a8lyVOG3KXG4LrAy6ZmtJTJtisX4Ostc4Pst4fE704I=
|
||||
github.com/zeromicro/go-zero v1.7.2/go.mod h1:WFXfF92Exw0O7WECifS6r99JSzv4KEN49x9RhAfgkMc=
|
||||
gitlink.org.cn/JointCloud/pcm-ac v0.0.0-20240725071305-f751eec4dde1 h1:DicBXoQiC6mumMBeyqSPNrsjtqJIgk5Pv2hscu2xryw=
|
||||
gitlink.org.cn/JointCloud/pcm-ac v0.0.0-20240725071305-f751eec4dde1/go.mod h1:3eECiw9O2bIFkkePlloKyLNXiqBAhOxNrDoGaaGseGY=
|
||||
gitlink.org.cn/JointCloud/pcm-modelarts v0.0.0-20240831031531-89bdb156378a h1:x3qNoZu7leHTx5gaDuLNR/T9ubwIpCXZH5hS6ZfwltQ=
|
||||
gitlink.org.cn/JointCloud/pcm-modelarts v0.0.0-20240831031531-89bdb156378a/go.mod h1:/eOmBFZKWGoabG3sRVkVvIbLwsd2631k4jkUBR6x1AA=
|
||||
gitlink.org.cn/JointCloud/pcm-modelarts v0.0.0-20240909072501-939c3144cd9e h1:6LYJggBoeAQxy/otzWjt40Pa7gnVvUR4c5YMi6A/NdU=
|
||||
gitlink.org.cn/JointCloud/pcm-modelarts v0.0.0-20240909072501-939c3144cd9e/go.mod h1:/eOmBFZKWGoabG3sRVkVvIbLwsd2631k4jkUBR6x1AA=
|
||||
gitlink.org.cn/JointCloud/pcm-octopus v0.0.0-20240817071412-44397870b110 h1:GaXwr5sgDh0raHjUf9IewTvnRvajYea7zbLsaerYyXo=
|
||||
gitlink.org.cn/JointCloud/pcm-octopus v0.0.0-20240817071412-44397870b110/go.mod h1:QOD5+/l2D+AYBjF2h5T0mdJyfGAmF78QmeKdbBXbjLQ=
|
||||
gitlink.org.cn/JointCloud/pcm-openstack v0.0.0-20240403033338-e7edabad4203 h1:s6PsZ1+bev294IWdZRlV7mnOwI1+UzFcldVW/BqhQzI=
|
||||
|
@ -484,8 +484,8 @@ go.etcd.io/etcd/client/pkg/v3 v3.5.15 h1:fo0HpWz/KlHGMCC+YejpiCmyWDEuIpnTDzpJLB5
|
|||
go.etcd.io/etcd/client/pkg/v3 v3.5.15/go.mod h1:mXDI4NAOwEiszrHCb0aqfAYNCrZP4e9hRca3d1YK8EU=
|
||||
go.etcd.io/etcd/client/v3 v3.5.15 h1:23M0eY4Fd/inNv1ZfU3AxrbbOdW79r9V9Rl62Nm6ip4=
|
||||
go.etcd.io/etcd/client/v3 v3.5.15/go.mod h1:CLSJxrYjvLtHsrPKsy7LmZEE+DK2ktfd2bN4RhBMwlU=
|
||||
go.mongodb.org/mongo-driver v1.16.0 h1:tpRsfBJMROVHKpdGyc1BBEzzjDUWjItxbVSZ8Ls4BQ4=
|
||||
go.mongodb.org/mongo-driver v1.16.0/go.mod h1:oB6AhJQvFQL4LEHyXi6aJzQJtBiTQHiAd83l0GdFaiw=
|
||||
go.mongodb.org/mongo-driver v1.16.1 h1:rIVLL3q0IHM39dvE+z2ulZLp9ENZKThVfuvN/IiN4l8=
|
||||
go.mongodb.org/mongo-driver v1.16.1/go.mod h1:oB6AhJQvFQL4LEHyXi6aJzQJtBiTQHiAd83l0GdFaiw=
|
||||
go.opencensus.io v0.21.0/go.mod h1:mSImk1erAIZhrmZN+AvHh14ztQfjbGwt4TtuofqLduU=
|
||||
go.opencensus.io v0.22.0/go.mod h1:+kGneAE2xo2IficOXnaByMWTGM9T73dGwxeWcUqIpI8=
|
||||
go.opencensus.io v0.22.2/go.mod h1:yxeiOL68Rb0Xd1ddK5vPZ/oVn4vY4Ynel7k9FzqtOIw=
|
||||
|
@ -533,8 +533,8 @@ golang.org/x/crypto v0.0.0-20200622213623-75b288015ac9/go.mod h1:LzIPMQfyMNhhGPh
|
|||
golang.org/x/crypto v0.0.0-20210921155107-089bfa567519/go.mod h1:GvvjBRRGRdwPK5ydBHafDWAxML/pGHZbMvKqRZ5+Abc=
|
||||
golang.org/x/crypto v0.19.0/go.mod h1:Iy9bg/ha4yyC70EfRS8jz+B6ybOBKMaSxLj6P6oBDfU=
|
||||
golang.org/x/crypto v0.21.0/go.mod h1:0BP7YvVV9gBbVKyeTG0Gyn+gZm94bibOW5BjDEYAOMs=
|
||||
golang.org/x/crypto v0.26.0 h1:RrRspgV4mU+YwB4FYnuBoKsUapNIL5cohGAmSH3azsw=
|
||||
golang.org/x/crypto v0.26.0/go.mod h1:GY7jblb9wI+FOo5y8/S2oY4zWP07AkOJ4+jxCqdqn54=
|
||||
golang.org/x/crypto v0.27.0 h1:GXm2NjJrPaiv/h1tb2UH8QfgC/hOf/+z0p6PT8o1w7A=
|
||||
golang.org/x/crypto v0.27.0/go.mod h1:1Xngt8kV6Dvbssa53Ziq6Eqn0HqbZi5Z6R0ZpwQzt70=
|
||||
golang.org/x/exp v0.0.0-20190121172915-509febef88a4/go.mod h1:CJ0aWSM057203Lf6IL+f9T1iT9GByDxfZKAQTCR3kQA=
|
||||
golang.org/x/exp v0.0.0-20190306152737-a1d7652674e8/go.mod h1:CJ0aWSM057203Lf6IL+f9T1iT9GByDxfZKAQTCR3kQA=
|
||||
golang.org/x/exp v0.0.0-20190510132918-efd6b22b2522/go.mod h1:ZjyILWgesfNpC6sMxTJOJm9Kp84zZh5NQWvqDGG3Qr8=
|
||||
|
@ -570,8 +570,8 @@ golang.org/x/mod v0.3.0/go.mod h1:s0Qsj1ACt9ePp/hMypM3fl4fZqREWJwdYDEqhRiZZUA=
|
|||
golang.org/x/mod v0.4.2/go.mod h1:s0Qsj1ACt9ePp/hMypM3fl4fZqREWJwdYDEqhRiZZUA=
|
||||
golang.org/x/mod v0.6.0-dev.0.20220419223038-86c51ed26bb4/go.mod h1:jJ57K6gSWd91VN4djpZkiMVwK6gcyfeH4XE8wZrZaV4=
|
||||
golang.org/x/mod v0.8.0/go.mod h1:iBbtSCu2XBx23ZKBPSOrRkjjQPZFPuis4dIYUhu/chs=
|
||||
golang.org/x/mod v0.20.0 h1:utOm6MM3R3dnawAiJgn0y+xvuYRsm1RKM/4giyfDgV0=
|
||||
golang.org/x/mod v0.20.0/go.mod h1:hTbmBsO62+eylJbnUtE2MGJUyE7QWk4xUqPFrRgJ+7c=
|
||||
golang.org/x/mod v0.21.0 h1:vvrHzRwRfVKSiLrG+d4FMl/Qi4ukBCE6kZlTUkDYRT0=
|
||||
golang.org/x/mod v0.21.0/go.mod h1:6SkKJ3Xj0I0BrPOZoBy3bdMptDDU9oJrpohJ3eWZ1fY=
|
||||
golang.org/x/net v0.0.0-20180724234803-3673e40ba225/go.mod h1:mL1N/T3taQHkDXs73rZJwtUhF3w3ftmwwsq0BUmARs4=
|
||||
golang.org/x/net v0.0.0-20180826012351-8a410e7b638d/go.mod h1:mL1N/T3taQHkDXs73rZJwtUhF3w3ftmwwsq0BUmARs4=
|
||||
golang.org/x/net v0.0.0-20181114220301-adae6a3d119a/go.mod h1:mL1N/T3taQHkDXs73rZJwtUhF3w3ftmwwsq0BUmARs4=
|
||||
|
@ -611,16 +611,16 @@ golang.org/x/net v0.6.0/go.mod h1:2Tu9+aMcznHK/AK1HMvgo6xiTLG5rD5rZLDS+rp2Bjs=
|
|||
golang.org/x/net v0.10.0/go.mod h1:0qNGK6F8kojg2nk9dLZ2mShWaEBan6FAoqfSigmmuDg=
|
||||
golang.org/x/net v0.21.0/go.mod h1:bIjVDfnllIU7BJ2DNgfnXvpSvtn8VRwhlsaeUTyUS44=
|
||||
golang.org/x/net v0.22.0/go.mod h1:JKghWKKOSdJwpW2GEx0Ja7fmaKnMsbu+MWVZTokSYmg=
|
||||
golang.org/x/net v0.28.0 h1:a9JDOJc5GMUJ0+UDqmLT86WiEy7iWyIhz8gz8E4e5hE=
|
||||
golang.org/x/net v0.28.0/go.mod h1:yqtgsTWOOnlGLG9GFRrK3++bGOUEkNBoHZc8MEDWPNg=
|
||||
golang.org/x/net v0.29.0 h1:5ORfpBpCs4HzDYoodCDBbwHzdR5UrLBZ3sOnUJmFoHo=
|
||||
golang.org/x/net v0.29.0/go.mod h1:gLkgy8jTGERgjzMic6DS9+SP0ajcu6Xu3Orq/SpETg0=
|
||||
golang.org/x/oauth2 v0.0.0-20180821212333-d2e6202438be/go.mod h1:N/0e6XlmueqKjAGxoOufVs8QHGRruUQn6yWY3a++T0U=
|
||||
golang.org/x/oauth2 v0.0.0-20190226205417-e64efc72b421/go.mod h1:gOpvHmFTYa4IltrdGE7lF6nIHvwfUNPOp7c8zoXwtLw=
|
||||
golang.org/x/oauth2 v0.0.0-20190604053449-0f29369cfe45/go.mod h1:gOpvHmFTYa4IltrdGE7lF6nIHvwfUNPOp7c8zoXwtLw=
|
||||
golang.org/x/oauth2 v0.0.0-20191202225959-858c2ad4c8b6/go.mod h1:gOpvHmFTYa4IltrdGE7lF6nIHvwfUNPOp7c8zoXwtLw=
|
||||
golang.org/x/oauth2 v0.0.0-20200107190931-bf48bf16ab8d/go.mod h1:gOpvHmFTYa4IltrdGE7lF6nIHvwfUNPOp7c8zoXwtLw=
|
||||
golang.org/x/oauth2 v0.0.0-20210514164344-f6687ab2804c/go.mod h1:KelEdhl1UZF7XfJ4dDtk6s++YSgaE7mD/BuKKDLBl4A=
|
||||
golang.org/x/oauth2 v0.22.0 h1:BzDx2FehcG7jJwgWLELCdmLuxk2i+x9UDpSiss2u0ZA=
|
||||
golang.org/x/oauth2 v0.22.0/go.mod h1:XYTD2NtWslqkgxebSiOHnXEap4TF09sJSc7H1sXbhtI=
|
||||
golang.org/x/oauth2 v0.23.0 h1:PbgcYx2W7i4LvjJWEbf0ngHV6qJYr86PkAV3bXdLEbs=
|
||||
golang.org/x/oauth2 v0.23.0/go.mod h1:XYTD2NtWslqkgxebSiOHnXEap4TF09sJSc7H1sXbhtI=
|
||||
golang.org/x/sync v0.0.0-20180314180146-1d60e4601c6f/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM=
|
||||
golang.org/x/sync v0.0.0-20181108010431-42b317875d0f/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM=
|
||||
golang.org/x/sync v0.0.0-20181221193216-37e7f081c4d4/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM=
|
||||
|
@ -686,16 +686,16 @@ golang.org/x/sys v0.6.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
|
|||
golang.org/x/sys v0.8.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
|
||||
golang.org/x/sys v0.17.0/go.mod h1:/VUhepiaJMQUp4+oa/7Zr1D23ma6VTLIYjOOTFZPUcA=
|
||||
golang.org/x/sys v0.18.0/go.mod h1:/VUhepiaJMQUp4+oa/7Zr1D23ma6VTLIYjOOTFZPUcA=
|
||||
golang.org/x/sys v0.24.0 h1:Twjiwq9dn6R1fQcyiK+wQyHWfaz/BJB+YIpzU/Cv3Xg=
|
||||
golang.org/x/sys v0.24.0/go.mod h1:/VUhepiaJMQUp4+oa/7Zr1D23ma6VTLIYjOOTFZPUcA=
|
||||
golang.org/x/sys v0.25.0 h1:r+8e+loiHxRqhXVl6ML1nO3l1+oFoWbnlu2Ehimmi34=
|
||||
golang.org/x/sys v0.25.0/go.mod h1:/VUhepiaJMQUp4+oa/7Zr1D23ma6VTLIYjOOTFZPUcA=
|
||||
golang.org/x/term v0.0.0-20201126162022-7de9c90e9dd1/go.mod h1:bj7SfCRtBDWHUb9snDiAeCFNEtKQo2Wmx5Cou7ajbmo=
|
||||
golang.org/x/term v0.0.0-20210927222741-03fcf44c2211/go.mod h1:jbD1KX2456YbFQfuXm/mYQcufACuNUgVhRMnK/tPxf8=
|
||||
golang.org/x/term v0.5.0/go.mod h1:jMB1sMXY+tzblOD4FWmEbocvup2/aLOaQEp7JmGp78k=
|
||||
golang.org/x/term v0.8.0/go.mod h1:xPskH00ivmX89bAKVGSKKtLOWNx2+17Eiy94tnKShWo=
|
||||
golang.org/x/term v0.17.0/go.mod h1:lLRBjIVuehSbZlaOtGMbcMncT+aqLLLmKrsjNrUguwk=
|
||||
golang.org/x/term v0.18.0/go.mod h1:ILwASektA3OnRv7amZ1xhE/KTR+u50pbXfZ03+6Nx58=
|
||||
golang.org/x/term v0.23.0 h1:F6D4vR+EHoL9/sWAWgAR1H2DcHr4PareCbAaCo1RpuU=
|
||||
golang.org/x/term v0.23.0/go.mod h1:DgV24QBUrK6jhZXl+20l6UWznPlwAHm1Q1mGHtydmSk=
|
||||
golang.org/x/term v0.24.0 h1:Mh5cbb+Zk2hqqXNO7S1iTjEphVL+jb8ZWaqh/g+JWkM=
|
||||
golang.org/x/term v0.24.0/go.mod h1:lOBK/LVxemqiMij05LGJ0tzNr8xlmwBRJ81PX6wVLH8=
|
||||
golang.org/x/text v0.0.0-20170915032832-14c0d48ead0c/go.mod h1:NqM8EUOU14njkJ3fqMW+pc6Ldnwhi/IjpwHt7yyuwOQ=
|
||||
golang.org/x/text v0.3.0/go.mod h1:NqM8EUOU14njkJ3fqMW+pc6Ldnwhi/IjpwHt7yyuwOQ=
|
||||
golang.org/x/text v0.3.1-0.20180807135948-17ff2d5776d2/go.mod h1:NqM8EUOU14njkJ3fqMW+pc6Ldnwhi/IjpwHt7yyuwOQ=
|
||||
|
@ -706,8 +706,8 @@ golang.org/x/text v0.3.7/go.mod h1:u+2+/6zg+i71rQMx5EYifcz6MCKuco9NR6JIITiCfzQ=
|
|||
golang.org/x/text v0.7.0/go.mod h1:mrYo+phRRbMaCq/xk9113O4dZlRixOauAjOtrjsXDZ8=
|
||||
golang.org/x/text v0.9.0/go.mod h1:e1OnstbJyHTd6l/uOt8jFFHp6TRDWZR/bV3emEE/zU8=
|
||||
golang.org/x/text v0.14.0/go.mod h1:18ZOQIKpY8NJVqYksKHtTdi31H5itFRjB5/qKTNYzSU=
|
||||
golang.org/x/text v0.17.0 h1:XtiM5bkSOt+ewxlOE/aE/AKEHibwj/6gvWMl9Rsh0Qc=
|
||||
golang.org/x/text v0.17.0/go.mod h1:BuEKDfySbSR4drPmRPG/7iBdf8hvFMuRexcpahXilzY=
|
||||
golang.org/x/text v0.18.0 h1:XvMDiNzPAl0jr17s6W9lcaIhGUfUORdGCNsuLmPG224=
|
||||
golang.org/x/text v0.18.0/go.mod h1:BuEKDfySbSR4drPmRPG/7iBdf8hvFMuRexcpahXilzY=
|
||||
golang.org/x/time v0.0.0-20181108054448-85acf8d2951c/go.mod h1:tRJNPiyCQ0inRvYxbN9jk5I+vvW/OXSQhTDSoE431IQ=
|
||||
golang.org/x/time v0.0.0-20190308202827-9d24e82272b4/go.mod h1:tRJNPiyCQ0inRvYxbN9jk5I+vvW/OXSQhTDSoE431IQ=
|
||||
golang.org/x/time v0.0.0-20191024005414-555d28b269f0/go.mod h1:tRJNPiyCQ0inRvYxbN9jk5I+vvW/OXSQhTDSoE431IQ=
|
||||
|
@ -819,10 +819,10 @@ google.golang.org/genproto v0.0.0-20200618031413-b414f8b61790/go.mod h1:jDfRM7Fc
|
|||
google.golang.org/genproto v0.0.0-20200729003335-053ba62fc06f/go.mod h1:FWY/as6DDZQgahTzZj3fqbO1CbirC29ZNUFHwi0/+no=
|
||||
google.golang.org/genproto v0.0.0-20200804131852-c06518451d9c/go.mod h1:FWY/as6DDZQgahTzZj3fqbO1CbirC29ZNUFHwi0/+no=
|
||||
google.golang.org/genproto v0.0.0-20200825200019-8632dd797987/go.mod h1:FWY/as6DDZQgahTzZj3fqbO1CbirC29ZNUFHwi0/+no=
|
||||
google.golang.org/genproto/googleapis/api v0.0.0-20240827150818-7e3bb234dfed h1:3RgNmBoI9MZhsj3QxC+AP/qQhNwpCLOvYDYYsFrhFt0=
|
||||
google.golang.org/genproto/googleapis/api v0.0.0-20240827150818-7e3bb234dfed/go.mod h1:OCdP9MfskevB/rbYvHTsXTtKC+3bHWajPdoKgjcYkfo=
|
||||
google.golang.org/genproto/googleapis/rpc v0.0.0-20240827150818-7e3bb234dfed h1:J6izYgfBXAI3xTKLgxzTmUltdYaLsuBxFCgDHWJ/eXg=
|
||||
google.golang.org/genproto/googleapis/rpc v0.0.0-20240827150818-7e3bb234dfed/go.mod h1:UqMtugtsSgubUsoxbuAoiCXvqvErP7Gf0so0mK9tHxU=
|
||||
google.golang.org/genproto/googleapis/api v0.0.0-20240903143218-8af14fe29dc1 h1:hjSy6tcFQZ171igDaN5QHOw2n6vx40juYbC/x67CEhc=
|
||||
google.golang.org/genproto/googleapis/api v0.0.0-20240903143218-8af14fe29dc1/go.mod h1:qpvKtACPCQhAdu3PyQgV4l3LMXZEtft7y8QcarRsp9I=
|
||||
google.golang.org/genproto/googleapis/rpc v0.0.0-20240903143218-8af14fe29dc1 h1:pPJltXNxVzT4pK9yD8vR9X75DaWYYmLGMsEvBfFQZzQ=
|
||||
google.golang.org/genproto/googleapis/rpc v0.0.0-20240903143218-8af14fe29dc1/go.mod h1:UqMtugtsSgubUsoxbuAoiCXvqvErP7Gf0so0mK9tHxU=
|
||||
google.golang.org/grpc v1.19.0/go.mod h1:mqu4LbDTu4XGKhr4mRzUsmM4RtVoemTSY81AxZiDr8c=
|
||||
google.golang.org/grpc v1.20.1/go.mod h1:10oTOabMzJvdu6/UiuZezV6QK5dSlG84ov/aaiqXj38=
|
||||
google.golang.org/grpc v1.21.1/go.mod h1:oYelfM1adQP15Ek0mdvEgi9Df8B9CZIaU1084ijfRaM=
|
||||
|
@ -902,10 +902,10 @@ k8s.io/client-go v0.31.0 h1:QqEJzNjbN2Yv1H79SsS+SWnXkBgVu4Pj3CJQgbx0gI8=
|
|||
k8s.io/client-go v0.31.0/go.mod h1:Y9wvC76g4fLjmU0BA+rV+h2cncoadjvjjkkIGoTLcGU=
|
||||
k8s.io/klog/v2 v2.130.1 h1:n9Xl7H1Xvksem4KFG4PYbdQCQxqc/tTUyrgXaOhHSzk=
|
||||
k8s.io/klog/v2 v2.130.1/go.mod h1:3Jpz1GvMt720eyJH1ckRHK1EDfpxISzJ7I9OYgaDtPE=
|
||||
k8s.io/kube-openapi v0.0.0-20240827152857-f7e401e7b4c2 h1:GKE9U8BH16uynoxQii0auTjmmmuZ3O0LFMN6S0lPPhI=
|
||||
k8s.io/kube-openapi v0.0.0-20240827152857-f7e401e7b4c2/go.mod h1:coRQXBK9NxO98XUv3ZD6AK3xzHCxV6+b7lrquKwaKzA=
|
||||
k8s.io/utils v0.0.0-20240821151609-f90d01438635 h1:2wThSvJoW/Ncn9TmQEYXRnevZXi2duqHWf5OX9S3zjI=
|
||||
k8s.io/utils v0.0.0-20240821151609-f90d01438635/go.mod h1:OLgZIPagt7ERELqWJFomSt595RzquPNLL48iOWgYOg0=
|
||||
k8s.io/kube-openapi v0.0.0-20240903163716-9e1beecbcb38 h1:1dWzkmJrrprYvjGwh9kEUxmcUV/CtNU8QM7h1FLWQOo=
|
||||
k8s.io/kube-openapi v0.0.0-20240903163716-9e1beecbcb38/go.mod h1:coRQXBK9NxO98XUv3ZD6AK3xzHCxV6+b7lrquKwaKzA=
|
||||
k8s.io/utils v0.0.0-20240902221715-702e33fdd3c3 h1:b2FmK8YH+QEwq/Sy2uAEhmqL5nPfGYbJOcaqjeYYZoA=
|
||||
k8s.io/utils v0.0.0-20240902221715-702e33fdd3c3/go.mod h1:OLgZIPagt7ERELqWJFomSt595RzquPNLL48iOWgYOg0=
|
||||
rsc.io/binaryregexp v0.2.0/go.mod h1:qTv7/COck+e2FymRvadv62gMdZztPaShugOCi3I+8D8=
|
||||
rsc.io/quote/v3 v3.1.0/go.mod h1:yEA65RcK8LyAZtP9Kv3t0HmxON59tX3rD+tICJqUlj0=
|
||||
rsc.io/sampler v1.3.0/go.mod h1:T1hPZKmBbMNahiBKFy5HrXp6adAjACjK9JXDnKaTXpA=
|
||||
|
|
|
@ -38,6 +38,14 @@ func (l *DeployInstanceListLogic) DeployInstanceList(req *types.DeployInstanceLi
|
|||
return nil, tx.Error
|
||||
}
|
||||
|
||||
if len(tasklist) == 0 {
|
||||
resp.List = nil
|
||||
resp.PageSize = req.PageSize
|
||||
resp.PageNum = req.PageNum
|
||||
resp.Total = 0
|
||||
return
|
||||
}
|
||||
|
||||
//count total
|
||||
var total int64
|
||||
err = tx.Count(&total).Error
|
||||
|
|
|
@ -70,12 +70,6 @@ func removeItem(items *[]*models.AiDeployInstanceTask, id int64) {
|
|||
if len(*items) == 0 {
|
||||
return
|
||||
}
|
||||
if len(*items) == 1 {
|
||||
if (*items)[0].Id == id {
|
||||
(*items) = nil
|
||||
return
|
||||
}
|
||||
}
|
||||
for i := len(*items) - 1; i >= 0; i-- {
|
||||
if (*items)[i].Id == id {
|
||||
*items = append((*items)[:i], (*items)[i+1:]...)
|
||||
|
|
|
@ -3,12 +3,15 @@ package inference
|
|||
import (
|
||||
"context"
|
||||
"errors"
|
||||
"fmt"
|
||||
"github.com/zeromicro/go-zero/core/logx"
|
||||
"gitlink.org.cn/JointCloud/pcm-coordinator/internal/scheduler/service/inference"
|
||||
"gitlink.org.cn/JointCloud/pcm-coordinator/internal/storeLink"
|
||||
"gitlink.org.cn/JointCloud/pcm-coordinator/internal/svc"
|
||||
"gitlink.org.cn/JointCloud/pcm-coordinator/internal/types"
|
||||
"gitlink.org.cn/JointCloud/pcm-coordinator/pkg/models"
|
||||
"strconv"
|
||||
"sync"
|
||||
)
|
||||
|
||||
type StartAllByDeployTaskIdLogic struct {
|
||||
|
@ -35,17 +38,8 @@ func (l *StartAllByDeployTaskIdLogic) StartAllByDeployTaskId(req *types.StartAll
|
|||
return nil, err
|
||||
}
|
||||
|
||||
for _, ins := range list {
|
||||
in, err := l.svcCtx.Scheduler.AiService.InferenceAdapterMap[strconv.FormatInt(ins.AdapterId, 10)][strconv.FormatInt(ins.ClusterId, 10)].GetInferDeployInstance(l.ctx, ins.InstanceId)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
if checkStopStatus(in) {
|
||||
success := l.svcCtx.Scheduler.AiService.InferenceAdapterMap[strconv.FormatInt(ins.AdapterId, 10)][strconv.FormatInt(ins.ClusterId, 10)].StartInferDeployInstance(l.ctx, ins.InstanceId)
|
||||
if !success {
|
||||
return nil, errors.New(ins.InstanceName + " start failed")
|
||||
}
|
||||
}
|
||||
if len(list) == 0 {
|
||||
return nil, errors.New("instances are empty")
|
||||
}
|
||||
|
||||
err = l.svcCtx.Scheduler.AiStorages.UpdateDeployTaskById(id)
|
||||
|
@ -53,9 +47,96 @@ func (l *StartAllByDeployTaskIdLogic) StartAllByDeployTaskId(req *types.StartAll
|
|||
return nil, err
|
||||
}
|
||||
|
||||
err = l.startAll(list)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
return resp, nil
|
||||
}
|
||||
|
||||
func (l *StartAllByDeployTaskIdLogic) startAll(list []*models.AiInferDeployInstance) error {
|
||||
var wg sync.WaitGroup
|
||||
var errCh = make(chan interface{}, len(list))
|
||||
var errs []interface{}
|
||||
buf := make(chan bool, 2)
|
||||
|
||||
for _, instance := range list {
|
||||
wg.Add(1)
|
||||
ins := instance
|
||||
buf <- true
|
||||
go func() {
|
||||
in, err := l.svcCtx.Scheduler.AiService.InferenceAdapterMap[strconv.FormatInt(ins.AdapterId, 10)][strconv.FormatInt(ins.ClusterId, 10)].GetInferDeployInstance(l.ctx, ins.InstanceId)
|
||||
if err != nil {
|
||||
e := struct {
|
||||
errTyp uint8
|
||||
err error
|
||||
instanceName string
|
||||
clusterName string
|
||||
}{
|
||||
errTyp: 1,
|
||||
err: err,
|
||||
instanceName: ins.InstanceName,
|
||||
clusterName: ins.ClusterName,
|
||||
}
|
||||
errCh <- e
|
||||
wg.Done()
|
||||
<-buf
|
||||
return
|
||||
}
|
||||
if checkStopStatus(in) {
|
||||
success := l.svcCtx.Scheduler.AiService.InferenceAdapterMap[strconv.FormatInt(ins.AdapterId, 10)][strconv.FormatInt(ins.ClusterId, 10)].StartInferDeployInstance(l.ctx, ins.InstanceId)
|
||||
if !success {
|
||||
e := struct {
|
||||
errTyp uint8
|
||||
err error
|
||||
instanceName string
|
||||
clusterName string
|
||||
}{
|
||||
errTyp: 2,
|
||||
err: err,
|
||||
instanceName: ins.InstanceName,
|
||||
clusterName: ins.ClusterName,
|
||||
}
|
||||
errCh <- e
|
||||
wg.Done()
|
||||
<-buf
|
||||
return
|
||||
}
|
||||
}
|
||||
wg.Done()
|
||||
<-buf
|
||||
}()
|
||||
}
|
||||
wg.Wait()
|
||||
close(errCh)
|
||||
|
||||
for e := range errCh {
|
||||
errs = append(errs, e)
|
||||
}
|
||||
|
||||
if len(errs) != 0 {
|
||||
var msg string
|
||||
for _, err := range errs {
|
||||
e := (err).(struct {
|
||||
errTyp uint8
|
||||
err error
|
||||
instanceName string
|
||||
clusterName string
|
||||
})
|
||||
switch e.errTyp {
|
||||
case 1:
|
||||
msg += fmt.Sprintf("GetInstance Failed # clusterName: %v , instanceName: %v , error: %v \n", e.clusterName, e.instanceName, e.err.Error())
|
||||
case 2:
|
||||
msg += fmt.Sprintf("StartInstance Failed # clusterName: %v , instanceName: %v , error: %v \n", e.clusterName, e.instanceName, e.err.Error())
|
||||
}
|
||||
}
|
||||
return errors.New(msg)
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
func checkStopStatus(in *inference.DeployInstance) bool {
|
||||
switch in.ClusterType {
|
||||
case storeLink.TYPE_OCTOPUS:
|
||||
|
|
|
@ -3,11 +3,14 @@ package inference
|
|||
import (
|
||||
"context"
|
||||
"errors"
|
||||
"fmt"
|
||||
"gitlink.org.cn/JointCloud/pcm-coordinator/internal/scheduler/service/inference"
|
||||
"gitlink.org.cn/JointCloud/pcm-coordinator/internal/storeLink"
|
||||
"gitlink.org.cn/JointCloud/pcm-coordinator/internal/svc"
|
||||
"gitlink.org.cn/JointCloud/pcm-coordinator/internal/types"
|
||||
"gitlink.org.cn/JointCloud/pcm-coordinator/pkg/models"
|
||||
"strconv"
|
||||
"sync"
|
||||
|
||||
"github.com/zeromicro/go-zero/core/logx"
|
||||
)
|
||||
|
@ -36,17 +39,8 @@ func (l *StopAllByDeployTaskIdLogic) StopAllByDeployTaskId(req *types.StopAllByD
|
|||
return nil, err
|
||||
}
|
||||
|
||||
for _, ins := range list {
|
||||
in, err := l.svcCtx.Scheduler.AiService.InferenceAdapterMap[strconv.FormatInt(ins.AdapterId, 10)][strconv.FormatInt(ins.ClusterId, 10)].GetInferDeployInstance(l.ctx, ins.InstanceId)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
if checkStatus(in) {
|
||||
success := l.svcCtx.Scheduler.AiService.InferenceAdapterMap[strconv.FormatInt(ins.AdapterId, 10)][strconv.FormatInt(ins.ClusterId, 10)].StopInferDeployInstance(l.ctx, ins.InstanceId)
|
||||
if !success {
|
||||
return nil, errors.New(ins.InstanceName + " stop failed")
|
||||
}
|
||||
}
|
||||
if len(list) == 0 {
|
||||
return nil, errors.New("instances are empty")
|
||||
}
|
||||
|
||||
err = l.svcCtx.Scheduler.AiStorages.UpdateDeployTaskById(id)
|
||||
|
@ -54,9 +48,96 @@ func (l *StopAllByDeployTaskIdLogic) StopAllByDeployTaskId(req *types.StopAllByD
|
|||
return nil, err
|
||||
}
|
||||
|
||||
err = l.stopAll(list)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
return resp, nil
|
||||
}
|
||||
|
||||
func (l *StopAllByDeployTaskIdLogic) stopAll(list []*models.AiInferDeployInstance) error {
|
||||
var wg sync.WaitGroup
|
||||
var errCh = make(chan interface{}, len(list))
|
||||
var errs []interface{}
|
||||
buf := make(chan bool, 2)
|
||||
|
||||
for _, instance := range list {
|
||||
wg.Add(1)
|
||||
ins := instance
|
||||
buf <- true
|
||||
go func() {
|
||||
in, err := l.svcCtx.Scheduler.AiService.InferenceAdapterMap[strconv.FormatInt(ins.AdapterId, 10)][strconv.FormatInt(ins.ClusterId, 10)].GetInferDeployInstance(l.ctx, ins.InstanceId)
|
||||
if err != nil {
|
||||
e := struct {
|
||||
errTyp uint8
|
||||
err error
|
||||
instanceName string
|
||||
clusterName string
|
||||
}{
|
||||
errTyp: 1,
|
||||
err: err,
|
||||
instanceName: ins.InstanceName,
|
||||
clusterName: ins.ClusterName,
|
||||
}
|
||||
errCh <- e
|
||||
wg.Done()
|
||||
<-buf
|
||||
return
|
||||
}
|
||||
if checkStatus(in) {
|
||||
success := l.svcCtx.Scheduler.AiService.InferenceAdapterMap[strconv.FormatInt(ins.AdapterId, 10)][strconv.FormatInt(ins.ClusterId, 10)].StopInferDeployInstance(l.ctx, ins.InstanceId)
|
||||
if !success {
|
||||
e := struct {
|
||||
errTyp uint8
|
||||
err error
|
||||
instanceName string
|
||||
clusterName string
|
||||
}{
|
||||
errTyp: 2,
|
||||
err: err,
|
||||
instanceName: ins.InstanceName,
|
||||
clusterName: ins.ClusterName,
|
||||
}
|
||||
errCh <- e
|
||||
wg.Done()
|
||||
<-buf
|
||||
return
|
||||
}
|
||||
}
|
||||
wg.Done()
|
||||
<-buf
|
||||
}()
|
||||
}
|
||||
wg.Wait()
|
||||
close(errCh)
|
||||
|
||||
for e := range errCh {
|
||||
errs = append(errs, e)
|
||||
}
|
||||
|
||||
if len(errs) != 0 {
|
||||
var msg string
|
||||
for _, err := range errs {
|
||||
e := (err).(struct {
|
||||
errTyp uint8
|
||||
err error
|
||||
instanceName string
|
||||
clusterName string
|
||||
})
|
||||
switch e.errTyp {
|
||||
case 1:
|
||||
msg += fmt.Sprintf("GetInstance Failed # clusterName: %v , instanceName: %v , error: %v \n", e.clusterName, e.instanceName, e.err.Error())
|
||||
case 2:
|
||||
msg += fmt.Sprintf("StopInstance Failed # clusterName: %v , instanceName: %v , error: %v \n", e.clusterName, e.instanceName, e.err.Error())
|
||||
}
|
||||
}
|
||||
return errors.New(msg)
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
func checkStatus(in *inference.DeployInstance) bool {
|
||||
switch in.ClusterType {
|
||||
case storeLink.TYPE_OCTOPUS:
|
||||
|
|
|
@ -16,6 +16,7 @@ package storeLink
|
|||
|
||||
import (
|
||||
"context"
|
||||
"fmt"
|
||||
"github.com/pkg/errors"
|
||||
"gitlink.org.cn/JointCloud/pcm-coordinator/internal/scheduler/schedulers/option"
|
||||
"gitlink.org.cn/JointCloud/pcm-coordinator/internal/scheduler/service/collector"
|
||||
|
@ -27,15 +28,19 @@ import (
|
|||
"gitlink.org.cn/JointCloud/pcm-modelarts/client/modelartsservice"
|
||||
"gitlink.org.cn/JointCloud/pcm-modelarts/modelarts"
|
||||
modelartsclient "gitlink.org.cn/JointCloud/pcm-modelarts/modelarts"
|
||||
"log"
|
||||
"mime/multipart"
|
||||
"strconv"
|
||||
"strings"
|
||||
"sync"
|
||||
"time"
|
||||
)
|
||||
|
||||
const (
|
||||
Ascend = "Ascend"
|
||||
Npu = "npu"
|
||||
Ascend = "Ascend"
|
||||
Npu = "npu"
|
||||
ImageNetResnet50Cmd = "cd /home/ma-user & python ./inference_ascend.py"
|
||||
ChatGLM6BCmd = "cd /home/ma-user && python ./download_model.py && python ./inference_chatGLM.py"
|
||||
)
|
||||
|
||||
type ModelArtsLink struct {
|
||||
|
@ -45,6 +50,60 @@ type ModelArtsLink struct {
|
|||
participantId int64
|
||||
pageIndex int32
|
||||
pageSize int32
|
||||
SourceLocation string
|
||||
Version string
|
||||
ModelId string
|
||||
ModelType string
|
||||
}
|
||||
|
||||
// Version 结构体表示版本号
|
||||
type Version struct {
|
||||
Major, Minor, Patch int
|
||||
}
|
||||
|
||||
// ParseVersion 从字符串解析版本号
|
||||
func ParseVersion(versionStr string) (*Version, error) {
|
||||
parts := strings.Split(versionStr, ".")
|
||||
if len(parts) != 3 {
|
||||
return nil, fmt.Errorf("invalid version format: %s", versionStr)
|
||||
}
|
||||
|
||||
major, err := strconv.Atoi(parts[0])
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
minor, err := strconv.Atoi(parts[1])
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
patch, err := strconv.Atoi(parts[2])
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
return &Version{Major: major, Minor: minor, Patch: patch}, nil
|
||||
}
|
||||
|
||||
// Increment 根据给定规则递增版本号
|
||||
func (v *Version) Increment() {
|
||||
if v.Patch < 9 {
|
||||
v.Patch++
|
||||
} else {
|
||||
v.Patch = 0
|
||||
if v.Minor < 9 {
|
||||
v.Minor++
|
||||
} else {
|
||||
v.Minor = 0
|
||||
v.Major++
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// String 将版本号转换回字符串格式
|
||||
func (v *Version) String() string {
|
||||
return fmt.Sprintf("%d.%d.%d", v.Major, v.Minor, v.Patch)
|
||||
}
|
||||
|
||||
func NewModelArtsLink(modelArtsRpc modelartsservice.ModelArtsService, modelArtsImgRpc imagesservice.ImagesService, name string, id int64, nickname string) *ModelArtsLink {
|
||||
|
@ -260,39 +319,142 @@ func (m *ModelArtsLink) UploadAlgorithmCode(ctx context.Context, resourceType st
|
|||
return nil
|
||||
}
|
||||
|
||||
// Determine whether there is a necessary image in image management and query the image name based on the image name
|
||||
func (m *ModelArtsLink) getSourceLocationFromImages(ctx context.Context, option *option.InferOption) error {
|
||||
req := &modelarts.ListImagesReq{
|
||||
//Platform: m.platform,
|
||||
Limit: 50,
|
||||
Offset: 0,
|
||||
}
|
||||
|
||||
ListImagesResp, err := m.modelArtsRpc.ListImages(ctx, req)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
if ListImagesResp.Code != 200 {
|
||||
return errors.New("failed to get ListImages")
|
||||
}
|
||||
|
||||
for _, ListImages := range ListImagesResp.Data {
|
||||
if option.ModelName == "ChatGLM-6B" {
|
||||
if ListImages.Name == "chatglm-6b" {
|
||||
m.SourceLocation = ListImages.SwrPath
|
||||
return nil
|
||||
}
|
||||
} else {
|
||||
if ListImages.Name == option.ModelName {
|
||||
m.SourceLocation = ListImages.SwrPath
|
||||
return nil
|
||||
}
|
||||
}
|
||||
}
|
||||
return errors.New("SourceLocation not set")
|
||||
}
|
||||
|
||||
// Get AI Application List
|
||||
func (m *ModelArtsLink) GetModelId(ctx context.Context, option *option.InferOption) error {
|
||||
req := &modelarts.ListModelReq{
|
||||
Platform: m.platform,
|
||||
ModelName: option.ModelName,
|
||||
ModelType: "Image",
|
||||
Limit: int64(m.pageIndex),
|
||||
Offset: int64(m.pageSize),
|
||||
//ModelType: "Image",
|
||||
Limit: int64(m.pageIndex),
|
||||
Offset: int64(m.pageSize),
|
||||
}
|
||||
ListModelResp, err := m.modelArtsRpc.ListModels(ctx, req)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
if ListModelResp.Code != 200 {
|
||||
return errors.New("failed to get ModelId")
|
||||
}
|
||||
for _, ListModel := range ListModelResp.Models {
|
||||
if ListModel.ModelName == option.ModelName {
|
||||
option.ModelId = ListModel.ModelId
|
||||
return nil
|
||||
if ListModelResp.Code == 200 {
|
||||
//return errors.New("failed to get ModelId")
|
||||
for _, ListModel := range ListModelResp.Models {
|
||||
if ListModel.ModelName == option.ModelName {
|
||||
option.ModelId = ListModel.ModelId
|
||||
m.Version = ListModel.ModelVersion
|
||||
return nil
|
||||
}
|
||||
}
|
||||
|
||||
}
|
||||
err = m.CreateModel(ctx, option)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
// 创建ai应用
|
||||
func (m *ModelArtsLink) GetModel(ctx context.Context, option *option.InferOption) string {
|
||||
req := &modelarts.ShowModelReq{
|
||||
Platform: m.platform,
|
||||
ModelId: option.ModelId,
|
||||
}
|
||||
ctx, cancel := context.WithTimeout(context.Background(), 50*time.Second)
|
||||
defer cancel()
|
||||
ShowModelsResp, err := m.modelArtsRpc.ShowModels(ctx, req)
|
||||
if err != nil {
|
||||
if err == context.DeadlineExceeded {
|
||||
log.Println("Request timed out")
|
||||
// 重试请求或其他处理
|
||||
} else {
|
||||
log.Fatalf("could not call method: %v", err)
|
||||
}
|
||||
}
|
||||
if ShowModelsResp.Code != 200 {
|
||||
errors.New("failed to get findModelsStatus")
|
||||
}
|
||||
m.ModelType = ShowModelsResp.ShowModelDetail.ModelAlgorithm
|
||||
return ShowModelsResp.ShowModelDetail.ModelStatus
|
||||
}
|
||||
|
||||
// Get AI Application List
|
||||
func (m *ModelArtsLink) GetModelStatus(ctx context.Context, option *option.InferOption) error {
|
||||
var wg sync.WaitGroup
|
||||
wg.Add(1)
|
||||
// 使用goroutine进行轮询
|
||||
//defer wg.Done()
|
||||
for {
|
||||
status := m.GetModel(ctx, option)
|
||||
if status == "published" {
|
||||
fmt.Println("Model is now published.")
|
||||
break // 一旦状态变为published,就退出循环
|
||||
}
|
||||
fmt.Println("Waiting for model to be published...")
|
||||
time.Sleep(5 * time.Second) // 等待一段时间后再次检查
|
||||
}
|
||||
// 在这里执行模型状态为published后需要进行的操作
|
||||
fmt.Println("Continuing with the program...")
|
||||
return nil
|
||||
}
|
||||
|
||||
// Create an AI application
|
||||
func (m *ModelArtsLink) CreateModel(ctx context.Context, option *option.InferOption) error {
|
||||
//Before creating an AI application, check if there are any images that can be created
|
||||
err := m.getSourceLocationFromImages(ctx, option)
|
||||
if err != nil { //
|
||||
return errors.New("No image available for creationd")
|
||||
}
|
||||
|
||||
//
|
||||
var CMD string
|
||||
if option.ModelName == "imagenet_resnet50" {
|
||||
CMD = ImageNetResnet50Cmd
|
||||
} else if option.ModelName == "ChatGLM-6B" {
|
||||
CMD = ChatGLM6BCmd
|
||||
}
|
||||
|
||||
if m.Version == "" {
|
||||
m.Version = "0.0.1"
|
||||
}
|
||||
version, err := ParseVersion(m.Version)
|
||||
version.Increment()
|
||||
req := &modelarts.CreateModelReq{
|
||||
Platform: m.platform,
|
||||
ModelName: option.ModelName,
|
||||
ModelType: "PyTorch",
|
||||
ModelVersion: "0.0.1",
|
||||
SourceLocation: "",
|
||||
ModelType: "Image",
|
||||
ModelVersion: version.String(),
|
||||
SourceLocation: m.SourceLocation,
|
||||
InstallType: []string{"real-time"},
|
||||
Cmd: CMD,
|
||||
ModelAlgorithm: option.ModelType,
|
||||
}
|
||||
ModelResp, err := m.modelArtsRpc.CreateModel(ctx, req)
|
||||
if err != nil {
|
||||
|
@ -600,6 +762,8 @@ func (m *ModelArtsLink) GetInferDeployInstance(ctx context.Context, id string) (
|
|||
ins.ClusterName = m.platform
|
||||
ins.CreatedTime = string(resp.StartTime)
|
||||
ins.ClusterType = TYPE_MODELARTS
|
||||
ins.ModelName = resp.Config[0].ModelName
|
||||
ins.ModelType = m.ModelType
|
||||
return ins, nil
|
||||
}
|
||||
|
||||
|
@ -609,7 +773,12 @@ func (m *ModelArtsLink) GetInferResult(ctx context.Context, url string, file mul
|
|||
|
||||
func (m *ModelArtsLink) CreateInferDeployInstance(ctx context.Context, option *option.InferOption) (string, error) {
|
||||
|
||||
err := m.CreateModel(ctx, option)
|
||||
err := m.GetModelId(ctx, option)
|
||||
if err != nil {
|
||||
return "", err
|
||||
}
|
||||
|
||||
err = m.GetModelStatus(ctx, option)
|
||||
if err != nil {
|
||||
return "", err
|
||||
}
|
||||
|
@ -626,8 +795,10 @@ func (m *ModelArtsLink) CreateInferDeployInstance(ctx context.Context, option *o
|
|||
Platform: m.platform,
|
||||
Config: configItems,
|
||||
InferType: "real-time",
|
||||
ServiceName: option.TaskName,
|
||||
ServiceName: option.ModelName + "_" + option.ModelType + "_" + Npu,
|
||||
}
|
||||
ctx, cancel := context.WithTimeout(context.Background(), 150*time.Second)
|
||||
defer cancel()
|
||||
resp, err := m.modelArtsRpc.CreateService(ctx, req)
|
||||
if err != nil {
|
||||
return "", err
|
||||
|
|
|
@ -80,7 +80,7 @@ var (
|
|||
}
|
||||
ModelTypeMap = map[string][]string{
|
||||
"image_classification": {"imagenet_resnet50"},
|
||||
"text_to_text": {"chatGLM_6B"},
|
||||
"text_to_text": {"ChatGLM-6B"},
|
||||
"image_to_text": {"blip-image-captioning-base"},
|
||||
"text_to_image": {"stable-diffusion-xl-base-1.0"},
|
||||
}
|
||||
|
|
Loading…
Reference in New Issue