diff --git a/.devops/aly.yml b/.devops/aly.yml new file mode 100644 index 00000000..1f6b4ada --- /dev/null +++ b/.devops/aly.yml @@ -0,0 +1,53 @@ +version: 2 +name: aly +description: 发布环境:https://comnet.jointcloud.net +global: + concurrent: 1 +workflow: + - ref: start + name: 开始 + task: start + - ref: end + name: 结束 + task: end + needs: + - ssh_cmd_0 + - ref: git_clone_0 + name: git clone + task: git_clone@1.2.9 + input: + remote_url: '"https://gitlink.org.cn/JointCloud/pcm-coordinator.git"' + ref: '"refs/heads/master"' + commit_id: '""' + depth: 1 + needs: + - start + - ref: docker_image_build_0 + name: docker镜像构建 + task: docker_image_build@1.6.0 + input: + docker_username: ((aly.docker_user)) + docker_password: ((aly.docker_password)) + image_name: '"registry.cn-hangzhou.aliyuncs.com/jcce/pcm-core-api"' + image_tag: '"latest"' + registry_address: '"registry.cn-hangzhou.aliyuncs.com"' + docker_file: '"Dockerfile"' + docker_build_path: '"."' + workspace: git_clone_0.git_path + image_clean: true + image_push: true + build_args: '""' + needs: + - git_clone_0 + - ref: ssh_cmd_0 + name: ssh执行命令 + task: ssh_cmd@1.1.1 + input: + ssh_private_key: ((aly.ssh_private_key)) + ssh_ip: '"47.92.39.128"' + ssh_port: '"22"' + ssh_user: '"root"' + ssh_cmd: '"kubectl rollout restart deployment pcm-core-api -n ns-admin"' + needs: + - docker_image_build_0 + diff --git a/.github/workflows/docker-publish.yml b/.github/workflows/docker-publish.yml new file mode 100644 index 00000000..0fe3a82a --- /dev/null +++ b/.github/workflows/docker-publish.yml @@ -0,0 +1,55 @@ +name: Docker + +on: + push: + branches: [ "master" ] + tags: [ 'v*.*.*' ] + pull_request: + branches: [ "master" ] + workflow_dispatch: # 允许手动触发 + +env: + REGISTRY: registry.cn-hangzhou.aliyuncs.com # 修改为你的阿里云镜像仓库地址 + IMAGE_NAME: jcce/pcm-core-api # 修改为你的阿里云镜像仓库名称 + IMAGE_TAG: latest + +jobs: + build: + runs-on: ubuntu-latest + permissions: + contents: read + packages: write + id-token: write + + steps: + - name: Checkout repository + uses: actions/checkout@v4 + + - name: Set up QEMU + uses: docker/setup-qemu-action@v3 + + - name: Set up Docker Buildx + uses: docker/setup-buildx-action@v3 + + - name: Login to Docker Hub + uses: docker/login-action@v3 + with: + registry: ${{ env.REGISTRY }} + username: ${{ secrets.ALIYUN_USERNAME }} + password: ${{ secrets.ALIYUN_PASSWORD }} + + - name: Build and push + run: | + docker build -t ${{env.REGISTRY}}/${{env.IMAGE_NAME}}:${{env.IMAGE_TAG}} . + docker push ${{env.REGISTRY}}/${{env.IMAGE_NAME}}:${{env.IMAGE_TAG}} + + - name: Set up SSH key + run: | + mkdir -p ~/.ssh + echo "${{ secrets.SSH_PRIVATE_KEY }}" > ~/.ssh/id_rsa + chmod 600 ~/.ssh/id_rsa + echo -e "Host *\n\tStrictHostKeyChecking no\n\n" > ~/.ssh/config + + - name: SSH to remote server and restart deployment + if: github.event_name != 'pull_request' + run: ssh ${{ secrets.SSH_USER }}@${{ secrets.SSH_HOST }} "kubectl rollout restart deployment ${{ secrets.SSH_DEPLOYMENT }} -n ${{ secrets.SSH_NAMESPACE }}" \ No newline at end of file diff --git a/.github/workflows/sync.yml b/.github/workflows/sync.yml new file mode 100644 index 00000000..8944fda0 --- /dev/null +++ b/.github/workflows/sync.yml @@ -0,0 +1,25 @@ +name: Sync Mirror Repository + +on: + schedule: + - cron: '0 * * * *' # 每小时同步一次 + workflow_dispatch: # 允许手动触发 + +jobs: + mirror: + runs-on: ubuntu-latest + + steps: + - name: Checkout target repository + uses: actions/checkout@v2 + with: + fetch-depth: 1 # 获取完整的提交历史 + + - name: Mirror source repository + uses: wearerequired/git-mirror-action@v1 + with: + source-repo: "https://gitlink.org.cn/JointCloud/pcm-coordinator.git" # 源仓库的URL + destination-repo: "git@github.com:${{ github.repository }}.git" # 目标仓库的URL + ssh: true + env: + SSH_PRIVATE_KEY: ${{ secrets.SSH_PRIVATE_KEY }} \ No newline at end of file diff --git a/Dockerfile b/Dockerfile index 81784518..414298e9 100644 --- a/Dockerfile +++ b/Dockerfile @@ -1,4 +1,4 @@ -FROM registry.cn-hangzhou.aliyuncs.com/jcce-images/golang:1.22.4-alpine3.20 AS builder +FROM golang:1.22.4-alpine3.20 AS builder WORKDIR /app COPY . . @@ -9,12 +9,11 @@ RUN go mod download RUN CGO_ENABLED=0 go build -ldflags="-w -s" -o pcm-core-api -FROM registry.cn-hangzhou.aliyuncs.com/jcce-images/alpine:3.20 +FROM alpine:latest WORKDIR /app #修改alpine源为上海交通大学 -RUN sed -i 's/dl-cdn.alpinelinux.org/mirrors.sjtug.sjtu.edu.cn/g' /etc/apk/repositories && \ - apk add --no-cache ca-certificates && update-ca-certificates && \ +RUN apk add --no-cache ca-certificates && update-ca-certificates && \ apk add --update tzdata && \ rm -rf /var/cache/apk/* diff --git a/etc/pcm.yaml b/etc/pcm.yaml index 11ef6519..24254896 100644 --- a/etc/pcm.yaml +++ b/etc/pcm.yaml @@ -3,11 +3,11 @@ Host: 0.0.0.0 Port: 8999 MaxBytes: 524288000 -Timeout: 50000 +Timeout: 500000 DB: -# DataSource: root:uJpLd6u-J?HC1@(10.206.0.12:3306)/pcm?parseTime=true&loc=Local - DataSource: root:uJpLd6u-J?HC1@(47.92.88.143:3306)/pcm?parseTime=true&loc=Local + DataSource: root:uJpLd6u-J?HC1@(10.206.0.12:3306)/pcm?parseTime=true&loc=Local + #DataSource: root:uJpLd6u-J?HC1@(47.92.88.143:3306)/pcm?parseTime=true&loc=Local Redis: Host: 10.206.0.12:6379 Pass: redisPW123 @@ -27,7 +27,7 @@ ModelArtsRpcConf: Endpoints: - 127.0.0.1:2002 NonBlock: true - Timeout: 20000 + Timeout: 500000 #rpc ModelArtsImgRpcConf: diff --git a/go.mod b/go.mod index 893a1787..99eda213 100644 --- a/go.mod +++ b/go.mod @@ -14,12 +14,12 @@ require ( github.com/pkg/errors v0.9.1 github.com/prometheus-operator/prometheus-operator/pkg/apis/monitoring v0.71.2 github.com/prometheus/alertmanager v0.27.0 - github.com/prometheus/client_golang v1.20.2 - github.com/prometheus/common v0.57.0 + github.com/prometheus/client_golang v1.20.3 + github.com/prometheus/common v0.59.1 github.com/robfig/cron/v3 v3.0.1 - github.com/zeromicro/go-zero v1.7.0 + github.com/zeromicro/go-zero v1.7.2 gitlink.org.cn/JointCloud/pcm-ac v0.0.0-20240725071305-f751eec4dde1 - gitlink.org.cn/JointCloud/pcm-modelarts v0.0.0-20240831031531-89bdb156378a + gitlink.org.cn/JointCloud/pcm-modelarts v0.0.0-20240909072501-939c3144cd9e gitlink.org.cn/JointCloud/pcm-octopus v0.0.0-20240817071412-44397870b110 gitlink.org.cn/JointCloud/pcm-openstack v0.0.0-20240403033338-e7edabad4203 gitlink.org.cn/JointCloud/pcm-slurm v0.0.0-20240301080743-8b94bbaf57f5 @@ -126,7 +126,7 @@ require ( go.etcd.io/etcd/api/v3 v3.5.15 // indirect go.etcd.io/etcd/client/pkg/v3 v3.5.15 // indirect go.etcd.io/etcd/client/v3 v3.5.15 // indirect - go.mongodb.org/mongo-driver v1.16.0 // indirect + go.mongodb.org/mongo-driver v1.16.1 // indirect go.opentelemetry.io/otel v1.29.0 // indirect go.opentelemetry.io/otel/exporters/jaeger v1.17.0 // indirect go.opentelemetry.io/otel/exporters/otlp/otlptrace v1.29.0 // indirect @@ -140,27 +140,27 @@ require ( go.uber.org/automaxprocs v1.5.3 // indirect go.uber.org/multierr v1.11.0 // indirect go.uber.org/zap v1.27.0 // indirect - golang.org/x/crypto v0.26.0 // indirect + golang.org/x/crypto v0.27.0 // indirect golang.org/x/exp v0.0.0-20240119083558-1b970713d09a // indirect - golang.org/x/mod v0.20.0 // indirect - golang.org/x/net v0.28.0 // indirect - golang.org/x/oauth2 v0.22.0 // indirect + golang.org/x/mod v0.21.0 // indirect + golang.org/x/net v0.29.0 // indirect + golang.org/x/oauth2 v0.23.0 // indirect golang.org/x/sync v0.8.0 // indirect - golang.org/x/sys v0.24.0 // indirect - golang.org/x/term v0.23.0 // indirect - golang.org/x/text v0.17.0 // indirect + golang.org/x/sys v0.25.0 // indirect + golang.org/x/term v0.24.0 // indirect + golang.org/x/text v0.18.0 // indirect golang.org/x/time v0.6.0 // indirect golang.org/x/tools v0.24.0 // indirect - google.golang.org/genproto/googleapis/api v0.0.0-20240827150818-7e3bb234dfed // indirect - google.golang.org/genproto/googleapis/rpc v0.0.0-20240827150818-7e3bb234dfed // indirect + google.golang.org/genproto/googleapis/api v0.0.0-20240903143218-8af14fe29dc1 // indirect + google.golang.org/genproto/googleapis/rpc v0.0.0-20240903143218-8af14fe29dc1 // indirect google.golang.org/protobuf v1.34.2 // indirect gopkg.in/inf.v0 v0.9.1 // indirect gopkg.in/yaml.v2 v2.4.0 // indirect gopkg.in/yaml.v3 v3.0.1 // indirect k8s.io/api v0.31.0 // indirect k8s.io/klog/v2 v2.130.1 // indirect - k8s.io/kube-openapi v0.0.0-20240827152857-f7e401e7b4c2 // indirect - k8s.io/utils v0.0.0-20240821151609-f90d01438635 // indirect + k8s.io/kube-openapi v0.0.0-20240903163716-9e1beecbcb38 // indirect + k8s.io/utils v0.0.0-20240902221715-702e33fdd3c3 // indirect sigs.k8s.io/json v0.0.0-20221116044647-bc3834ca7abd // indirect sigs.k8s.io/structured-merge-diff/v4 v4.4.1 // indirect ) diff --git a/go.sum b/go.sum index 3179e303..4b6cd18b 100644 --- a/go.sum +++ b/go.sum @@ -393,8 +393,8 @@ github.com/prometheus/client_golang v1.0.0/go.mod h1:db9x61etRT2tGnBNRi70OPL5Fsn github.com/prometheus/client_golang v1.4.0/go.mod h1:e9GMxYsXl05ICDXkRhurwBS4Q3OK1iX/F2sw+iXX5zU= github.com/prometheus/client_golang v1.7.1/go.mod h1:PY5Wy2awLA44sXw4AOSfFBetzPP4j5+D6mVACh+pe2M= github.com/prometheus/client_golang v1.11.0/go.mod h1:Z6t4BnS23TR94PD6BsDNk8yVqroYurpAkEiz0P2BEV0= -github.com/prometheus/client_golang v1.20.2 h1:5ctymQzZlyOON1666svgwn3s6IKWgfbjsejTMiXIyjg= -github.com/prometheus/client_golang v1.20.2/go.mod h1:PIEt8X02hGcP8JWbeHyeZ53Y/jReSnHgO035n//V5WE= +github.com/prometheus/client_golang v1.20.3 h1:oPksm4K8B+Vt35tUhw6GbSNSgVlVSBH0qELP/7u83l4= +github.com/prometheus/client_golang v1.20.3/go.mod h1:PIEt8X02hGcP8JWbeHyeZ53Y/jReSnHgO035n//V5WE= github.com/prometheus/client_model v0.0.0-20180712105110-5c3871d89910/go.mod h1:MbSGuTsp3dbXC40dX6PRTWyKYBIrTGTE9sqQNg2J8bo= github.com/prometheus/client_model v0.0.0-20190129233127-fd36f4220a90/go.mod h1:xMI15A0UPsDsEKsMN9yxemIoYk6Tm2C1GtYGdfGttqA= github.com/prometheus/client_model v0.0.0-20190812154241-14fe0d1b01d4/go.mod h1:xMI15A0UPsDsEKsMN9yxemIoYk6Tm2C1GtYGdfGttqA= @@ -406,8 +406,8 @@ github.com/prometheus/common v0.9.1/go.mod h1:yhUN8i9wzaXS3w1O07YhxHEBxD+W35wd8b github.com/prometheus/common v0.10.0/go.mod h1:Tlit/dnDKsSWFlCLTWaA1cyBgKHSMdTB80sz/V91rCo= github.com/prometheus/common v0.26.0/go.mod h1:M7rCNAaPfAosfx8veZJCuw84e35h3Cfd9VFqTh1DIvc= github.com/prometheus/common v0.29.0/go.mod h1:vu+V0TpY+O6vW9J44gczi3Ap/oXXR10b+M/gUGO4Hls= -github.com/prometheus/common v0.57.0 h1:Ro/rKjwdq9mZn1K5QPctzh+MA4Lp0BuYk5ZZEVhoNcY= -github.com/prometheus/common v0.57.0/go.mod h1:7uRPFSUTbfZWsJ7MHY56sqt7hLQu3bxXHDnNhl8E9qI= +github.com/prometheus/common v0.59.1 h1:LXb1quJHWm1P6wq/U824uxYi4Sg0oGvNeUm1z5dJoX0= +github.com/prometheus/common v0.59.1/go.mod h1:GpWM7dewqmVYcd7SmRaiWVe9SSqjf0UrwnYnpEZNuT0= github.com/prometheus/common/sigv4 v0.1.0 h1:qoVebwtwwEhS85Czm2dSROY5fTo2PAPEVdDeppTwGX4= github.com/prometheus/common/sigv4 v0.1.0/go.mod h1:2Jkxxk9yYvCkE5G1sQT7GuEXm57JrvHu9k5YwTjsNtI= github.com/prometheus/exporter-toolkit v0.11.0 h1:yNTsuZ0aNCNFQ3aFTD2uhPOvr4iD7fdBvKPAEGkNf+g= @@ -464,12 +464,12 @@ github.com/yuin/goldmark v1.3.5/go.mod h1:mwnBkeHKe2W/ZEtQ+71ViKU8L12m81fl3OWwC1 github.com/yuin/goldmark v1.4.13/go.mod h1:6yULJ656Px+3vBD8DxQVa3kxgyrAnzto9xy5taEt/CY= github.com/yuin/gopher-lua v1.1.1 h1:kYKnWBjvbNP4XLT3+bPEwAXJx262OhaHDWDVOPjL46M= github.com/yuin/gopher-lua v1.1.1/go.mod h1:GBR0iDaNXjAgGg9zfCvksxSRnQx76gclCIb7kdAd1Pw= -github.com/zeromicro/go-zero v1.7.0 h1:B+y7tUVlo3qVQ6F0I0R9bi+Dq4I1QdO9ZB+dz1r0p1s= -github.com/zeromicro/go-zero v1.7.0/go.mod h1:ypW4PzQI+jUrMcNJDDQ+7YW+pE+tMua9Xj/pmtmS1Dc= +github.com/zeromicro/go-zero v1.7.2 h1:a8lyVOG3KXG4LrAy6ZmtJTJtisX4Ostc4Pst4fE704I= +github.com/zeromicro/go-zero v1.7.2/go.mod h1:WFXfF92Exw0O7WECifS6r99JSzv4KEN49x9RhAfgkMc= gitlink.org.cn/JointCloud/pcm-ac v0.0.0-20240725071305-f751eec4dde1 h1:DicBXoQiC6mumMBeyqSPNrsjtqJIgk5Pv2hscu2xryw= gitlink.org.cn/JointCloud/pcm-ac v0.0.0-20240725071305-f751eec4dde1/go.mod h1:3eECiw9O2bIFkkePlloKyLNXiqBAhOxNrDoGaaGseGY= -gitlink.org.cn/JointCloud/pcm-modelarts v0.0.0-20240831031531-89bdb156378a h1:x3qNoZu7leHTx5gaDuLNR/T9ubwIpCXZH5hS6ZfwltQ= -gitlink.org.cn/JointCloud/pcm-modelarts v0.0.0-20240831031531-89bdb156378a/go.mod h1:/eOmBFZKWGoabG3sRVkVvIbLwsd2631k4jkUBR6x1AA= +gitlink.org.cn/JointCloud/pcm-modelarts v0.0.0-20240909072501-939c3144cd9e h1:6LYJggBoeAQxy/otzWjt40Pa7gnVvUR4c5YMi6A/NdU= +gitlink.org.cn/JointCloud/pcm-modelarts v0.0.0-20240909072501-939c3144cd9e/go.mod h1:/eOmBFZKWGoabG3sRVkVvIbLwsd2631k4jkUBR6x1AA= gitlink.org.cn/JointCloud/pcm-octopus v0.0.0-20240817071412-44397870b110 h1:GaXwr5sgDh0raHjUf9IewTvnRvajYea7zbLsaerYyXo= gitlink.org.cn/JointCloud/pcm-octopus v0.0.0-20240817071412-44397870b110/go.mod h1:QOD5+/l2D+AYBjF2h5T0mdJyfGAmF78QmeKdbBXbjLQ= gitlink.org.cn/JointCloud/pcm-openstack v0.0.0-20240403033338-e7edabad4203 h1:s6PsZ1+bev294IWdZRlV7mnOwI1+UzFcldVW/BqhQzI= @@ -484,8 +484,8 @@ go.etcd.io/etcd/client/pkg/v3 v3.5.15 h1:fo0HpWz/KlHGMCC+YejpiCmyWDEuIpnTDzpJLB5 go.etcd.io/etcd/client/pkg/v3 v3.5.15/go.mod h1:mXDI4NAOwEiszrHCb0aqfAYNCrZP4e9hRca3d1YK8EU= go.etcd.io/etcd/client/v3 v3.5.15 h1:23M0eY4Fd/inNv1ZfU3AxrbbOdW79r9V9Rl62Nm6ip4= go.etcd.io/etcd/client/v3 v3.5.15/go.mod h1:CLSJxrYjvLtHsrPKsy7LmZEE+DK2ktfd2bN4RhBMwlU= -go.mongodb.org/mongo-driver v1.16.0 h1:tpRsfBJMROVHKpdGyc1BBEzzjDUWjItxbVSZ8Ls4BQ4= -go.mongodb.org/mongo-driver v1.16.0/go.mod h1:oB6AhJQvFQL4LEHyXi6aJzQJtBiTQHiAd83l0GdFaiw= +go.mongodb.org/mongo-driver v1.16.1 h1:rIVLL3q0IHM39dvE+z2ulZLp9ENZKThVfuvN/IiN4l8= +go.mongodb.org/mongo-driver v1.16.1/go.mod h1:oB6AhJQvFQL4LEHyXi6aJzQJtBiTQHiAd83l0GdFaiw= go.opencensus.io v0.21.0/go.mod h1:mSImk1erAIZhrmZN+AvHh14ztQfjbGwt4TtuofqLduU= go.opencensus.io v0.22.0/go.mod h1:+kGneAE2xo2IficOXnaByMWTGM9T73dGwxeWcUqIpI8= go.opencensus.io v0.22.2/go.mod h1:yxeiOL68Rb0Xd1ddK5vPZ/oVn4vY4Ynel7k9FzqtOIw= @@ -533,8 +533,8 @@ golang.org/x/crypto v0.0.0-20200622213623-75b288015ac9/go.mod h1:LzIPMQfyMNhhGPh golang.org/x/crypto v0.0.0-20210921155107-089bfa567519/go.mod h1:GvvjBRRGRdwPK5ydBHafDWAxML/pGHZbMvKqRZ5+Abc= golang.org/x/crypto v0.19.0/go.mod h1:Iy9bg/ha4yyC70EfRS8jz+B6ybOBKMaSxLj6P6oBDfU= golang.org/x/crypto v0.21.0/go.mod h1:0BP7YvVV9gBbVKyeTG0Gyn+gZm94bibOW5BjDEYAOMs= -golang.org/x/crypto v0.26.0 h1:RrRspgV4mU+YwB4FYnuBoKsUapNIL5cohGAmSH3azsw= -golang.org/x/crypto v0.26.0/go.mod h1:GY7jblb9wI+FOo5y8/S2oY4zWP07AkOJ4+jxCqdqn54= +golang.org/x/crypto v0.27.0 h1:GXm2NjJrPaiv/h1tb2UH8QfgC/hOf/+z0p6PT8o1w7A= +golang.org/x/crypto v0.27.0/go.mod h1:1Xngt8kV6Dvbssa53Ziq6Eqn0HqbZi5Z6R0ZpwQzt70= golang.org/x/exp v0.0.0-20190121172915-509febef88a4/go.mod h1:CJ0aWSM057203Lf6IL+f9T1iT9GByDxfZKAQTCR3kQA= golang.org/x/exp v0.0.0-20190306152737-a1d7652674e8/go.mod h1:CJ0aWSM057203Lf6IL+f9T1iT9GByDxfZKAQTCR3kQA= golang.org/x/exp v0.0.0-20190510132918-efd6b22b2522/go.mod h1:ZjyILWgesfNpC6sMxTJOJm9Kp84zZh5NQWvqDGG3Qr8= @@ -570,8 +570,8 @@ golang.org/x/mod v0.3.0/go.mod h1:s0Qsj1ACt9ePp/hMypM3fl4fZqREWJwdYDEqhRiZZUA= golang.org/x/mod v0.4.2/go.mod h1:s0Qsj1ACt9ePp/hMypM3fl4fZqREWJwdYDEqhRiZZUA= golang.org/x/mod v0.6.0-dev.0.20220419223038-86c51ed26bb4/go.mod h1:jJ57K6gSWd91VN4djpZkiMVwK6gcyfeH4XE8wZrZaV4= golang.org/x/mod v0.8.0/go.mod h1:iBbtSCu2XBx23ZKBPSOrRkjjQPZFPuis4dIYUhu/chs= -golang.org/x/mod v0.20.0 h1:utOm6MM3R3dnawAiJgn0y+xvuYRsm1RKM/4giyfDgV0= -golang.org/x/mod v0.20.0/go.mod h1:hTbmBsO62+eylJbnUtE2MGJUyE7QWk4xUqPFrRgJ+7c= +golang.org/x/mod v0.21.0 h1:vvrHzRwRfVKSiLrG+d4FMl/Qi4ukBCE6kZlTUkDYRT0= +golang.org/x/mod v0.21.0/go.mod h1:6SkKJ3Xj0I0BrPOZoBy3bdMptDDU9oJrpohJ3eWZ1fY= golang.org/x/net v0.0.0-20180724234803-3673e40ba225/go.mod h1:mL1N/T3taQHkDXs73rZJwtUhF3w3ftmwwsq0BUmARs4= golang.org/x/net v0.0.0-20180826012351-8a410e7b638d/go.mod h1:mL1N/T3taQHkDXs73rZJwtUhF3w3ftmwwsq0BUmARs4= golang.org/x/net v0.0.0-20181114220301-adae6a3d119a/go.mod h1:mL1N/T3taQHkDXs73rZJwtUhF3w3ftmwwsq0BUmARs4= @@ -611,16 +611,16 @@ golang.org/x/net v0.6.0/go.mod h1:2Tu9+aMcznHK/AK1HMvgo6xiTLG5rD5rZLDS+rp2Bjs= golang.org/x/net v0.10.0/go.mod h1:0qNGK6F8kojg2nk9dLZ2mShWaEBan6FAoqfSigmmuDg= golang.org/x/net v0.21.0/go.mod h1:bIjVDfnllIU7BJ2DNgfnXvpSvtn8VRwhlsaeUTyUS44= golang.org/x/net v0.22.0/go.mod h1:JKghWKKOSdJwpW2GEx0Ja7fmaKnMsbu+MWVZTokSYmg= -golang.org/x/net v0.28.0 h1:a9JDOJc5GMUJ0+UDqmLT86WiEy7iWyIhz8gz8E4e5hE= -golang.org/x/net v0.28.0/go.mod h1:yqtgsTWOOnlGLG9GFRrK3++bGOUEkNBoHZc8MEDWPNg= +golang.org/x/net v0.29.0 h1:5ORfpBpCs4HzDYoodCDBbwHzdR5UrLBZ3sOnUJmFoHo= +golang.org/x/net v0.29.0/go.mod h1:gLkgy8jTGERgjzMic6DS9+SP0ajcu6Xu3Orq/SpETg0= golang.org/x/oauth2 v0.0.0-20180821212333-d2e6202438be/go.mod h1:N/0e6XlmueqKjAGxoOufVs8QHGRruUQn6yWY3a++T0U= golang.org/x/oauth2 v0.0.0-20190226205417-e64efc72b421/go.mod h1:gOpvHmFTYa4IltrdGE7lF6nIHvwfUNPOp7c8zoXwtLw= golang.org/x/oauth2 v0.0.0-20190604053449-0f29369cfe45/go.mod h1:gOpvHmFTYa4IltrdGE7lF6nIHvwfUNPOp7c8zoXwtLw= golang.org/x/oauth2 v0.0.0-20191202225959-858c2ad4c8b6/go.mod h1:gOpvHmFTYa4IltrdGE7lF6nIHvwfUNPOp7c8zoXwtLw= golang.org/x/oauth2 v0.0.0-20200107190931-bf48bf16ab8d/go.mod h1:gOpvHmFTYa4IltrdGE7lF6nIHvwfUNPOp7c8zoXwtLw= golang.org/x/oauth2 v0.0.0-20210514164344-f6687ab2804c/go.mod h1:KelEdhl1UZF7XfJ4dDtk6s++YSgaE7mD/BuKKDLBl4A= -golang.org/x/oauth2 v0.22.0 h1:BzDx2FehcG7jJwgWLELCdmLuxk2i+x9UDpSiss2u0ZA= -golang.org/x/oauth2 v0.22.0/go.mod h1:XYTD2NtWslqkgxebSiOHnXEap4TF09sJSc7H1sXbhtI= +golang.org/x/oauth2 v0.23.0 h1:PbgcYx2W7i4LvjJWEbf0ngHV6qJYr86PkAV3bXdLEbs= +golang.org/x/oauth2 v0.23.0/go.mod h1:XYTD2NtWslqkgxebSiOHnXEap4TF09sJSc7H1sXbhtI= golang.org/x/sync v0.0.0-20180314180146-1d60e4601c6f/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= golang.org/x/sync v0.0.0-20181108010431-42b317875d0f/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= golang.org/x/sync v0.0.0-20181221193216-37e7f081c4d4/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= @@ -686,16 +686,16 @@ golang.org/x/sys v0.6.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= golang.org/x/sys v0.8.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= golang.org/x/sys v0.17.0/go.mod h1:/VUhepiaJMQUp4+oa/7Zr1D23ma6VTLIYjOOTFZPUcA= golang.org/x/sys v0.18.0/go.mod h1:/VUhepiaJMQUp4+oa/7Zr1D23ma6VTLIYjOOTFZPUcA= -golang.org/x/sys v0.24.0 h1:Twjiwq9dn6R1fQcyiK+wQyHWfaz/BJB+YIpzU/Cv3Xg= -golang.org/x/sys v0.24.0/go.mod h1:/VUhepiaJMQUp4+oa/7Zr1D23ma6VTLIYjOOTFZPUcA= +golang.org/x/sys v0.25.0 h1:r+8e+loiHxRqhXVl6ML1nO3l1+oFoWbnlu2Ehimmi34= +golang.org/x/sys v0.25.0/go.mod h1:/VUhepiaJMQUp4+oa/7Zr1D23ma6VTLIYjOOTFZPUcA= golang.org/x/term v0.0.0-20201126162022-7de9c90e9dd1/go.mod h1:bj7SfCRtBDWHUb9snDiAeCFNEtKQo2Wmx5Cou7ajbmo= golang.org/x/term v0.0.0-20210927222741-03fcf44c2211/go.mod h1:jbD1KX2456YbFQfuXm/mYQcufACuNUgVhRMnK/tPxf8= golang.org/x/term v0.5.0/go.mod h1:jMB1sMXY+tzblOD4FWmEbocvup2/aLOaQEp7JmGp78k= golang.org/x/term v0.8.0/go.mod h1:xPskH00ivmX89bAKVGSKKtLOWNx2+17Eiy94tnKShWo= golang.org/x/term v0.17.0/go.mod h1:lLRBjIVuehSbZlaOtGMbcMncT+aqLLLmKrsjNrUguwk= golang.org/x/term v0.18.0/go.mod h1:ILwASektA3OnRv7amZ1xhE/KTR+u50pbXfZ03+6Nx58= -golang.org/x/term v0.23.0 h1:F6D4vR+EHoL9/sWAWgAR1H2DcHr4PareCbAaCo1RpuU= -golang.org/x/term v0.23.0/go.mod h1:DgV24QBUrK6jhZXl+20l6UWznPlwAHm1Q1mGHtydmSk= +golang.org/x/term v0.24.0 h1:Mh5cbb+Zk2hqqXNO7S1iTjEphVL+jb8ZWaqh/g+JWkM= +golang.org/x/term v0.24.0/go.mod h1:lOBK/LVxemqiMij05LGJ0tzNr8xlmwBRJ81PX6wVLH8= golang.org/x/text v0.0.0-20170915032832-14c0d48ead0c/go.mod h1:NqM8EUOU14njkJ3fqMW+pc6Ldnwhi/IjpwHt7yyuwOQ= golang.org/x/text v0.3.0/go.mod h1:NqM8EUOU14njkJ3fqMW+pc6Ldnwhi/IjpwHt7yyuwOQ= golang.org/x/text v0.3.1-0.20180807135948-17ff2d5776d2/go.mod h1:NqM8EUOU14njkJ3fqMW+pc6Ldnwhi/IjpwHt7yyuwOQ= @@ -706,8 +706,8 @@ golang.org/x/text v0.3.7/go.mod h1:u+2+/6zg+i71rQMx5EYifcz6MCKuco9NR6JIITiCfzQ= golang.org/x/text v0.7.0/go.mod h1:mrYo+phRRbMaCq/xk9113O4dZlRixOauAjOtrjsXDZ8= golang.org/x/text v0.9.0/go.mod h1:e1OnstbJyHTd6l/uOt8jFFHp6TRDWZR/bV3emEE/zU8= golang.org/x/text v0.14.0/go.mod h1:18ZOQIKpY8NJVqYksKHtTdi31H5itFRjB5/qKTNYzSU= -golang.org/x/text v0.17.0 h1:XtiM5bkSOt+ewxlOE/aE/AKEHibwj/6gvWMl9Rsh0Qc= -golang.org/x/text v0.17.0/go.mod h1:BuEKDfySbSR4drPmRPG/7iBdf8hvFMuRexcpahXilzY= +golang.org/x/text v0.18.0 h1:XvMDiNzPAl0jr17s6W9lcaIhGUfUORdGCNsuLmPG224= +golang.org/x/text v0.18.0/go.mod h1:BuEKDfySbSR4drPmRPG/7iBdf8hvFMuRexcpahXilzY= golang.org/x/time v0.0.0-20181108054448-85acf8d2951c/go.mod h1:tRJNPiyCQ0inRvYxbN9jk5I+vvW/OXSQhTDSoE431IQ= golang.org/x/time v0.0.0-20190308202827-9d24e82272b4/go.mod h1:tRJNPiyCQ0inRvYxbN9jk5I+vvW/OXSQhTDSoE431IQ= golang.org/x/time v0.0.0-20191024005414-555d28b269f0/go.mod h1:tRJNPiyCQ0inRvYxbN9jk5I+vvW/OXSQhTDSoE431IQ= @@ -819,10 +819,10 @@ google.golang.org/genproto v0.0.0-20200618031413-b414f8b61790/go.mod h1:jDfRM7Fc google.golang.org/genproto v0.0.0-20200729003335-053ba62fc06f/go.mod h1:FWY/as6DDZQgahTzZj3fqbO1CbirC29ZNUFHwi0/+no= google.golang.org/genproto v0.0.0-20200804131852-c06518451d9c/go.mod h1:FWY/as6DDZQgahTzZj3fqbO1CbirC29ZNUFHwi0/+no= google.golang.org/genproto v0.0.0-20200825200019-8632dd797987/go.mod h1:FWY/as6DDZQgahTzZj3fqbO1CbirC29ZNUFHwi0/+no= -google.golang.org/genproto/googleapis/api v0.0.0-20240827150818-7e3bb234dfed h1:3RgNmBoI9MZhsj3QxC+AP/qQhNwpCLOvYDYYsFrhFt0= -google.golang.org/genproto/googleapis/api v0.0.0-20240827150818-7e3bb234dfed/go.mod h1:OCdP9MfskevB/rbYvHTsXTtKC+3bHWajPdoKgjcYkfo= -google.golang.org/genproto/googleapis/rpc v0.0.0-20240827150818-7e3bb234dfed h1:J6izYgfBXAI3xTKLgxzTmUltdYaLsuBxFCgDHWJ/eXg= -google.golang.org/genproto/googleapis/rpc v0.0.0-20240827150818-7e3bb234dfed/go.mod h1:UqMtugtsSgubUsoxbuAoiCXvqvErP7Gf0so0mK9tHxU= +google.golang.org/genproto/googleapis/api v0.0.0-20240903143218-8af14fe29dc1 h1:hjSy6tcFQZ171igDaN5QHOw2n6vx40juYbC/x67CEhc= +google.golang.org/genproto/googleapis/api v0.0.0-20240903143218-8af14fe29dc1/go.mod h1:qpvKtACPCQhAdu3PyQgV4l3LMXZEtft7y8QcarRsp9I= +google.golang.org/genproto/googleapis/rpc v0.0.0-20240903143218-8af14fe29dc1 h1:pPJltXNxVzT4pK9yD8vR9X75DaWYYmLGMsEvBfFQZzQ= +google.golang.org/genproto/googleapis/rpc v0.0.0-20240903143218-8af14fe29dc1/go.mod h1:UqMtugtsSgubUsoxbuAoiCXvqvErP7Gf0so0mK9tHxU= google.golang.org/grpc v1.19.0/go.mod h1:mqu4LbDTu4XGKhr4mRzUsmM4RtVoemTSY81AxZiDr8c= google.golang.org/grpc v1.20.1/go.mod h1:10oTOabMzJvdu6/UiuZezV6QK5dSlG84ov/aaiqXj38= google.golang.org/grpc v1.21.1/go.mod h1:oYelfM1adQP15Ek0mdvEgi9Df8B9CZIaU1084ijfRaM= @@ -902,10 +902,10 @@ k8s.io/client-go v0.31.0 h1:QqEJzNjbN2Yv1H79SsS+SWnXkBgVu4Pj3CJQgbx0gI8= k8s.io/client-go v0.31.0/go.mod h1:Y9wvC76g4fLjmU0BA+rV+h2cncoadjvjjkkIGoTLcGU= k8s.io/klog/v2 v2.130.1 h1:n9Xl7H1Xvksem4KFG4PYbdQCQxqc/tTUyrgXaOhHSzk= k8s.io/klog/v2 v2.130.1/go.mod h1:3Jpz1GvMt720eyJH1ckRHK1EDfpxISzJ7I9OYgaDtPE= -k8s.io/kube-openapi v0.0.0-20240827152857-f7e401e7b4c2 h1:GKE9U8BH16uynoxQii0auTjmmmuZ3O0LFMN6S0lPPhI= -k8s.io/kube-openapi v0.0.0-20240827152857-f7e401e7b4c2/go.mod h1:coRQXBK9NxO98XUv3ZD6AK3xzHCxV6+b7lrquKwaKzA= -k8s.io/utils v0.0.0-20240821151609-f90d01438635 h1:2wThSvJoW/Ncn9TmQEYXRnevZXi2duqHWf5OX9S3zjI= -k8s.io/utils v0.0.0-20240821151609-f90d01438635/go.mod h1:OLgZIPagt7ERELqWJFomSt595RzquPNLL48iOWgYOg0= +k8s.io/kube-openapi v0.0.0-20240903163716-9e1beecbcb38 h1:1dWzkmJrrprYvjGwh9kEUxmcUV/CtNU8QM7h1FLWQOo= +k8s.io/kube-openapi v0.0.0-20240903163716-9e1beecbcb38/go.mod h1:coRQXBK9NxO98XUv3ZD6AK3xzHCxV6+b7lrquKwaKzA= +k8s.io/utils v0.0.0-20240902221715-702e33fdd3c3 h1:b2FmK8YH+QEwq/Sy2uAEhmqL5nPfGYbJOcaqjeYYZoA= +k8s.io/utils v0.0.0-20240902221715-702e33fdd3c3/go.mod h1:OLgZIPagt7ERELqWJFomSt595RzquPNLL48iOWgYOg0= rsc.io/binaryregexp v0.2.0/go.mod h1:qTv7/COck+e2FymRvadv62gMdZztPaShugOCi3I+8D8= rsc.io/quote/v3 v3.1.0/go.mod h1:yEA65RcK8LyAZtP9Kv3t0HmxON59tX3rD+tICJqUlj0= rsc.io/sampler v1.3.0/go.mod h1:T1hPZKmBbMNahiBKFy5HrXp6adAjACjK9JXDnKaTXpA= diff --git a/internal/logic/inference/deployinstancelistlogic.go b/internal/logic/inference/deployinstancelistlogic.go index 6f17c7cd..434dd72f 100644 --- a/internal/logic/inference/deployinstancelistlogic.go +++ b/internal/logic/inference/deployinstancelistlogic.go @@ -38,6 +38,14 @@ func (l *DeployInstanceListLogic) DeployInstanceList(req *types.DeployInstanceLi return nil, tx.Error } + if len(tasklist) == 0 { + resp.List = nil + resp.PageSize = req.PageSize + resp.PageNum = req.PageNum + resp.Total = 0 + return + } + //count total var total int64 err = tx.Count(&total).Error diff --git a/internal/logic/inference/getdeploytasksbytypelogic.go b/internal/logic/inference/getdeploytasksbytypelogic.go index f1cd5268..54d9fbe6 100644 --- a/internal/logic/inference/getdeploytasksbytypelogic.go +++ b/internal/logic/inference/getdeploytasksbytypelogic.go @@ -70,12 +70,6 @@ func removeItem(items *[]*models.AiDeployInstanceTask, id int64) { if len(*items) == 0 { return } - if len(*items) == 1 { - if (*items)[0].Id == id { - (*items) = nil - return - } - } for i := len(*items) - 1; i >= 0; i-- { if (*items)[i].Id == id { *items = append((*items)[:i], (*items)[i+1:]...) diff --git a/internal/logic/inference/startallbydeploytaskidlogic.go b/internal/logic/inference/startallbydeploytaskidlogic.go index db87a515..d78f98b9 100644 --- a/internal/logic/inference/startallbydeploytaskidlogic.go +++ b/internal/logic/inference/startallbydeploytaskidlogic.go @@ -3,12 +3,15 @@ package inference import ( "context" "errors" + "fmt" "github.com/zeromicro/go-zero/core/logx" "gitlink.org.cn/JointCloud/pcm-coordinator/internal/scheduler/service/inference" "gitlink.org.cn/JointCloud/pcm-coordinator/internal/storeLink" "gitlink.org.cn/JointCloud/pcm-coordinator/internal/svc" "gitlink.org.cn/JointCloud/pcm-coordinator/internal/types" + "gitlink.org.cn/JointCloud/pcm-coordinator/pkg/models" "strconv" + "sync" ) type StartAllByDeployTaskIdLogic struct { @@ -35,17 +38,8 @@ func (l *StartAllByDeployTaskIdLogic) StartAllByDeployTaskId(req *types.StartAll return nil, err } - for _, ins := range list { - in, err := l.svcCtx.Scheduler.AiService.InferenceAdapterMap[strconv.FormatInt(ins.AdapterId, 10)][strconv.FormatInt(ins.ClusterId, 10)].GetInferDeployInstance(l.ctx, ins.InstanceId) - if err != nil { - return nil, err - } - if checkStopStatus(in) { - success := l.svcCtx.Scheduler.AiService.InferenceAdapterMap[strconv.FormatInt(ins.AdapterId, 10)][strconv.FormatInt(ins.ClusterId, 10)].StartInferDeployInstance(l.ctx, ins.InstanceId) - if !success { - return nil, errors.New(ins.InstanceName + " start failed") - } - } + if len(list) == 0 { + return nil, errors.New("instances are empty") } err = l.svcCtx.Scheduler.AiStorages.UpdateDeployTaskById(id) @@ -53,9 +47,96 @@ func (l *StartAllByDeployTaskIdLogic) StartAllByDeployTaskId(req *types.StartAll return nil, err } + err = l.startAll(list) + if err != nil { + return nil, err + } + return resp, nil } +func (l *StartAllByDeployTaskIdLogic) startAll(list []*models.AiInferDeployInstance) error { + var wg sync.WaitGroup + var errCh = make(chan interface{}, len(list)) + var errs []interface{} + buf := make(chan bool, 2) + + for _, instance := range list { + wg.Add(1) + ins := instance + buf <- true + go func() { + in, err := l.svcCtx.Scheduler.AiService.InferenceAdapterMap[strconv.FormatInt(ins.AdapterId, 10)][strconv.FormatInt(ins.ClusterId, 10)].GetInferDeployInstance(l.ctx, ins.InstanceId) + if err != nil { + e := struct { + errTyp uint8 + err error + instanceName string + clusterName string + }{ + errTyp: 1, + err: err, + instanceName: ins.InstanceName, + clusterName: ins.ClusterName, + } + errCh <- e + wg.Done() + <-buf + return + } + if checkStopStatus(in) { + success := l.svcCtx.Scheduler.AiService.InferenceAdapterMap[strconv.FormatInt(ins.AdapterId, 10)][strconv.FormatInt(ins.ClusterId, 10)].StartInferDeployInstance(l.ctx, ins.InstanceId) + if !success { + e := struct { + errTyp uint8 + err error + instanceName string + clusterName string + }{ + errTyp: 2, + err: err, + instanceName: ins.InstanceName, + clusterName: ins.ClusterName, + } + errCh <- e + wg.Done() + <-buf + return + } + } + wg.Done() + <-buf + }() + } + wg.Wait() + close(errCh) + + for e := range errCh { + errs = append(errs, e) + } + + if len(errs) != 0 { + var msg string + for _, err := range errs { + e := (err).(struct { + errTyp uint8 + err error + instanceName string + clusterName string + }) + switch e.errTyp { + case 1: + msg += fmt.Sprintf("GetInstance Failed # clusterName: %v , instanceName: %v , error: %v \n", e.clusterName, e.instanceName, e.err.Error()) + case 2: + msg += fmt.Sprintf("StartInstance Failed # clusterName: %v , instanceName: %v , error: %v \n", e.clusterName, e.instanceName, e.err.Error()) + } + } + return errors.New(msg) + } + + return nil +} + func checkStopStatus(in *inference.DeployInstance) bool { switch in.ClusterType { case storeLink.TYPE_OCTOPUS: diff --git a/internal/logic/inference/stopallbydeploytaskidlogic.go b/internal/logic/inference/stopallbydeploytaskidlogic.go index d0ebc23a..c6eaad01 100644 --- a/internal/logic/inference/stopallbydeploytaskidlogic.go +++ b/internal/logic/inference/stopallbydeploytaskidlogic.go @@ -3,11 +3,14 @@ package inference import ( "context" "errors" + "fmt" "gitlink.org.cn/JointCloud/pcm-coordinator/internal/scheduler/service/inference" "gitlink.org.cn/JointCloud/pcm-coordinator/internal/storeLink" "gitlink.org.cn/JointCloud/pcm-coordinator/internal/svc" "gitlink.org.cn/JointCloud/pcm-coordinator/internal/types" + "gitlink.org.cn/JointCloud/pcm-coordinator/pkg/models" "strconv" + "sync" "github.com/zeromicro/go-zero/core/logx" ) @@ -36,17 +39,8 @@ func (l *StopAllByDeployTaskIdLogic) StopAllByDeployTaskId(req *types.StopAllByD return nil, err } - for _, ins := range list { - in, err := l.svcCtx.Scheduler.AiService.InferenceAdapterMap[strconv.FormatInt(ins.AdapterId, 10)][strconv.FormatInt(ins.ClusterId, 10)].GetInferDeployInstance(l.ctx, ins.InstanceId) - if err != nil { - return nil, err - } - if checkStatus(in) { - success := l.svcCtx.Scheduler.AiService.InferenceAdapterMap[strconv.FormatInt(ins.AdapterId, 10)][strconv.FormatInt(ins.ClusterId, 10)].StopInferDeployInstance(l.ctx, ins.InstanceId) - if !success { - return nil, errors.New(ins.InstanceName + " stop failed") - } - } + if len(list) == 0 { + return nil, errors.New("instances are empty") } err = l.svcCtx.Scheduler.AiStorages.UpdateDeployTaskById(id) @@ -54,9 +48,96 @@ func (l *StopAllByDeployTaskIdLogic) StopAllByDeployTaskId(req *types.StopAllByD return nil, err } + err = l.stopAll(list) + if err != nil { + return nil, err + } + return resp, nil } +func (l *StopAllByDeployTaskIdLogic) stopAll(list []*models.AiInferDeployInstance) error { + var wg sync.WaitGroup + var errCh = make(chan interface{}, len(list)) + var errs []interface{} + buf := make(chan bool, 2) + + for _, instance := range list { + wg.Add(1) + ins := instance + buf <- true + go func() { + in, err := l.svcCtx.Scheduler.AiService.InferenceAdapterMap[strconv.FormatInt(ins.AdapterId, 10)][strconv.FormatInt(ins.ClusterId, 10)].GetInferDeployInstance(l.ctx, ins.InstanceId) + if err != nil { + e := struct { + errTyp uint8 + err error + instanceName string + clusterName string + }{ + errTyp: 1, + err: err, + instanceName: ins.InstanceName, + clusterName: ins.ClusterName, + } + errCh <- e + wg.Done() + <-buf + return + } + if checkStatus(in) { + success := l.svcCtx.Scheduler.AiService.InferenceAdapterMap[strconv.FormatInt(ins.AdapterId, 10)][strconv.FormatInt(ins.ClusterId, 10)].StopInferDeployInstance(l.ctx, ins.InstanceId) + if !success { + e := struct { + errTyp uint8 + err error + instanceName string + clusterName string + }{ + errTyp: 2, + err: err, + instanceName: ins.InstanceName, + clusterName: ins.ClusterName, + } + errCh <- e + wg.Done() + <-buf + return + } + } + wg.Done() + <-buf + }() + } + wg.Wait() + close(errCh) + + for e := range errCh { + errs = append(errs, e) + } + + if len(errs) != 0 { + var msg string + for _, err := range errs { + e := (err).(struct { + errTyp uint8 + err error + instanceName string + clusterName string + }) + switch e.errTyp { + case 1: + msg += fmt.Sprintf("GetInstance Failed # clusterName: %v , instanceName: %v , error: %v \n", e.clusterName, e.instanceName, e.err.Error()) + case 2: + msg += fmt.Sprintf("StopInstance Failed # clusterName: %v , instanceName: %v , error: %v \n", e.clusterName, e.instanceName, e.err.Error()) + } + } + return errors.New(msg) + } + + return nil +} + func checkStatus(in *inference.DeployInstance) bool { switch in.ClusterType { case storeLink.TYPE_OCTOPUS: diff --git a/internal/storeLink/modelarts.go b/internal/storeLink/modelarts.go index 467098f0..acc624d5 100644 --- a/internal/storeLink/modelarts.go +++ b/internal/storeLink/modelarts.go @@ -16,6 +16,7 @@ package storeLink import ( "context" + "fmt" "github.com/pkg/errors" "gitlink.org.cn/JointCloud/pcm-coordinator/internal/scheduler/schedulers/option" "gitlink.org.cn/JointCloud/pcm-coordinator/internal/scheduler/service/collector" @@ -27,15 +28,19 @@ import ( "gitlink.org.cn/JointCloud/pcm-modelarts/client/modelartsservice" "gitlink.org.cn/JointCloud/pcm-modelarts/modelarts" modelartsclient "gitlink.org.cn/JointCloud/pcm-modelarts/modelarts" + "log" "mime/multipart" "strconv" "strings" + "sync" "time" ) const ( - Ascend = "Ascend" - Npu = "npu" + Ascend = "Ascend" + Npu = "npu" + ImageNetResnet50Cmd = "cd /home/ma-user & python ./inference_ascend.py" + ChatGLM6BCmd = "cd /home/ma-user && python ./download_model.py && python ./inference_chatGLM.py" ) type ModelArtsLink struct { @@ -45,6 +50,60 @@ type ModelArtsLink struct { participantId int64 pageIndex int32 pageSize int32 + SourceLocation string + Version string + ModelId string + ModelType string +} + +// Version 结构体表示版本号 +type Version struct { + Major, Minor, Patch int +} + +// ParseVersion 从字符串解析版本号 +func ParseVersion(versionStr string) (*Version, error) { + parts := strings.Split(versionStr, ".") + if len(parts) != 3 { + return nil, fmt.Errorf("invalid version format: %s", versionStr) + } + + major, err := strconv.Atoi(parts[0]) + if err != nil { + return nil, err + } + + minor, err := strconv.Atoi(parts[1]) + if err != nil { + return nil, err + } + + patch, err := strconv.Atoi(parts[2]) + if err != nil { + return nil, err + } + + return &Version{Major: major, Minor: minor, Patch: patch}, nil +} + +// Increment 根据给定规则递增版本号 +func (v *Version) Increment() { + if v.Patch < 9 { + v.Patch++ + } else { + v.Patch = 0 + if v.Minor < 9 { + v.Minor++ + } else { + v.Minor = 0 + v.Major++ + } + } +} + +// String 将版本号转换回字符串格式 +func (v *Version) String() string { + return fmt.Sprintf("%d.%d.%d", v.Major, v.Minor, v.Patch) } func NewModelArtsLink(modelArtsRpc modelartsservice.ModelArtsService, modelArtsImgRpc imagesservice.ImagesService, name string, id int64, nickname string) *ModelArtsLink { @@ -260,39 +319,142 @@ func (m *ModelArtsLink) UploadAlgorithmCode(ctx context.Context, resourceType st return nil } +// Determine whether there is a necessary image in image management and query the image name based on the image name +func (m *ModelArtsLink) getSourceLocationFromImages(ctx context.Context, option *option.InferOption) error { + req := &modelarts.ListImagesReq{ + //Platform: m.platform, + Limit: 50, + Offset: 0, + } + + ListImagesResp, err := m.modelArtsRpc.ListImages(ctx, req) + if err != nil { + return err + } + if ListImagesResp.Code != 200 { + return errors.New("failed to get ListImages") + } + + for _, ListImages := range ListImagesResp.Data { + if option.ModelName == "ChatGLM-6B" { + if ListImages.Name == "chatglm-6b" { + m.SourceLocation = ListImages.SwrPath + return nil + } + } else { + if ListImages.Name == option.ModelName { + m.SourceLocation = ListImages.SwrPath + return nil + } + } + } + return errors.New("SourceLocation not set") +} + // Get AI Application List func (m *ModelArtsLink) GetModelId(ctx context.Context, option *option.InferOption) error { req := &modelarts.ListModelReq{ Platform: m.platform, ModelName: option.ModelName, - ModelType: "Image", - Limit: int64(m.pageIndex), - Offset: int64(m.pageSize), + //ModelType: "Image", + Limit: int64(m.pageIndex), + Offset: int64(m.pageSize), } ListModelResp, err := m.modelArtsRpc.ListModels(ctx, req) if err != nil { return err } - if ListModelResp.Code != 200 { - return errors.New("failed to get ModelId") - } - for _, ListModel := range ListModelResp.Models { - if ListModel.ModelName == option.ModelName { - option.ModelId = ListModel.ModelId - return nil + if ListModelResp.Code == 200 { + //return errors.New("failed to get ModelId") + for _, ListModel := range ListModelResp.Models { + if ListModel.ModelName == option.ModelName { + option.ModelId = ListModel.ModelId + m.Version = ListModel.ModelVersion + return nil + } } + + } + err = m.CreateModel(ctx, option) + if err != nil { + return err } return nil } -// 创建ai应用 +func (m *ModelArtsLink) GetModel(ctx context.Context, option *option.InferOption) string { + req := &modelarts.ShowModelReq{ + Platform: m.platform, + ModelId: option.ModelId, + } + ctx, cancel := context.WithTimeout(context.Background(), 50*time.Second) + defer cancel() + ShowModelsResp, err := m.modelArtsRpc.ShowModels(ctx, req) + if err != nil { + if err == context.DeadlineExceeded { + log.Println("Request timed out") + // 重试请求或其他处理 + } else { + log.Fatalf("could not call method: %v", err) + } + } + if ShowModelsResp.Code != 200 { + errors.New("failed to get findModelsStatus") + } + m.ModelType = ShowModelsResp.ShowModelDetail.ModelAlgorithm + return ShowModelsResp.ShowModelDetail.ModelStatus +} + +// Get AI Application List +func (m *ModelArtsLink) GetModelStatus(ctx context.Context, option *option.InferOption) error { + var wg sync.WaitGroup + wg.Add(1) + // 使用goroutine进行轮询 + //defer wg.Done() + for { + status := m.GetModel(ctx, option) + if status == "published" { + fmt.Println("Model is now published.") + break // 一旦状态变为published,就退出循环 + } + fmt.Println("Waiting for model to be published...") + time.Sleep(5 * time.Second) // 等待一段时间后再次检查 + } + // 在这里执行模型状态为published后需要进行的操作 + fmt.Println("Continuing with the program...") + return nil +} + +// Create an AI application func (m *ModelArtsLink) CreateModel(ctx context.Context, option *option.InferOption) error { + //Before creating an AI application, check if there are any images that can be created + err := m.getSourceLocationFromImages(ctx, option) + if err != nil { // + return errors.New("No image available for creationd") + } + + // + var CMD string + if option.ModelName == "imagenet_resnet50" { + CMD = ImageNetResnet50Cmd + } else if option.ModelName == "ChatGLM-6B" { + CMD = ChatGLM6BCmd + } + + if m.Version == "" { + m.Version = "0.0.1" + } + version, err := ParseVersion(m.Version) + version.Increment() req := &modelarts.CreateModelReq{ Platform: m.platform, ModelName: option.ModelName, - ModelType: "PyTorch", - ModelVersion: "0.0.1", - SourceLocation: "", + ModelType: "Image", + ModelVersion: version.String(), + SourceLocation: m.SourceLocation, + InstallType: []string{"real-time"}, + Cmd: CMD, + ModelAlgorithm: option.ModelType, } ModelResp, err := m.modelArtsRpc.CreateModel(ctx, req) if err != nil { @@ -600,6 +762,8 @@ func (m *ModelArtsLink) GetInferDeployInstance(ctx context.Context, id string) ( ins.ClusterName = m.platform ins.CreatedTime = string(resp.StartTime) ins.ClusterType = TYPE_MODELARTS + ins.ModelName = resp.Config[0].ModelName + ins.ModelType = m.ModelType return ins, nil } @@ -609,7 +773,12 @@ func (m *ModelArtsLink) GetInferResult(ctx context.Context, url string, file mul func (m *ModelArtsLink) CreateInferDeployInstance(ctx context.Context, option *option.InferOption) (string, error) { - err := m.CreateModel(ctx, option) + err := m.GetModelId(ctx, option) + if err != nil { + return "", err + } + + err = m.GetModelStatus(ctx, option) if err != nil { return "", err } @@ -626,8 +795,10 @@ func (m *ModelArtsLink) CreateInferDeployInstance(ctx context.Context, option *o Platform: m.platform, Config: configItems, InferType: "real-time", - ServiceName: option.TaskName, + ServiceName: option.ModelName + "_" + option.ModelType + "_" + Npu, } + ctx, cancel := context.WithTimeout(context.Background(), 150*time.Second) + defer cancel() resp, err := m.modelArtsRpc.CreateService(ctx, req) if err != nil { return "", err diff --git a/internal/storeLink/storeLink.go b/internal/storeLink/storeLink.go index 8c7dc042..70ce291b 100644 --- a/internal/storeLink/storeLink.go +++ b/internal/storeLink/storeLink.go @@ -80,7 +80,7 @@ var ( } ModelTypeMap = map[string][]string{ "image_classification": {"imagenet_resnet50"}, - "text_to_text": {"chatGLM_6B"}, + "text_to_text": {"ChatGLM-6B"}, "image_to_text": {"blip-image-captioning-base"}, "text_to_image": {"stable-diffusion-xl-base-1.0"}, }