diff --git a/.devops/aly.yml b/.devops/aly.yml index 60e7480a..1f6b4ada 100644 --- a/.devops/aly.yml +++ b/.devops/aly.yml @@ -32,7 +32,7 @@ workflow: image_tag: '"latest"' registry_address: '"registry.cn-hangzhou.aliyuncs.com"' docker_file: '"Dockerfile"' - docker_build_path: git_clone_0.git_path + docker_build_path: '"."' workspace: git_clone_0.git_path image_clean: true image_push: true diff --git a/.github/workflows/docker-publish.yml b/.github/workflows/docker-publish.yml new file mode 100644 index 00000000..0fe3a82a --- /dev/null +++ b/.github/workflows/docker-publish.yml @@ -0,0 +1,55 @@ +name: Docker + +on: + push: + branches: [ "master" ] + tags: [ 'v*.*.*' ] + pull_request: + branches: [ "master" ] + workflow_dispatch: # 允许手动触发 + +env: + REGISTRY: registry.cn-hangzhou.aliyuncs.com # 修改为你的阿里云镜像仓库地址 + IMAGE_NAME: jcce/pcm-core-api # 修改为你的阿里云镜像仓库名称 + IMAGE_TAG: latest + +jobs: + build: + runs-on: ubuntu-latest + permissions: + contents: read + packages: write + id-token: write + + steps: + - name: Checkout repository + uses: actions/checkout@v4 + + - name: Set up QEMU + uses: docker/setup-qemu-action@v3 + + - name: Set up Docker Buildx + uses: docker/setup-buildx-action@v3 + + - name: Login to Docker Hub + uses: docker/login-action@v3 + with: + registry: ${{ env.REGISTRY }} + username: ${{ secrets.ALIYUN_USERNAME }} + password: ${{ secrets.ALIYUN_PASSWORD }} + + - name: Build and push + run: | + docker build -t ${{env.REGISTRY}}/${{env.IMAGE_NAME}}:${{env.IMAGE_TAG}} . + docker push ${{env.REGISTRY}}/${{env.IMAGE_NAME}}:${{env.IMAGE_TAG}} + + - name: Set up SSH key + run: | + mkdir -p ~/.ssh + echo "${{ secrets.SSH_PRIVATE_KEY }}" > ~/.ssh/id_rsa + chmod 600 ~/.ssh/id_rsa + echo -e "Host *\n\tStrictHostKeyChecking no\n\n" > ~/.ssh/config + + - name: SSH to remote server and restart deployment + if: github.event_name != 'pull_request' + run: ssh ${{ secrets.SSH_USER }}@${{ secrets.SSH_HOST }} "kubectl rollout restart deployment ${{ secrets.SSH_DEPLOYMENT }} -n ${{ secrets.SSH_NAMESPACE }}" \ No newline at end of file diff --git a/.github/workflows/sync.yml b/.github/workflows/sync.yml new file mode 100644 index 00000000..8944fda0 --- /dev/null +++ b/.github/workflows/sync.yml @@ -0,0 +1,25 @@ +name: Sync Mirror Repository + +on: + schedule: + - cron: '0 * * * *' # 每小时同步一次 + workflow_dispatch: # 允许手动触发 + +jobs: + mirror: + runs-on: ubuntu-latest + + steps: + - name: Checkout target repository + uses: actions/checkout@v2 + with: + fetch-depth: 1 # 获取完整的提交历史 + + - name: Mirror source repository + uses: wearerequired/git-mirror-action@v1 + with: + source-repo: "https://gitlink.org.cn/JointCloud/pcm-coordinator.git" # 源仓库的URL + destination-repo: "git@github.com:${{ github.repository }}.git" # 目标仓库的URL + ssh: true + env: + SSH_PRIVATE_KEY: ${{ secrets.SSH_PRIVATE_KEY }} \ No newline at end of file diff --git a/Dockerfile b/Dockerfile index 81784518..414298e9 100644 --- a/Dockerfile +++ b/Dockerfile @@ -1,4 +1,4 @@ -FROM registry.cn-hangzhou.aliyuncs.com/jcce-images/golang:1.22.4-alpine3.20 AS builder +FROM golang:1.22.4-alpine3.20 AS builder WORKDIR /app COPY . . @@ -9,12 +9,11 @@ RUN go mod download RUN CGO_ENABLED=0 go build -ldflags="-w -s" -o pcm-core-api -FROM registry.cn-hangzhou.aliyuncs.com/jcce-images/alpine:3.20 +FROM alpine:latest WORKDIR /app #修改alpine源为上海交通大学 -RUN sed -i 's/dl-cdn.alpinelinux.org/mirrors.sjtug.sjtu.edu.cn/g' /etc/apk/repositories && \ - apk add --no-cache ca-certificates && update-ca-certificates && \ +RUN apk add --no-cache ca-certificates && update-ca-certificates && \ apk add --update tzdata && \ rm -rf /var/cache/apk/* diff --git a/go.mod b/go.mod index 3844129c..99eda213 100644 --- a/go.mod +++ b/go.mod @@ -14,12 +14,12 @@ require ( github.com/pkg/errors v0.9.1 github.com/prometheus-operator/prometheus-operator/pkg/apis/monitoring v0.71.2 github.com/prometheus/alertmanager v0.27.0 - github.com/prometheus/client_golang v1.20.2 - github.com/prometheus/common v0.58.0 + github.com/prometheus/client_golang v1.20.3 + github.com/prometheus/common v0.59.1 github.com/robfig/cron/v3 v3.0.1 github.com/zeromicro/go-zero v1.7.2 gitlink.org.cn/JointCloud/pcm-ac v0.0.0-20240725071305-f751eec4dde1 - gitlink.org.cn/JointCloud/pcm-modelarts v0.0.0-20240905092954-07ff355339dd + gitlink.org.cn/JointCloud/pcm-modelarts v0.0.0-20240909072501-939c3144cd9e gitlink.org.cn/JointCloud/pcm-octopus v0.0.0-20240817071412-44397870b110 gitlink.org.cn/JointCloud/pcm-openstack v0.0.0-20240403033338-e7edabad4203 gitlink.org.cn/JointCloud/pcm-slurm v0.0.0-20240301080743-8b94bbaf57f5 @@ -140,10 +140,10 @@ require ( go.uber.org/automaxprocs v1.5.3 // indirect go.uber.org/multierr v1.11.0 // indirect go.uber.org/zap v1.27.0 // indirect - golang.org/x/crypto v0.26.0 // indirect + golang.org/x/crypto v0.27.0 // indirect golang.org/x/exp v0.0.0-20240119083558-1b970713d09a // indirect golang.org/x/mod v0.21.0 // indirect - golang.org/x/net v0.28.0 // indirect + golang.org/x/net v0.29.0 // indirect golang.org/x/oauth2 v0.23.0 // indirect golang.org/x/sync v0.8.0 // indirect golang.org/x/sys v0.25.0 // indirect diff --git a/go.sum b/go.sum index 424d966c..4b6cd18b 100644 --- a/go.sum +++ b/go.sum @@ -393,8 +393,8 @@ github.com/prometheus/client_golang v1.0.0/go.mod h1:db9x61etRT2tGnBNRi70OPL5Fsn github.com/prometheus/client_golang v1.4.0/go.mod h1:e9GMxYsXl05ICDXkRhurwBS4Q3OK1iX/F2sw+iXX5zU= github.com/prometheus/client_golang v1.7.1/go.mod h1:PY5Wy2awLA44sXw4AOSfFBetzPP4j5+D6mVACh+pe2M= github.com/prometheus/client_golang v1.11.0/go.mod h1:Z6t4BnS23TR94PD6BsDNk8yVqroYurpAkEiz0P2BEV0= -github.com/prometheus/client_golang v1.20.2 h1:5ctymQzZlyOON1666svgwn3s6IKWgfbjsejTMiXIyjg= -github.com/prometheus/client_golang v1.20.2/go.mod h1:PIEt8X02hGcP8JWbeHyeZ53Y/jReSnHgO035n//V5WE= +github.com/prometheus/client_golang v1.20.3 h1:oPksm4K8B+Vt35tUhw6GbSNSgVlVSBH0qELP/7u83l4= +github.com/prometheus/client_golang v1.20.3/go.mod h1:PIEt8X02hGcP8JWbeHyeZ53Y/jReSnHgO035n//V5WE= github.com/prometheus/client_model v0.0.0-20180712105110-5c3871d89910/go.mod h1:MbSGuTsp3dbXC40dX6PRTWyKYBIrTGTE9sqQNg2J8bo= github.com/prometheus/client_model v0.0.0-20190129233127-fd36f4220a90/go.mod h1:xMI15A0UPsDsEKsMN9yxemIoYk6Tm2C1GtYGdfGttqA= github.com/prometheus/client_model v0.0.0-20190812154241-14fe0d1b01d4/go.mod h1:xMI15A0UPsDsEKsMN9yxemIoYk6Tm2C1GtYGdfGttqA= @@ -406,8 +406,8 @@ github.com/prometheus/common v0.9.1/go.mod h1:yhUN8i9wzaXS3w1O07YhxHEBxD+W35wd8b github.com/prometheus/common v0.10.0/go.mod h1:Tlit/dnDKsSWFlCLTWaA1cyBgKHSMdTB80sz/V91rCo= github.com/prometheus/common v0.26.0/go.mod h1:M7rCNAaPfAosfx8veZJCuw84e35h3Cfd9VFqTh1DIvc= github.com/prometheus/common v0.29.0/go.mod h1:vu+V0TpY+O6vW9J44gczi3Ap/oXXR10b+M/gUGO4Hls= -github.com/prometheus/common v0.58.0 h1:N+N8vY4/23r6iYfD3UQZUoJPnUYAo7v6LG5XZxjZTXo= -github.com/prometheus/common v0.58.0/go.mod h1:GpWM7dewqmVYcd7SmRaiWVe9SSqjf0UrwnYnpEZNuT0= +github.com/prometheus/common v0.59.1 h1:LXb1quJHWm1P6wq/U824uxYi4Sg0oGvNeUm1z5dJoX0= +github.com/prometheus/common v0.59.1/go.mod h1:GpWM7dewqmVYcd7SmRaiWVe9SSqjf0UrwnYnpEZNuT0= github.com/prometheus/common/sigv4 v0.1.0 h1:qoVebwtwwEhS85Czm2dSROY5fTo2PAPEVdDeppTwGX4= github.com/prometheus/common/sigv4 v0.1.0/go.mod h1:2Jkxxk9yYvCkE5G1sQT7GuEXm57JrvHu9k5YwTjsNtI= github.com/prometheus/exporter-toolkit v0.11.0 h1:yNTsuZ0aNCNFQ3aFTD2uhPOvr4iD7fdBvKPAEGkNf+g= @@ -468,8 +468,8 @@ github.com/zeromicro/go-zero v1.7.2 h1:a8lyVOG3KXG4LrAy6ZmtJTJtisX4Ostc4Pst4fE70 github.com/zeromicro/go-zero v1.7.2/go.mod h1:WFXfF92Exw0O7WECifS6r99JSzv4KEN49x9RhAfgkMc= gitlink.org.cn/JointCloud/pcm-ac v0.0.0-20240725071305-f751eec4dde1 h1:DicBXoQiC6mumMBeyqSPNrsjtqJIgk5Pv2hscu2xryw= gitlink.org.cn/JointCloud/pcm-ac v0.0.0-20240725071305-f751eec4dde1/go.mod h1:3eECiw9O2bIFkkePlloKyLNXiqBAhOxNrDoGaaGseGY= -gitlink.org.cn/JointCloud/pcm-modelarts v0.0.0-20240905092954-07ff355339dd h1:rwEf9EHx9/KRHtA0VXDfSS3sEsi8e79C/4LVd9PgcFw= -gitlink.org.cn/JointCloud/pcm-modelarts v0.0.0-20240905092954-07ff355339dd/go.mod h1:/eOmBFZKWGoabG3sRVkVvIbLwsd2631k4jkUBR6x1AA= +gitlink.org.cn/JointCloud/pcm-modelarts v0.0.0-20240909072501-939c3144cd9e h1:6LYJggBoeAQxy/otzWjt40Pa7gnVvUR4c5YMi6A/NdU= +gitlink.org.cn/JointCloud/pcm-modelarts v0.0.0-20240909072501-939c3144cd9e/go.mod h1:/eOmBFZKWGoabG3sRVkVvIbLwsd2631k4jkUBR6x1AA= gitlink.org.cn/JointCloud/pcm-octopus v0.0.0-20240817071412-44397870b110 h1:GaXwr5sgDh0raHjUf9IewTvnRvajYea7zbLsaerYyXo= gitlink.org.cn/JointCloud/pcm-octopus v0.0.0-20240817071412-44397870b110/go.mod h1:QOD5+/l2D+AYBjF2h5T0mdJyfGAmF78QmeKdbBXbjLQ= gitlink.org.cn/JointCloud/pcm-openstack v0.0.0-20240403033338-e7edabad4203 h1:s6PsZ1+bev294IWdZRlV7mnOwI1+UzFcldVW/BqhQzI= @@ -533,8 +533,8 @@ golang.org/x/crypto v0.0.0-20200622213623-75b288015ac9/go.mod h1:LzIPMQfyMNhhGPh golang.org/x/crypto v0.0.0-20210921155107-089bfa567519/go.mod h1:GvvjBRRGRdwPK5ydBHafDWAxML/pGHZbMvKqRZ5+Abc= golang.org/x/crypto v0.19.0/go.mod h1:Iy9bg/ha4yyC70EfRS8jz+B6ybOBKMaSxLj6P6oBDfU= golang.org/x/crypto v0.21.0/go.mod h1:0BP7YvVV9gBbVKyeTG0Gyn+gZm94bibOW5BjDEYAOMs= -golang.org/x/crypto v0.26.0 h1:RrRspgV4mU+YwB4FYnuBoKsUapNIL5cohGAmSH3azsw= -golang.org/x/crypto v0.26.0/go.mod h1:GY7jblb9wI+FOo5y8/S2oY4zWP07AkOJ4+jxCqdqn54= +golang.org/x/crypto v0.27.0 h1:GXm2NjJrPaiv/h1tb2UH8QfgC/hOf/+z0p6PT8o1w7A= +golang.org/x/crypto v0.27.0/go.mod h1:1Xngt8kV6Dvbssa53Ziq6Eqn0HqbZi5Z6R0ZpwQzt70= golang.org/x/exp v0.0.0-20190121172915-509febef88a4/go.mod h1:CJ0aWSM057203Lf6IL+f9T1iT9GByDxfZKAQTCR3kQA= golang.org/x/exp v0.0.0-20190306152737-a1d7652674e8/go.mod h1:CJ0aWSM057203Lf6IL+f9T1iT9GByDxfZKAQTCR3kQA= golang.org/x/exp v0.0.0-20190510132918-efd6b22b2522/go.mod h1:ZjyILWgesfNpC6sMxTJOJm9Kp84zZh5NQWvqDGG3Qr8= @@ -611,8 +611,8 @@ golang.org/x/net v0.6.0/go.mod h1:2Tu9+aMcznHK/AK1HMvgo6xiTLG5rD5rZLDS+rp2Bjs= golang.org/x/net v0.10.0/go.mod h1:0qNGK6F8kojg2nk9dLZ2mShWaEBan6FAoqfSigmmuDg= golang.org/x/net v0.21.0/go.mod h1:bIjVDfnllIU7BJ2DNgfnXvpSvtn8VRwhlsaeUTyUS44= golang.org/x/net v0.22.0/go.mod h1:JKghWKKOSdJwpW2GEx0Ja7fmaKnMsbu+MWVZTokSYmg= -golang.org/x/net v0.28.0 h1:a9JDOJc5GMUJ0+UDqmLT86WiEy7iWyIhz8gz8E4e5hE= -golang.org/x/net v0.28.0/go.mod h1:yqtgsTWOOnlGLG9GFRrK3++bGOUEkNBoHZc8MEDWPNg= +golang.org/x/net v0.29.0 h1:5ORfpBpCs4HzDYoodCDBbwHzdR5UrLBZ3sOnUJmFoHo= +golang.org/x/net v0.29.0/go.mod h1:gLkgy8jTGERgjzMic6DS9+SP0ajcu6Xu3Orq/SpETg0= golang.org/x/oauth2 v0.0.0-20180821212333-d2e6202438be/go.mod h1:N/0e6XlmueqKjAGxoOufVs8QHGRruUQn6yWY3a++T0U= golang.org/x/oauth2 v0.0.0-20190226205417-e64efc72b421/go.mod h1:gOpvHmFTYa4IltrdGE7lF6nIHvwfUNPOp7c8zoXwtLw= golang.org/x/oauth2 v0.0.0-20190604053449-0f29369cfe45/go.mod h1:gOpvHmFTYa4IltrdGE7lF6nIHvwfUNPOp7c8zoXwtLw= diff --git a/internal/storeLink/modelarts.go b/internal/storeLink/modelarts.go index 9317f519..acc624d5 100644 --- a/internal/storeLink/modelarts.go +++ b/internal/storeLink/modelarts.go @@ -37,8 +37,10 @@ import ( ) const ( - Ascend = "Ascend" - Npu = "npu" + Ascend = "Ascend" + Npu = "npu" + ImageNetResnet50Cmd = "cd /home/ma-user & python ./inference_ascend.py" + ChatGLM6BCmd = "cd /home/ma-user && python ./download_model.py && python ./inference_chatGLM.py" ) type ModelArtsLink struct { @@ -51,6 +53,7 @@ type ModelArtsLink struct { SourceLocation string Version string ModelId string + ModelType string } // Version 结构体表示版本号 @@ -320,8 +323,8 @@ func (m *ModelArtsLink) UploadAlgorithmCode(ctx context.Context, resourceType st func (m *ModelArtsLink) getSourceLocationFromImages(ctx context.Context, option *option.InferOption) error { req := &modelarts.ListImagesReq{ //Platform: m.platform, - Limit: m.pageIndex, - Offset: 10, + Limit: 50, + Offset: 0, } ListImagesResp, err := m.modelArtsRpc.ListImages(ctx, req) @@ -333,9 +336,16 @@ func (m *ModelArtsLink) getSourceLocationFromImages(ctx context.Context, option } for _, ListImages := range ListImagesResp.Data { - if ListImages.Name == option.ModelName { - m.SourceLocation = ListImages.SwrPath - return nil + if option.ModelName == "ChatGLM-6B" { + if ListImages.Name == "chatglm-6b" { + m.SourceLocation = ListImages.SwrPath + return nil + } + } else { + if ListImages.Name == option.ModelName { + m.SourceLocation = ListImages.SwrPath + return nil + } } } return errors.New("SourceLocation not set") @@ -346,23 +356,28 @@ func (m *ModelArtsLink) GetModelId(ctx context.Context, option *option.InferOpti req := &modelarts.ListModelReq{ Platform: m.platform, ModelName: option.ModelName, - ModelType: "Image", - Limit: int64(m.pageIndex), - Offset: int64(m.pageSize), + //ModelType: "Image", + Limit: int64(m.pageIndex), + Offset: int64(m.pageSize), } ListModelResp, err := m.modelArtsRpc.ListModels(ctx, req) if err != nil { return err } - if ListModelResp.Code != 200 { - return errors.New("failed to get ModelId") - } - for _, ListModel := range ListModelResp.Models { - if ListModel.ModelName == option.ModelName { - option.ModelId = ListModel.ModelId - m.Version = ListModel.ModelVersion - return nil + if ListModelResp.Code == 200 { + //return errors.New("failed to get ModelId") + for _, ListModel := range ListModelResp.Models { + if ListModel.ModelName == option.ModelName { + option.ModelId = ListModel.ModelId + m.Version = ListModel.ModelVersion + return nil + } } + + } + err = m.CreateModel(ctx, option) + if err != nil { + return err } return nil } @@ -386,6 +401,7 @@ func (m *ModelArtsLink) GetModel(ctx context.Context, option *option.InferOption if ShowModelsResp.Code != 200 { errors.New("failed to get findModelsStatus") } + m.ModelType = ShowModelsResp.ShowModelDetail.ModelAlgorithm return ShowModelsResp.ShowModelDetail.ModelStatus } @@ -417,15 +433,28 @@ func (m *ModelArtsLink) CreateModel(ctx context.Context, option *option.InferOpt return errors.New("No image available for creationd") } + // + var CMD string + if option.ModelName == "imagenet_resnet50" { + CMD = ImageNetResnet50Cmd + } else if option.ModelName == "ChatGLM-6B" { + CMD = ChatGLM6BCmd + } + + if m.Version == "" { + m.Version = "0.0.1" + } version, err := ParseVersion(m.Version) version.Increment() - req := &modelarts.CreateModelReq{ Platform: m.platform, ModelName: option.ModelName, ModelType: "Image", ModelVersion: version.String(), SourceLocation: m.SourceLocation, + InstallType: []string{"real-time"}, + Cmd: CMD, + ModelAlgorithm: option.ModelType, } ModelResp, err := m.modelArtsRpc.CreateModel(ctx, req) if err != nil { @@ -733,6 +762,8 @@ func (m *ModelArtsLink) GetInferDeployInstance(ctx context.Context, id string) ( ins.ClusterName = m.platform ins.CreatedTime = string(resp.StartTime) ins.ClusterType = TYPE_MODELARTS + ins.ModelName = resp.Config[0].ModelName + ins.ModelType = m.ModelType return ins, nil } @@ -747,11 +778,6 @@ func (m *ModelArtsLink) CreateInferDeployInstance(ctx context.Context, option *o return "", err } - err = m.CreateModel(ctx, option) - if err != nil { - return "", err - } - err = m.GetModelStatus(ctx, option) if err != nil { return "", err @@ -769,7 +795,7 @@ func (m *ModelArtsLink) CreateInferDeployInstance(ctx context.Context, option *o Platform: m.platform, Config: configItems, InferType: "real-time", - ServiceName: option.TaskName, + ServiceName: option.ModelName + "_" + option.ModelType + "_" + Npu, } ctx, cancel := context.WithTimeout(context.Background(), 150*time.Second) defer cancel()