5555 build :
5656 needs : test
5757 runs-on : ubuntu-latest
58+ strategy :
59+ fail-fast : false
60+ matrix :
61+ include :
62+ - name : cpu
63+ target : final-llamacpp
64+ platforms : " linux/amd64, linux/arm64"
65+ tag_suffix : " "
66+ variant : " "
67+ base_image : " "
68+ extra_build_args : " "
69+
70+ - name : cuda
71+ target : final-llamacpp
72+ platforms : " linux/amd64, linux/arm64"
73+ tag_suffix : " -cuda"
74+ variant : " cuda"
75+ base_image : " nvidia/cuda:12.9.0-runtime-ubuntu24.04"
76+ extra_build_args : " "
77+
78+ - name : vllm-cuda
79+ target : final-vllm
80+ platforms : " linux/amd64, linux/arm64"
81+ tag_suffix : " -vllm-cuda"
82+ variant : " cuda"
83+ base_image : " nvidia/cuda:13.0.2-runtime-ubuntu24.04"
84+ extra_build_args : |
85+ VLLM_CUDA_VERSION=cu130
86+ VLLM_PYTHON_TAG=cp38-abi3
87+
88+ - name : sglang-cuda
89+ target : final-sglang
90+ platforms : " linux/amd64"
91+ tag_suffix : " -sglang-cuda"
92+ variant : " cuda"
93+ base_image : " nvidia/cuda:12.9.0-runtime-ubuntu24.04"
94+ extra_build_args : " "
95+
96+ - name : rocm
97+ target : final-llamacpp
98+ platforms : " linux/amd64"
99+ tag_suffix : " -rocm"
100+ variant : " rocm"
101+ base_image : " rocm/dev-ubuntu-22.04"
102+ extra_build_args : " "
103+
58104 steps :
59105 - name : Checkout repo
60106 uses : actions/checkout@8e8c483db84b4bee98b60c0593521ed34d9990e8
@@ -63,46 +109,10 @@ jobs:
63109 id : tags
64110 shell : bash
65111 run : |
66- echo "cpu<<EOF" >> "$GITHUB_OUTPUT"
67- echo "docker/model-runner:${{ inputs.releaseTag }}" >> "$GITHUB_OUTPUT"
68- if [ "${{ inputs.pushLatest }}" == "true" ]; then
69- echo "docker/model-runner:latest" >> "$GITHUB_OUTPUT"
70- fi
71- echo 'EOF' >> "$GITHUB_OUTPUT"
72- echo "cuda<<EOF" >> "$GITHUB_OUTPUT"
73- echo "docker/model-runner:${{ inputs.releaseTag }}-cuda" >> "$GITHUB_OUTPUT"
74- if [ "${{ inputs.pushLatest }}" == "true" ]; then
75- echo "docker/model-runner:latest-cuda" >> "$GITHUB_OUTPUT"
76- fi
77- echo 'EOF' >> "$GITHUB_OUTPUT"
78- echo "vllm-cuda<<EOF" >> "$GITHUB_OUTPUT"
79- echo "docker/model-runner:${{ inputs.releaseTag }}-vllm-cuda" >> "$GITHUB_OUTPUT"
80- if [ "${{ inputs.pushLatest }}" == "true" ]; then
81- echo "docker/model-runner:latest-vllm-cuda" >> "$GITHUB_OUTPUT"
82- fi
83- echo 'EOF' >> "$GITHUB_OUTPUT"
84- echo "sglang-cuda<<EOF" >> "$GITHUB_OUTPUT"
85- echo "docker/model-runner:${{ inputs.releaseTag }}-sglang-cuda" >> "$GITHUB_OUTPUT"
86- if [ "${{ inputs.pushLatest }}" == "true" ]; then
87- echo "docker/model-runner:latest-sglang-cuda" >> "$GITHUB_OUTPUT"
88- fi
89- echo 'EOF' >> "$GITHUB_OUTPUT"
90- echo "rocm<<EOF" >> "$GITHUB_OUTPUT"
91- echo "docker/model-runner:${{ inputs.releaseTag }}-rocm" >> "$GITHUB_OUTPUT"
92- if [ "${{ inputs.pushLatest }}" == "true" ]; then
93- echo "docker/model-runner:latest-rocm" >> "$GITHUB_OUTPUT"
94- fi
95- echo 'EOF' >> "$GITHUB_OUTPUT"
96- echo "musa<<EOF" >> "$GITHUB_OUTPUT"
97- echo "docker/model-runner:${{ inputs.releaseTag }}-musa" >> "$GITHUB_OUTPUT"
98- if [ "${{ inputs.pushLatest }}" == "true" ]; then
99- echo "docker/model-runner:latest-musa" >> "$GITHUB_OUTPUT"
100- fi
101- echo 'EOF' >> "$GITHUB_OUTPUT"
102- echo "cann<<EOF" >> "$GITHUB_OUTPUT"
103- echo "docker/model-runner:${{ inputs.releaseTag }}-cann" >> "$GITHUB_OUTPUT"
112+ echo "tags<<EOF" >> "$GITHUB_OUTPUT"
113+ echo "docker/model-runner:${{ inputs.releaseTag }}${{ matrix.tag_suffix }}" >> "$GITHUB_OUTPUT"
104114 if [ "${{ inputs.pushLatest }}" == "true" ]; then
105- echo "docker/model-runner:latest-cann " >> "$GITHUB_OUTPUT"
115+ echo "docker/model-runner:latest${{ matrix.tag_suffix }} " >> "$GITHUB_OUTPUT"
106116 fi
107117 echo 'EOF' >> "$GITHUB_OUTPUT"
108118
@@ -120,111 +130,118 @@ jobs:
120130 endpoint : " docker/make-product-smarter"
121131 install : true
122132
123- - name : Build CPU image
124- uses : docker/build-push-action@263435318d21b8e681c14492fe198d362a7d2c83
125- with :
126- file : Dockerfile
127- target : final-llamacpp
128- platforms : linux/amd64, linux/arm64
129- build-args : |
130- "LLAMA_SERVER_VERSION=${{ inputs.llamaServerVersion }}"
131- push : true
132- sbom : true
133- provenance : mode=max
134- tags : ${{ steps.tags.outputs.cpu }}
133+ - name : Prepare build args
134+ id : build_args
135+ shell : bash
136+ run : |
137+ ARGS="LLAMA_SERVER_VERSION=${{ inputs.llamaServerVersion }}"
138+
139+ if [ -n "${{ matrix.variant }}" ]; then
140+ ARGS="${ARGS}
141+ LLAMA_SERVER_VARIANT=${{ matrix.variant }}"
142+ fi
143+
144+ if [ -n "${{ matrix.base_image }}" ]; then
145+ ARGS="${ARGS}
146+ BASE_IMAGE=${{ matrix.base_image }}"
147+ fi
148+
149+ # Add vLLM version for vllm builds
150+ if [ "${{ matrix.name }}" == "vllm-cuda" ]; then
151+ ARGS="${ARGS}
152+ VLLM_VERSION=${{ inputs.vllmVersion }}"
153+ fi
154+
155+ # Add SGLang version for sglang builds
156+ if [ "${{ matrix.name }}" == "sglang-cuda" ]; then
157+ ARGS="${ARGS}
158+ SGLANG_VERSION=${{ inputs.sglangVersion }}"
159+ fi
160+
161+ # Add extra build args if present
162+ if [ -n "${{ matrix.extra_build_args }}" ]; then
163+ ARGS="${ARGS}
164+ ${{ matrix.extra_build_args }}"
165+ fi
166+
167+ echo "args<<EOF" >> "$GITHUB_OUTPUT"
168+ echo "$ARGS" >> "$GITHUB_OUTPUT"
169+ echo "EOF" >> "$GITHUB_OUTPUT"
135170
136- - name : Build CUDA image
171+ - name : Build and push ${{ matrix.name }} image
137172 uses : docker/build-push-action@263435318d21b8e681c14492fe198d362a7d2c83
138173 with :
139174 file : Dockerfile
140- target : final-llamacpp
141- platforms : linux/amd64, linux/arm64
142- build-args : |
143- "LLAMA_SERVER_VERSION=${{ inputs.llamaServerVersion }}"
144- "LLAMA_SERVER_VARIANT=cuda"
145- "BASE_IMAGE=nvidia/cuda:12.9.0-runtime-ubuntu24.04"
175+ target : ${{ matrix.target }}
176+ platforms : ${{ matrix.platforms }}
177+ build-args : ${{ steps.build_args.outputs.args }}
146178 push : true
147179 sbom : true
148180 provenance : mode=max
149- tags : ${{ steps.tags.outputs.cuda }}
181+ tags : ${{ steps.tags.outputs.tags }}
150182
151- - name : Build vLLM CUDA image
152- uses : docker/build-push-action@263435318d21b8e681c14492fe198d362a7d2c83
153- with :
154- file : Dockerfile
155- target : final-vllm
156- platforms : linux/amd64, linux/arm64
157- build-args : |
158- "LLAMA_SERVER_VERSION=${{ inputs.llamaServerVersion }}"
159- "LLAMA_SERVER_VARIANT=cuda"
160- "BASE_IMAGE=nvidia/cuda:13.0.2-runtime-ubuntu24.04"
161- "VLLM_VERSION=${{ inputs.vllmVersion }}"
162- "VLLM_CUDA_VERSION=cu130"
163- "VLLM_PYTHON_TAG=cp38-abi3"
164- push : true
165- sbom : true
166- provenance : mode=max
167- tags : ${{ steps.tags.outputs.vllm-cuda }}
183+ build-musa-cann :
184+ needs : test
185+ if : ${{ inputs.buildMusaCann }}
186+ runs-on : ubuntu-latest
187+ strategy :
188+ fail-fast : false
189+ matrix :
190+ include :
191+ - name : musa
192+ target : final-llamacpp
193+ platforms : " linux/amd64"
194+ tag_suffix : " -musa"
195+ variant : " musa"
196+ base_image : " mthreads/musa:rc4.3.0-runtime-ubuntu22.04-amd64"
168197
169- - name : Build SGLang CUDA image
170- uses : docker/build-push-action@263435318d21b8e681c14492fe198d362a7d2c83
171- with :
172- file : Dockerfile
173- target : final-sglang
174- platforms : linux/amd64
175- build-args : |
176- "LLAMA_SERVER_VERSION=${{ inputs.llamaServerVersion }}"
177- "LLAMA_SERVER_VARIANT=cuda"
178- "BASE_IMAGE=nvidia/cuda:12.9.0-runtime-ubuntu24.04"
179- "SGLANG_VERSION=${{ inputs.sglangVersion }}"
180- push : true
181- sbom : true
182- provenance : mode=max
183- tags : ${{ steps.tags.outputs.sglang-cuda }}
198+ - name : cann
199+ target : final-llamacpp
200+ platforms : " linux/arm64, linux/amd64"
201+ tag_suffix : " -cann"
202+ variant : " cann"
203+ base_image : " ascendai/cann:8.2.rc2-910b-ubuntu22.04-py3.11"
184204
185- - name : Build ROCm image
186- uses : docker/build-push-action@263435318d21b8e681c14492fe198d362a7d2c83
205+ steps :
206+ - name : Checkout repo
207+ uses : actions/checkout@8e8c483db84b4bee98b60c0593521ed34d9990e8
208+
209+ - name : Format tags
210+ id : tags
211+ shell : bash
212+ run : |
213+ echo "tags<<EOF" >> "$GITHUB_OUTPUT"
214+ echo "docker/model-runner:${{ inputs.releaseTag }}${{ matrix.tag_suffix }}" >> "$GITHUB_OUTPUT"
215+ if [ "${{ inputs.pushLatest }}" == "true" ]; then
216+ echo "docker/model-runner:latest${{ matrix.tag_suffix }}" >> "$GITHUB_OUTPUT"
217+ fi
218+ echo 'EOF' >> "$GITHUB_OUTPUT"
219+
220+ - name : Log in to DockerHub
221+ uses : docker/login-action@5e57cd118135c172c3672efd75eb46360885c0ef
187222 with :
188- file : Dockerfile
189- target : final-llamacpp
190- platforms : linux/amd64
191- build-args : |
192- "LLAMA_SERVER_VERSION=${{ inputs.llamaServerVersion }}"
193- "LLAMA_SERVER_VARIANT=rocm"
194- "BASE_IMAGE=rocm/dev-ubuntu-22.04"
195- push : true
196- sbom : true
197- provenance : mode=max
198- tags : ${{ steps.tags.outputs.rocm }}
223+ username : " docker"
224+ password : ${{ secrets.ORG_ACCESS_TOKEN }}
199225
200- - name : Build MUSA image
201- if : ${{ inputs.buildMusaCann }}
202- uses : docker/build-push-action@263435318d21b8e681c14492fe198d362a7d2c83
226+ - name : Set up Buildx
227+ uses : docker/setup-buildx-action@e468171a9de216ec08956ac3ada2f0791b6bd435
203228 with :
204- file : Dockerfile
205- target : final-llamacpp
206- platforms : linux/amd64
207- build-args : |
208- "LLAMA_SERVER_VERSION=${{ inputs.llamaServerVersion }}"
209- "LLAMA_SERVER_VARIANT=musa"
210- "BASE_IMAGE=mthreads/musa:rc4.3.0-runtime-ubuntu22.04-amd64"
211- push : true
212- sbom : true
213- provenance : mode=max
214- tags : ${{ steps.tags.outputs.musa }}
229+ version : " lab:latest"
230+ driver : cloud
231+ endpoint : " docker/make-product-smarter"
232+ install : true
215233
216- - name : Build CANN image
217- if : ${{ inputs.buildMusaCann }}
234+ - name : Build and push ${{ matrix.name }} image
218235 uses : docker/build-push-action@263435318d21b8e681c14492fe198d362a7d2c83
219236 with :
220237 file : Dockerfile
221- target : final-llamacpp
222- platforms : linux/arm64, linux/amd64
238+ target : ${{ matrix.target }}
239+ platforms : ${{ matrix.platforms }}
223240 build-args : |
224- " LLAMA_SERVER_VERSION=${{ inputs.llamaServerVersion }}"
225- " LLAMA_SERVER_VARIANT=cann"
226- " BASE_IMAGE=ascendai/cann:8.2.rc2-910b-ubuntu22.04-py3.11"
241+ LLAMA_SERVER_VERSION=${{ inputs.llamaServerVersion }}
242+ LLAMA_SERVER_VARIANT=${{ matrix.variant }}
243+ BASE_IMAGE=${{ matrix.base_image }}
227244 push : true
228245 sbom : true
229246 provenance : mode=max
230- tags : ${{ steps.tags.outputs.cann }}
247+ tags : ${{ steps.tags.outputs.tags }}
0 commit comments