Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
19 changes: 19 additions & 0 deletions .github/workflows/release.yml
Original file line number Diff line number Diff line change
Expand Up @@ -87,6 +87,12 @@ jobs:
echo "docker/model-runner:latest-sglang-cuda" >> "$GITHUB_OUTPUT"
fi
echo 'EOF' >> "$GITHUB_OUTPUT"
echo "diffusers<<EOF" >> "$GITHUB_OUTPUT"
echo "docker/model-runner:${{ inputs.releaseTag }}-diffusers" >> "$GITHUB_OUTPUT"
if [ "${{ inputs.pushLatest }}" == "true" ]; then
echo "docker/model-runner:latest-diffusers" >> "$GITHUB_OUTPUT"
fi
echo 'EOF' >> "$GITHUB_OUTPUT"
echo "rocm<<EOF" >> "$GITHUB_OUTPUT"
echo "docker/model-runner:${{ inputs.releaseTag }}-rocm" >> "$GITHUB_OUTPUT"
if [ "${{ inputs.pushLatest }}" == "true" ]; then
Expand Down Expand Up @@ -182,6 +188,19 @@ jobs:
provenance: mode=max
tags: ${{ steps.tags.outputs.sglang-cuda }}

- name: Build Diffusers image
uses: docker/build-push-action@263435318d21b8e681c14492fe198d362a7d2c83
with:
file: Dockerfile
target: final-diffusers
platforms: linux/amd64, linux/arm64
build-args: |
"LLAMA_SERVER_VERSION=${{ inputs.llamaServerVersion }}"
push: true
sbom: true
provenance: mode=max
tags: ${{ steps.tags.outputs.diffusers }}

- name: Build ROCm image
uses: docker/build-push-action@263435318d21b8e681c14492fe198d362a7d2c83
with:
Expand Down
18 changes: 15 additions & 3 deletions cmd/cli/commands/install-runner.go
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@ import (
"errors"
"fmt"
"os"
"strings"
"time"

"github.com/docker/docker/api/types/container"
Expand All @@ -13,6 +14,7 @@ import (
gpupkg "github.com/docker/model-runner/cmd/cli/pkg/gpu"
"github.com/docker/model-runner/cmd/cli/pkg/standalone"
"github.com/docker/model-runner/cmd/cli/pkg/types"
"github.com/docker/model-runner/pkg/inference/backends/diffusers"
"github.com/docker/model-runner/pkg/inference/backends/llamacpp"
"github.com/docker/model-runner/pkg/inference/backends/vllm"
"github.com/spf13/cobra"
Expand All @@ -26,7 +28,7 @@ const (
// installation will try to reach the model runner while waiting for it to
// be ready.
installWaitRetryInterval = 500 * time.Millisecond
backendUsage = "Specify backend (" + llamacpp.Name + "|" + vllm.Name + "). Default: " + llamacpp.Name
backendUsage = "Specify backend (" + llamacpp.Name + "|" + vllm.Name + "|" + diffusers.Name + "). Default: " + llamacpp.Name
)

// waitForStandaloneRunnerAfterInstall waits for a standalone model runner
Expand Down Expand Up @@ -322,8 +324,18 @@ func runInstallOrStart(cmd *cobra.Command, opts runnerOptions, debug bool) error
}

// Validate backend selection
if opts.backend != "" && opts.backend != llamacpp.Name && opts.backend != vllm.Name {
return fmt.Errorf("unknown backend: %q (supported: %s, %s)", opts.backend, llamacpp.Name, vllm.Name)
validBackends := []string{llamacpp.Name, vllm.Name, diffusers.Name}
if opts.backend != "" {
isValid := false
for _, valid := range validBackends {
if opts.backend == valid {
isValid = true
break
}
}
if !isValid {
return fmt.Errorf("unknown backend: %q (supported: %s)", opts.backend, strings.Join(validBackends, ", "))
}
}

// Validate backend-GPU compatibility
Expand Down
2 changes: 1 addition & 1 deletion cmd/cli/docs/reference/docker_model_install-runner.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@ plink: docker_model.yaml
options:
- option: backend
value_type: string
description: 'Specify backend (llama.cpp|vllm). Default: llama.cpp'
description: 'Specify backend (llama.cpp|vllm|diffusers). Default: llama.cpp'
deprecated: false
hidden: false
experimental: false
Expand Down
2 changes: 1 addition & 1 deletion cmd/cli/docs/reference/docker_model_reinstall-runner.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@ plink: docker_model.yaml
options:
- option: backend
value_type: string
description: 'Specify backend (llama.cpp|vllm). Default: llama.cpp'
description: 'Specify backend (llama.cpp|vllm|diffusers). Default: llama.cpp'
deprecated: false
hidden: false
experimental: false
Expand Down
2 changes: 1 addition & 1 deletion cmd/cli/docs/reference/docker_model_start-runner.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@ plink: docker_model.yaml
options:
- option: backend
value_type: string
description: 'Specify backend (llama.cpp|vllm). Default: llama.cpp'
description: 'Specify backend (llama.cpp|vllm|diffusers). Default: llama.cpp'
deprecated: false
hidden: false
experimental: false
Expand Down
2 changes: 1 addition & 1 deletion cmd/cli/docs/reference/model_install-runner.md
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@ Install Docker Model Runner (Docker Engine only)

| Name | Type | Default | Description |
|:-----------------|:---------|:------------|:-------------------------------------------------------------------------------------------------------|
| `--backend` | `string` | | Specify backend (llama.cpp\|vllm). Default: llama.cpp |
| `--backend` | `string` | | Specify backend (llama.cpp\|vllm\|diffusers). Default: llama.cpp |
| `--debug` | `bool` | | Enable debug logging |
| `--do-not-track` | `bool` | | Do not track models usage in Docker Model Runner |
| `--gpu` | `string` | `auto` | Specify GPU support (none\|auto\|cuda\|rocm\|musa\|cann) |
Expand Down
2 changes: 1 addition & 1 deletion cmd/cli/docs/reference/model_reinstall-runner.md
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@ Reinstall Docker Model Runner (Docker Engine only)

| Name | Type | Default | Description |
|:-----------------|:---------|:------------|:-------------------------------------------------------------------------------------------------------|
| `--backend` | `string` | | Specify backend (llama.cpp\|vllm). Default: llama.cpp |
| `--backend` | `string` | | Specify backend (llama.cpp\|vllm\|diffusers). Default: llama.cpp |
| `--debug` | `bool` | | Enable debug logging |
| `--do-not-track` | `bool` | | Do not track models usage in Docker Model Runner |
| `--gpu` | `string` | `auto` | Specify GPU support (none\|auto\|cuda\|rocm\|musa\|cann) |
Expand Down
2 changes: 1 addition & 1 deletion cmd/cli/docs/reference/model_start-runner.md
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@ Start Docker Model Runner (Docker Engine only)

| Name | Type | Default | Description |
|:-----------------|:---------|:------------|:-------------------------------------------------------------------------------------------------------|
| `--backend` | `string` | | Specify backend (llama.cpp\|vllm). Default: llama.cpp |
| `--backend` | `string` | | Specify backend (llama.cpp\|vllm\|diffusers). Default: llama.cpp |
| `--debug` | `bool` | | Enable debug logging |
| `--do-not-track` | `bool` | | Do not track models usage in Docker Model Runner |
| `--gpu` | `string` | `auto` | Specify GPU support (none\|auto\|cuda\|rocm\|musa\|cann) |
Expand Down
5 changes: 5 additions & 0 deletions cmd/cli/pkg/standalone/controller_image.go
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@ import (
"os"

gpupkg "github.com/docker/model-runner/cmd/cli/pkg/gpu"
"github.com/docker/model-runner/pkg/inference/backends/diffusers"
"github.com/docker/model-runner/pkg/inference/backends/vllm"
)

Expand Down Expand Up @@ -32,6 +33,10 @@ func controllerImageVariant(detectedGPU gpupkg.GPUSupport, backend string) strin
if backend == vllm.Name {
return "vllm-cuda"
}
// If diffusers backend is requested, return diffusers variant
if backend == diffusers.Name {
return "diffusers"
}
// Default to llama.cpp backend behavior
switch detectedGPU {
case gpupkg.GPUSupportCUDA:
Expand Down
Loading