diff --git a/.github/workflows/release.yml b/.github/workflows/release.yml index 077ad4c92..ed9c082dd 100644 --- a/.github/workflows/release.yml +++ b/.github/workflows/release.yml @@ -87,6 +87,12 @@ jobs: echo "docker/model-runner:latest-sglang-cuda" >> "$GITHUB_OUTPUT" fi echo 'EOF' >> "$GITHUB_OUTPUT" + echo "diffusers<> "$GITHUB_OUTPUT" + echo "docker/model-runner:${{ inputs.releaseTag }}-diffusers" >> "$GITHUB_OUTPUT" + if [ "${{ inputs.pushLatest }}" == "true" ]; then + echo "docker/model-runner:latest-diffusers" >> "$GITHUB_OUTPUT" + fi + echo 'EOF' >> "$GITHUB_OUTPUT" echo "rocm<> "$GITHUB_OUTPUT" echo "docker/model-runner:${{ inputs.releaseTag }}-rocm" >> "$GITHUB_OUTPUT" if [ "${{ inputs.pushLatest }}" == "true" ]; then @@ -182,6 +188,19 @@ jobs: provenance: mode=max tags: ${{ steps.tags.outputs.sglang-cuda }} + - name: Build Diffusers image + uses: docker/build-push-action@263435318d21b8e681c14492fe198d362a7d2c83 + with: + file: Dockerfile + target: final-diffusers + platforms: linux/amd64, linux/arm64 + build-args: | + "LLAMA_SERVER_VERSION=${{ inputs.llamaServerVersion }}" + push: true + sbom: true + provenance: mode=max + tags: ${{ steps.tags.outputs.diffusers }} + - name: Build ROCm image uses: docker/build-push-action@263435318d21b8e681c14492fe198d362a7d2c83 with: diff --git a/cmd/cli/commands/install-runner.go b/cmd/cli/commands/install-runner.go index a3640c1b6..b986a93ff 100644 --- a/cmd/cli/commands/install-runner.go +++ b/cmd/cli/commands/install-runner.go @@ -5,6 +5,7 @@ import ( "errors" "fmt" "os" + "strings" "time" "github.com/docker/docker/api/types/container" @@ -13,6 +14,7 @@ import ( gpupkg "github.com/docker/model-runner/cmd/cli/pkg/gpu" "github.com/docker/model-runner/cmd/cli/pkg/standalone" "github.com/docker/model-runner/cmd/cli/pkg/types" + "github.com/docker/model-runner/pkg/inference/backends/diffusers" "github.com/docker/model-runner/pkg/inference/backends/llamacpp" "github.com/docker/model-runner/pkg/inference/backends/vllm" "github.com/spf13/cobra" @@ -26,7 +28,7 @@ const ( // installation will try to reach the model runner while waiting for it to // be ready. installWaitRetryInterval = 500 * time.Millisecond - backendUsage = "Specify backend (" + llamacpp.Name + "|" + vllm.Name + "). Default: " + llamacpp.Name + backendUsage = "Specify backend (" + llamacpp.Name + "|" + vllm.Name + "|" + diffusers.Name + "). Default: " + llamacpp.Name ) // waitForStandaloneRunnerAfterInstall waits for a standalone model runner @@ -322,8 +324,18 @@ func runInstallOrStart(cmd *cobra.Command, opts runnerOptions, debug bool) error } // Validate backend selection - if opts.backend != "" && opts.backend != llamacpp.Name && opts.backend != vllm.Name { - return fmt.Errorf("unknown backend: %q (supported: %s, %s)", opts.backend, llamacpp.Name, vllm.Name) + validBackends := []string{llamacpp.Name, vllm.Name, diffusers.Name} + if opts.backend != "" { + isValid := false + for _, valid := range validBackends { + if opts.backend == valid { + isValid = true + break + } + } + if !isValid { + return fmt.Errorf("unknown backend: %q (supported: %s)", opts.backend, strings.Join(validBackends, ", ")) + } } // Validate backend-GPU compatibility diff --git a/cmd/cli/docs/reference/docker_model_install-runner.yaml b/cmd/cli/docs/reference/docker_model_install-runner.yaml index d8640fd83..562fd6bbf 100644 --- a/cmd/cli/docs/reference/docker_model_install-runner.yaml +++ b/cmd/cli/docs/reference/docker_model_install-runner.yaml @@ -8,7 +8,7 @@ plink: docker_model.yaml options: - option: backend value_type: string - description: 'Specify backend (llama.cpp|vllm). Default: llama.cpp' + description: 'Specify backend (llama.cpp|vllm|diffusers). Default: llama.cpp' deprecated: false hidden: false experimental: false diff --git a/cmd/cli/docs/reference/docker_model_reinstall-runner.yaml b/cmd/cli/docs/reference/docker_model_reinstall-runner.yaml index 28b56666d..35d328b55 100644 --- a/cmd/cli/docs/reference/docker_model_reinstall-runner.yaml +++ b/cmd/cli/docs/reference/docker_model_reinstall-runner.yaml @@ -8,7 +8,7 @@ plink: docker_model.yaml options: - option: backend value_type: string - description: 'Specify backend (llama.cpp|vllm). Default: llama.cpp' + description: 'Specify backend (llama.cpp|vllm|diffusers). Default: llama.cpp' deprecated: false hidden: false experimental: false diff --git a/cmd/cli/docs/reference/docker_model_start-runner.yaml b/cmd/cli/docs/reference/docker_model_start-runner.yaml index 5fa9df426..740e36c53 100644 --- a/cmd/cli/docs/reference/docker_model_start-runner.yaml +++ b/cmd/cli/docs/reference/docker_model_start-runner.yaml @@ -10,7 +10,7 @@ plink: docker_model.yaml options: - option: backend value_type: string - description: 'Specify backend (llama.cpp|vllm). Default: llama.cpp' + description: 'Specify backend (llama.cpp|vllm|diffusers). Default: llama.cpp' deprecated: false hidden: false experimental: false diff --git a/cmd/cli/docs/reference/model_install-runner.md b/cmd/cli/docs/reference/model_install-runner.md index 2d9e1c014..de40a5028 100644 --- a/cmd/cli/docs/reference/model_install-runner.md +++ b/cmd/cli/docs/reference/model_install-runner.md @@ -7,7 +7,7 @@ Install Docker Model Runner (Docker Engine only) | Name | Type | Default | Description | |:-----------------|:---------|:------------|:-------------------------------------------------------------------------------------------------------| -| `--backend` | `string` | | Specify backend (llama.cpp\|vllm). Default: llama.cpp | +| `--backend` | `string` | | Specify backend (llama.cpp\|vllm\|diffusers). Default: llama.cpp | | `--debug` | `bool` | | Enable debug logging | | `--do-not-track` | `bool` | | Do not track models usage in Docker Model Runner | | `--gpu` | `string` | `auto` | Specify GPU support (none\|auto\|cuda\|rocm\|musa\|cann) | diff --git a/cmd/cli/docs/reference/model_reinstall-runner.md b/cmd/cli/docs/reference/model_reinstall-runner.md index 2ec744310..457b322e5 100644 --- a/cmd/cli/docs/reference/model_reinstall-runner.md +++ b/cmd/cli/docs/reference/model_reinstall-runner.md @@ -7,7 +7,7 @@ Reinstall Docker Model Runner (Docker Engine only) | Name | Type | Default | Description | |:-----------------|:---------|:------------|:-------------------------------------------------------------------------------------------------------| -| `--backend` | `string` | | Specify backend (llama.cpp\|vllm). Default: llama.cpp | +| `--backend` | `string` | | Specify backend (llama.cpp\|vllm\|diffusers). Default: llama.cpp | | `--debug` | `bool` | | Enable debug logging | | `--do-not-track` | `bool` | | Do not track models usage in Docker Model Runner | | `--gpu` | `string` | `auto` | Specify GPU support (none\|auto\|cuda\|rocm\|musa\|cann) | diff --git a/cmd/cli/docs/reference/model_start-runner.md b/cmd/cli/docs/reference/model_start-runner.md index 4ca1acc33..24cf2fe12 100644 --- a/cmd/cli/docs/reference/model_start-runner.md +++ b/cmd/cli/docs/reference/model_start-runner.md @@ -7,7 +7,7 @@ Start Docker Model Runner (Docker Engine only) | Name | Type | Default | Description | |:-----------------|:---------|:------------|:-------------------------------------------------------------------------------------------------------| -| `--backend` | `string` | | Specify backend (llama.cpp\|vllm). Default: llama.cpp | +| `--backend` | `string` | | Specify backend (llama.cpp\|vllm\|diffusers). Default: llama.cpp | | `--debug` | `bool` | | Enable debug logging | | `--do-not-track` | `bool` | | Do not track models usage in Docker Model Runner | | `--gpu` | `string` | `auto` | Specify GPU support (none\|auto\|cuda\|rocm\|musa\|cann) | diff --git a/cmd/cli/pkg/standalone/controller_image.go b/cmd/cli/pkg/standalone/controller_image.go index d948bdd61..34237519e 100644 --- a/cmd/cli/pkg/standalone/controller_image.go +++ b/cmd/cli/pkg/standalone/controller_image.go @@ -4,6 +4,7 @@ import ( "os" gpupkg "github.com/docker/model-runner/cmd/cli/pkg/gpu" + "github.com/docker/model-runner/pkg/inference/backends/diffusers" "github.com/docker/model-runner/pkg/inference/backends/vllm" ) @@ -32,6 +33,10 @@ func controllerImageVariant(detectedGPU gpupkg.GPUSupport, backend string) strin if backend == vllm.Name { return "vllm-cuda" } + // If diffusers backend is requested, return diffusers variant + if backend == diffusers.Name { + return "diffusers" + } // Default to llama.cpp backend behavior switch detectedGPU { case gpupkg.GPUSupportCUDA: