From adcfa12b191a18465262287ddd55eb585a61a240 Mon Sep 17 00:00:00 2001
From: Christopher Haen <me@chrishaen.com>
Date: Sat, 20 Dec 2025 13:02:35 -0500
Subject: [PATCH 1/2] router allow extra args flag

---
 common/arg.cpp                 |  8 ++++++++
 common/common.h                |  1 +
 tools/server/server-models.cpp | 23 +++++++++++++++++++++--
 tools/server/server-models.h   |  2 +-
 4 files changed, 31 insertions(+), 3 deletions(-)

diff --git a/common/arg.cpp b/common/arg.cpp
index 476bc0084a4..0523264adf7 100644
--- a/common/arg.cpp
+++ b/common/arg.cpp
@@ -2811,6 +2811,14 @@ common_params_context common_params_parser_init(common_params & params, llama_ex
             params.models_autoload = value;
         }
     ).set_examples({LLAMA_EXAMPLE_SERVER}).set_env("LLAMA_ARG_MODELS_AUTOLOAD"));
+    add_opt(common_arg(
+        {"--models-allow-extra-args"},
+        {"--no-models-allow-extra-args"},
+        string_format("for router server, whether to allow extra_args in /models/load endpoint (default: %s)", params.models_allow_extra_args ? "enabled" : "disabled"),
+        [](common_params & params, bool value) {
+            params.models_allow_extra_args = value;
+        }
+    ).set_examples({LLAMA_EXAMPLE_SERVER}).set_env("LLAMA_ARG_MODELS_ALLOW_EXTRA_ARGS"));
     add_opt(common_arg(
         {"--jinja"},
         {"--no-jinja"},
diff --git a/common/common.h b/common/common.h
index 3e314f4c802..821419a8ed1 100644
--- a/common/common.h
+++ b/common/common.h
@@ -498,6 +498,7 @@ struct common_params {
     std::string models_preset = ""; // directory containing model presets for the router server
     int models_max = 4;             // maximum number of models to load simultaneously
     bool models_autoload = true;    // automatically load models when requested via the router server
+    bool models_allow_extra_args = false; // allow extra_args in /models/load endpoint
 
     bool log_json = false;
 
diff --git a/tools/server/server-models.cpp b/tools/server/server-models.cpp
index 08a0da5c875..8187b1cf775 100644
--- a/tools/server/server-models.cpp
+++ b/tools/server/server-models.cpp
@@ -379,7 +379,7 @@ void server_models::unload_lru() {
     }
 }
 
-void server_models::load(const std::string & name) {
+void server_models::load(const std::string & name, const std::vector<std::string> & extra_args) {
     if (!has_model(name)) {
         throw std::runtime_error("model name=" + name + " is not found");
     }
@@ -411,6 +411,10 @@ void server_models::load(const std::string & name) {
         inst.meta.update_args(ctx_preset, bin_path); // render args
 
         std::vector<std::string> child_args = inst.meta.args; // copy
+        // append extra_args if provided (requires --models-allow-extra-args)
+        if (!extra_args.empty()) {
+            child_args.insert(child_args.end(), extra_args.begin(), extra_args.end());
+        }
         std::vector<std::string> child_env  = base_env; // copy
         child_env.push_back("LLAMA_SERVER_ROUTER_PORT=" + std::to_string(base_params.port));
 
@@ -743,7 +747,22 @@ void server_models_routes::init_routes() {
             res_err(res, format_error_response("model is already loaded", ERROR_TYPE_INVALID_REQUEST));
             return res;
         }
-        models.load(name);
+        // parse extra_args if provided and allowed
+        std::vector<std::string> extra_args;
+        if (body.contains("extra_args") && body["extra_args"].is_array()) {
+            if (!params.models_allow_extra_args) {
+                res_err(res, format_error_response(
+                    "extra_args not allowed; start server with --models-allow-extra-args to enable",
+                    ERROR_TYPE_INVALID_REQUEST));
+                return res;
+            }
+            for (const auto & arg : body["extra_args"]) {
+                if (arg.is_string()) {
+                    extra_args.push_back(arg.get<std::string>());
+                }
+            }
+        }
+        models.load(name, extra_args);
         res_ok(res, {{"success", true}});
         return res;
     };
diff --git a/tools/server/server-models.h b/tools/server/server-models.h
index 3e1868c27cc..b85d32fbde6 100644
--- a/tools/server/server-models.h
+++ b/tools/server/server-models.h
@@ -114,7 +114,7 @@ struct server_models {
 
     // load and unload model instances
     // these functions are thread-safe
-    void load(const std::string & name);
+    void load(const std::string & name, const std::vector<std::string> & extra_args = {});
     void unload(const std::string & name);
     void unload_all();
 

From 0f55dd575f0e757256fd8c3338ffcd1ec6db2b28 Mon Sep 17 00:00:00 2001
From: Christopher Haen <me@chrishaen.com>
Date: Sat, 20 Dec 2025 23:04:22 -0500
Subject: [PATCH 2/2] ci: disable s390x and ppc64le builds (requires special
 runners)

---
 .github/workflows/build.yml   | 9 +++++----
 .github/workflows/release.yml | 5 +++--
 2 files changed, 8 insertions(+), 6 deletions(-)

diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml
index de3ad060656..090cfad4b61 100644
--- a/.github/workflows/build.yml
+++ b/.github/workflows/build.yml
@@ -181,10 +181,11 @@ jobs:
             os: ubuntu-22.04
           - build: 'arm64'
             os: ubuntu-22.04-arm
-          - build: 's390x'
-            os: ubuntu-24.04-s390x
-          - build: 'ppc64le'
-            os: ubuntu-24.04-ppc64le
+          # Disabled - requires special GitHub runners not available on forks
+          # - build: 's390x'
+          #   os: ubuntu-24.04-s390x
+          # - build: 'ppc64le'
+          #   os: ubuntu-24.04-ppc64le
 
     runs-on: ${{ matrix.os }}
 
diff --git a/.github/workflows/release.yml b/.github/workflows/release.yml
index 11f850511f5..f543ed49ed0 100644
--- a/.github/workflows/release.yml
+++ b/.github/workflows/release.yml
@@ -134,8 +134,9 @@ jobs:
         include:
           - build: 'x64'
             os: ubuntu-22.04
-          - build: 's390x'
-            os: ubuntu-24.04-s390x
+          # Disabled - requires special GitHub runners not available on forks
+          # - build: 's390x'
+          #   os: ubuntu-24.04-s390x
           # GGML_BACKEND_DL and GGML_CPU_ALL_VARIANTS are not currently supported on arm
           # - build: 'arm64'
           #   os: ubuntu-22.04-arm