From adcfa12b191a18465262287ddd55eb585a61a240 Mon Sep 17 00:00:00 2001 From: Christopher Haen Date: Sat, 20 Dec 2025 13:02:35 -0500 Subject: [PATCH 1/2] router allow extra args flag --- common/arg.cpp | 8 ++++++++ common/common.h | 1 + tools/server/server-models.cpp | 23 +++++++++++++++++++++-- tools/server/server-models.h | 2 +- 4 files changed, 31 insertions(+), 3 deletions(-) diff --git a/common/arg.cpp b/common/arg.cpp index 476bc0084a4..0523264adf7 100644 --- a/common/arg.cpp +++ b/common/arg.cpp @@ -2811,6 +2811,14 @@ common_params_context common_params_parser_init(common_params & params, llama_ex params.models_autoload = value; } ).set_examples({LLAMA_EXAMPLE_SERVER}).set_env("LLAMA_ARG_MODELS_AUTOLOAD")); + add_opt(common_arg( + {"--models-allow-extra-args"}, + {"--no-models-allow-extra-args"}, + string_format("for router server, whether to allow extra_args in /models/load endpoint (default: %s)", params.models_allow_extra_args ? "enabled" : "disabled"), + [](common_params & params, bool value) { + params.models_allow_extra_args = value; + } + ).set_examples({LLAMA_EXAMPLE_SERVER}).set_env("LLAMA_ARG_MODELS_ALLOW_EXTRA_ARGS")); add_opt(common_arg( {"--jinja"}, {"--no-jinja"}, diff --git a/common/common.h b/common/common.h index 3e314f4c802..821419a8ed1 100644 --- a/common/common.h +++ b/common/common.h @@ -498,6 +498,7 @@ struct common_params { std::string models_preset = ""; // directory containing model presets for the router server int models_max = 4; // maximum number of models to load simultaneously bool models_autoload = true; // automatically load models when requested via the router server + bool models_allow_extra_args = false; // allow extra_args in /models/load endpoint bool log_json = false; diff --git a/tools/server/server-models.cpp b/tools/server/server-models.cpp index 08a0da5c875..8187b1cf775 100644 --- a/tools/server/server-models.cpp +++ b/tools/server/server-models.cpp @@ -379,7 +379,7 @@ void server_models::unload_lru() { } } -void server_models::load(const std::string & name) { +void server_models::load(const std::string & name, const std::vector & extra_args) { if (!has_model(name)) { throw std::runtime_error("model name=" + name + " is not found"); } @@ -411,6 +411,10 @@ void server_models::load(const std::string & name) { inst.meta.update_args(ctx_preset, bin_path); // render args std::vector child_args = inst.meta.args; // copy + // append extra_args if provided (requires --models-allow-extra-args) + if (!extra_args.empty()) { + child_args.insert(child_args.end(), extra_args.begin(), extra_args.end()); + } std::vector child_env = base_env; // copy child_env.push_back("LLAMA_SERVER_ROUTER_PORT=" + std::to_string(base_params.port)); @@ -743,7 +747,22 @@ void server_models_routes::init_routes() { res_err(res, format_error_response("model is already loaded", ERROR_TYPE_INVALID_REQUEST)); return res; } - models.load(name); + // parse extra_args if provided and allowed + std::vector extra_args; + if (body.contains("extra_args") && body["extra_args"].is_array()) { + if (!params.models_allow_extra_args) { + res_err(res, format_error_response( + "extra_args not allowed; start server with --models-allow-extra-args to enable", + ERROR_TYPE_INVALID_REQUEST)); + return res; + } + for (const auto & arg : body["extra_args"]) { + if (arg.is_string()) { + extra_args.push_back(arg.get()); + } + } + } + models.load(name, extra_args); res_ok(res, {{"success", true}}); return res; }; diff --git a/tools/server/server-models.h b/tools/server/server-models.h index 3e1868c27cc..b85d32fbde6 100644 --- a/tools/server/server-models.h +++ b/tools/server/server-models.h @@ -114,7 +114,7 @@ struct server_models { // load and unload model instances // these functions are thread-safe - void load(const std::string & name); + void load(const std::string & name, const std::vector & extra_args = {}); void unload(const std::string & name); void unload_all(); From 0f55dd575f0e757256fd8c3338ffcd1ec6db2b28 Mon Sep 17 00:00:00 2001 From: Christopher Haen Date: Sat, 20 Dec 2025 23:04:22 -0500 Subject: [PATCH 2/2] ci: disable s390x and ppc64le builds (requires special runners) --- .github/workflows/build.yml | 9 +++++---- .github/workflows/release.yml | 5 +++-- 2 files changed, 8 insertions(+), 6 deletions(-) diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml index de3ad060656..090cfad4b61 100644 --- a/.github/workflows/build.yml +++ b/.github/workflows/build.yml @@ -181,10 +181,11 @@ jobs: os: ubuntu-22.04 - build: 'arm64' os: ubuntu-22.04-arm - - build: 's390x' - os: ubuntu-24.04-s390x - - build: 'ppc64le' - os: ubuntu-24.04-ppc64le + # Disabled - requires special GitHub runners not available on forks + # - build: 's390x' + # os: ubuntu-24.04-s390x + # - build: 'ppc64le' + # os: ubuntu-24.04-ppc64le runs-on: ${{ matrix.os }} diff --git a/.github/workflows/release.yml b/.github/workflows/release.yml index 11f850511f5..f543ed49ed0 100644 --- a/.github/workflows/release.yml +++ b/.github/workflows/release.yml @@ -134,8 +134,9 @@ jobs: include: - build: 'x64' os: ubuntu-22.04 - - build: 's390x' - os: ubuntu-24.04-s390x + # Disabled - requires special GitHub runners not available on forks + # - build: 's390x' + # os: ubuntu-24.04-s390x # GGML_BACKEND_DL and GGML_CPU_ALL_VARIANTS are not currently supported on arm # - build: 'arm64' # os: ubuntu-22.04-arm