Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
9 changes: 5 additions & 4 deletions .github/workflows/build.yml
Original file line number Diff line number Diff line change
Expand Up @@ -181,10 +181,11 @@ jobs:
os: ubuntu-22.04
- build: 'arm64'
os: ubuntu-22.04-arm
- build: 's390x'
os: ubuntu-24.04-s390x
- build: 'ppc64le'
os: ubuntu-24.04-ppc64le
# Disabled - requires special GitHub runners not available on forks
# - build: 's390x'
# os: ubuntu-24.04-s390x
# - build: 'ppc64le'
# os: ubuntu-24.04-ppc64le
Comment on lines -184 to +188
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Restore please.


runs-on: ${{ matrix.os }}

Expand Down
5 changes: 3 additions & 2 deletions .github/workflows/release.yml
Original file line number Diff line number Diff line change
Expand Up @@ -134,8 +134,9 @@ jobs:
include:
- build: 'x64'
os: ubuntu-22.04
- build: 's390x'
os: ubuntu-24.04-s390x
# Disabled - requires special GitHub runners not available on forks
# - build: 's390x'
# os: ubuntu-24.04-s390x
Comment on lines -137 to +139
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Restore please.

Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

here created a new PR #18261

Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Not necessary, but ok...

# GGML_BACKEND_DL and GGML_CPU_ALL_VARIANTS are not currently supported on arm
# - build: 'arm64'
# os: ubuntu-22.04-arm
Expand Down
8 changes: 8 additions & 0 deletions common/arg.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -2811,6 +2811,14 @@ common_params_context common_params_parser_init(common_params & params, llama_ex
params.models_autoload = value;
}
).set_examples({LLAMA_EXAMPLE_SERVER}).set_env("LLAMA_ARG_MODELS_AUTOLOAD"));
add_opt(common_arg(
{"--models-allow-extra-args"},
{"--no-models-allow-extra-args"},
string_format("for router server, whether to allow extra_args in /models/load endpoint (default: %s)", params.models_allow_extra_args ? "enabled" : "disabled"),
[](common_params & params, bool value) {
params.models_allow_extra_args = value;
}
).set_examples({LLAMA_EXAMPLE_SERVER}).set_env("LLAMA_ARG_MODELS_ALLOW_EXTRA_ARGS"));
add_opt(common_arg(
{"--jinja"},
{"--no-jinja"},
Expand Down
1 change: 1 addition & 0 deletions common/common.h
Original file line number Diff line number Diff line change
Expand Up @@ -498,6 +498,7 @@ struct common_params {
std::string models_preset = ""; // directory containing model presets for the router server
int models_max = 4; // maximum number of models to load simultaneously
bool models_autoload = true; // automatically load models when requested via the router server
bool models_allow_extra_args = false; // allow extra_args in /models/load endpoint

bool log_json = false;

Expand Down
23 changes: 21 additions & 2 deletions tools/server/server-models.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -379,7 +379,7 @@ void server_models::unload_lru() {
}
}

void server_models::load(const std::string & name) {
void server_models::load(const std::string & name, const std::vector<std::string> & extra_args) {
if (!has_model(name)) {
throw std::runtime_error("model name=" + name + " is not found");
}
Expand Down Expand Up @@ -411,6 +411,10 @@ void server_models::load(const std::string & name) {
inst.meta.update_args(ctx_preset, bin_path); // render args

std::vector<std::string> child_args = inst.meta.args; // copy
// append extra_args if provided (requires --models-allow-extra-args)
if (!extra_args.empty()) {
child_args.insert(child_args.end(), extra_args.begin(), extra_args.end());
}
std::vector<std::string> child_env = base_env; // copy
child_env.push_back("LLAMA_SERVER_ROUTER_PORT=" + std::to_string(base_params.port));

Expand Down Expand Up @@ -743,7 +747,22 @@ void server_models_routes::init_routes() {
res_err(res, format_error_response("model is already loaded", ERROR_TYPE_INVALID_REQUEST));
return res;
}
models.load(name);
// parse extra_args if provided and allowed
std::vector<std::string> extra_args;
if (body.contains("extra_args") && body["extra_args"].is_array()) {
if (!params.models_allow_extra_args) {
res_err(res, format_error_response(
"extra_args not allowed; start server with --models-allow-extra-args to enable",
ERROR_TYPE_INVALID_REQUEST));
return res;
}
for (const auto & arg : body["extra_args"]) {
if (arg.is_string()) {
extra_args.push_back(arg.get<std::string>());
}
}
}
models.load(name, extra_args);
res_ok(res, {{"success", true}});
return res;
};
Expand Down
2 changes: 1 addition & 1 deletion tools/server/server-models.h
Original file line number Diff line number Diff line change
Expand Up @@ -114,7 +114,7 @@ struct server_models {

// load and unload model instances
// these functions are thread-safe
void load(const std::string & name);
void load(const std::string & name, const std::vector<std::string> & extra_args = {});
void unload(const std::string & name);
void unload_all();

Expand Down