Skip to content
149 changes: 126 additions & 23 deletions codeflash/api/aiservice.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
from __future__ import annotations

import concurrent.futures
import json
import os
import platform
Expand All @@ -12,7 +13,6 @@
from codeflash.cli_cmds.console import console, logger
from codeflash.code_utils.code_replacer import is_zero_diff
from codeflash.code_utils.code_utils import unified_diff_strings
from codeflash.code_utils.config_consts import N_CANDIDATES_EFFECTIVE, N_CANDIDATES_LP_EFFECTIVE
from codeflash.code_utils.env_utils import get_codeflash_api_key
from codeflash.code_utils.git_utils import get_last_commit_author_if_pr_exists, get_repo_owner_and_name
from codeflash.code_utils.time_utils import humanize_runtime
Expand All @@ -35,6 +35,8 @@
from codeflash.models.models import AIServiceCodeRepairRequest, AIServiceRefinerRequest
from codeflash.result.explanation import Explanation

multi_model_executor = concurrent.futures.ThreadPoolExecutor(max_workers=10, thread_name_prefix="multi_model")


class AiServiceClient:
def __init__(self) -> None:
Expand Down Expand Up @@ -92,7 +94,7 @@ def make_ai_service_request(
return response

def _get_valid_candidates(
self, optimizations_json: list[dict[str, Any]], source: OptimizedCandidateSource
self, optimizations_json: list[dict[str, Any]], source: OptimizedCandidateSource, model: str | None = None
) -> list[OptimizedCandidate]:
candidates: list[OptimizedCandidate] = []
for opt in optimizations_json:
Expand All @@ -106,6 +108,7 @@ def _get_valid_candidates(
optimization_id=opt["optimization_id"],
source=source,
parent_id=opt.get("parent_id", None),
model=model,
)
)
return candidates
Expand All @@ -115,10 +118,11 @@ def optimize_python_code( # noqa: D417
source_code: str,
dependency_code: str,
trace_id: str,
num_candidates: int = 10,
experiment_metadata: ExperimentMetadata | None = None,
*,
is_async: bool = False,
model: str | None = None,
call_sequence: int | None = None,
) -> list[OptimizedCandidate]:
"""Optimize the given python code for performance by making a request to the Django endpoint.

Expand All @@ -127,8 +131,9 @@ def optimize_python_code( # noqa: D417
- source_code (str): The python code to optimize.
- dependency_code (str): The dependency code used as read-only context for the optimization
- trace_id (str): Trace id of optimization run
- num_candidates (int): Number of optimization variants to generate. Default is 10.
- experiment_metadata (Optional[ExperimentalMetadata, None]): Any available experiment metadata for this optimization
- model (str | None): Model name to use ("gpt-4.1" or "claude-sonnet-4-5"). Default is None (server default).
- call_sequence (int | None): Sequence number for multi-model calls (1, 2, 3...). Default is None.

Returns
-------
Expand All @@ -141,20 +146,19 @@ def optimize_python_code( # noqa: D417
payload = {
"source_code": source_code,
"dependency_code": dependency_code,
"num_variants": num_candidates,
"trace_id": trace_id,
"python_version": platform.python_version(),
"experiment_metadata": experiment_metadata,
"codeflash_version": codeflash_version,
"current_username": get_last_commit_author_if_pr_exists(None),
"repo_owner": git_repo_owner,
"repo_name": git_repo_name,
"n_candidates": N_CANDIDATES_EFFECTIVE,
"is_async": is_async,
"model": model,
"call_sequence": call_sequence,
}
logger.debug(f"Sending optimize request: model={model}, trace_id={trace_id}, call_sequence={call_sequence}")

logger.info("!lsp|Generating optimized candidates…")
console.rule()
try:
response = self.make_ai_service_request("/optimize", payload=payload, timeout=60)
except requests.exceptions.RequestException as e:
Expand All @@ -164,17 +168,16 @@ def optimize_python_code( # noqa: D417

if response.status_code == 200:
optimizations_json = response.json()["optimizations"]
console.rule()
end_time = time.perf_counter()
logger.debug(f"!lsp|Generating possible optimizations took {end_time - start_time:.2f} seconds.")
return self._get_valid_candidates(optimizations_json, OptimizedCandidateSource.OPTIMIZE)
logger.debug(f"Backend returned {len(optimizations_json)} optimization(s)")
return self._get_valid_candidates(optimizations_json, OptimizedCandidateSource.OPTIMIZE, model=model)
try:
error = response.json()["error"]
except Exception:
error = response.text
logger.error(f"Error generating optimized candidates: {response.status_code} - {error}")
ph("cli-optimize-error-response", {"response_status_code": response.status_code, "error": error})
console.rule()
return []

def optimize_python_code_line_profiler( # noqa: D417
Expand All @@ -183,8 +186,9 @@ def optimize_python_code_line_profiler( # noqa: D417
dependency_code: str,
trace_id: str,
line_profiler_results: str,
num_candidates: int = 10,
experiment_metadata: ExperimentMetadata | None = None,
model: str | None = None,
call_sequence: int | None = None,
) -> list[OptimizedCandidate]:
"""Optimize the given python code for performance by making a request to the Django endpoint.

Expand All @@ -193,8 +197,9 @@ def optimize_python_code_line_profiler( # noqa: D417
- source_code (str): The python code to optimize.
- dependency_code (str): The dependency code used as read-only context for the optimization
- trace_id (str): Trace id of optimization run
- num_candidates (int): Number of optimization variants to generate. Default is 10.
- experiment_metadata (Optional[ExperimentalMetadata, None]): Any available experiment metadata for this optimization
- model (str | None): Model name to use ("gpt-4.1" or "claude-sonnet-4-5"). Default is None (server default).
- call_sequence (int | None): Sequence number for multi-model calls (1, 2, 3...). Default is None.

Returns
-------
Expand All @@ -204,20 +209,18 @@ def optimize_python_code_line_profiler( # noqa: D417
payload = {
"source_code": source_code,
"dependency_code": dependency_code,
"num_variants": num_candidates,
"line_profiler_results": line_profiler_results,
"trace_id": trace_id,
"python_version": platform.python_version(),
"experiment_metadata": experiment_metadata,
"codeflash_version": codeflash_version,
"lsp_mode": is_LSP_enabled(),
"n_candidates_lp": N_CANDIDATES_LP_EFFECTIVE,
"model": model,
"call_sequence": call_sequence,
}

console.rule()
if line_profiler_results == "":
logger.info("No LineProfiler results were provided, Skipping optimization.")
console.rule()
return []
try:
response = self.make_ai_service_request("/optimize-line-profiler", payload=payload, timeout=60)
Expand All @@ -228,20 +231,113 @@ def optimize_python_code_line_profiler( # noqa: D417

if response.status_code == 200:
optimizations_json = response.json()["optimizations"]
logger.info(
f"!lsp|Generated {len(optimizations_json)} candidate optimizations using line profiler information."
)
console.rule()
return self._get_valid_candidates(optimizations_json, OptimizedCandidateSource.OPTIMIZE_LP)
logger.debug(f"Backend returned {len(optimizations_json)} LP optimization(s)")
return self._get_valid_candidates(optimizations_json, OptimizedCandidateSource.OPTIMIZE_LP, model=model)
try:
error = response.json()["error"]
except Exception:
error = response.text
logger.error(f"Error generating optimized candidates: {response.status_code} - {error}")
ph("cli-optimize-error-response", {"response_status_code": response.status_code, "error": error})
console.rule()
return []

def optimize_python_code_multi_model(
self,
source_code: str,
dependency_code: str,
base_trace_id: str,
model_distribution: list[tuple[str, int]],
experiment_metadata: ExperimentMetadata | None = None,
*,
is_async: bool = False,
sequence_offset: int = 0,
) -> tuple[list[OptimizedCandidate], int]:
"""Generate optimizations using multiple models in parallel."""
logger.info("Generating optimized candidates…")
console.rule()

futures: list[tuple[concurrent.futures.Future[list[OptimizedCandidate]], str]] = []

call_index = 0
for model_name, num_calls in model_distribution:
for _ in range(num_calls):
call_trace_id = f"{base_trace_id[:-3]}0{call_index:02x}"
call_sequence = sequence_offset + call_index + 1
call_index += 1
future = multi_model_executor.submit(
self.optimize_python_code,
source_code,
dependency_code,
call_trace_id,
experiment_metadata,
is_async=is_async,
model=model_name,
call_sequence=call_sequence,
)
futures.append((future, model_name))

concurrent.futures.wait([f for f, _ in futures])

all_candidates: list[OptimizedCandidate] = []
for future, model_name in futures:
try:
candidates = future.result()
all_candidates.extend(candidates)
except Exception as e:
logger.warning(f"Model {model_name} call failed: {e}")
continue

console.rule()
return all_candidates, call_index

def optimize_python_code_line_profiler_multi_model(
self,
source_code: str,
dependency_code: str,
base_trace_id: str,
line_profiler_results: str,
model_distribution: list[tuple[str, int]],
experiment_metadata: ExperimentMetadata | None = None,
sequence_offset: int = 0,
) -> tuple[list[OptimizedCandidate], int]:
"""Generate line profiler optimizations using multiple models in parallel."""
logger.info("Generating optimized candidates with line profiler…")
console.rule()

futures: list[tuple[concurrent.futures.Future[list[OptimizedCandidate]], str]] = []

call_index = 0
for model_name, num_calls in model_distribution:
for _ in range(num_calls):
call_trace_id = f"{base_trace_id[:-3]}1{call_index:02x}"
call_sequence = sequence_offset + call_index + 1
call_index += 1
future = multi_model_executor.submit(
self.optimize_python_code_line_profiler,
source_code,
dependency_code,
call_trace_id,
line_profiler_results,
experiment_metadata,
model_name,
call_sequence,
)
futures.append((future, model_name))

concurrent.futures.wait([f for f, _ in futures])

all_candidates: list[OptimizedCandidate] = []
for future, model_name in futures:
try:
candidates = future.result()
all_candidates.extend(candidates)
except Exception as e:
logger.warning(f"Line profiler model {model_name} call failed: {e}")
continue

console.rule()
return all_candidates, call_index

def optimize_python_code_refinement(self, request: list[AIServiceRefinerRequest]) -> list[OptimizedCandidate]:
"""Optimize the given python code for performance by making a request to the Django endpoint.

Expand All @@ -268,6 +364,7 @@ def optimize_python_code_refinement(self, request: list[AIServiceRefinerRequest]
"trace_id": opt.trace_id,
"function_references": opt.function_references,
"python_version": platform.python_version(),
"call_sequence": opt.call_sequence,
}
for opt in request
]
Expand Down Expand Up @@ -357,6 +454,7 @@ def get_new_explanation( # noqa: D417
throughput_improvement: str | None = None,
function_references: str | None = None,
codeflash_version: str = codeflash_version,
call_sequence: int | None = None,
) -> str:
"""Optimize the given python code for performance by making a request to the Django endpoint.

Expand Down Expand Up @@ -402,6 +500,7 @@ def get_new_explanation( # noqa: D417
"throughput_improvement": throughput_improvement,
"function_references": function_references,
"codeflash_version": codeflash_version,
"call_sequence": call_sequence,
}
logger.info("loading|Generating explanation")
console.rule()
Expand Down Expand Up @@ -529,6 +628,7 @@ def generate_regression_tests( # noqa: D417
test_timeout: int,
trace_id: str,
test_index: int,
call_sequence: int | None = None,
) -> tuple[str, str, str] | None:
"""Generate regression tests for the given function by making a request to the Django endpoint.

Expand Down Expand Up @@ -564,6 +664,7 @@ def generate_regression_tests( # noqa: D417
"python_version": platform.python_version(),
"codeflash_version": codeflash_version,
"is_async": function_to_optimize.is_async,
"call_sequence": call_sequence,
}
try:
response = self.make_ai_service_request("/testgen", payload=payload, timeout=90)
Expand Down Expand Up @@ -604,6 +705,7 @@ def get_optimization_review(
replay_tests: str,
concolic_tests: str, # noqa: ARG002
calling_fn_details: str,
call_sequence: int | None = None,
) -> str:
"""Compute the optimization review of current Pull Request.

Expand Down Expand Up @@ -650,6 +752,7 @@ def get_optimization_review(
"codeflash_version": codeflash_version,
"calling_fn_details": calling_fn_details,
"python_version": platform.python_version(),
"call_sequence": call_sequence,
}
console.rule()
try:
Expand Down
16 changes: 16 additions & 0 deletions codeflash/code_utils/config_consts.py
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,20 @@
MAX_N_CANDIDATES = 5
MAX_N_CANDIDATES_LP = 6

# Multi-model diversity configuration
# Each tuple is (model_name, num_calls) where each call returns 1 candidate
# Standard mode: 3 GPT-4.1 + 2 Claude Sonnet = 5 candidates
MODEL_DISTRIBUTION: list[tuple[str, int]] = [("gpt-4.1", 3), ("claude-sonnet-4-5", 2)]

# LSP mode: fewer candidates for faster response
MODEL_DISTRIBUTION_LSP: list[tuple[str, int]] = [("gpt-4.1", 2), ("claude-sonnet-4-5", 1)]

# Line profiler mode: 6 candidates total
MODEL_DISTRIBUTION_LP: list[tuple[str, int]] = [("gpt-4.1", 4), ("claude-sonnet-4-5", 2)]

# Line profiler LSP mode
MODEL_DISTRIBUTION_LP_LSP: list[tuple[str, int]] = [("gpt-4.1", 2), ("claude-sonnet-4-5", 1)]

try:
from codeflash.lsp.helpers import is_LSP_enabled

Expand All @@ -43,5 +57,7 @@
N_CANDIDATES_LP_EFFECTIVE = min(N_CANDIDATES_LP_LSP if _IS_LSP_ENABLED else N_CANDIDATES_LP, MAX_N_CANDIDATES_LP)
N_TESTS_TO_GENERATE_EFFECTIVE = N_TESTS_TO_GENERATE_LSP if _IS_LSP_ENABLED else N_TESTS_TO_GENERATE
TOTAL_LOOPING_TIME_EFFECTIVE = TOTAL_LOOPING_TIME_LSP if _IS_LSP_ENABLED else TOTAL_LOOPING_TIME
MODEL_DISTRIBUTION_EFFECTIVE = MODEL_DISTRIBUTION_LSP if _IS_LSP_ENABLED else MODEL_DISTRIBUTION
MODEL_DISTRIBUTION_LP_EFFECTIVE = MODEL_DISTRIBUTION_LP_LSP if _IS_LSP_ENABLED else MODEL_DISTRIBUTION_LP

MAX_CONTEXT_LEN_REVIEW = 1000
2 changes: 2 additions & 0 deletions codeflash/models/models.py
Original file line number Diff line number Diff line change
Expand Up @@ -46,6 +46,7 @@ class AIServiceRefinerRequest:
original_line_profiler_results: str
optimized_line_profiler_results: str
function_references: str | None = None
call_sequence: int | None = None


class TestDiffScope(str, Enum):
Expand Down Expand Up @@ -464,6 +465,7 @@ class OptimizedCandidate:
optimization_id: str
source: OptimizedCandidateSource
parent_id: str | None = None
model: str | None = None # Which LLM model generated this candidate


@dataclass(frozen=True)
Expand Down
Loading
Loading