pydantic · Danipulok · Dec 14, 2025 · DouweM · Dec 16, 2025 · DouweM
diff --git a/docs/output.md b/docs/output.md
@@ -315,6 +315,12 @@ When the model calls other tools in parallel with an output tool, you can contro
 
 The `'exhaustive'` strategy is useful when tools have important side effects (like logging, sending notifications, or updating metrics) that should always execute.
 
+!!! warning "Streaming vs Sync Behavior Difference"
+    `run_stream()` behaves differently from `run()` and `run_sync()` when choosing the final result:
+
+    - **`run_stream()`**: The first called tool that **can** produce a final result (output or deferred) becomes the final result
+    - **`run()` / `run_sync()`**: The first **output** tool becomes the final result. If none are called, all **deferred** tools become the final result as `DeferredToolRequests`
 yield _messages.FunctionToolCallEvent(call) 
 output_parts.append(e.tool_retry) 
 yield _messages.FunctionToolResultEvent(e.tool_retry) 
 yield _messages.FunctionToolCallEvent(call) 
 output_parts.append(e.tool_retry) 
 yield _messages.FunctionToolResultEvent(e.tool_retry) 
+
 #### Native Output
 
 Native Output mode uses a model's native "Structured Outputs" feature (aka "JSON Schema response format"), where the model is forced to only output text matching the provided JSON schema. Note that this is not supported by all models, and sometimes comes with restrictions. For example, Gemini cannot use tools at the same time as structured output, and attempting to do so will result in an error.

diff --git a/tests/test_agent.py b/tests/test_agent.py
@@ -3291,7 +3291,14 @@ def deferred_tool(x: int) -> int:  # pragma: no cover
         )
 
     def test_early_strategy_with_external_tool_call(self):
-        """Test that early strategy handles external tool calls correctly."""
+        """Test that early strategy handles external tool calls correctly.
+
+        Streaming and sync modes differ in how they choose the final result:
+        - Streaming: First tool call (in response order) that can produce a final result (output or deferred)
+        - Sync: First output tool (if none called, all deferred tools become final result)
+
+        See https://github.com/pydantic/pydantic-ai/issues/3636#issuecomment-3618800480 for details.
+        """
         tool_called: list[str] = []
 
         def return_model(_: list[ModelMessage], info: AgentInfo) -> ModelResponse:

diff --git a/tests/test_streaming.py b/tests/test_streaming.py
@@ -1110,9 +1110,11 @@ def deferred_tool(x: int) -> int:  # pragma: no cover
     async def test_early_strategy_with_external_tool_call(self):
         """Test that early strategy handles external tool calls correctly.
 
-        Streaming mode expects the first output tool call to be the final result,
-        and has different behavior from sync mode in this regard.
-        See https://github.com/pydantic/pydantic-ai/issues/3636 for details.
+        Streaming and sync modes differ in how they choose the final result:
+        - Streaming: First tool call (in response order) that can produce a final result (output or deferred)
+        - Sync: First output tool (if none called, all deferred tools become final result)
+
+        See https://github.com/pydantic/pydantic-ai/issues/3636#issuecomment-3618800480 for details.
         """
         tool_called: list[str] = []