diff --git a/docs/builtin-tools.md b/docs/builtin-tools.md index 85acb7dd37..881f9d3ce2 100644 --- a/docs/builtin-tools.md +++ b/docs/builtin-tools.md @@ -353,6 +353,23 @@ assert isinstance(result.output, BinaryImage) _(This example is complete, it can be run "as is")_ +To control the image resolution with Google image generation models (starting with Gemini 3 Pro Image), use the `size` parameter: + +```py {title="image_generation_google_resolution.py"} +from pydantic_ai import Agent, BinaryImage, ImageGenerationTool + +agent = Agent( + 'google-gla:gemini-3-pro-image-preview', + builtin_tools=[ImageGenerationTool(aspect_ratio='16:9', size='4K')], + output_type=BinaryImage, +) + +result = agent.run_sync('Generate a high-resolution wide landscape illustration of an axolotl.') +assert isinstance(result.output, BinaryImage) +``` + +_(This example is complete, it can be run "as is")_ + For more details, check the [API documentation][pydantic_ai.builtin_tools.ImageGenerationTool]. #### Provider Support @@ -366,8 +383,10 @@ For more details, check the [API documentation][pydantic_ai.builtin_tools.ImageG | `output_format` | ✅ | ❌ | | `partial_images` | ✅ | ❌ | | `quality` | ✅ | ❌ | -| `size` | ✅ | ❌ | -| `aspect_ratio` | ✅ (1:1, 2:3, 3:2) | ✅ | +| `size` | ✅ (auto (default), 1024x1024, 1024x1536, 1536x1024) | ✅ (1K (default), 2K, 4K) | +| `aspect_ratio` | ✅ (1:1, 2:3, 3:2) | ✅ (1:1, 2:3, 3:2, 3:4, 4:3, 4:5, 5:4, 9:16, 16:9, 21:9) | + +**Note:** For OpenAI, `auto` lets the model select the value. ## Web Fetch Tool diff --git a/pydantic_ai_slim/pydantic_ai/builtin_tools.py b/pydantic_ai_slim/pydantic_ai/builtin_tools.py index 76aeb5dccc..21a1ae0dc5 100644 --- a/pydantic_ai_slim/pydantic_ai/builtin_tools.py +++ b/pydantic_ai_slim/pydantic_ai/builtin_tools.py @@ -326,12 +326,11 @@ class ImageGenerationTool(AbstractBuiltinTool): * OpenAI Responses """ - size: Literal['1024x1024', '1024x1536', '1536x1024', 'auto'] = 'auto' + size: Literal['auto', '1024x1024', '1024x1536', '1536x1024', '1K', '2K', '4K'] | None = None """The size of the generated image. - Supported by: - - * OpenAI Responses + * OpenAI Responses: 'auto' (default: model selects the size based on the prompt), '1024x1024', '1024x1536', '1536x1024' + * Google (Gemini 3 Pro Image and later): '1K' (default), '2K', '4K' """ aspect_ratio: ImageAspectRatio | None = None diff --git a/pydantic_ai_slim/pydantic_ai/models/google.py b/pydantic_ai_slim/pydantic_ai/models/google.py index 847f36e99b..485763f9c1 100644 --- a/pydantic_ai_slim/pydantic_ai/models/google.py +++ b/pydantic_ai_slim/pydantic_ai/models/google.py @@ -137,6 +137,9 @@ GoogleFinishReason.NO_IMAGE: 'error', } +_GOOGLE_IMAGE_SIZE = Literal['1K', '2K', '4K'] +_GOOGLE_IMAGE_SIZES: tuple[_GOOGLE_IMAGE_SIZE, ...] = _utils.get_args(_GOOGLE_IMAGE_SIZE) + class GoogleModelSettings(ModelSettings, total=False): """Settings used for a Gemini model request.""" @@ -367,8 +370,17 @@ def _get_tools( raise UserError( "`ImageGenerationTool` is not supported by this model. Use a model with 'image' in the name instead." ) - if tool.aspect_ratio: - image_config = ImageConfigDict(aspect_ratio=tool.aspect_ratio) + + image_config = ImageConfigDict() + if tool.aspect_ratio is not None: + image_config['aspect_ratio'] = tool.aspect_ratio + if tool.size is not None: + if tool.size not in _GOOGLE_IMAGE_SIZES: + raise UserError( + f'Google image generation only supports `size` values: {_GOOGLE_IMAGE_SIZES}. ' + f'Got: {tool.size!r}. Omit `size` to use the default (1K).' + ) + image_config['image_size'] = tool.size else: # pragma: no cover raise UserError( f'`{tool.__class__.__name__}` is not supported by `GoogleModel`. If it should be, please file an issue.' diff --git a/pydantic_ai_slim/pydantic_ai/models/openai.py b/pydantic_ai_slim/pydantic_ai/models/openai.py index 4bef917b4d..2816b723de 100644 --- a/pydantic_ai_slim/pydantic_ai/models/openai.py +++ b/pydantic_ai_slim/pydantic_ai/models/openai.py @@ -159,24 +159,33 @@ '3:2': '1536x1024', } +_OPENAI_IMAGE_SIZE = Literal['auto', '1024x1024', '1024x1536', '1536x1024'] +_OPENAI_IMAGE_SIZES: tuple[_OPENAI_IMAGE_SIZE, ...] = _utils.get_args(_OPENAI_IMAGE_SIZE) + def _resolve_openai_image_generation_size( tool: ImageGenerationTool, -) -> Literal['auto', '1024x1024', '1024x1536', '1536x1024']: +) -> _OPENAI_IMAGE_SIZE: """Map `ImageGenerationTool.aspect_ratio` to an OpenAI size string when provided.""" aspect_ratio = tool.aspect_ratio if aspect_ratio is None: + if tool.size is None: + return 'auto' # default + if tool.size not in _OPENAI_IMAGE_SIZES: + raise UserError( + f'OpenAI image generation only supports `size` values: {_OPENAI_IMAGE_SIZES}. ' + f'Got: {tool.size}. Omit `size` to use the default (auto).' + ) return tool.size mapped_size = _OPENAI_ASPECT_RATIO_TO_SIZE.get(aspect_ratio) if mapped_size is None: supported = ', '.join(_OPENAI_ASPECT_RATIO_TO_SIZE) raise UserError( - f'OpenAI image generation only supports `aspect_ratio` values: {supported}. ' - 'Specify one of those values or omit `aspect_ratio`.' + f'OpenAI image generation only supports `aspect_ratio` values: {supported}. Specify one of those values or omit `aspect_ratio`.' ) - - if tool.size not in ('auto', mapped_size): + # When aspect_ratio is set, size must be None, 'auto', or match the mapped size + if tool.size not in (None, 'auto', mapped_size): raise UserError( '`ImageGenerationTool` cannot combine `aspect_ratio` with a conflicting `size` when using OpenAI.' ) diff --git a/tests/models/test_google.py b/tests/models/test_google.py index e87a51214f..752c6c7ac3 100644 --- a/tests/models/test_google.py +++ b/tests/models/test_google.py @@ -3388,7 +3388,7 @@ async def test_google_image_generation_with_text(allow_model_requests: None, goo async def test_google_image_or_text_output(allow_model_requests: None, google_provider: GoogleProvider): m = GoogleModel('gemini-2.5-flash-image', provider=google_provider) # ImageGenerationTool is listed here to indicate just that it doesn't cause any issues, even though it's not necessary with an image model. - agent = Agent(m, output_type=str | BinaryImage, builtin_tools=[ImageGenerationTool()]) + agent = Agent(m, output_type=str | BinaryImage, builtin_tools=[ImageGenerationTool(size='1K')]) result = await agent.run('Tell me a two-sentence story about an axolotl, no image please.') assert result.output == snapshot( @@ -3653,6 +3653,44 @@ async def test_google_image_generation_tool_aspect_ratio(google_provider: Google assert image_config == {'aspect_ratio': '16:9'} +async def test_google_image_generation_resolution(google_provider: GoogleProvider) -> None: + """Test that resolution parameter from ImageGenerationTool is added to image_config.""" + model = GoogleModel('gemini-3-pro-image-preview', provider=google_provider) + params = ModelRequestParameters(builtin_tools=[ImageGenerationTool(size='2K')]) + + tools, image_config = model._get_tools(params) # pyright: ignore[reportPrivateUsage] + assert tools is None + assert image_config == {'image_size': '2K'} + + +async def test_google_image_generation_resolution_with_aspect_ratio(google_provider: GoogleProvider) -> None: + """Test that resolution and aspect_ratio from ImageGenerationTool work together.""" + model = GoogleModel('gemini-3-pro-image-preview', provider=google_provider) + params = ModelRequestParameters(builtin_tools=[ImageGenerationTool(aspect_ratio='16:9', size='4K')]) + + tools, image_config = model._get_tools(params) # pyright: ignore[reportPrivateUsage] + assert tools is None + assert image_config == {'aspect_ratio': '16:9', 'image_size': '4K'} + + +async def test_google_image_generation_unsupported_size_raises_error(google_provider: GoogleProvider) -> None: + """Test that unsupported size values raise an error.""" + model = GoogleModel('gemini-3-pro-image-preview', provider=google_provider) + params = ModelRequestParameters(builtin_tools=[ImageGenerationTool(size='1024x1024')]) + + with pytest.raises(UserError, match='Google image generation only supports `size` values'): + model._get_tools(params) # pyright: ignore[reportPrivateUsage] + + +async def test_google_image_generation_auto_size_raises_error(google_provider: GoogleProvider) -> None: + """Test that 'auto' size raises an error for Google since it doesn't support intelligent size selection.""" + model = GoogleModel('gemini-3-pro-image-preview', provider=google_provider) + params = ModelRequestParameters(builtin_tools=[ImageGenerationTool(size='auto')]) + + with pytest.raises(UserError, match='Google image generation only supports `size` values'): + model._get_tools(params) # pyright: ignore[reportPrivateUsage] + + async def test_google_vertexai_image_generation(allow_model_requests: None, vertex_provider: GoogleProvider): model = GoogleModel('gemini-2.5-flash-image', provider=vertex_provider) diff --git a/tests/models/test_openai.py b/tests/models/test_openai.py index 40cd5bdb22..6181be2e88 100644 --- a/tests/models/test_openai.py +++ b/tests/models/test_openai.py @@ -37,7 +37,7 @@ UserPromptPart, ) from pydantic_ai._json_schema import InlineDefsJsonSchemaTransformer -from pydantic_ai.builtin_tools import WebSearchTool +from pydantic_ai.builtin_tools import ImageGenerationTool, WebSearchTool from pydantic_ai.models import ModelRequestParameters from pydantic_ai.output import NativeOutput, PromptedOutput, TextOutput, ToolOutput from pydantic_ai.profiles.openai import OpenAIModelProfile, openai_model_profile @@ -79,6 +79,7 @@ OpenAIResponsesModel, OpenAIResponsesModelSettings, OpenAISystemPromptRole, + _resolve_openai_image_generation_size, # pyright: ignore[reportPrivateUsage] ) from pydantic_ai.profiles.openai import OpenAIJsonSchemaTransformer from pydantic_ai.providers.cerebras import CerebrasProvider @@ -103,6 +104,43 @@ def test_init(): assert m.model_name == 'gpt-4o' +@pytest.mark.parametrize( + 'aspect_ratio,size,expected', + [ + # aspect_ratio is None, various sizes + (None, None, 'auto'), + (None, 'auto', 'auto'), + (None, '1024x1024', '1024x1024'), + (None, '1024x1536', '1024x1536'), + (None, '1536x1024', '1536x1024'), + # Valid aspect_ratios with no size + ('1:1', None, '1024x1024'), + ('2:3', None, '1024x1536'), + ('3:2', None, '1536x1024'), + # Valid aspect_ratios with compatible sizes + ('1:1', 'auto', '1024x1024'), + ('1:1', '1024x1024', '1024x1024'), + ('2:3', '1024x1536', '1024x1536'), + ('3:2', '1536x1024', '1536x1024'), + ], +) +def test_openai_image_generation_size_valid_combinations( + aspect_ratio: Literal['1:1', '2:3', '3:2'] | None, + size: Literal['auto', '1024x1024', '1024x1536', '1536x1024'] | None, + expected: Literal['auto', '1024x1024', '1024x1536', '1536x1024'], +) -> None: + """Test valid combinations of aspect_ratio and size for OpenAI image generation.""" + tool = ImageGenerationTool(aspect_ratio=aspect_ratio, size=size) + assert _resolve_openai_image_generation_size(tool) == expected + + +def test_openai_image_generation_tool_aspect_ratio_invalid() -> None: + """Test that invalid aspect_ratio raises UserError.""" + tool = ImageGenerationTool(aspect_ratio='16:9') + with pytest.raises(UserError, match='OpenAI image generation only supports `aspect_ratio` values'): + _resolve_openai_image_generation_size(tool) + + async def test_request_simple_success(allow_model_requests: None): c = completion_message( ChatCompletionMessage(content='world', role='assistant'), diff --git a/tests/models/test_openai_responses.py b/tests/models/test_openai_responses.py index af82923a09..1c7ec1f53c 100644 --- a/tests/models/test_openai_responses.py +++ b/tests/models/test_openai_responses.py @@ -163,6 +163,12 @@ def test_openai_responses_image_generation_tool_aspect_ratio_conflicts_with_size _resolve_openai_image_generation_size(tool) +def test_openai_responses_image_generation_tool_unsupported_size_raises_error() -> None: + tool = ImageGenerationTool(size='2K') + with pytest.raises(UserError, match='OpenAI image generation only supports `size` values'): + _resolve_openai_image_generation_size(tool) + + async def test_openai_responses_model_simple_response_with_tool_call(allow_model_requests: None, openai_api_key: str): model = OpenAIResponsesModel('gpt-4o', provider=OpenAIProvider(api_key=openai_api_key)) diff --git a/tests/test_examples.py b/tests/test_examples.py index 8ed0828250..bc887504d2 100644 --- a/tests/test_examples.py +++ b/tests/test_examples.py @@ -698,6 +698,12 @@ async def model_logic( # noqa: C901 FilePart(content=BinaryImage(data=b'fake', media_type='image/png', identifier='wide-axolotl-city')), ] ) + elif m.content == 'Generate a high-resolution wide landscape illustration of an axolotl.': + return ModelResponse( + parts=[ + FilePart(content=BinaryImage(data=b'fake', media_type='image/png', identifier='high-res-axolotl')), + ] + ) elif m.content == 'Generate a chart of y=x^2 for x=-5 to 5.': return ModelResponse( parts=[