Skip to content

Commit d26d526

Browse files
mochow13Motta Kin
andauthored
Pass s3:// file URLs directly to API in BedrockConverseModel (#3663)
Co-authored-by: Motta Kin <[email protected]>
1 parent ff7a5aa commit d26d526

File tree

5 files changed

+160
-6
lines changed

5 files changed

+160
-6
lines changed

docs/input.md

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -112,7 +112,7 @@ Some model APIs do not support file URLs at all or for specific file types. In t
112112
- [`OpenAIResponsesModel`][pydantic_ai.models.openai.OpenAIResponsesModel]: All URLs
113113
- [`AnthropicModel`][pydantic_ai.models.anthropic.AnthropicModel]: `DocumentUrl` with media type `text/plain`
114114
- [`GoogleModel`][pydantic_ai.models.google.GoogleModel] using GLA (Gemini Developer API): All URLs except YouTube video URLs and files uploaded to the [Files API](https://ai.google.dev/gemini-api/docs/files).
115-
- [`BedrockConverseModel`][pydantic_ai.models.bedrock.BedrockConverseModel]: All URLs
115+
- [`BedrockConverseModel`][pydantic_ai.models.bedrock.BedrockConverseModel]: All URLs except S3 URLs, specifically starting with `s3://`.
116116

117117
If the model API supports file URLs but may not be able to download a file because of crawling or access restrictions, you can instruct Pydantic AI to download the file content and send that instead of the URL by enabling the `force_download` flag on the URL object. For example, [`GoogleModel`][pydantic_ai.models.google.GoogleModel] on Vertex AI limits YouTube video URLs to one URL per request.
118118

@@ -138,3 +138,5 @@ result = agent.run_sync(
138138
)
139139
print(result.output)
140140
```
141+
142+
`BedrockConverseModel` supports `s3://<bucket-name>/<object-key>` URIs, provided that the assumed role has the `s3:GetObject` permission. An optional `bucketOwner` query parameter must be specified if the bucket is not owned by the account making the request. For example: `s3://my-bucket/my-file.png?bucketOwner=123456789012`.

pydantic_ai_slim/pydantic_ai/models/__init__.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1172,6 +1172,8 @@ async def download_item(
11721172
"""
11731173
if item.url.startswith('gs://'):
11741174
raise UserError('Downloading from protocol "gs://" is not supported.')
1175+
elif item.url.startswith('s3://'):
1176+
raise UserError('Downloading from protocol "s3://" is not supported.')
11751177
elif isinstance(item, VideoUrl) and item.is_youtube:
11761178
raise UserError('Downloading YouTube videos is not supported.')
11771179

pydantic_ai_slim/pydantic_ai/models/bedrock.py

Lines changed: 17 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,7 @@
88
from datetime import datetime
99
from itertools import count
1010
from typing import TYPE_CHECKING, Any, Generic, Literal, cast, overload
11+
from urllib.parse import parse_qs, urlparse
1112

1213
import anyio.to_thread
1314
from botocore.exceptions import ClientError
@@ -62,13 +63,15 @@
6263
ConverseStreamResponseTypeDef,
6364
CountTokensRequestTypeDef,
6465
DocumentBlockTypeDef,
66+
DocumentSourceTypeDef,
6567
GuardrailConfigurationTypeDef,
6668
ImageBlockTypeDef,
6769
InferenceConfigurationTypeDef,
6870
MessageUnionTypeDef,
6971
PerformanceConfigurationTypeDef,
7072
PromptVariableValuesTypeDef,
7173
ReasoningContentBlockOutputTypeDef,
74+
S3LocationTypeDef,
7275
SystemContentBlockTypeDef,
7376
ToolChoiceTypeDef,
7477
ToolConfigurationTypeDef,
@@ -733,20 +736,29 @@ async def _map_user_prompt( # noqa: C901
733736
else:
734737
raise NotImplementedError('Binary content is not supported yet.')
735738
elif isinstance(item, ImageUrl | DocumentUrl | VideoUrl):
736-
downloaded_item = await download_item(item, data_format='bytes', type_format='extension')
737-
format = downloaded_item['data_type']
739+
source: DocumentSourceTypeDef
740+
if item.url.startswith('s3://'):
741+
parsed = urlparse(item.url)
742+
s3_location: S3LocationTypeDef = {'uri': f'{parsed.scheme}://{parsed.netloc}{parsed.path}'}
743+
if bucket_owner := parse_qs(parsed.query).get('bucketOwner', [None])[0]:
744+
s3_location['bucketOwner'] = bucket_owner
745+
source = {'s3Location': s3_location}
746+
else:
747+
downloaded_item = await download_item(item, data_format='bytes', type_format='extension')
748+
source = {'bytes': downloaded_item['data']}
749+
738750
if item.kind == 'image-url':
739751
format = item.media_type.split('/')[1]
740752
assert format in ('jpeg', 'png', 'gif', 'webp'), f'Unsupported image format: {format}'
741-
image: ImageBlockTypeDef = {'format': format, 'source': {'bytes': downloaded_item['data']}}
753+
image: ImageBlockTypeDef = {'format': format, 'source': source}
742754
content.append({'image': image})
743755

744756
elif item.kind == 'document-url':
745757
name = f'Document {next(document_count)}'
746758
document: DocumentBlockTypeDef = {
747759
'name': name,
748760
'format': item.format,
749-
'source': {'bytes': downloaded_item['data']},
761+
'source': source,
750762
}
751763
content.append({'document': document})
752764

@@ -763,7 +775,7 @@ async def _map_user_prompt( # noqa: C901
763775
'wmv',
764776
'three_gp',
765777
), f'Unsupported video format: {format}'
766-
video: VideoBlockTypeDef = {'format': format, 'source': {'bytes': downloaded_item['data']}}
778+
video: VideoBlockTypeDef = {'format': format, 'source': source}
767779
content.append({'video': video})
768780
elif isinstance(item, AudioUrl): # pragma: no cover
769781
raise NotImplementedError('Audio is not supported yet.')

tests/models/test_bedrock.py

Lines changed: 122 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -739,6 +739,128 @@ async def test_text_document_url_input(allow_model_requests: None, bedrock_provi
739739
)
740740

741741

742+
async def test_s3_image_url_input(bedrock_provider: BedrockProvider):
743+
"""Test that s3:// image URLs are passed directly to Bedrock API without downloading."""
744+
model = BedrockConverseModel('us.amazon.nova-pro-v1:0', provider=bedrock_provider)
745+
image_url = ImageUrl(url='s3://my-bucket/images/test-image.jpg', media_type='image/jpeg')
746+
747+
req = [
748+
ModelRequest(parts=[UserPromptPart(content=['What is in this image?', image_url])]),
749+
]
750+
751+
_, bedrock_messages = await model._map_messages(req, ModelRequestParameters(), None) # type: ignore[reportPrivateUsage]
752+
753+
assert bedrock_messages == snapshot(
754+
[
755+
{
756+
'role': 'user',
757+
'content': [
758+
{'text': 'What is in this image?'},
759+
{
760+
'image': {
761+
'format': 'jpeg',
762+
'source': {'s3Location': {'uri': 's3://my-bucket/images/test-image.jpg'}},
763+
}
764+
},
765+
],
766+
}
767+
]
768+
)
769+
770+
771+
async def test_s3_video_url_input(bedrock_provider: BedrockProvider):
772+
"""Test that s3:// video URLs are passed directly to Bedrock API."""
773+
model = BedrockConverseModel('us.amazon.nova-pro-v1:0', provider=bedrock_provider)
774+
video_url = VideoUrl(url='s3://my-bucket/videos/test-video.mp4', media_type='video/mp4')
775+
776+
req = [
777+
ModelRequest(parts=[UserPromptPart(content=['Describe this video', video_url])]),
778+
]
779+
780+
_, bedrock_messages = await model._map_messages(req, ModelRequestParameters(), None) # type: ignore[reportPrivateUsage]
781+
782+
assert bedrock_messages == snapshot(
783+
[
784+
{
785+
'role': 'user',
786+
'content': [
787+
{'text': 'Describe this video'},
788+
{
789+
'video': {
790+
'format': 'mp4',
791+
'source': {'s3Location': {'uri': 's3://my-bucket/videos/test-video.mp4'}},
792+
}
793+
},
794+
],
795+
}
796+
]
797+
)
798+
799+
800+
async def test_s3_document_url_input(bedrock_provider: BedrockProvider):
801+
"""Test that s3:// document URLs are passed directly to Bedrock API."""
802+
model = BedrockConverseModel('anthropic.claude-v2', provider=bedrock_provider)
803+
document_url = DocumentUrl(url='s3://my-bucket/documents/test-doc.pdf', media_type='application/pdf')
804+
805+
req = [
806+
ModelRequest(parts=[UserPromptPart(content=['What is the main content on this document?', document_url])]),
807+
]
808+
809+
_, bedrock_messages = await model._map_messages(req, ModelRequestParameters(), None) # type: ignore[reportPrivateUsage]
810+
811+
assert bedrock_messages == snapshot(
812+
[
813+
{
814+
'role': 'user',
815+
'content': [
816+
{'text': 'What is the main content on this document?'},
817+
{
818+
'document': {
819+
'format': 'pdf',
820+
'name': 'Document 1',
821+
'source': {'s3Location': {'uri': 's3://my-bucket/documents/test-doc.pdf'}},
822+
}
823+
},
824+
],
825+
}
826+
]
827+
)
828+
829+
830+
async def test_s3_url_with_bucket_owner(bedrock_provider: BedrockProvider):
831+
"""Test that s3:// URLs with bucketOwner parameter are parsed correctly."""
832+
model = BedrockConverseModel('us.amazon.nova-pro-v1:0', provider=bedrock_provider)
833+
image_url = ImageUrl(url='s3://my-bucket/images/test-image.jpg?bucketOwner=123456789012', media_type='image/jpeg')
834+
835+
req = [
836+
ModelRequest(parts=[UserPromptPart(content=['What is in this image?', image_url])]),
837+
]
838+
839+
_, bedrock_messages = await model._map_messages(req, ModelRequestParameters(), None) # type: ignore[reportPrivateUsage]
840+
841+
assert bedrock_messages == snapshot(
842+
[
843+
{
844+
'role': 'user',
845+
'content': [
846+
{'text': 'What is in this image?'},
847+
{
848+
'image': {
849+
'format': 'jpeg',
850+
'source': {
851+
's3Location': {
852+
'uri': 's3://my-bucket/images/test-image.jpg',
853+
'bucketOwner': '123456789012',
854+
}
855+
},
856+
}
857+
},
858+
],
859+
}
860+
]
861+
)
862+
863+
742864
@pytest.mark.vcr()
743865
async def test_text_as_binary_content_input(allow_model_requests: None, bedrock_provider: BedrockProvider):
744866
m = BedrockConverseModel('us.amazon.nova-pro-v1:0', provider=bedrock_provider)

tests/models/test_download_item.py

Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -24,6 +24,22 @@ async def test_download_item_raises_user_error_with_gs_uri(
2424
_ = await download_item(url, data_format='bytes')
2525

2626

27+
@pytest.mark.parametrize(
28+
'url',
29+
(
30+
pytest.param(AudioUrl(url='s3://my-bucket/audio.wav')),
31+
pytest.param(DocumentUrl(url='s3://my-bucket/document.pdf')),
32+
pytest.param(ImageUrl(url='s3://my-bucket/image.png')),
33+
pytest.param(VideoUrl(url='s3://my-bucket/video.mp4')),
34+
),
35+
)
36+
async def test_download_item_raises_user_error_with_s3_uri(
37+
url: AudioUrl | DocumentUrl | ImageUrl | VideoUrl,
38+
) -> None:
39+
with pytest.raises(UserError, match='Downloading from protocol "s3://" is not supported.'):
40+
_ = await download_item(url, data_format='bytes')
41+
42+
2743
async def test_download_item_raises_user_error_with_youtube_url() -> None:
2844
with pytest.raises(UserError, match='Downloading YouTube videos is not supported.'):
2945
_ = await download_item(VideoUrl(url='https://youtu.be/lCdaVNyHtjU'), data_format='bytes')

0 commit comments

Comments
 (0)