diff --git a/src/google/adk/models/anthropic_llm.py b/src/google/adk/models/anthropic_llm.py index f965a9906d..21ca25cf2b 100644 --- a/src/google/adk/models/anthropic_llm.py +++ b/src/google/adk/models/anthropic_llm.py @@ -76,6 +76,15 @@ def _is_image_part(part: types.Part) -> bool: ) +def _is_pdf_part(part: types.Part) -> bool: + """Check if the part contains PDF data.""" + return ( + part.inline_data + and part.inline_data.mime_type + and part.inline_data.mime_type == "application/pdf" + ) + + def part_to_message_block( part: types.Part, ) -> Union[ @@ -83,6 +92,7 @@ def part_to_message_block( anthropic_types.ImageBlockParam, anthropic_types.ToolUseBlockParam, anthropic_types.ToolResultBlockParam, + anthropic_types.DocumentBlockParam, # For PDF document blocks ]: if part.text: return anthropic_types.TextBlockParam(text=part.text, type="text") @@ -134,6 +144,18 @@ def part_to_message_block( type="base64", media_type=part.inline_data.mime_type, data=data ), ) + elif _is_pdf_part(part): + # Handle PDF documents - Anthropic supports PDFs as document blocks + # PDFs are encoded as base64 and sent with document type + data = base64.b64encode(part.inline_data.data).decode() + return anthropic_types.DocumentBlockParam( + type="document", + source={ + "type": "base64", + "media_type": part.inline_data.mime_type, + "data": data, + }, + ) elif part.executable_code: return anthropic_types.TextBlockParam( type="text", diff --git a/tests/unittests/models/test_anthropic_llm.py b/tests/unittests/models/test_anthropic_llm.py index 13d615bc32..8c93b71bb7 100644 --- a/tests/unittests/models/test_anthropic_llm.py +++ b/tests/unittests/models/test_anthropic_llm.py @@ -465,6 +465,39 @@ def test_part_to_message_block_with_multiple_content_items(): assert result["content"] == "First part\nSecond part" +def test_part_to_message_block_with_pdf(): + """Test that part_to_message_block handles PDF documents.""" + import base64 + + from anthropic import types as anthropic_types + from google.adk.models.anthropic_llm import part_to_message_block + + # Create a PDF part with inline data + pdf_data = ( + b"%PDF-1.4\n1 0 obj\n<<\n/Type /Catalog\n>>\nendobj\nxref\n0" + b" 1\ntrailer\n<<\n/Root 1 0 R\n>>\n%%EOF" + ) + pdf_part = types.Part( + inline_data=types.Blob( + mime_type="application/pdf", + data=pdf_data, + ) + ) + + result = part_to_message_block(pdf_part) + + # PDF should be returned as DocumentBlockParam (TypedDict, which is a dict) + assert isinstance(result, dict) + # Verify it matches DocumentBlockParam structure + assert result["type"] == "document" + assert "source" in result + assert result["source"]["type"] == "base64" + assert result["source"]["media_type"] == "application/pdf" + # Verify the data is base64 encoded and can be decoded back + decoded_data = base64.b64decode(result["source"]["data"]) + assert decoded_data == pdf_data + + content_to_message_param_test_cases = [ ( "user_role_with_text_and_image",