diff --git a/pkg/model/provider/anthropic/beta_client.go b/pkg/model/provider/anthropic/beta_client.go index 86815418b..fa412817f 100644 --- a/pkg/model/provider/anthropic/beta_client.go +++ b/pkg/model/provider/anthropic/beta_client.go @@ -151,112 +151,33 @@ func (c *Client) createBetaStream( // validateAnthropicSequencingBeta performs the same validation as standard API but for Beta payloads func validateAnthropicSequencingBeta(msgs []anthropic.BetaMessageParam) error { - for i := range msgs { - m, ok := marshalToMapBeta(msgs[i]) - if !ok || m["role"] != "assistant" { - continue - } - - toolUseIDs := collectToolUseIDs(contentArrayBeta(m)) - if len(toolUseIDs) == 0 { - continue - } - - if i+1 >= len(msgs) { - slog.Warn("Anthropic (beta) sequencing invalid: assistant tool_use present but no next user tool_result message", "assistant_index", i) - return errors.New("assistant tool_use present but no subsequent user message with tool_result blocks (beta)") - } - - next, ok := marshalToMapBeta(msgs[i+1]) - if !ok || next["role"] != "user" { - slog.Warn("Anthropic (beta) sequencing invalid: next message after assistant tool_use is not user", "assistant_index", i, "next_role", next["role"]) - return errors.New("assistant tool_use must be followed by a user message containing corresponding tool_result blocks (beta)") - } - - toolResultIDs := collectToolResultIDs(contentArrayBeta(next)) - missing := differenceIDs(toolUseIDs, toolResultIDs) - if len(missing) > 0 { - slog.Warn("Anthropic (beta) sequencing invalid: missing tool_result for tool_use id in next user message", "assistant_index", i, "tool_use_id", missing[0], "missing_count", len(missing)) - return fmt.Errorf("missing tool_result for tool_use id %s in the next user message (beta)", missing[0]) - } - } - return nil + return validateSequencing(msgs) } // repairAnthropicSequencingBeta inserts a synthetic user message with tool_result blocks // for any assistant tool_use blocks that don't have corresponding tool_result blocks // in the immediate next user message. func repairAnthropicSequencingBeta(msgs []anthropic.BetaMessageParam) []anthropic.BetaMessageParam { - if len(msgs) == 0 { - return msgs - } - repaired := make([]anthropic.BetaMessageParam, 0, len(msgs)+2) - for i := range msgs { - m, ok := marshalToMapBeta(msgs[i]) - if !ok || m["role"] != "assistant" { - repaired = append(repaired, msgs[i]) - continue - } - - toolUseIDs := collectToolUseIDs(contentArrayBeta(m)) - if len(toolUseIDs) == 0 { - repaired = append(repaired, msgs[i]) - continue - } - - // Check if the next message is a user message with tool_results - needsSyntheticMessage := true - if i+1 < len(msgs) { - if next, ok := marshalToMapBeta(msgs[i+1]); ok && next["role"] == "user" { - toolResultIDs := collectToolResultIDs(contentArrayBeta(next)) - // Remove tool_use IDs that have corresponding tool_results - for id := range toolResultIDs { - delete(toolUseIDs, id) - } - // If all tool_use IDs have results, no synthetic message needed - if len(toolUseIDs) == 0 { - needsSyntheticMessage = false - } - } - } - - // Append the assistant message first - repaired = append(repaired, msgs[i]) - - // If there are missing tool_results, insert a synthetic user message immediately after - if needsSyntheticMessage && len(toolUseIDs) > 0 { - slog.Debug("Inserting synthetic user message for missing tool_results", - "assistant_index", i, - "missing_count", len(toolUseIDs)) - - blocks := make([]anthropic.BetaContentBlockParamUnion, 0, len(toolUseIDs)) - for id := range toolUseIDs { - slog.Debug("Creating synthetic tool_result", "tool_use_id", id) - blocks = append(blocks, anthropic.BetaContentBlockParamUnion{ - OfToolResult: &anthropic.BetaToolResultBlockParam{ - ToolUseID: id, - Content: []anthropic.BetaToolResultBlockParamContentUnion{ - {OfText: &anthropic.BetaTextBlockParam{Text: "(tool execution failed)"}}, - }, + return repairSequencing(msgs, func(toolUseIDs map[string]struct{}) anthropic.BetaMessageParam { + blocks := make([]anthropic.BetaContentBlockParamUnion, 0, len(toolUseIDs)) + for id := range toolUseIDs { + slog.Debug("Creating synthetic tool_result", "tool_use_id", id) + blocks = append(blocks, anthropic.BetaContentBlockParamUnion{ + OfToolResult: &anthropic.BetaToolResultBlockParam{ + ToolUseID: id, + Content: []anthropic.BetaToolResultBlockParamContentUnion{ + {OfText: &anthropic.BetaTextBlockParam{Text: "(tool execution failed)"}}, }, - }) - } - repaired = append(repaired, anthropic.BetaMessageParam{ - Role: anthropic.BetaMessageParamRoleUser, - Content: blocks, + }, }) } - } - return repaired + return anthropic.BetaMessageParam{ + Role: anthropic.BetaMessageParamRoleUser, + Content: blocks, + } + }) } -// marshalToMapBeta is an alias for marshalToMap - shared with standard API. -// Kept as separate function for clarity in Beta-specific code paths. -var marshalToMapBeta = marshalToMap - -// contentArrayBeta is an alias for contentArray - shared with standard API. -var contentArrayBeta = contentArray - // countAnthropicTokensBeta calls Anthropic's Count Tokens API for the provided Beta API payload // and returns the number of input tokens. func countAnthropicTokensBeta( diff --git a/pkg/model/provider/anthropic/beta_converter.go b/pkg/model/provider/anthropic/beta_converter.go index 772b21882..84ac4bed5 100644 --- a/pkg/model/provider/anthropic/beta_converter.go +++ b/pkg/model/provider/anthropic/beta_converter.go @@ -207,23 +207,9 @@ func (c *Client) convertBetaUserMultiContent(ctx context.Context, parts []chat.M if strings.HasPrefix(part.ImageURL.URL, "data:") { urlParts := strings.SplitN(part.ImageURL.URL, ",", 2) if len(urlParts) == 2 { - mediaTypePart := urlParts[0] + mediaType := extractMediaType(urlParts[0]) base64Data := urlParts[1] - var mediaType string - switch { - case strings.Contains(mediaTypePart, "image/jpeg"): - mediaType = "image/jpeg" - case strings.Contains(mediaTypePart, "image/png"): - mediaType = "image/png" - case strings.Contains(mediaTypePart, "image/gif"): - mediaType = "image/gif" - case strings.Contains(mediaTypePart, "image/webp"): - mediaType = "image/webp" - default: - mediaType = "image/jpeg" - } - contentBlocks = append(contentBlocks, anthropic.BetaContentBlockParamUnion{ OfImage: &anthropic.BetaImageBlockParam{ Source: anthropic.BetaImageBlockParamSourceUnion{ diff --git a/pkg/model/provider/anthropic/beta_converter_test.go b/pkg/model/provider/anthropic/beta_converter_test.go index 21dff3b5c..5a8474112 100644 --- a/pkg/model/provider/anthropic/beta_converter_test.go +++ b/pkg/model/provider/anthropic/beta_converter_test.go @@ -65,18 +65,18 @@ func TestConvertBetaMessages_MergesConsecutiveToolMessages(t *testing.T) { require.Len(t, betaMessages, 4, "Should have 4 messages after conversion") - msg0Map, _ := marshalToMapBeta(betaMessages[0]) - msg1Map, _ := marshalToMapBeta(betaMessages[1]) - msg2Map, _ := marshalToMapBeta(betaMessages[2]) - msg3Map, _ := marshalToMapBeta(betaMessages[3]) + msg0Map, _ := marshalToMap(betaMessages[0]) + msg1Map, _ := marshalToMap(betaMessages[1]) + msg2Map, _ := marshalToMap(betaMessages[2]) + msg3Map, _ := marshalToMap(betaMessages[3]) assert.Equal(t, "user", msg0Map["role"]) assert.Equal(t, "assistant", msg1Map["role"]) assert.Equal(t, "user", msg2Map["role"]) assert.Equal(t, "assistant", msg3Map["role"]) - userMsg2Map, ok := marshalToMapBeta(betaMessages[2]) + userMsg2Map, ok := marshalToMap(betaMessages[2]) require.True(t, ok) - content := contentArrayBeta(userMsg2Map) + content := contentArray(userMsg2Map) require.Len(t, content, 2, "User message should have 2 tool_result blocks") toolResultIDs := collectToolResultIDs(content) diff --git a/pkg/model/provider/anthropic/client.go b/pkg/model/provider/anthropic/client.go index f856d17e0..8bafe1cec 100644 --- a/pkg/model/provider/anthropic/client.go +++ b/pkg/model/provider/anthropic/client.go @@ -583,23 +583,9 @@ func (c *Client) convertUserMultiContent(_ context.Context, parts []chat.Message if strings.HasPrefix(part.ImageURL.URL, "data:") { urlParts := strings.SplitN(part.ImageURL.URL, ",", 2) if len(urlParts) == 2 { - mediaTypePart := urlParts[0] + mediaType := extractMediaType(urlParts[0]) base64Data := urlParts[1] - var mediaType string - switch { - case strings.Contains(mediaTypePart, "image/jpeg"): - mediaType = "image/jpeg" - case strings.Contains(mediaTypePart, "image/png"): - mediaType = "image/png" - case strings.Contains(mediaTypePart, "image/gif"): - mediaType = "image/gif" - case strings.Contains(mediaTypePart, "image/webp"): - mediaType = "image/webp" - default: - mediaType = "image/jpeg" - } - contentBlocks = append(contentBlocks, anthropic.NewImageBlock(anthropic.Base64ImageSourceParam{ Data: base64Data, MediaType: anthropic.Base64ImageSourceMediaType(mediaType), @@ -748,7 +734,52 @@ func (c *Client) FileManager() *FileManager { // one or more tool_use blocks, the immediately following message is a user message // that includes tool_result blocks for all those tool_use IDs (grouped into that single message). func validateAnthropicSequencing(msgs []anthropic.MessageParam) error { - // Marshal-based inspection to avoid depending on SDK internals of union types + return validateSequencing(msgs) +} + +// repairAnthropicSequencing inserts a synthetic user message containing tool_result blocks +// immediately after any assistant message that has tool_use blocks missing a corresponding +// tool_result in the next user message. This is a best-effort local repair to keep the +// conversation valid for Anthropic while preserving original messages, to keep the agent loop running. +func repairAnthropicSequencing(msgs []anthropic.MessageParam) []anthropic.MessageParam { + return repairSequencing(msgs, func(toolUseIDs map[string]struct{}) anthropic.MessageParam { + blocks := make([]anthropic.ContentBlockParamUnion, 0, len(toolUseIDs)) + for id := range toolUseIDs { + blocks = append(blocks, anthropic.NewToolResultBlock(id, "(tool execution failed)", false)) + } + return anthropic.NewUserMessage(blocks...) + }) +} + +// marshalToMap is a helper that converts any value to a map[string]any via JSON marshaling. +// This is used to inspect SDK union types without depending on their internal structure. +// It's shared by both standard and Beta API validation/repair code. +func marshalToMap(v any) (map[string]any, bool) { + b, err := json.Marshal(v) + if err != nil { + return nil, false + } + var m map[string]any + if json.Unmarshal(b, &m) != nil { + return nil, false + } + return m, true +} + +// contentArray extracts the content array from a marshaled message map. +// Used by both standard and Beta API validation/repair code. +func contentArray(m map[string]any) []any { + if a, ok := m["content"].([]any); ok { + return a + } + return nil +} + +// validateSequencing generically validates that every assistant message with tool_use blocks +// is immediately followed by a user message with corresponding tool_result blocks. +// It works on both standard (MessageParam) and Beta (BetaMessageParam) types by +// marshaling to map[string]any for inspection. +func validateSequencing[T any](msgs []T) error { for i := range msgs { m, ok := marshalToMap(msgs[i]) if !ok || m["role"] != "assistant" { @@ -781,15 +812,14 @@ func validateAnthropicSequencing(msgs []anthropic.MessageParam) error { return nil } -// repairAnthropicSequencing inserts a synthetic user message containing tool_result blocks -// immediately after any assistant message that has tool_use blocks missing a corresponding -// tool_result in the next user message. This is a best-effort local repair to keep the -// conversation valid for Anthropic while preserving original messages, to keep the agent loop running. -func repairAnthropicSequencing(msgs []anthropic.MessageParam) []anthropic.MessageParam { +// repairSequencing generically inserts a synthetic user message after any assistant +// tool_use message that is missing corresponding tool_result blocks. The makeSynthetic +// callback builds the appropriate user message type for the remaining tool_use IDs. +func repairSequencing[T any](msgs []T, makeSynthetic func(toolUseIDs map[string]struct{}) T) []T { if len(msgs) == 0 { return msgs } - repaired := make([]anthropic.MessageParam, 0, len(msgs)+2) + repaired := make([]T, 0, len(msgs)+2) for i := range msgs { repaired = append(repaired, msgs[i]) @@ -814,40 +844,15 @@ func repairAnthropicSequencing(msgs []anthropic.MessageParam) []anthropic.Messag } if len(toolUseIDs) > 0 { - blocks := make([]anthropic.ContentBlockParamUnion, 0, len(toolUseIDs)) - for id := range toolUseIDs { - blocks = append(blocks, anthropic.NewToolResultBlock(id, "(tool execution failed)", false)) - } - repaired = append(repaired, anthropic.NewUserMessage(blocks...)) + slog.Debug("Inserting synthetic user message for missing tool_results", + "assistant_index", i, + "missing_count", len(toolUseIDs)) + repaired = append(repaired, makeSynthetic(toolUseIDs)) } } return repaired } -// marshalToMap is a helper that converts any value to a map[string]any via JSON marshaling. -// This is used to inspect SDK union types without depending on their internal structure. -// It's shared by both standard and Beta API validation/repair code. -func marshalToMap(v any) (map[string]any, bool) { - b, err := json.Marshal(v) - if err != nil { - return nil, false - } - var m map[string]any - if json.Unmarshal(b, &m) != nil { - return nil, false - } - return m, true -} - -// contentArray extracts the content array from a marshaled message map. -// Used by both standard and Beta API validation/repair code. -func contentArray(m map[string]any) []any { - if a, ok := m["content"].([]any); ok { - return a - } - return nil -} - func collectToolUseIDs(content []any) map[string]struct{} { ids := make(map[string]struct{}) for _, c := range content { diff --git a/pkg/model/provider/anthropic/files.go b/pkg/model/provider/anthropic/files.go index 933139390..98417abd4 100644 --- a/pkg/model/provider/anthropic/files.go +++ b/pkg/model/provider/anthropic/files.go @@ -368,24 +368,6 @@ func (fm *FileManager) CachedCount() int { return len(fm.uploads) } -// hashFile computes the SHA256 hash of a file's contents. -// Note: This function is only used for testing and legacy code paths. -// The main GetOrUpload path computes the hash inline to avoid opening the file twice. -func hashFile(filePath string) (string, error) { - file, err := os.Open(filePath) - if err != nil { - return "", err - } - defer file.Close() - - h := sha256.New() - if _, err := io.Copy(h, file); err != nil { - return "", err - } - - return hex.EncodeToString(h.Sum(nil)), nil -} - // IsImageMime returns true if the MIME type is an image type supported by Anthropic. func IsImageMime(mimeType string) bool { switch mimeType { diff --git a/pkg/model/provider/anthropic/files_test.go b/pkg/model/provider/anthropic/files_test.go index 6d9fe186b..4fd3ac7fc 100644 --- a/pkg/model/provider/anthropic/files_test.go +++ b/pkg/model/provider/anthropic/files_test.go @@ -1,6 +1,9 @@ package anthropic import ( + "crypto/sha256" + "encoding/hex" + "io" "os" "path/filepath" "testing" @@ -12,6 +15,22 @@ import ( "github.com/docker/docker-agent/pkg/chat" ) +// hashFile computes the SHA256 hash of a file's contents. +func hashFile(filePath string) (string, error) { + file, err := os.Open(filePath) + if err != nil { + return "", err + } + defer file.Close() + + h := sha256.New() + if _, err := io.Copy(h, file); err != nil { + return "", err + } + + return hex.EncodeToString(h.Sum(nil)), nil +} + func TestDetectMimeType(t *testing.T) { tests := []struct { path string