fix: filter base64 image from screenshot result

Grant Burry · Grant Burry · commit a7e4b0d9066c · 2025-12-09T13:04:47.000-08:00
- Filter out base64 image data from the `screenshot` result to reduce context usage.
- Extract file paths from text content and attach guidance.
diff --git a/src/tools/browser-eval.ts b/src/tools/browser-eval.ts
@@ -149,6 +149,45 @@ type BrowserEvalArgs = {
   files?: string[]
 }
 
+/**
+ * Filter out base64 image data from the `screenshot` result to reduce context usage.
+ * Extract file paths from text content and attach guidance.
+ */
+export function filterImageDataFromResult(result: unknown): unknown {
+  if (typeof result !== "object" || result === null) {
+    return result
+  }
+
+  const typedResult = result as { content?: { type: string; text?: string; data?: string }[] }
+
+  if (!Array.isArray(typedResult.content)) {
+    return result
+  }
+
+  const filteredContent = typedResult.content.filter((block) => block.type !== "image")
+
+  const textBlock = filteredContent.find((block) => block.type === "text")
+  let screenshotPath: string | null = null
+
+  if (textBlock?.text) {
+    // Extract file path from text like "saved it as /path/to/screenshot.png"
+    const pathMatch = textBlock.text.match(/(?:saved (?:it )?as|saved to) (.+\.png)/i)
+    if (pathMatch) {
+      screenshotPath = pathMatch[1]
+    }
+  }
+
+  // Add helpful message about reading the screenshot
+  if (screenshotPath && textBlock) {
+    filteredContent.push({
+      type: "text",
+      text: `\n\nTo view this screenshot, use the "read" tool with the file path: ${screenshotPath}`,
+    })
+  }
+
+  return { ...typedResult, content: filteredContent }
+}
+
 export async function handler(args: BrowserEvalArgs): Promise<string> {
   try {
     if (args.action === "start") {
@@ -288,10 +327,13 @@ export async function handler(args: BrowserEvalArgs): Promise<string> {
 
     const result = await callServerTool(connection, toolName, toolArgs)
 
+    const formattedResult =
+      args.action === "screenshot" ? filterImageDataFromResult(result) : result
+
     return JSON.stringify({
       success: true,
       action: args.action,
-      result,
+      result: formattedResult,
     })
   } catch (error) {
     const errorMessage = error instanceof Error ? error.message : String(error)
diff --git a/test/unit/browser-eval-filter.test.ts b/test/unit/browser-eval-filter.test.ts
@@ -0,0 +1,77 @@
+import { describe, it, expect } from "vitest"
+import { filterImageDataFromResult } from "../../src/tools/browser-eval.js"
+
+describe("browser-eval screenshot filter", () => {
+  it("should filter out base64 image data from screenshot result", () => {
+    const mockResponse = {
+      content: [
+        {
+          type: "text",
+          text: "### Result\nTook the viewport screenshot and saved it as /tmp/screenshot.png",
+        },
+        {
+          type: "image",
+          data: "iVBORw0KGgoAAAANSUhEUgAAAAEAAAABCAYAAAAfFcSJAAAADUlEQVR42mNk+M9QDwADhgGAWjR9awAAAABJRU5ErkJggg==",
+        },
+      ],
+    }
+
+    const formattedResult = filterImageDataFromResult(mockResponse) as {
+      content: { type: string; text?: string }[]
+    }
+
+    // Should remove the image block
+    expect(formattedResult.content.length).toBe(2) // Original text + new guidance text
+    expect(formattedResult.content.every((block) => block.type !== "image")).toBe(true)
+
+    // Should contain guidance about reading the file
+    const guidanceBlock = formattedResult.content.find((block) =>
+      block.text?.includes('To view this screenshot, use the "read" tool with the file path')
+    )
+    expect(guidanceBlock).toBeDefined()
+    expect(guidanceBlock?.text).toContain("/tmp/screenshot.png")
+  })
+
+  it("should extract file path from various text formats", () => {
+    const formats = [
+      "saved it as /path/to/screenshot.png",
+      "saved as /path/to/screenshot.png",
+      "Took the viewport screenshot and saved it as /var/folders/temp/screenshot.png",
+    ]
+
+    formats.forEach((text) => {
+      const mockResult = { content: [{ type: "text", text }] }
+
+      const formattedResult = filterImageDataFromResult(mockResult) as {
+        content: { type: string; text?: string }[]
+      }
+
+      const guidanceBlock = formattedResult.content.find((block) =>
+        block.text?.includes("To view this screenshot")
+      )
+      expect(guidanceBlock).toBeDefined()
+    })
+  })
+
+  it("should handle results without image data", () => {
+    const mockResult = { content: [{ type: "text", text: "Some other result" }] }
+
+    const formattedResult = filterImageDataFromResult(mockResult) as {
+      content: { type: string; text?: string }[]
+    }
+
+    // Should not add guidance if no screenshot path found
+    expect(formattedResult.content.length).toBe(1)
+  })
+
+  it("should handle non-object results", () => {
+    expect(filterImageDataFromResult(null)).toBe(null)
+    expect(filterImageDataFromResult("string")).toBe("string")
+    expect(filterImageDataFromResult(123)).toBe(123)
+  })
+
+  it("should handle responses without content array", () => {
+    const mockResult = { other: "data" }
+    expect(filterImageDataFromResult(mockResult)).toEqual(mockResult)
+  })
+})