Skip to content

Commit ee4fff6

Browse files
pranaygpclaude
andauthored
benchmarking: make steps simulate real work (+ misc improvements) (#565)
* perf: add 5s delay to benchmark steps to simulate real work * perf: add realistic workloads to benchmark steps Add realistic workloads to benchmark step functions: - doWork() - 1 second delay to simulate real computation - stressTestStep() - 1 second delay to simulate real computation - genBenchStream() - generates ~5KB of data in 50 chunks - transformStream() - uppercases stream content (renamed from doubleNumbers) Add "Slurp Time" metric to stream benchmarks measuring time from first byte to complete stream consumption, complementing TTFB. 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude <[email protected]> * fix(e2e): increase dev test timeouts for Windows Windows file watching and rebuilding is slower than macOS/Linux, causing the 10s timeouts to fail. Increased to 30s to accommodate. 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude <[email protected]> * fix(ci): treat community world failures as warnings, not errors Community world tests/benchmarks are now non-blocking: - Removed from has_failures check in both tests.yml and benchmarks.yml - Added separate has_warnings output for community failures - Split PR comment notices: ❌ for failures, ⚠️ for community warnings 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude <[email protected]> * fix: apply PR review suggestions - Fix chunk size calculation: chunkSize - 11 for ~100 bytes per chunk - Remove redundant metadata check (always truthy) 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude <[email protected]> --------- Co-authored-by: Claude <[email protected]>
1 parent 516f16f commit ee4fff6

File tree

8 files changed

+259
-38
lines changed

8 files changed

+259
-38
lines changed

.github/scripts/aggregate-benchmarks.js

Lines changed: 17 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -176,6 +176,7 @@ function collectBenchmarkData(resultFiles) {
176176
// Get workflow timing if available
177177
let workflowTimeMs = null;
178178
let firstByteTimeMs = null;
179+
let slurpTimeMs = null;
179180
let lastRunId = null;
180181
let observabilityUrl = null;
181182
if (timings?.summary?.[benchName]) {
@@ -184,6 +185,10 @@ function collectBenchmarkData(resultFiles) {
184185
if (timings.summary[benchName].avgFirstByteTimeMs !== undefined) {
185186
firstByteTimeMs = timings.summary[benchName].avgFirstByteTimeMs;
186187
}
188+
// Get slurp time for stream benchmarks (time from first byte to completion)
189+
if (timings.summary[benchName].avgSlurpTimeMs !== undefined) {
190+
slurpTimeMs = timings.summary[benchName].avgSlurpTimeMs;
191+
}
187192
}
188193
// Get the last runId for observability link (Vercel only)
189194
if (timings?.timings?.[benchName]?.length > 0) {
@@ -209,6 +214,7 @@ function collectBenchmarkData(resultFiles) {
209214
max: bench.max,
210215
samples: bench.sampleCount,
211216
firstByteTime: firstByteTimeMs,
217+
slurpTime: slurpTimeMs,
212218
runId: lastRunId,
213219
observabilityUrl: observabilityUrl,
214220
};
@@ -329,10 +335,10 @@ function renderBenchmarkTable(
329335
// Render table - different columns for stream vs regular benchmarks
330336
if (isStream) {
331337
console.log(
332-
'| World | Framework | Workflow Time | TTFB | Wall Time | Overhead | Samples | vs Fastest |'
338+
'| World | Framework | Workflow Time | TTFB | Slurp | Wall Time | Overhead | Samples | vs Fastest |'
333339
);
334340
console.log(
335-
'|:------|:----------|--------------:|-----:|----------:|---------:|--------:|-----------:|'
341+
'|:------|:----------|--------------:|-----:|------:|----------:|---------:|--------:|-----------:|'
336342
);
337343
} else {
338344
console.log(
@@ -391,6 +397,11 @@ function renderBenchmarkTable(
391397
baseline?.firstByteTime
392398
);
393399

400+
// Format slurp time with delta for stream benchmarks (time from first byte to completion)
401+
const slurpSec =
402+
metrics.slurpTime !== null ? formatSec(metrics.slurpTime) : '-';
403+
const slurpDelta = formatDelta(metrics.slurpTime, baseline?.slurpTime);
404+
394405
// Format samples count
395406
const samplesCount = metrics.samples ?? '-';
396407

@@ -401,7 +412,7 @@ function renderBenchmarkTable(
401412

402413
if (isStream) {
403414
console.log(
404-
`| ${worldInfo.emoji} ${worldInfo.label} | ${medal}${frameworkInfo.label} | ${workflowTimeSec}s${workflowDelta} | ${firstByteSec}s${ttfbDelta} | ${wallTimeSec}s${wallDelta} | ${overheadSec}s | ${samplesCount} | ${factor} |`
415+
`| ${worldInfo.emoji} ${worldInfo.label} | ${medal}${frameworkInfo.label} | ${workflowTimeSec}s${workflowDelta} | ${firstByteSec}s${ttfbDelta} | ${slurpSec}s${slurpDelta} | ${wallTimeSec}s${wallDelta} | ${overheadSec}s | ${samplesCount} | ${factor} |`
405416
);
406417
} else {
407418
console.log(
@@ -680,6 +691,9 @@ function renderComparison(data, baselineData) {
680691
console.log(
681692
'- **TTFB**: Time to First Byte - time from workflow start until first stream byte received (stream benchmarks only)'
682693
);
694+
console.log(
695+
'- **Slurp**: Time from first byte to complete stream consumption (stream benchmarks only)'
696+
);
683697
console.log(
684698
'- **Wall Time**: Total testbench time (trigger workflow + poll for result)'
685699
);

.github/scripts/aggregate-e2e-results.js

Lines changed: 76 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -36,6 +36,7 @@ function findResultFiles(dir) {
3636
files.push(...findResultFiles(fullPath));
3737
} else if (
3838
entry.name.startsWith('e2e-') &&
39+
!entry.name.startsWith('e2e-metadata-') &&
3940
entry.name.endsWith('.json')
4041
) {
4142
files.push(fullPath);
@@ -47,6 +48,67 @@ function findResultFiles(dir) {
4748
return files;
4849
}
4950

51+
// Find all e2e metadata JSON files
52+
function findMetadataFiles(dir) {
53+
const files = [];
54+
try {
55+
const entries = fs.readdirSync(dir, { withFileTypes: true });
56+
for (const entry of entries) {
57+
const fullPath = path.join(dir, entry.name);
58+
if (entry.isDirectory()) {
59+
files.push(...findMetadataFiles(fullPath));
60+
} else if (
61+
entry.name.startsWith('e2e-metadata-') &&
62+
entry.name.endsWith('.json')
63+
) {
64+
files.push(fullPath);
65+
}
66+
}
67+
} catch (e) {
68+
// Directory doesn't exist or can't be read
69+
}
70+
return files;
71+
}
72+
73+
// Load metadata indexed by app name
74+
function loadMetadata(dir) {
75+
const metadata = new Map(); // app -> { runIds, vercel }
76+
const metadataFiles = findMetadataFiles(dir);
77+
78+
for (const file of metadataFiles) {
79+
try {
80+
const content = JSON.parse(fs.readFileSync(file, 'utf-8'));
81+
// Extract app name from filename: e2e-metadata-{app}-vercel.json
82+
const basename = path.basename(file, '.json');
83+
const match = basename.match(/^e2e-metadata-(.+)-vercel$/);
84+
if (match && content.vercel) {
85+
const appName = match[1];
86+
metadata.set(appName, content);
87+
}
88+
} catch (e) {
89+
// Skip invalid metadata files
90+
}
91+
}
92+
93+
return metadata;
94+
}
95+
96+
// Generate observability URL for a test
97+
function getObservabilityUrl(metadata, appName, testName) {
98+
const appMetadata = metadata.get(appName);
99+
if (!appMetadata || !appMetadata.vercel) return null;
100+
101+
const { vercel, runIds } = appMetadata;
102+
if (!vercel.teamSlug || !vercel.projectSlug) return null;
103+
104+
// Find the runId for this test
105+
const runInfo = runIds?.find((r) => r.testName === testName);
106+
if (!runInfo) return null;
107+
108+
const env = vercel.environment === 'production' ? 'production' : 'preview';
109+
return `https://vercel.com/${vercel.teamSlug}/${vercel.projectSlug}/observability/workflows/runs/${runInfo.runId}?environment=${env}`;
110+
}
111+
50112
// Parse vitest JSON output
51113
function parseVitestResults(file) {
52114
try {
@@ -291,7 +353,7 @@ const categoryOrder = [
291353
];
292354

293355
// Render aggregated PR comment summary
294-
function renderAggregatedSummary(categories, overallSummary) {
356+
function renderAggregatedSummary(categories, overallSummary, metadata) {
295357
const total =
296358
overallSummary.totalPassed +
297359
overallSummary.totalFailed +
@@ -371,7 +433,17 @@ function renderAggregatedSummary(categories, overallSummary) {
371433
for (const test of tests) {
372434
// Extract just the test name without "e2e " prefix if present
373435
const testName = test.name.replace(/^e2e\s+/, '');
374-
console.log(`- \`${testName}\``);
436+
// Add observability link for vercel-prod tests
437+
if (catName === 'vercel-prod') {
438+
const obsUrl = getObservabilityUrl(metadata, appName, test.name);
439+
if (obsUrl) {
440+
console.log(`- \`${testName}\` ([🔍 observability](${obsUrl}))`);
441+
} else {
442+
console.log(`- \`${testName}\``);
443+
}
444+
} else {
445+
console.log(`- \`${testName}\``);
446+
}
375447
}
376448
console.log('');
377449
}
@@ -425,7 +497,8 @@ if (resultFiles.length === 0) {
425497

426498
if (mode === 'aggregate') {
427499
const { categories, overallSummary } = aggregateByCategory(resultFiles);
428-
renderAggregatedSummary(categories, overallSummary);
500+
const metadata = loadMetadata(resultsDir);
501+
renderAggregatedSummary(categories, overallSummary, metadata);
429502

430503
// Exit with non-zero if any tests failed
431504
if (overallSummary.totalFailed > 0) {

.github/workflows/benchmarks.yml

Lines changed: 22 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -495,12 +495,19 @@ jobs:
495495
echo "vercel=$VERCEL_STATUS" >> $GITHUB_OUTPUT
496496
echo "community=$COMMUNITY_STATUS" >> $GITHUB_OUTPUT
497497
498-
if [[ "$LOCAL_STATUS" == "failure" || "$POSTGRES_STATUS" == "failure" || "$VERCEL_STATUS" == "failure" || "$COMMUNITY_STATUS" == "failure" ]]; then
498+
# Community world failures are warnings, not errors
499+
if [[ "$LOCAL_STATUS" == "failure" || "$POSTGRES_STATUS" == "failure" || "$VERCEL_STATUS" == "failure" ]]; then
499500
echo "has_failures=true" >> $GITHUB_OUTPUT
500501
else
501502
echo "has_failures=false" >> $GITHUB_OUTPUT
502503
fi
503504
505+
if [[ "$COMMUNITY_STATUS" == "failure" ]]; then
506+
echo "has_warnings=true" >> $GITHUB_OUTPUT
507+
else
508+
echo "has_warnings=false" >> $GITHUB_OUTPUT
509+
fi
510+
504511
- name: Update PR comment with results
505512
if: github.event_name == 'pull_request'
506513
uses: marocchino/sticky-pull-request-comment@v2
@@ -517,10 +524,23 @@ jobs:
517524
message: |
518525
519526
---
520-
⚠️ **Some benchmark jobs failed:**
527+
**Some benchmark jobs failed:**
521528
- Local: ${{ needs.benchmark-local.result }}
522529
- Postgres: ${{ needs.benchmark-postgres.result }}
523530
- Vercel: ${{ needs.benchmark-vercel.result }}
531+
532+
Check the [workflow run](${{ github.server_url }}/${{ github.repository }}/actions/runs/${{ github.run_id }}) for details.
533+
534+
- name: Append community warning to PR comment
535+
if: github.event_name == 'pull_request' && steps.check-status.outputs.has_warnings == 'true'
536+
uses: marocchino/sticky-pull-request-comment@v2
537+
with:
538+
header: benchmark-results
539+
append: true
540+
message: |
541+
542+
---
543+
⚠️ **Community world benchmarks failed** (non-blocking):
524544
- Community Worlds: ${{ needs.benchmark-community.result }}
525545
526546
Check the [workflow run](${{ github.server_url }}/${{ github.repository }}/actions/runs/${{ github.run_id }}) for details.

.github/workflows/tests.yml

Lines changed: 37 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -181,26 +181,37 @@ jobs:
181181
app:
182182
- name: "example"
183183
project-id: "prj_xWq20Dd860HHAfzMjK2Mb6TPVxMa"
184+
project-slug: "example-workflow"
184185
- name: "nextjs-turbopack"
185186
project-id: "prj_yjkM7UdHliv8bfxZ1sMJQf1pMpdi"
187+
project-slug: "example-nextjs-workflow-turbopack"
186188
- name: "nextjs-webpack"
187189
project-id: "prj_avRPBF3eWjh6iDNQgmhH4VOg27h0"
190+
project-slug: "example-nextjs-workflow-webpack"
188191
- name: "nitro"
189192
project-id: "prj_e7DZirYdLrQKXNrlxg7KmA6ABx8r"
193+
project-slug: "workbench-nitro-workflow"
190194
- name: "vite"
191195
project-id: "prj_uLIcNZNDmETulAvj5h0IcDHi5432"
196+
project-slug: "workbench-vite-workflow"
192197
- name: "nuxt"
193198
project-id: "prj_oTgiz3SGX2fpZuM6E0P38Ts8de6d"
199+
project-slug: "workbench-nuxt-workflow"
194200
- name: "sveltekit"
195201
project-id: "prj_MqnBLm71ceXGSnm3Fs8i8gBnI23G"
202+
project-slug: "workbench-sveltekit-workflow"
196203
- name: "hono"
197204
project-id: "prj_p0GIEsfl53L7IwVbosPvi9rPSOYW"
205+
project-slug: "workbench-hono-workflow"
198206
- name: "express"
199207
project-id: "prj_cCZjpBy92VRbKHHbarDMhOHtkuIr"
208+
project-slug: "workbench-express-workflow"
200209
- name: "fastify"
201210
project-id: "prj_5Yap0VDQ633v998iqQ3L3aQ25Cck"
211+
project-slug: "workbench-fastify-workflow"
202212
- name: "astro"
203213
project-id: "prj_YDAXj3K8LM0hgejuIMhioz2yLgTI"
214+
project-slug: "workbench-astro-workflow"
204215
env:
205216
TURBO_TOKEN: ${{ secrets.TURBO_TOKEN }}
206217
TURBO_TEAM: ${{ vars.TURBO_TEAM }}
@@ -236,6 +247,7 @@ jobs:
236247
WORKFLOW_VERCEL_AUTH_TOKEN: ${{ secrets.VERCEL_LABS_TOKEN }}
237248
WORKFLOW_VERCEL_TEAM: "team_nO2mCG4W8IxPIeKoSsqwAxxB"
238249
WORKFLOW_VERCEL_PROJECT: ${{ matrix.app.project-id }}
250+
WORKFLOW_VERCEL_PROJECT_SLUG: ${{ matrix.app.project-slug }}
239251

240252
- name: Generate E2E summary
241253
if: always()
@@ -246,7 +258,9 @@ jobs:
246258
uses: actions/upload-artifact@v4
247259
with:
248260
name: e2e-results-vercel-prod-${{ matrix.app.name }}
249-
path: e2e-vercel-prod-${{ matrix.app.name }}.json
261+
path: |
262+
e2e-vercel-prod-${{ matrix.app.name }}.json
263+
e2e-metadata-${{ matrix.app.name }}-vercel.json
250264
retention-days: 7
251265
if-no-files-found: ignore
252266

@@ -621,12 +635,19 @@ jobs:
621635
echo "windows=$WINDOWS_STATUS" >> $GITHUB_OUTPUT
622636
echo "community=$COMMUNITY_STATUS" >> $GITHUB_OUTPUT
623637
624-
if [[ "$VERCEL_STATUS" == "failure" || "$LOCAL_DEV_STATUS" == "failure" || "$LOCAL_PROD_STATUS" == "failure" || "$POSTGRES_STATUS" == "failure" || "$WINDOWS_STATUS" == "failure" || "$COMMUNITY_STATUS" == "failure" ]]; then
638+
# Community world failures are warnings, not errors
639+
if [[ "$VERCEL_STATUS" == "failure" || "$LOCAL_DEV_STATUS" == "failure" || "$LOCAL_PROD_STATUS" == "failure" || "$POSTGRES_STATUS" == "failure" || "$WINDOWS_STATUS" == "failure" ]]; then
625640
echo "has_failures=true" >> $GITHUB_OUTPUT
626641
else
627642
echo "has_failures=false" >> $GITHUB_OUTPUT
628643
fi
629644
645+
if [[ "$COMMUNITY_STATUS" == "failure" ]]; then
646+
echo "has_warnings=true" >> $GITHUB_OUTPUT
647+
else
648+
echo "has_warnings=false" >> $GITHUB_OUTPUT
649+
fi
650+
630651
- name: Update PR comment with results
631652
if: github.event_name == 'pull_request'
632653
uses: marocchino/sticky-pull-request-comment@v2
@@ -643,12 +664,25 @@ jobs:
643664
message: |
644665
645666
---
646-
⚠️ **Some E2E test jobs failed:**
667+
**Some E2E test jobs failed:**
647668
- Vercel Prod: ${{ needs.e2e-vercel-prod.result }}
648669
- Local Dev: ${{ needs.e2e-local-dev.result }}
649670
- Local Prod: ${{ needs.e2e-local-prod.result }}
650671
- Local Postgres: ${{ needs.e2e-local-postgres.result }}
651672
- Windows: ${{ needs.e2e-windows.result }}
673+
674+
Check the [workflow run](${{ github.server_url }}/${{ github.repository }}/actions/runs/${{ github.run_id }}) for details.
675+
676+
- name: Append community warning to PR comment
677+
if: github.event_name == 'pull_request' && steps.check-status.outputs.has_warnings == 'true'
678+
uses: marocchino/sticky-pull-request-comment@v2
679+
with:
680+
header: e2e-test-results
681+
append: true
682+
message: |
683+
684+
---
685+
⚠️ **Community world tests failed** (non-blocking):
652686
- Community Worlds: ${{ needs.e2e-community.result }}
653687
654688
Check the [workflow run](${{ github.server_url }}/${{ github.repository }}/actions/runs/${{ github.run_id }}) for details.

0 commit comments

Comments
 (0)