Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
134 changes: 134 additions & 0 deletions modules/standard-site-sync.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,134 @@
import { readFileSync } from 'node:fs'
import { createHash } from 'node:crypto'
import { defineNuxtModule, useNuxt, createResolver } from 'nuxt/kit'
import { safeParse } from 'valibot'
import * as site from '../shared/types/lexicons/site'
import { BlogPostSchema } from '../shared/schemas/blog'
import { NPMX_SITE } from '../shared/utils/constants'
import { parseBasicFrontmatter } from '../shared/utils/parse-basic-frontmatter'
import { TID } from '@atproto/common'
import { Client } from '@atproto/lex'

const syncedDocuments = new Map<string, string>()
const CLOCK_ID_THREE = 3
const DATE_TO_MICROSECONDS = 1000

// TODO: Currently logging quite a lot, can remove some later if we want
export default defineNuxtModule({
meta: { name: 'standard-site-sync' },
async setup() {
const nuxt = useNuxt()
const { resolve } = createResolver(import.meta.url)
const contentDir = resolve('../app/pages/blog')

// Authentication with PDS using an app password
const pdsUrl = process.env.NPMX_PDS_URL
if (!pdsUrl) {
console.warn('[standard-site-sync] NPMX_PDS_URL not set, skipping sync')
return
}
// Instantiate a single new client instance that is reused for every file
const client = new Client(pdsUrl)

if (nuxt.options._prepare) return

nuxt.hook('build:before', async () => {
const { glob } = await import('tinyglobby')
const files: string[] = await glob(`${contentDir}/**/*.md`)

// INFO: Arbitrarily chosen concurrency limit, can be changed if needed
const concurrencyLimit = 5
for (let i = 0; i < files.length; i += concurrencyLimit) {
const batch = files.slice(i, i + concurrencyLimit)
// Process files in parallel
await Promise.all(
batch.map(file =>
syncFile(file, NPMX_SITE, client).catch(error =>
console.error(`[standard-site-sync] Error in ${file}:` + error),
),
Comment on lines +46 to +48
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

⚠️ Potential issue | 🟡 Minor

Error logging uses string concatenation with error object.

Using + error concatenates the error object as a string, which may lose stack trace information. Use a comma or structured logging for better error visibility.

🔧 Proposed fix
           syncFile(file, NPMX_SITE, client).catch(error =>
-              console.error(`[standard-site-sync] Error in ${file}:` + error),
+              console.error(`[standard-site-sync] Error in ${file}:`, error),
           ),
📝 Committable suggestion

‼️ IMPORTANT
Carefully review the code before committing. Ensure that it accurately replaces the highlighted code, contains no missing lines, and has no issues with indentation. Thoroughly test & benchmark the code to ensure it meets the requirements.

Suggested change
syncFile(file, NPMX_SITE, client).catch(error =>
console.error(`[standard-site-sync] Error in ${file}:` + error),
),
syncFile(file, NPMX_SITE, client).catch(error =>
console.error(`[standard-site-sync] Error in ${file}:`, error),
),

),
)
}
})

nuxt.hook('builder:watch', async (event, path) => {
if (!path.endsWith('.md')) return

// Ignore deleted files
if (event === 'unlink') {
console.log(`[standard-site-sync] File deleted: ${path}`)
return
}

// Process add/change events only
await syncFile(resolve(nuxt.options.rootDir, path), NPMX_SITE, client).catch(err =>
console.error(`[standard-site-sync] Failed ${path}:`, err),
)
})
},
})

/*
* INFO: Loads record to atproto and ensures uniqueness by checking the date the article is published
* publishedAt is an id that does not change
* Atomicity is enforced with upsert using publishedAt so we always update existing records instead of creating new ones
* Clock id(3) provides a deterministic ID
* WARN: DOES NOT CATCH ERRORS, THIS MUST BE HANDLED
*/
const syncFile = async (filePath: string, siteUrl: string, client: Client) => {
const fileContent = readFileSync(filePath, 'utf-8')
const frontmatter = parseBasicFrontmatter(fileContent)

// Schema expects 'path' & frontmatter provides 'slug'
const normalizedFrontmatter = {
...frontmatter,
path: typeof frontmatter.slug === 'string' ? `/blog/${frontmatter.slug}` : frontmatter.path,
}

const result = safeParse(BlogPostSchema, normalizedFrontmatter)
if (!result.success) {
console.warn(`[standard-site-sync] Validation failed for ${filePath}`, result.issues)
return
}

const data = result.output

// filter drafts
if (data.draft) {
if (process.env.DEBUG === 'true') {
console.debug(`[standard-site-sync] Skipping draft: ${data.path}`)
}
return
}

// Keys are sorted to provide a more stable hash
const hash = createHash('sha256')
.update(JSON.stringify(data, Object.keys(data).sort()))
.digest('hex')

if (syncedDocuments.get(data.path) === hash) {
return
}

const document = site.standard.document.$build({
site: siteUrl as `${string}:${string}`,
path: data.path,
title: data.title,
description: data.description ?? data.excerpt,
tags: data.tags,
// This can be extended to update the site.standard.document .updatedAt if it is changed and use the posts date here
publishedAt: new Date(data.date).toISOString(),
})
Comment on lines +113 to +121
Copy link
Contributor

@coderabbitai coderabbitai bot Feb 3, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

⚠️ Potential issue | 🟡 Minor

🧩 Analysis chain

🏁 Script executed:

rg -n 'NPMX_SITE' --type ts -A 2 -B 2

Repository: npmx-dev/npmx.dev

Length of output: 1595


🏁 Script executed:

cd modules && head -c 5000 standard-site-sync.ts | cat -n

Repository: npmx-dev/npmx.dev

Length of output: 5648


🏁 Script executed:

rg -n 'siteUrl' modules/standard-site-sync.ts -B 3 -A 3

Repository: npmx-dev/npmx.dev

Length of output: 599


🏁 Script executed:

fd -t ts -e ts . -x grep -l 'NPMX_SITE' {}

Repository: npmx-dev/npmx.dev

Length of output: 230


Improve type safety for the siteUrl parameter.

The type assertion siteUrl as \${string}:${string}`assumes the URL contains a colon, but the parameter typesiteUrl: stringprovides no enforcement. Whilst NPMX_SITE is hardcoded to'https://npmx.dev'`, the parameter accepts any string, creating potential for runtime issues if the function is called with an improperly formatted URL.

Consider either constraining the parameter type using a template literal type (${string}:${string}) or adding runtime validation.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@Kai-ros is this comment valid?

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Seems like the humans are having a chat. I'll hop back into my burrow for now. If you need me again, just tag @coderabbitai in a new comment, and I'll come hopping out!


const dateInMicroSeconds = new Date(result.output.date).getTime() * DATE_TO_MICROSECONDS

// Clock id(3) needs to be the same everytime to get the same TID from a timestamp
const tid = TID.fromTime(dateInMicroSeconds, CLOCK_ID_THREE)

// client.put is async and needs to be awaited
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Will need to make sure the client is logged into as well. Also want to only login when we know we're going do writes so we don't hit session rate limits

https://npmx.dev/package/@atproto/lex#user-content-authenticated-client-with-password

await client.put(site.standard.document, document, {
rkey: tid.str,
Comment on lines +123 to +130
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

⚠️ Potential issue | 🟠 Major

🧩 Analysis chain

🏁 Script executed:

# First, locate and inspect the file in question
git ls-files | grep -E "standard-site-sync\.ts"

Repository: npmx-dev/npmx.dev

Length of output: 90


🏁 Script executed:

# Get the full context around lines 123-130
if [ -f "modules/standard-site-sync.ts" ]; then
  wc -l modules/standard-site-sync.ts
  echo "=== Lines 110-145 ===" 
  sed -n '110,145p' modules/standard-site-sync.ts
fi

Repository: npmx-dev/npmx.dev

Length of output: 933


🏁 Script executed:

# Search for TID implementation
rg -t ts "class TID|function TID|fromTime" --context 3

Repository: npmx-dev/npmx.dev

Length of output: 615


🏁 Script executed:

# Check for imports and CLOCK_ID_THREE definition
rg -t ts "CLOCK_ID_THREE|DATE_TO_MICROSECONDS" --context 2

Repository: npmx-dev/npmx.dev

Length of output: 963


🌐 Web query:

AT Protocol rkey uniqueness requirements TID collision same timestamp

💡 Result:

  • Uniqueness scope for rkey: Record keys are only required to be unique within a single repo + collection. In other words, (did, collection, rkey) is the unique tuple; (did, rkey) is not unique, and the same rkey (including the same TID string) may be reused in different collections in the same repo. rkey values are ultimately user-controlled strings, so you must not assume any global uniqueness. [1]
  • What a TID is (and why “same timestamp” can still differ): A TID is a 64-bit value where 53 bits are “microseconds since UNIX epoch” and the low 10 bits are a random “clock identifier”. That means multiple TIDs can share the same microsecond timestamp while still being different due to the clock-id bits. [2]
  • Collision expectations / requirements: atproto explicitly says global uniqueness of TIDs cannot be guaranteed (decentralized + potentially adversarial). However, TID generators should ensure their own output is monotonically increasing and never repeats, even when generating multiple IDs in the same microsecond or during clock adjustments (“clock smear” / sync incidents). A common strategy is “last_ts = max(now_us, last_ts+1)”. [2] [3]
  • If two records end up with the same TID rkey: within the same repo+collection that would be an rkey conflict because that tuple must be unique; the practical fix is to regenerate a new TID (or otherwise choose a different rkey) and retry. [1] [2]

Sources:
[1] AT Protocol — Record Keys (rkey), incl. (did, collection, rkey) uniqueness and non-global TID uniqueness notes: (atproto.com)
[2] AT Protocol — TID spec (53-bit microseconds + 10-bit clock id; monotonic/non-repeating guidance; no global guarantee): (atproto.com)
[3] Example implementation enforcing monotonic microsecond ticks (max(now_us, last+1)): (arroba.readthedocs.io)

Citations:


🌐 Web query:

AT Protocol lexicon rkey format constraints and TID fromTime stability

💡 Result:

Lexicon rkey / record-key format constraints

In Lexicon, record keys are strings with format record-key, and each record schema declares a key type (tid, nsid, literal:<value>, or any). [1]

Baseline record-key syntax (applies regardless of key type): [1]

  • Length: 1–512 characters
  • Allowed characters (ASCII subset): alphanumeric (A–Z a–z 0–9) plus . - _ : ~
  • Forbidden exact values: . and ..
  • Case-sensitive

TID (Timestamp Identifier) format constraints

A TID is a 13-character string using the base32-sortable alphabet 234567abcdefghijklmnopqrstuvwxyz. [2]

Structure / semantics: [2]

  • Encodes a 64-bit integer (big-endian)
  • Top bit is 0
  • Next 53 bits: microseconds since UNIX epoch
  • Last 10 bits: random “clock identifier”
  • Designed to be lexicographically sortable

Syntax / parsing rules: [2]

  • Length always 13 ASCII chars
  • First char must be one of 234567abcdefghij
  • Reference regex: ^[234567abcdefghij][234567abcdefghijklmnopqrstuvwxyz]{12}$ [2]

TID.fromTime(...) stability (what you can and can’t assume)

  • The spec guarantees that a TID contains a timestamp (µs) plus a 10-bit clock id field. Because that clock id is not determined by time alone (it’s intended to be random for generators), there is no spec-level guarantee that “time → TID” is unique or stable unless the clock id is fixed by the method. [2]
  • Therefore, stability of fromTime is an implementation detail: it’s typically used to create a sortable boundary (e.g., the lowest or highest possible TID for a given timestamp by choosing a fixed clock id like 0 or 1023), not a globally unique ID. This follows from the TID bit layout in the spec, but the exact choice (clock id, rounding to microseconds, etc.) depends on the library. [2]

Practical implication: if you need a deterministic cursor/boundary, fromTime can be stable only if the implementation sets the clock id deterministically and normalizes the timestamp deterministically; the TID spec itself doesn’t require any particular fromTime behavior. [2]

Sources

[1] AT Protocol — Record Key spec: record-key types + baseline syntax constraints
[2] AT Protocol — TID spec: base32-sortable encoding, 53-bit timestamp + 10-bit clock id, regex/syntax rules
[3] AT Protocol — Lexicon spec: string formats include record-key and tid (context for schema validation) (atproto.com)

Citations:


Genuine collision risk: fixed clock id causes same-day posts to overwrite each other.

Posts sharing the same date will generate identical rkeys because TID.fromTime(dateInMicroSeconds, CLOCK_ID_THREE) produces the same TID when the timestamp is identical and the clock id is constant. Within a single repo + collection, the tuple (did, collection, rkey) must be unique; two records with the same rkey will cause the second to overwrite the first.

Derive the clock id deterministically from data.path (which is unique per post) to ensure distinct TIDs:

Example fix
  const dateInMicroSeconds = new Date(result.output.date).getTime() * DATE_TO_MICROSECONDS

+  const clockId =
+    Number.parseInt(createHash('sha256').update(data.path).digest('hex').slice(0, 2), 16) % 1024
-  const tid = TID.fromTime(dateInMicroSeconds, CLOCK_ID_THREE)
+  const tid = TID.fromTime(dateInMicroSeconds, clockId)
📝 Committable suggestion

‼️ IMPORTANT
Carefully review the code before committing. Ensure that it accurately replaces the highlighted code, contains no missing lines, and has no issues with indentation. Thoroughly test & benchmark the code to ensure it meets the requirements.

Suggested change
const dateInMicroSeconds = new Date(result.output.date).getTime() * DATE_TO_MICROSECONDS
// Clock id(3) needs to be the same everytime to get the same TID from a timestamp
const tid = TID.fromTime(dateInMicroSeconds, CLOCK_ID_THREE)
// client.put is async and needs to be awaited
await client.put(site.standard.document, document, {
rkey: tid.str,
const dateInMicroSeconds = new Date(result.output.date).getTime() * DATE_TO_MICROSECONDS
const clockId =
Number.parseInt(createHash('sha256').update(data.path).digest('hex').slice(0, 2), 16) % 1024
const tid = TID.fromTime(dateInMicroSeconds, clockId)
// client.put is async and needs to be awaited
await client.put(site.standard.document, document, {
rkey: tid.str,

})

syncedDocuments.set(data.path, hash)
}
6 changes: 4 additions & 2 deletions package.json
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,7 @@
"npmx-connector": "pnpm --filter npmx-connector dev",
"generate-pwa-icons": "pwa-assets-generator",
"preview": "nuxt preview",
"postinstall": "nuxt prepare && simple-git-hooks && pnpm generate:lexicons",
"postinstall": "pnpm generate:lexicons && nuxt prepare && simple-git-hooks",
"generate:fixtures": "node scripts/generate-fixtures.ts",
"generate:lexicons": "lex build --lexicons lexicons --out shared/types/lexicons --clear",
"test": "vite test",
Expand All @@ -34,12 +34,13 @@
"test:browser:ui": "pnpm build:playwright && pnpm test:browser:prebuilt --ui",
"test:browser:update": "pnpm build:playwright && pnpm test:browser:prebuilt --update-snapshots",
"test:nuxt": "vite test --project nuxt",
"test:types": "nuxt prepare && vue-tsc -b --noEmit && pnpm --filter npmx-connector test:types",
"test:types": "pnpm generate:lexicons && nuxt prepare && vue-tsc -b --noEmit && pnpm --filter npmx-connector test:types",
"test:unit": "vite test --project unit",
"start:playwright:webserver": "NODE_ENV=test pnpm preview --port 5678"
},
"dependencies": {
"@atproto/api": "^0.18.17",
"@atproto/common": "0.5.10",
"@atproto/lex": "0.0.13",
"@atproto/oauth-client-node": "^0.3.15",
"@deno/doc": "jsr:^0.189.1",
Expand Down Expand Up @@ -75,6 +76,7 @@
"defu": "6.1.4",
"fast-npm-meta": "1.0.0",
"focus-trap": "^7.8.0",
"tinyglobby": "0.2.15",
"marked": "17.0.1",
"module-replacements": "2.11.0",
"nuxt": "4.3.0",
Expand Down
27 changes: 21 additions & 6 deletions pnpm-lock.yaml

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

87 changes: 87 additions & 0 deletions server/api/atproto/author-profiles.get.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,87 @@
import * as v from 'valibot'
import { CACHE_MAX_AGE_ONE_DAY, BLUESKY_API } from '#shared/utils/constants'
import { AuthorSchema } from '#shared/schemas/blog'
import type { Author, ResolvedAuthor } from '#shared/schemas/blog'

type ProfilesResponse = {
profiles: Array<{
did: string
handle: string
displayName?: string
avatar?: string
}>
}

export default defineCachedEventHandler(
async event => {
const query = getQuery(event)
const authorsParam = query.authors

if (!authorsParam || typeof authorsParam !== 'string') {
throw createError({
statusCode: 400,
statusMessage: 'authors query parameter is required (JSON array)',
})
}

let authors: Author[]
try {
const parsed = JSON.parse(authorsParam)
authors = v.parse(v.array(AuthorSchema), parsed)
} catch (error) {
if (error instanceof v.ValiError) {
throw createError({
statusCode: 400,
statusMessage: `Invalid authors format: ${error.message}`,
})
}
throw createError({
statusCode: 400,
statusMessage: 'authors must be valid JSON',
})
}

if (!Array.isArray(authors) || authors.length === 0) {
return { authors: [] }
}

const handles = authors.filter(a => a.blueskyHandle).map(a => a.blueskyHandle as string)

if (handles.length === 0) {
return {
authors: authors.map(author => ({
...author,
avatar: null,
profileUrl: null,
})),
}
}

const response = await $fetch<ProfilesResponse>(`${BLUESKY_API}app.bsky.actor.getProfiles`, {
query: { actors: handles },
}).catch(() => ({ profiles: [] }))

const avatarMap = new Map<string, string>()
for (const profile of response.profiles) {
if (profile.avatar) {
avatarMap.set(profile.handle, profile.avatar)
}
}

const resolvedAuthors: ResolvedAuthor[] = authors.map(author => ({
...author,
avatar: author.blueskyHandle ? avatarMap.get(author.blueskyHandle) || null : null,
profileUrl: author.blueskyHandle ? `https://bsky.app/profile/${author.blueskyHandle}` : null,
}))

return { authors: resolvedAuthors }
},
{
name: 'author-profiles',
maxAge: CACHE_MAX_AGE_ONE_DAY,
getKey: event => {
const { authors } = getQuery(event)
return `author-profiles:${authors ?? 'npmx.dev'}`
},
},
)
Loading
Loading