diff --git a/.github/scripts/sync-crowdin-distribution.js b/.github/scripts/sync-crowdin-distribution.js new file mode 100644 index 0000000..9682427 --- /dev/null +++ b/.github/scripts/sync-crowdin-distribution.js @@ -0,0 +1,173 @@ +#!/usr/bin/env node +/** + * Syncs Crowdin distribution files from distributions.crowdin.net to a local directory. + * Designed to be run from GitHub Actions and produce a static-file artifact for GitHub Pages. + * + * Usage: + * node sync-crowdin-distribution.js + * + * Environment variables: + * OUTPUT_DIR - Directory to write files into (default: dist-pages/crowdin-dist) + */ + +'use strict'; + +const https = require('node:https'); +const fs = require('node:fs'); +const path = require('node:path'); + +const BASE_CDN = 'https://distributions.crowdin.net'; +const OUTPUT_DIR = path.resolve(process.env.OUTPUT_DIR || 'dist-pages/crowdin-dist'); + +/** Number of simultaneous downloads per batch. */ +const CONCURRENCY = 8; + +/** + * Distribution hashes to sync. + * Read from the CROWDIN_DISTRIBUTION_IDS environment variable as a + * comma-separated list (e.g. "hash1,hash2"). Store the value in GitHub + * project variables under the name CROWDIN_DISTRIBUTION_IDS. + */ +const DISTRIBUTIONS = (process.env.CROWDIN_DISTRIBUTION_IDS || '') + .split(',') + .map((s) => s.trim()) + .filter(Boolean); + +if (DISTRIBUTIONS.length === 0) { + console.error('ERROR: CROWDIN_DISTRIBUTION_IDS environment variable is not set or empty.'); + process.exit(1); +} + +/** + * Fetches a URL, following redirects, and returns the body as a Buffer. + * @param {string} url + * @returns {Promise} + */ +function fetchUrl(url) { + return new Promise((resolve, reject) => { + https.get(url, (res) => { + // Follow redirects + if (res.statusCode >= 300 && res.statusCode < 400 && res.headers.location) { + return fetchUrl(res.headers.location).then(resolve).catch(reject); + } + const chunks = []; + res.on('data', (chunk) => chunks.push(chunk)); + res.on('end', () => { + if (res.statusCode >= 400) { + return reject(new Error(`HTTP ${res.statusCode} for ${url}`)); + } + resolve(Buffer.concat(chunks)); + }); + res.on('error', reject); + }).on('error', reject); + }); +} + +/** + * Writes data to a file, creating parent directories as needed. + * @param {string} filePath + * @param {Buffer|string} data + */ +function saveFile(filePath, data) { + fs.mkdirSync(path.dirname(filePath), { recursive: true }); + fs.writeFileSync(filePath, data); +} + +/** + * Processes an array of items in fixed-size concurrent batches. + * @template T + * @param {T[]} items + * @param {number} batchSize + * @param {(item: T) => Promise} fn + */ +async function processInBatches(items, batchSize, fn) { + for (let i = 0; i < items.length; i += batchSize) { + await Promise.all(items.slice(i, i + batchSize).map(fn)); + } +} + +/** + * Downloads all distribution files for a single hash. + * @param {string} hash Distribution hash. + * @returns {Promise} true if all files were fetched without errors. + */ +async function syncDistribution(hash) { + console.log(`\n=== Syncing distribution: ${hash} ===`); + const hashDir = path.join(OUTPUT_DIR, hash); + + // manifest.json + console.log(' Fetching manifest.json...'); + const manifestBuf = await fetchUrl(`${BASE_CDN}/${hash}/manifest.json`); + saveFile(path.join(hashDir, 'manifest.json'), manifestBuf); + const manifest = JSON.parse(manifestBuf.toString('utf8')); + + console.log(` Timestamp : ${manifest.timestamp}`); + console.log(` Languages : ${(manifest.languages || []).length}`); + + // languages.json + console.log(' Fetching languages.json...'); + const langsBuf = await fetchUrl(`${BASE_CDN}/${hash}/languages.json`); + saveFile(path.join(hashDir, 'languages.json'), langsBuf); + + // content files + const contentPaths = new Set(); + if (manifest.content) { + for (const paths of Object.values(manifest.content)) { + for (const p of paths) { + contentPaths.add(p); + } + } + } + + const pathList = [...contentPaths]; + console.log(` Content files: ${pathList.length} (concurrency=${CONCURRENCY})`); + + let fetched = 0; + let failed = 0; + + await processInBatches(pathList, CONCURRENCY, async (contentPath) => { + const url = `${BASE_CDN}/${hash}${contentPath}`; + const localPath = path.join(hashDir, contentPath); + try { + const data = await fetchUrl(url); + saveFile(localPath, data); + fetched++; + if ((fetched + failed) % 50 === 0) { + console.log(` Progress: ${fetched + failed}/${pathList.length}`); + } + } catch (err) { + failed++; + console.warn(` WARN: failed to fetch ${contentPath}: ${err.message}`); + } + }); + + console.log(` Result: ${fetched} fetched, ${failed} failed`); + return failed === 0; +} + +async function main() { + console.log('Crowdin Distribution Sync'); + console.log(`Output dir: ${OUTPUT_DIR}`); + console.log(`Distributions: ${DISTRIBUTIONS.length}`); + + fs.mkdirSync(OUTPUT_DIR, { recursive: true }); + + let allOk = true; + for (const hash of DISTRIBUTIONS) { + try { + const ok = await syncDistribution(hash); + if (!ok) allOk = false; + } catch (err) { + console.error(`\nFATAL: Failed to sync ${hash}:`, err.message); + allOk = false; + } + } + + if (!allOk) { + console.error('\nSync completed with errors.'); + process.exit(1); + } + console.log('\nSync complete!'); +} + +main(); diff --git a/.github/workflows/crowdin-distribution-sync.yml b/.github/workflows/crowdin-distribution-sync.yml new file mode 100644 index 0000000..7c4d2ce --- /dev/null +++ b/.github/workflows/crowdin-distribution-sync.yml @@ -0,0 +1,88 @@ +--- +# Syncs Crowdin distribution files from distributions.crowdin.net to a +# dedicated git branch (crowdin-dist) served via jsDelivr CDN. +# +# proxy-translator.js fetches manifest.json, languages.json and all translation +# JSON files from https://distributions.crowdin.net. Those requests count +# against the LizardByte Crowdin free-tier quota, so we mirror the content +# here (refreshed daily) and redirect browser fetch() calls to jsDelivr via +# the interceptor in src/js/crowdin.js. +# +# jsDelivr CDN URL pattern: +# https://cdn.jsdelivr.net/gh/LizardByte/shared-web@crowdin-dist//… +# +# jsDelivr guarantees Access-Control-Allow-Origin: * on all responses, which +# means no CORS plugin is required in consumer pages. + +name: Sync Crowdin Distribution +permissions: {} + +on: + schedule: + # Run daily at 02:00 UTC so translations are fresh at the start of each day. + - cron: '0 2 * * *' + workflow_dispatch: # Allow ad-hoc manual runs + +# Only one deployment at a time; do not cancel an in-progress run. +concurrency: + group: crowdin-dist-sync + cancel-in-progress: false + +jobs: + sync: + name: Sync distributions to crowdin-dist branch + runs-on: ubuntu-latest + permissions: + contents: write + environment: + name: crowdin-dist + url: ${{ github.server_url }}/${{ github.repository }}/tree/crowdin-dist + if: github.repository_owner == 'LizardByte' # don't run for forks + + steps: + - name: Checkout + uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2 + with: + token: ${{ secrets.GH_BOT_TOKEN }} + + - name: Set up Node.js + uses: actions/setup-node@53b83947a5a98c8d113130e565377fae1a50d02f # v6.3.0 + with: + node-version: 'node' + + - name: Download Crowdin distribution files + env: + CROWDIN_DISTRIBUTION_IDS: ${{ vars.CROWDIN_DISTRIBUTION_IDS }} + OUTPUT_DIR: /tmp/crowdin-dist + run: node .github/scripts/sync-crowdin-distribution.js + + - name: Commit and push to crowdin-dist branch + env: + GH_BOT_NAME: ${{ vars.GH_BOT_NAME }} + GH_BOT_EMAIL: ${{ secrets.GH_BOT_EMAIL }} + run: | + git config user.name "${GH_BOT_NAME}" + git config user.email "${GH_BOT_EMAIL}" + + # Create an orphan branch so the branch contains only distribution + # files with no history from main (keeps the branch lean). + git checkout --orphan crowdin-dist + + # Remove every file that was inherited from the main checkout. + git rm -rf . --quiet + + # Clean up any remaining untracked files / directories. + git clean -fdx + + # Populate the branch with the freshly downloaded distribution files. + cp -r /tmp/crowdin-dist/. . + + git add . + + # Only commit when there are actual changes. + if git diff --staged --quiet; then + echo "No changes – distribution files are already up to date." + else + git commit -m "chore: sync Crowdin distributions" + git push origin crowdin-dist --force + fi diff --git a/eslint.config.mjs b/eslint.config.mjs index 872ac3b..0b121f1 100644 --- a/eslint.config.mjs +++ b/eslint.config.mjs @@ -5,8 +5,10 @@ export default [ pluginJs.configs.recommended, { ignores: [ + "_readthedocs/**", // generated ReadTheDocs HTML "coverage/**", "dist/**", + "docs/**", // generated JSDoc output ], }, { diff --git a/src/js/crowdin.js b/src/js/crowdin.js index 9403d1b..0cee09c 100644 --- a/src/js/crowdin.js +++ b/src/js/crowdin.js @@ -1,5 +1,51 @@ const loadScript = require('./load-script'); +/** + * jsDelivr CDN URL serving Crowdin distribution files from the crowdin-dist + * git branch. jsDelivr unconditionally sets Access-Control-Allow-Origin: *, + * so cross-origin fetch() calls succeed without any browser plugin. + * The branch is refreshed daily by the "Sync Crowdin Distribution" workflow. + * Structure mirrors https://distributions.crowdin.net//… exactly. + * @type {string} + */ +const CROWDIN_DIST_MIRROR = 'https://cdn.jsdelivr.net/gh/LizardByte/shared-web@crowdin-dist'; + +/** + * Monkey-patches globalThis.fetch to redirect Crowdin distribution requests to + * the self-hosted GitHub Pages mirror. + * + * Must be called BEFORE proxy-translator.js is loaded so that every fetch() + * the script makes is already intercepted. + * + * Idempotent – installs the interceptor at most once per page. + */ +function _installCrowdinFetchInterceptor() { + if (typeof globalThis.fetch !== 'function') return; + if (globalThis._crowdinMirrorInstalled) return; + globalThis._crowdinMirrorInstalled = true; + + const _origFetch = globalThis.fetch.bind(globalThis); + + globalThis.fetch = function crowdinMirrorFetch(url, options) { + if (typeof url === 'string') { + let parsed; + try { + parsed = new URL(url); + } catch { + // Not a valid absolute URL – pass through unchanged. + } + // Use exact hostname comparison to avoid prefix-match bypasses + // (e.g. distributions.crowdin.net.evil.com) that would be flagged + // by incomplete URL sanitisation checks. + if (parsed?.protocol === 'https:' && parsed.hostname === 'distributions.crowdin.net') { + const mirroredUrl = CROWDIN_DIST_MIRROR + parsed.pathname + parsed.search + parsed.hash; + return _origFetch(mirroredUrl, options); + } + } + return _origFetch(url, options); + }; +} + /** * Initializes Crowdin translation widget based on project and UI platform. * @param {string} project - Project name ('LizardByte' or 'LizardByte-docs'). @@ -16,6 +62,10 @@ function initCrowdIn(project = 'LizardByte', platform = null) { return; } + // Redirect distribution CDN requests to our self-hosted GitHub Pages mirror + // before the script is even loaded so every fetch() it makes is intercepted. + _installCrowdinFetchInterceptor(); + loadScript('https://website-translator.app.crowdin.net/assets/proxy-translator.js', function() { // Configure base settings based on project const projectSettings = { diff --git a/tests/crowdin.test.js b/tests/crowdin.test.js index f274a47..7446b11 100644 --- a/tests/crowdin.test.js +++ b/tests/crowdin.test.js @@ -41,12 +41,16 @@ describe('initCrowdIn', () => { // Use fake timers to control setTimeout jest.useFakeTimers(); + + // Reset fetch interceptor flag so each test starts clean + delete globalThis._crowdinMirrorInstalled; }); afterEach(() => { jest.clearAllMocks(); jest.useRealTimers(); delete global.window.proxyTranslator; + delete globalThis._crowdinMirrorInstalled; }); it('should validate project parameter', () => { @@ -114,3 +118,100 @@ describe('initCrowdIn', () => { expect(sidebar.contains(container)).toBe(true); }); }); + +describe('Crowdin fetch interceptor', () => { + const CROWDIN_CDN = 'https://distributions.crowdin.net'; + const MIRROR = 'https://cdn.jsdelivr.net/gh/LizardByte/shared-web@crowdin-dist'; + + beforeEach(() => { + // Reset interceptor state so each test gets a fresh install + delete globalThis._crowdinMirrorInstalled; + + // Install a controllable mock fetch + globalThis.fetch = jest.fn().mockResolvedValue({ ok: true }); + + // Minimal DOM + proxyTranslator stub so initCrowdIn callback doesn't crash + globalThis.window.proxyTranslator = { init: jest.fn() }; + globalThis.document.body.innerHTML = ''; + + jest.useFakeTimers(); + }); + + afterEach(() => { + jest.clearAllMocks(); + jest.useRealTimers(); + delete globalThis._crowdinMirrorInstalled; + delete globalThis.window.proxyTranslator; + }); + + it('should redirect Crowdin CDN fetch calls to the GitHub Pages mirror', async () => { + // Save mock before initCrowdIn wraps globalThis.fetch + const mockFetch = globalThis.fetch; + + initCrowdIn(); + jest.runAllTimers(); + + const testUrl = `${CROWDIN_CDN}/458f881791aebba1d4dde491bw4/manifest.json`; + await globalThis.fetch(testUrl); + + // The wrapper should have called the underlying mock with the mirrored URL. + // The interceptor uses URL parsing, so the path is taken from parsed.pathname. + expect(mockFetch).toHaveBeenCalled(); + const calledUrl = mockFetch.mock.calls[0][0]; + expect(calledUrl).toBe(`${MIRROR}/458f881791aebba1d4dde491bw4/manifest.json`); + }); + + it('should preserve query string when redirecting', async () => { + const mockFetch = globalThis.fetch; + + initCrowdIn(); + jest.runAllTimers(); + + const testUrl = `${CROWDIN_CDN}/458f881791aebba1d4dde491bw4/languages.json?timestamp=1234`; + await globalThis.fetch(testUrl); + + const calledUrl = mockFetch.mock.calls[0][0]; + expect(calledUrl).toBe(`${MIRROR}/458f881791aebba1d4dde491bw4/languages.json?timestamp=1234`); + }); + + it('should not redirect non-Crowdin fetch calls', async () => { + const mockFetch = globalThis.fetch; + + initCrowdIn(); + jest.runAllTimers(); + + const externalUrl = 'https://example.com/data.json'; + await globalThis.fetch(externalUrl); + + const calledUrl = mockFetch.mock.calls[0][0]; + expect(calledUrl).toBe(externalUrl); + }); + + it('should not redirect a lookalike hostname (incomplete-URL-sanitisation guard)', async () => { + // A URL whose hostname merely starts with the CDN hostname must NOT be + // redirected – this is the exact bypass that CodeQL flags when using + // startsWith() instead of exact hostname comparison. + const mockFetch = globalThis.fetch; + + initCrowdIn(); + jest.runAllTimers(); + + const lookalike = 'https://distributions.crowdin.net.evil.com/steal'; + await globalThis.fetch(lookalike); + + const calledUrl = mockFetch.mock.calls[0][0]; + expect(calledUrl).toBe(lookalike); + }); + + it('should install the interceptor only once (idempotent)', () => { + initCrowdIn(); + const fetchAfterFirst = globalThis.fetch; + jest.runAllTimers(); + + // Calling initCrowdIn again must NOT wrap fetch a second time + initCrowdIn(); + jest.runAllTimers(); + + expect(globalThis.fetch).toBe(fetchAfterFirst); + }); +});