diff --git a/.github/copilot-instructions.md b/.github/copilot-instructions.md index e5a438ee1..aeb7ed884 100644 --- a/.github/copilot-instructions.md +++ b/.github/copilot-instructions.md @@ -1,6 +1,6 @@ # GitHub Copilot Instructions for vscode-documentdb -VS Code Extension for Azure Cosmos DB and MongoDB. TypeScript (strict mode), React webviews, Jest testing. +VS Code Extension for Azure Cosmos DB and the MongoDB API. TypeScript (strict mode), React webviews, Jest testing. ## Critical Build Commands @@ -32,6 +32,12 @@ Before finishing work on a PR, agents **must** run the following steps in order: > ⚠️ **An agent must not finish or terminate until all three steps above have been run and pass successfully.** Skipping these steps leads to CI failures. +## Git Safety + +- **Never use `git add -f`** to force-add files. If `git add` refuses a file, it is likely in `.gitignore` for a reason (e.g., `docs/plan/`, `docs/analysis/`, build outputs). Do NOT override this with `-f`. +- When `git add` warns that a path is ignored, **stop and inform the user** instead of force-adding. +- Files in `docs/plan/` and `docs/analysis/` are **local planning documents** that must not be committed to the repository. + ## Project Structure | Folder | Purpose | @@ -178,6 +184,32 @@ For Discovery View, both `treeId` and `clusterId` are sanitized (all `/` replace See `src/tree/models/BaseClusterModel.ts` and `docs/analysis/08-cluster-model-simplification-plan.md` for details. +## Terminology + +This is a **DocumentDB** extension that uses the **MongoDB-compatible wire protocol**. + +- Use **"DocumentDB"** when referring to the database service itself. +- Use **"MongoDB API"** or **"DocumentDB API"** when referring to the wire protocol, query language, or API compatibility layer. +- **Never use "MongoDB" alone** as a product name in code, comments, docs, or user-facing strings. + +| βœ… Do | ❌ Don't | +| ---------------------------------------------------- | -------------------------------- | +| `// Query operators supported by the DocumentDB API` | `// MongoDB query operators` | +| `// BSON types per the MongoDB API spec` | `// Uses MongoDB's $match stage` | +| `documentdbQuery` (variable name) | `mongoQuery` | + +This applies to: code comments, JSDoc/TSDoc, naming (prefer `documentdb` prefix), user-facing strings, docs, and test descriptions. + +## TDD Contract Tests + +Test suites prefixed with `TDD:` (e.g., `describe('TDD: Completion Behavior', ...)`) are **behavior contracts** written before the implementation. If a `TDD:` test fails after a code change: + +1. **Do NOT automatically fix the test.** +2. **Stop and ask the user** whether the behavior change is intentional. +3. The user decides: update the contract (test) or fix the implementation. + +This applies to any test whose name starts with `TDD:`, regardless of folder location. + ## Additional Patterns For detailed patterns, see: diff --git a/.github/workflows/main.yml b/.github/workflows/main.yml index ac02668fb..7ea04b6ef 100644 --- a/.github/workflows/main.yml +++ b/.github/workflows/main.yml @@ -55,6 +55,9 @@ jobs: - name: πŸ“¦ Install Dependencies (npm ci) run: npm ci --prefer-offline --no-audit --no-fund --progress=false --verbose + - name: πŸ”¨ Build Workspace Packages + run: npm run build --workspaces --if-present + - name: 🌐 Check Localization Files run: npm run l10n:check diff --git a/.gitignore b/.gitignore index e0bf99748..eddc516fd 100644 --- a/.gitignore +++ b/.gitignore @@ -1,6 +1,9 @@ ## Ignore Visual Studio temporary files, build results, and ## files generated by popular Visual Studio add-ons. +/docs/analysis/ +/docs/plan/ + # User-specific files *.suo *.user @@ -157,6 +160,9 @@ PublishScripts/ **/packages/* # except build/, which is used as an MSBuild target. !**/packages/build/ +# Include our monorepo packages at the root +!/packages/ +!/packages/** # Uncomment if necessary however generally it will be regenerated when needed #!**/packages/repositories.config # NuGet v3's project.json files produces more ignoreable files @@ -268,6 +274,7 @@ dist stats.json *.tgz *.zip +*.tsbuildinfo # Scrapbooks *.mongo diff --git a/.vscode/settings.json b/.vscode/settings.json index 9bcd06e4a..a6a027735 100644 --- a/.vscode/settings.json +++ b/.vscode/settings.json @@ -1,33 +1,35 @@ { - "editor.codeActionsOnSave": { - "source.fixAll.eslint": "explicit", - "source.organizeImports": "explicit" - }, - "editor.detectIndentation": false, - "editor.formatOnSave": true, - "editor.formatOnPaste": false, - "editor.insertSpaces": true, - "editor.tabSize": 4, - "editor.defaultFormatter": "esbenp.prettier-vscode", - "files.insertFinalNewline": true, - "files.trimTrailingWhitespace": true, - "search.exclude": { - "out": true, - "**/node_modules": true, - ".vscode-test": true - }, - "typescript.preferences.importModuleSpecifier": "relative", - "typescript.tsdk": "node_modules/typescript/lib", - "antlr4.generation": { - // Settings for "ANTLR4 grammar syntax support" extension - "mode": "internal", - "listeners": true, - "visitors": false - }, - "vscode-nmake-tools.workspaceBuildDirectories": ["."], - "vscode-nmake-tools.installOsRepoRustHelperExtension": false, - "sarif-viewer.connectToGithubCodeScanning": "off" - // "eslint.workingDirectories": [ - // ".", "src" - // ] + "editor.codeActionsOnSave": { + "source.fixAll.eslint": "explicit", + "source.organizeImports": "explicit" + }, + "editor.detectIndentation": false, + "editor.formatOnSave": true, + "editor.formatOnPaste": false, + "editor.insertSpaces": true, + "editor.tabSize": 4, + "editor.defaultFormatter": "esbenp.prettier-vscode", + "files.insertFinalNewline": true, + "files.trimTrailingWhitespace": true, + "search.exclude": { + "out": true, + "**/node_modules": true, + ".vscode-test": true + }, + "typescript.preferences.importModuleSpecifier": "relative", + "typescript.tsdk": "node_modules/typescript/lib", + "antlr4.generation": { + // Settings for "ANTLR4 grammar syntax support" extension + "mode": "internal", + "listeners": true, + "visitors": false + }, + "vscode-nmake-tools.workspaceBuildDirectories": ["."], + "vscode-nmake-tools.installOsRepoRustHelperExtension": false, + "sarif-viewer.connectToGithubCodeScanning": "off", + "jest.runMode": "deferred", + "testing.automaticallyOpenTestResults": "neverOpen" + // "eslint.workingDirectories": [ + // ".", "src" + // ] } diff --git a/jest.config.js b/jest.config.js index 7ad26361a..ca22ee433 100644 --- a/jest.config.js +++ b/jest.config.js @@ -1,11 +1,18 @@ /** @type {import('ts-jest').JestConfigWithTsJest} **/ module.exports = { - testEnvironment: 'node', - testMatch: ['/src/**/*.test.ts'], - transform: { - '^.+.tsx?$': ['ts-jest', {}], - }, // Limit workers to avoid OOM kills on machines with many cores. // Each ts-jest worker loads the TypeScript compiler and consumes ~500MB+. maxWorkers: '50%', + projects: [ + { + displayName: 'extension', + testEnvironment: 'node', + testMatch: ['/src/**/*.test.ts'], + transform: { + '^.+\\.tsx?$': ['ts-jest', {}], + }, + }, + '/packages/schema-analyzer', + '/packages/documentdb-constants', + ], }; diff --git a/l10n/bundle.l10n.json b/l10n/bundle.l10n.json index 3e9519d4e..59a69f54b 100644 --- a/l10n/bundle.l10n.json +++ b/l10n/bundle.l10n.json @@ -415,7 +415,6 @@ "Error running process: ": "Error running process: ", "Error saving the document": "Error saving the document", "Error validating collection name availability: {0}": "Error validating collection name availability: {0}", - "Error while loading the autocompletion data": "Error while loading the autocompletion data", "Error while loading the data": "Error while loading the data", "Error while loading the document": "Error while loading the document", "Error while refreshing the document": "Error while refreshing the document", @@ -521,7 +520,7 @@ "Failed to validate source collection: {0}": "Failed to validate source collection: {0}", "Failed with code \"{0}\".": "Failed with code \"{0}\".", "Fair": "Fair", - "Filter: Enter the DocumentDB query filter in JSON format": "Filter: Enter the DocumentDB query filter in JSON format", + "Filter: Enter the DocumentDB query filter": "Filter: Enter the DocumentDB query filter", "Find Query": "Find Query", "Finished importing": "Finished importing", "Folder name cannot be empty": "Folder name cannot be empty", @@ -616,7 +615,7 @@ "Invalid Connection String: {error}": "Invalid Connection String: {error}", "Invalid connection type selected.": "Invalid connection type selected.", "Invalid document ID: {0}": "Invalid document ID: {0}", - "Invalid filter syntax: {0}. Please use valid JSON, for example: { \"name\": \"value\" }": "Invalid filter syntax: {0}. Please use valid JSON, for example: { \"name\": \"value\" }", + "Invalid filter syntax: {0}. Please use valid JSON or a DocumentDB API expression, for example: { name: \"value\" }": "Invalid filter syntax: {0}. Please use valid JSON or a DocumentDB API expression, for example: { name: \"value\" }", "Invalid folder type.": "Invalid folder type.", "Invalid mongoShell command format": "Invalid mongoShell command format", "Invalid node type.": "Invalid node type.", @@ -624,10 +623,10 @@ "Invalid payload for drop index action": "Invalid payload for drop index action", "Invalid payload for modify index action": "Invalid payload for modify index action", "Invalid projection syntax: {0}": "Invalid projection syntax: {0}", - "Invalid projection syntax: {0}. Please use valid JSON, for example: { \"fieldName\": 1 }": "Invalid projection syntax: {0}. Please use valid JSON, for example: { \"fieldName\": 1 }", + "Invalid projection syntax: {0}. Please use valid JSON or a DocumentDB API expression, for example: { fieldName: 1 }": "Invalid projection syntax: {0}. Please use valid JSON or a DocumentDB API expression, for example: { fieldName: 1 }", "Invalid semver \"{0}\".": "Invalid semver \"{0}\".", "Invalid sort syntax: {0}": "Invalid sort syntax: {0}", - "Invalid sort syntax: {0}. Please use valid JSON, for example: { \"fieldName\": 1 }": "Invalid sort syntax: {0}. Please use valid JSON, for example: { \"fieldName\": 1 }", + "Invalid sort syntax: {0}. Please use valid JSON or a DocumentDB API expression, for example: { fieldName: 1 }": "Invalid sort syntax: {0}. Please use valid JSON or a DocumentDB API expression, for example: { fieldName: 1 }", "It could be better": "It could be better", "It looks like there aren't any other folders to move these items into.\nYou might want to create a new folder first.\n\nNote: You can't move items between 'DocumentDB Local' and regular connections.": "It looks like there aren't any other folders to move these items into.\nYou might want to create a new folder first.\n\nNote: You can't move items between 'DocumentDB Local' and regular connections.", "item": "item", @@ -721,7 +720,6 @@ "No matching resources found.": "No matching resources found.", "No node selected.": "No node selected.", "No parent folder selected.": "No parent folder selected.", - "No properties found in the schema at path \"{0}\"": "No properties found in the schema at path \"{0}\"", "No public connectivity": "No public connectivity", "No result returned from the MongoDB shell.": "No result returned from the MongoDB shell.", "No results found": "No results found", diff --git a/package-lock.json b/package-lock.json index ade59941e..8a951e8fb 100644 --- a/package-lock.json +++ b/package-lock.json @@ -8,6 +8,9 @@ "name": "vscode-documentdb", "version": "0.7.2", "license": "SEE LICENSE IN LICENSE.md", + "workspaces": [ + "packages/*" + ], "dependencies": { "@azure/arm-compute": "^22.4.0", "@azure/arm-cosmosdb": "~16.4.0", @@ -24,9 +27,14 @@ "@microsoft/vscode-azureresources-api": "~2.5.0", "@monaco-editor/react": "~4.7.0", "@mongodb-js/explain-plan-helper": "1.4.24", + "@mongodb-js/shell-bson-parser": "^1.5.6", "@trpc/client": "~11.10.0", "@trpc/server": "~11.10.0", + "@vscode-documentdb/documentdb-constants": "*", + "@vscode-documentdb/schema-analyzer": "*", "@vscode/l10n": "~0.0.18", + "acorn": "^8.16.0", + "acorn-walk": "^8.3.5", "antlr4ts": "^0.5.0-alpha.4", "bson": "~7.0.0", "denque": "~2.1.0", @@ -4199,28 +4207,6 @@ "mongodb-explain-compat": "^3.3.23" } }, - "node_modules/@mongodb-js/explain-plan-helper/node_modules/@mongodb-js/shell-bson-parser": { - "version": "1.4.0", - "resolved": "https://registry.npmjs.org/@mongodb-js/shell-bson-parser/-/shell-bson-parser-1.4.0.tgz", - "integrity": "sha512-3HO90liE6pmEuUMi7SWR1HooVk23/jfx5iaBZHo250iYyF5uaqssepBGRF7J/14pmgTSwIGrrDd5rQtBYrY7wA==", - "license": "Apache-2.0", - "dependencies": { - "acorn": "^8.14.1" - }, - "peerDependencies": { - "bson": "^4.6.3 || ^5 || ^6" - } - }, - "node_modules/@mongodb-js/explain-plan-helper/node_modules/bson": { - "version": "6.10.4", - "resolved": "https://registry.npmjs.org/bson/-/bson-6.10.4.tgz", - "integrity": "sha512-WIsKqkSC0ABoBJuT1LEX+2HEvNmNKKgnTAyd0fL8qzK4SH2i9NXg+t08YtdZp/V9IZ33cxe3iV4yM0qg8lMQng==", - "license": "Apache-2.0", - "peer": true, - "engines": { - "node": ">=16.20.1" - } - }, "node_modules/@mongodb-js/saslprep": { "version": "1.3.2", "resolved": "https://registry.npmjs.org/@mongodb-js/saslprep/-/saslprep-1.3.2.tgz", @@ -4230,6 +4216,18 @@ "sparse-bitfield": "^3.0.3" } }, + "node_modules/@mongodb-js/shell-bson-parser": { + "version": "1.5.6", + "resolved": "https://registry.npmjs.org/@mongodb-js/shell-bson-parser/-/shell-bson-parser-1.5.6.tgz", + "integrity": "sha512-yzVLeOkRSE+r8scrDMJjL9zTSzypU/TLxF+INQLs3yQX9a2R6IfBDDqdSVFyHVWv1FhZN0lVeqEWTsX+Iz5BaA==", + "license": "Apache-2.0", + "dependencies": { + "acorn": "^8.14.1" + }, + "peerDependencies": { + "bson": "^4.6.3 || ^5 || ^6.10.3 || ^7.0.0" + } + }, "node_modules/@napi-rs/nice": { "version": "1.1.1", "resolved": "https://registry.npmjs.org/@napi-rs/nice/-/nice-1.1.1.tgz", @@ -7284,6 +7282,14 @@ "win32" ] }, + "node_modules/@vscode-documentdb/documentdb-constants": { + "resolved": "packages/documentdb-constants", + "link": true + }, + "node_modules/@vscode-documentdb/schema-analyzer": { + "resolved": "packages/schema-analyzer", + "link": true + }, "node_modules/@vscode/extension-telemetry": { "version": "0.9.9", "resolved": "https://registry.npmjs.org/@vscode/extension-telemetry/-/extension-telemetry-0.9.9.tgz", @@ -8049,9 +8055,9 @@ } }, "node_modules/acorn": { - "version": "8.15.0", - "resolved": "https://registry.npmjs.org/acorn/-/acorn-8.15.0.tgz", - "integrity": "sha512-NZyJarBfL7nWwIq+FDL6Zp/yHEhePMNnnJ0y3qfieCrmNvYct8uvtiV41UvlSe6apAfk0fY1FbWx+NwfmpvtTg==", + "version": "8.16.0", + "resolved": "https://registry.npmjs.org/acorn/-/acorn-8.16.0.tgz", + "integrity": "sha512-UVJyE9MttOsBQIDKw1skb9nAwQuR5wuGD3+82K6JgJlm/Y+KI92oNsMNGZCYdDsVtRHSak0pcV5Dno5+4jh9sw==", "license": "MIT", "bin": { "acorn": "bin/acorn" @@ -8084,10 +8090,9 @@ } }, "node_modules/acorn-walk": { - "version": "8.3.4", - "resolved": "https://registry.npmjs.org/acorn-walk/-/acorn-walk-8.3.4.tgz", - "integrity": "sha512-ueEepnujpqee2o5aIYnvHU6C0A42MNdsIDeqy5BydrkuC5R1ZuUFnm27EeFJGoEHJQgn3uleRvmTXaJgfXbt4g==", - "dev": true, + "version": "8.3.5", + "resolved": "https://registry.npmjs.org/acorn-walk/-/acorn-walk-8.3.5.tgz", + "integrity": "sha512-HEHNfbars9v4pgpW6SO1KSPkfoS0xVOM/9UzkJltjlsHZmJasxg8aXkuZa7SMf8vKGIBhpUsPluQSqhJFCqebw==", "license": "MIT", "dependencies": { "acorn": "^8.11.0" @@ -22232,6 +22237,22 @@ "type": "github", "url": "https://github.com/sponsors/wooorm" } + }, + "packages/documentdb-constants": { + "name": "@vscode-documentdb/documentdb-constants", + "version": "1.0.0", + "license": "MIT" + }, + "packages/schema-analyzer": { + "name": "@vscode-documentdb/schema-analyzer", + "version": "1.0.0", + "license": "MIT", + "dependencies": { + "denque": "~2.1.0" + }, + "peerDependencies": { + "mongodb": ">=6.0.0" + } } } } diff --git a/package.json b/package.json index e02878042..2577d0f69 100644 --- a/package.json +++ b/package.json @@ -46,6 +46,9 @@ "type": "git", "url": "https://github.com/microsoft/vscode-documentdb" }, + "workspaces": [ + "packages/*" + ], "main": "./main", "l10n": "./l10n", "activationEvents": [ @@ -55,8 +58,9 @@ "onUri" ], "scripts": { + "prebuild": "npm run build --workspaces --if-present", "build": "tsc", - "clean": "git clean -dfx", + "clean": "rimraf out dist coverage && npm run clean --workspaces --if-present", "compile": "tsc -watch", "package": "run-script-os", "package:win32": "npm run webpack-prod && cd dist && npm pkg delete \"scripts.vscode:prepublish\" && npx vsce package --no-dependencies --out ../%npm_package_name%-%npm_package_version%.vsix", @@ -66,10 +70,11 @@ "package-prerelease:default": "npm run webpack-prod && cd dist && npm pkg delete \"scripts.vscode:prepublish\" && npx vsce package --pre-release --no-dependencies --out ../${npm_package_name}-${npm_package_version}-pre-release.vsix", "lint": "eslint --quiet .", "lint-fix": "eslint . --fix", - "prettier": "prettier -c \"(src|test|l10n|grammar|docs)/**/*.@(js|ts|jsx|tsx|json)\" \"./*.@(js|ts|jsx|tsx|json)\"", - "prettier-fix": "prettier -w \"(src|test|l10n|grammar|docs)/**/*.@(js|ts|jsx|tsx|json)\" \"./*.@(js|ts|jsx|tsx|json)\"", + "prettier": "prettier -c \"(src|test|l10n|grammar|docs|packages)/**/*.@(js|ts|jsx|tsx|json)\" \"./*.@(js|ts|jsx|tsx|json)\"", + "prettier-fix": "prettier -w \"(src|test|l10n|grammar|docs|packages)/**/*.@(js|ts|jsx|tsx|json)\" \"./*.@(js|ts|jsx|tsx|json)\"", "pretest": "npm run build", "test": "vscode-test", + "prejesttest": "npm run build --workspaces --if-present", "jesttest": "jest", "update-grammar": "antlr4ts -visitor ./grammar/mongo.g4 -o src/documentdb/grammar", "webpack-dev": "rimraf ./dist && npm run webpack-dev-ext && npm run webpack-dev-wv", @@ -162,9 +167,14 @@ "@microsoft/vscode-azureresources-api": "~2.5.0", "@monaco-editor/react": "~4.7.0", "@mongodb-js/explain-plan-helper": "1.4.24", + "@mongodb-js/shell-bson-parser": "^1.5.6", "@trpc/client": "~11.10.0", "@trpc/server": "~11.10.0", + "@vscode-documentdb/documentdb-constants": "*", + "@vscode-documentdb/schema-analyzer": "*", "@vscode/l10n": "~0.0.18", + "acorn": "^8.16.0", + "acorn-walk": "^8.3.5", "antlr4ts": "^0.5.0-alpha.4", "bson": "~7.0.0", "denque": "~2.1.0", diff --git a/packages/documentdb-constants/README.md b/packages/documentdb-constants/README.md new file mode 100644 index 000000000..4835cdcc2 --- /dev/null +++ b/packages/documentdb-constants/README.md @@ -0,0 +1,85 @@ +# @vscode-documentdb/documentdb-constants + +Static operator metadata for all DocumentDB-supported operators, aggregation stages, accumulators, update operators, BSON type constructors, and system variables. + +> **Monorepo package** β€” this package is part of the `vscode-documentdb` workspace. +> Dev dependencies (Jest, ts-jest, Prettier, ts-node, etc.) are provided by the +> root `package.json`. Always install from the repository root: +> +> ```bash +> cd +> npm install +> ``` + +## Purpose + +This package is the **single source of truth** for operator metadata when the connected database is DocumentDB. It provides: + +- `OperatorEntry` objects with value, description, snippet, documentation link, and type metadata +- Meta-tag based filtering (`getFilteredCompletions()`) for context-aware autocompletion +- Convenience presets for common completion contexts (filter bar, aggregation pipeline, etc.) +- Documentation URL generation (`getDocLink()`) + +## Data Source + +All operator data is derived from the official DocumentDB documentation: + +- **Compatibility reference:** [DocumentDB Query Language Compatibility](https://learn.microsoft.com/en-us/azure/documentdb/compatibility-query-language) β€” lists every operator with its support status across DocumentDB versions 5.0–8.0. +- **Per-operator docs:** [DocumentDB Operators](https://learn.microsoft.com/en-us/azure/documentdb/operators/) β€” individual pages with descriptions and syntax for each operator. +- **Source repository:** [MicrosoftDocs/azure-databases-docs](https://github.com/MicrosoftDocs/azure-databases-docs) β€” the GitHub repo containing the raw Markdown source for all documentation pages above (under `articles/documentdb/`). + +The scraper (`scripts/scrape-operator-docs.ts`) fetches data from these sources and generates the `resources/scraped/operator-reference.md` dump file that serves as the contract between the documentation and the TypeScript implementation. + +## Usage + +```typescript +import { + getFilteredCompletions, + getAllCompletions, + FILTER_COMPLETION_META, + STAGE_COMPLETION_META, +} from '@vscode-documentdb/documentdb-constants'; + +// Get operators for a filter/query context +const filterOps = getFilteredCompletions({ meta: FILTER_COMPLETION_META }); + +// Get operators for a specific BSON type +const stringOps = getFilteredCompletions({ + meta: FILTER_COMPLETION_META, + bsonTypes: ['string'], +}); + +// Get all stage names +const stages = getFilteredCompletions({ meta: STAGE_COMPLETION_META }); +``` + +## Scraper + +The operator data is sourced from the official DocumentDB documentation. To re-scrape: + +```bash +npm run scrape --workspace=@vscode-documentdb/documentdb-constants +``` + +This runs the scraper and then formats the output with Prettier. The scraper: + +1. **Verifies** upstream doc structure (early fail-fast) +2. **Extracts** all operators from the [compatibility page](https://learn.microsoft.com/en-us/azure/documentdb/compatibility-query-language) +3. **Fetches** per-operator documentation (descriptions, syntax) with a global file index fallback for operators filed in unexpected directories +4. **Generates** `resources/scraped/operator-reference.md` in a structured heading format (`### $operator` with description, syntax, and doc link) + +The dump serves as the authoritative reference for the TypeScript implementation. A Jest test (`src/operatorReference.test.ts`) validates that the implementation matches the dump. + +## Structure + +| File | Purpose | +| ------------------------------------------- | -------------------------------------------- | +| `src/types.ts` | `OperatorEntry` interface and `MetaTag` type | +| `src/metaTags.ts` | Meta tag constants and completion presets | +| `src/docLinks.ts` | Documentation URL generation | +| `src/getFilteredCompletions.ts` | Primary consumer API: filter by meta tags | +| `src/index.ts` | Barrel exports for all public API | +| `resources/scraped/operator-reference.md` | Auto-generated scraped operator dump | +| `resources/overrides/operator-overrides.md` | Hand-maintained overrides | +| `resources/overrides/operator-snippets.md` | Snippet templates per category | +| `scripts/scrape-operator-docs.ts` | Scraper script | diff --git a/packages/documentdb-constants/jest.config.js b/packages/documentdb-constants/jest.config.js new file mode 100644 index 000000000..a39810b1f --- /dev/null +++ b/packages/documentdb-constants/jest.config.js @@ -0,0 +1,11 @@ +/** @type {import('ts-jest').JestConfigWithTsJest} **/ +module.exports = { + // Limit workers to avoid OOM kills on machines with many cores. + // Each ts-jest worker loads the TypeScript compiler and consumes ~500MB+. + maxWorkers: '50%', + testEnvironment: 'node', + testMatch: ['/src/**/*.test.ts'], + transform: { + '^.+\\.tsx?$': ['ts-jest', {}], + }, +}; diff --git a/packages/documentdb-constants/package.json b/packages/documentdb-constants/package.json new file mode 100644 index 000000000..99f3fb017 --- /dev/null +++ b/packages/documentdb-constants/package.json @@ -0,0 +1,25 @@ +{ + "name": "@vscode-documentdb/documentdb-constants", + "version": "1.0.0", + "description": "Static operator metadata for DocumentDB-supported operators, stages, accumulators, and BSON constructors", + "main": "dist/index.js", + "types": "dist/index.d.ts", + "files": [ + "dist" + ], + "scripts": { + "build": "tsc -p . && tsc -p tsconfig.scripts.json --noEmit", + "clean": "rimraf dist tsconfig.tsbuildinfo", + "test": "jest --config jest.config.js", + "prettier-fix": "prettier -w \"(scripts|src)/**/*.@(js|ts|jsx|tsx|json|md)\" \"./*.@(js|ts|jsx|tsx|json|md)\"", + "scrape": "ts-node scripts/scrape-operator-docs.ts && prettier --write resources/scraped/operator-reference.md", + "generate": "ts-node scripts/generate-from-reference.ts", + "evaluate": "ts-node scripts/evaluate-overrides.ts" + }, + "repository": { + "type": "git", + "url": "https://github.com/microsoft/vscode-documentdb", + "directory": "packages/documentdb-constants" + }, + "license": "MIT" +} diff --git a/packages/documentdb-constants/resources/overrides/operator-overrides.md b/packages/documentdb-constants/resources/overrides/operator-overrides.md new file mode 100644 index 000000000..2fe988737 --- /dev/null +++ b/packages/documentdb-constants/resources/overrides/operator-overrides.md @@ -0,0 +1,321 @@ +# DocumentDB Operator Reference β€” Overrides + + + + + + + + + + + + + + + + + + + + + + + +--- + +## String Expression Operators + +### $concat + +- **Description:** Concatenates two or more strings and returns the resulting string. + +### $indexOfBytes + +- **Description:** Returns the byte index of the first occurrence of a substring within a string. + +### $indexOfCP + +- **Description:** Returns the code point index of the first occurrence of a substring within a string. + +### $ltrim + +- **Description:** Removes whitespace or specified characters from the beginning of a string. + +### $regexFind + +- **Description:** Applies a regular expression to a string and returns the first match. + +### $regexFindAll + +- **Description:** Applies a regular expression to a string and returns all matches as an array. + +### $regexMatch + +- **Description:** Applies a regular expression to a string and returns a boolean indicating if a match was found. + +### $replaceOne + +- **Description:** Replaces the first occurrence of a search string with a replacement string. + +### $replaceAll + +- **Description:** Replaces all occurrences of a search string with a replacement string. + +### $rtrim + +- **Description:** Removes whitespace or specified characters from the end of a string. + +### $split + +- **Description:** Splits a string by a delimiter and returns an array of substrings. + +### $strLenBytes + +- **Description:** Returns the number of UTF-8 encoded bytes in the specified string. + +### $strLenCP + +- **Description:** Returns the number of UTF-8 code points in the specified string. + +### $strcasecmp + +- **Description:** Performs a case-insensitive comparison of two strings and returns an integer. + +### $substr + +- **Description:** Returns a substring of a string, starting at a specified index for a specified length. Deprecated β€” use $substrBytes or $substrCP. + +### $substrBytes + +- **Description:** Returns a substring of a string by byte index, starting at a specified index for a specified number of bytes. + +### $substrCP + +- **Description:** Returns a substring of a string by code point index, starting at a specified index for a specified number of code points. + +### $toLower + +- **Description:** Converts a string to lowercase and returns the result. + +### $toUpper + +- **Description:** Converts a string to uppercase and returns the result. + +### $trim + +- **Description:** Removes whitespace or specified characters from both ends of a string. + +## Trigonometry Expression Operators + +### $sin + +- **Description:** Returns the sine of a value measured in radians. + +### $cos + +- **Description:** Returns the cosine of a value measured in radians. + +### $tan + +- **Description:** Returns the tangent of a value measured in radians. + +### $asin + +- **Description:** Returns the arcsine (inverse sine) of a value in radians. + +### $acos + +- **Description:** Returns the arccosine (inverse cosine) of a value in radians. + +### $atan + +- **Description:** Returns the arctangent (inverse tangent) of a value in radians. + +### $atan2 + +- **Description:** Returns the arctangent of the quotient of two values, using the signs to determine the quadrant. + +### $asinh + +- **Description:** Returns the inverse hyperbolic sine of a value. + +### $acosh + +- **Description:** Returns the inverse hyperbolic cosine of a value. + +### $atanh + +- **Description:** Returns the inverse hyperbolic tangent of a value. + +### $sinh + +- **Description:** Returns the hyperbolic sine of a value. + +### $cosh + +- **Description:** Returns the hyperbolic cosine of a value. + +### $tanh + +- **Description:** Returns the hyperbolic tangent of a value. + +### $degreesToRadians + +- **Description:** Converts a value from degrees to radians. + +### $radiansToDegrees + +- **Description:** Converts a value from radians to degrees. + +## Aggregation Pipeline Stages + +### $bucketAuto + +- **Description:** Categorizes documents into a specified number of groups based on a given expression, automatically determining bucket boundaries. + +### $graphLookup + +- **Description:** Performs a recursive search on a collection to return documents connected by a specified field relationship. + +### $limit + +- **Description:** Restricts the number of documents passed to the next stage in the pipeline. + +### $project + +- **Description:** Reshapes documents by including, excluding, or computing new fields. + +### $replaceRoot + +- **Description:** Replaces the input document with a specified embedded document, promoting it to the top level. + +### $search + +- **Description:** Performs full-text search on string fields using Atlas Search or compatible search indexes. + +### $searchMeta + +- **Description:** Returns metadata about an Atlas Search query without returning the matching documents. + +### $setWindowFields + +- **Description:** Adds computed fields to documents using window functions over a specified partition and sort order. + +### $unionWith + +- **Description:** Combines the results of two collections into a single result set, similar to SQL UNION ALL. + +### $currentOp + +- **Description:** Returns information on active and queued operations for the database instance. + +## Array Update Operators + +### $[] + +- **Description:** Positional all operator. Acts as a placeholder to update all elements in an array field. + +### $[identifier] + +- **Description:** Filtered positional operator. Acts as a placeholder to update elements that match an arrayFilters condition. + +### $position + +- **Description:** Specifies the position in the array at which the $push operator inserts elements. Used with $each. + +## Array Expression Operators + +### $objectToArray + +- **Description:** Converts an object into an array of key-value pair documents. + +## Variables in Aggregation Expressions + +### $$NOW + +- **Description:** Returns the current datetime as a Date object. Constant throughout a single aggregation pipeline. + +### $$ROOT + +- **Description:** References the root document β€” the top-level document currently being processed in the pipeline stage. + +### $$REMOVE + +- **Description:** Removes a field from the output document. Used with $project or $addFields to conditionally exclude fields. + +### $$CURRENT + +- **Description:** References the current document in the pipeline stage. Equivalent to $$ROOT at the start of the pipeline. + +### $$DESCEND + +- **Description:** Used with $redact. Returns the document fields at the current level and continues descending into subdocuments. + +### $$PRUNE + +- **Description:** Used with $redact. Excludes all fields at the current document level and stops descending into subdocuments. + +### $$KEEP + +- **Description:** Used with $redact. Keeps all fields at the current document level without further descending into subdocuments. + +## Array Expression Operators + +### $minN + +- **Doc Link:** https://learn.microsoft.com/en-us/azure/documentdb/operators/accumulators/$minn + +## Comparison Expression Operators + +### $cmp + +- **Doc Link:** https://learn.microsoft.com/en-us/azure/documentdb/operators/comparison-query/$cmp + +## Window Operators + +### $minN + +- **Doc Link:** https://learn.microsoft.com/en-us/azure/documentdb/operators/accumulators/$minn + +## Geospatial Operators + +### $box + +- **Standalone:** false + +### $center + +- **Standalone:** false + +### $centerSphere + +- **Standalone:** false + +### $geometry + +- **Standalone:** false + +### $maxDistance + +- **Standalone:** false + +### $minDistance + +- **Standalone:** false + +### $polygon + +- **Standalone:** false + +## Projection Operators + +### $ + +- **Standalone:** false + +## Miscellaneous Query Operators + +### $natural + +- **Standalone:** false diff --git a/packages/documentdb-constants/resources/overrides/operator-snippets.md b/packages/documentdb-constants/resources/overrides/operator-snippets.md new file mode 100644 index 000000000..9b3adf63e --- /dev/null +++ b/packages/documentdb-constants/resources/overrides/operator-snippets.md @@ -0,0 +1,810 @@ +# Operator Snippets + + + +## Aggregation Pipeline Stages + +### DEFAULT + +- **Snippet:** `{ {{VALUE}}: { ${1} } }` + +### $match + +- **Snippet:** `{ $match: { ${1:query} } }` + +### $group + +- **Snippet:** `{ $group: { _id: "${1:\$field}", ${2:accumulator}: { ${3:\$sum}: 1 } } }` + +### $project + +- **Snippet:** `{ $project: { ${1:field}: 1 } }` + +### $sort + +- **Snippet:** `{ $sort: { ${1:field}: ${2:1} } }` + +### $limit + +- **Snippet:** `{ $limit: ${1:number} }` + +### $skip + +- **Snippet:** `{ $skip: ${1:number} }` + +### $unwind + +- **Snippet:** `{ $unwind: "${1:\$arrayField}" }` + +### $lookup + +- **Snippet:** `{ $lookup: { from: "${1:collection}", localField: "${2:field}", foreignField: "${3:field}", as: "${4:result}" } }` + +### $addFields + +- **Snippet:** `{ $addFields: { ${1:newField}: ${2:expression} } }` + +### $set + +- **Snippet:** `{ $set: { ${1:field}: ${2:expression} } }` + +### $unset + +- **Snippet:** `{ $unset: "${1:field}" }` + +### $replaceRoot + +- **Snippet:** `{ $replaceRoot: { newRoot: "${1:\$field}" } }` + +### $replaceWith + +- **Snippet:** `{ $replaceWith: "${1:\$field}" }` + +### $count + +- **Snippet:** `{ $count: "${1:countField}" }` + +### $out + +- **Snippet:** `{ $out: "${1:collection}" }` + +### $merge + +- **Snippet:** `{ $merge: { into: "${1:collection}" } }` + +### $bucket + +- **Snippet:** `{ $bucket: { groupBy: "${1:\$field}", boundaries: [${2:values}], default: "${3:Other}" } }` + +### $bucketAuto + +- **Snippet:** `{ $bucketAuto: { groupBy: "${1:\$field}", buckets: ${2:number} } }` + +### $facet + +- **Snippet:** `{ $facet: { ${1:outputField}: [{ ${2:stage} }] } }` + +### $graphLookup + +- **Snippet:** `{ $graphLookup: { from: "${1:collection}", startWith: "${2:\$field}", connectFromField: "${3:field}", connectToField: "${4:field}", as: "${5:result}" } }` + +### $sample + +- **Snippet:** `{ $sample: { size: ${1:number} } }` + +### $sortByCount + +- **Snippet:** `{ $sortByCount: "${1:\$field}" }` + +### $redact + +- **Snippet:** `{ $redact: { \$cond: { if: { ${1:expression} }, then: "${2:\$\$DESCEND}", else: "${3:\$\$PRUNE}" } } }` + +### $unionWith + +- **Snippet:** `{ $unionWith: { coll: "${1:collection}", pipeline: [${2}] } }` + +### $setWindowFields + +- **Snippet:** `{ $setWindowFields: { partitionBy: "${1:\$field}", sortBy: { ${2:field}: ${3:1} }, output: { ${4:newField}: { ${5:windowFunc} } } } }` + +### $densify + +- **Snippet:** `{ $densify: { field: "${1:field}", range: { step: ${2:1}, bounds: "full" } } }` + +### $fill + +- **Snippet:** `{ $fill: { output: { ${1:field}: { method: "${2:linear}" } } } }` + +### $documents + +- **Snippet:** `{ $documents: [${1:documents}] }` + +### $changeStream + +- **Snippet:** `{ $changeStream: {} }` + +### $collStats + +- **Snippet:** `{ $collStats: { storageStats: {} } }` + +### $currentOp + +- **Snippet:** `{ $currentOp: { allUsers: true } }` + +### $indexStats + +- **Snippet:** `{ $indexStats: {} }` + +### $listLocalSessions + +- **Snippet:** `{ $listLocalSessions: { allUsers: true } }` + +### $geoNear + +- **Snippet:** `{ $geoNear: { near: { type: "Point", coordinates: [${1:lng}, ${2:lat}] }, distanceField: "${3:distance}" } }` + +## Comparison Query Operators + +### DEFAULT + +- **Snippet:** `{ {{VALUE}}: ${1:value} }` + +### $in + +- **Snippet:** `{ $in: [${1:value}] }` + +### $nin + +- **Snippet:** `{ $nin: [${1:value}] }` + +## Logical Query Operators + +### DEFAULT + +- **Snippet:** `{ {{VALUE}}: [{ ${1:expression} }] }` + +### $not + +- **Snippet:** `{ $not: { ${1:expression} } }` + +## Element Query Operators + +### $exists + +- **Snippet:** `{ $exists: ${1:true} }` + +### $type + +- **Snippet:** `{ $type: "${1:type}" }` + +## Evaluation Query Operators + +### $expr + +- **Snippet:** `{ $expr: { ${1:expression} } }` + +### $regex + +- **Snippet:** `{ $regex: /${1:pattern}/ }` + +### $mod + +- **Snippet:** `{ $mod: [${1:divisor}, ${2:remainder}] }` + +### $text + +- **Snippet:** `{ $text: { \$search: "${1:text}" } }` + +### $jsonSchema + +- **Snippet:** `{ $jsonSchema: { bsonType: "${1:object}" } }` + +## Array Query Operators + +### $all + +- **Snippet:** `{ $all: [${1:value}] }` + +### $elemMatch + +- **Snippet:** `{ $elemMatch: { ${1:query} } }` + +### $size + +- **Snippet:** `{ $size: ${1:number} }` + +## Bitwise Query Operators + +### DEFAULT + +- **Snippet:** `{ {{VALUE}}: ${1:bitmask} }` + +## Geospatial Operators + +### $near + +- **Snippet:** `{ $near: { \$geometry: { type: "Point", coordinates: [${1:lng}, ${2:lat}] }, \$maxDistance: ${3:distance} } }` + +### $nearSphere + +- **Snippet:** `{ $nearSphere: { \$geometry: { type: "Point", coordinates: [${1:lng}, ${2:lat}] }, \$maxDistance: ${3:distance} } }` + +### $geoIntersects + +- **Snippet:** `{ $geoIntersects: { \$geometry: { type: "${1:GeoJSON type}", coordinates: ${2:coordinates} } } }` + +### $geoWithin + +- **Snippet:** `{ $geoWithin: { \$geometry: { type: "${1:GeoJSON type}", coordinates: ${2:coordinates} } } }` + +### $box + +- **Snippet:** `[[${1:bottomLeftX}, ${2:bottomLeftY}], [${3:upperRightX}, ${4:upperRightY}]]` + +### $center + +- **Snippet:** `[[${1:x}, ${2:y}], ${3:radius}]` + +### $centerSphere + +- **Snippet:** `[[${1:x}, ${2:y}], ${3:radiusInRadians}]` + +### $geometry + +- **Snippet:** `{ type: "${1:Point}", coordinates: [${2:coordinates}] }` + +### $maxDistance + +- **Snippet:** `${1:distance}` + +### $minDistance + +- **Snippet:** `${1:distance}` + +### $polygon + +- **Snippet:** `[[${1:x1}, ${2:y1}], [${3:x2}, ${4:y2}], [${5:x3}, ${6:y3}]]` + +## Projection Operators + +### $elemMatch + +- **Snippet:** `{ $elemMatch: { ${1:query} } }` + +### $slice + +- **Snippet:** `{ $slice: ${1:number} }` + +## Miscellaneous Query Operators + +### $comment + +- **Snippet:** `{ $comment: "${1:comment}" }` + +### $rand + +- **Snippet:** `{ $rand: {} }` + +### $natural + +- **Snippet:** `{ $natural: ${1:1} }` + +## Field Update Operators + +### DEFAULT + +- **Snippet:** `{ {{VALUE}}: { "${1:field}": ${2:value} } }` + +### $rename + +- **Snippet:** `{ $rename: { "${1:oldField}": "${2:newField}" } }` + +### $currentDate + +- **Snippet:** `{ $currentDate: { "${1:field}": true } }` + +## Array Update Operators + +### $addToSet + +- **Snippet:** `{ $addToSet: { "${1:field}": ${2:value} } }` + +### $pop + +- **Snippet:** `{ $pop: { "${1:field}": ${2:1} } }` + +### $pull + +- **Snippet:** `{ $pull: { "${1:field}": ${2:condition} } }` + +### $push + +- **Snippet:** `{ $push: { "${1:field}": ${2:value} } }` + +### $pullAll + +- **Snippet:** `{ $pullAll: { "${1:field}": [${2:values}] } }` + +### $each + +- **Snippet:** `{ $each: [${1:values}] }` + +### $position + +- **Snippet:** `{ $position: ${1:index} }` + +### $slice + +- **Snippet:** `{ $slice: ${1:number} }` + +### $sort + +- **Snippet:** `{ $sort: { "${1:field}": ${2:1} } }` + +## Bitwise Update Operators + +### DEFAULT + +- **Snippet:** `{ {{VALUE}}: { "${1:field}": { "${2:and|or|xor}": ${3:value} } } }` + +## Accumulators + +### DEFAULT + +- **Snippet:** `{ {{VALUE}}: "${1:\$field}" }` + +### $count + +- **Snippet:** `{ $count: {} }` + +### $bottom + +- **Snippet:** `{ $bottom: { sortBy: { ${1:field}: ${2:1} }, output: "${3:\$field}" } }` + +### $top + +- **Snippet:** `{ $top: { sortBy: { ${1:field}: ${2:1} }, output: "${3:\$field}" } }` + +### $bottomN + +- **Snippet:** `{ $bottomN: { n: ${1:number}, sortBy: { ${2:field}: ${3:1} }, output: "${4:\$field}" } }` + +### $topN + +- **Snippet:** `{ $topN: { n: ${1:number}, sortBy: { ${2:field}: ${3:1} }, output: "${4:\$field}" } }` + +### $firstN + +- **Snippet:** `{ $firstN: { input: "${1:\$field}", n: ${2:number} } }` + +### $lastN + +- **Snippet:** `{ $lastN: { input: "${1:\$field}", n: ${2:number} } }` + +### $maxN + +- **Snippet:** `{ $maxN: { input: "${1:\$field}", n: ${2:number} } }` + +### $minN + +- **Snippet:** `{ $minN: { input: "${1:\$field}", n: ${2:number} } }` + +### $percentile + +- **Snippet:** `{ $percentile: { input: "${1:\$field}", p: [${2:0.5}], method: "approximate" } }` + +### $median + +- **Snippet:** `{ $median: { input: "${1:\$field}", method: "approximate" } }` + +## Window Operators + +### DEFAULT + +- **Snippet:** `{ {{VALUE}}: "${1:\$field}" }` + +### $shift + +- **Snippet:** `{ $shift: { output: "${1:\$field}", by: ${2:1}, default: ${3:null} } }` + +### $rank + +- **Snippet:** `{ $rank: {} }` + +### $denseRank + +- **Snippet:** `{ $denseRank: {} }` + +### $documentNumber + +- **Snippet:** `{ $documentNumber: {} }` + +### $expMovingAvg + +- **Snippet:** `{ $expMovingAvg: { input: "${1:\$field}", N: ${2:number} } }` + +### $derivative + +- **Snippet:** `{ $derivative: { input: "${1:\$field}", unit: "${2:hour}" } }` + +### $integral + +- **Snippet:** `{ $integral: { input: "${1:\$field}", unit: "${2:hour}" } }` + +## Arithmetic Expression Operators + +### DEFAULT + +- **Snippet:** `{ {{VALUE}}: "${1:\$field}" }` + +### $add + +- **Snippet:** `{ $add: ["${1:\$field1}", "${2:\$field2}"] }` + +### $subtract + +- **Snippet:** `{ $subtract: ["${1:\$field1}", "${2:\$field2}"] }` + +### $multiply + +- **Snippet:** `{ $multiply: ["${1:\$field1}", "${2:\$field2}"] }` + +### $divide + +- **Snippet:** `{ $divide: ["${1:\$field1}", "${2:\$field2}"] }` + +### $mod + +- **Snippet:** `{ $mod: ["${1:\$field1}", "${2:\$field2}"] }` + +### $pow + +- **Snippet:** `{ $pow: ["${1:\$field1}", "${2:\$field2}"] }` + +### $log + +- **Snippet:** `{ $log: ["${1:\$number}", ${2:base}] }` + +### $round + +- **Snippet:** `{ $round: ["${1:\$field}", ${2:place}] }` + +## Array Expression Operators + +### DEFAULT + +- **Snippet:** `{ {{VALUE}}: "${1:\$array}" }` + +### $arrayElemAt + +- **Snippet:** `{ $arrayElemAt: ["${1:\$array}", ${2:index}] }` + +### $concatArrays + +- **Snippet:** `{ $concatArrays: ["${1:\$array1}", "${2:\$array2}"] }` + +### $filter + +- **Snippet:** `{ $filter: { input: "${1:\$array}", as: "${2:item}", cond: { ${3:expression} } } }` + +### $in + +- **Snippet:** `{ $in: ["${1:\$field}", "${2:\$array}"] }` + +### $indexOfArray + +- **Snippet:** `{ $indexOfArray: ["${1:\$array}", "${2:value}"] }` + +### $isArray + +- **Snippet:** `{ $isArray: "${1:\$field}" }` + +### $map + +- **Snippet:** `{ $map: { input: "${1:\$array}", as: "${2:item}", in: { ${3:expression} } } }` + +### $objectToArray + +- **Snippet:** `{ $objectToArray: "${1:\$object}" }` + +### $range + +- **Snippet:** `{ $range: [${1:start}, ${2:end}, ${3:step}] }` + +### $reduce + +- **Snippet:** `{ $reduce: { input: "${1:\$array}", initialValue: ${2:0}, in: { ${3:expression} } } }` + +### $slice + +- **Snippet:** `{ $slice: ["${1:\$array}", ${2:n}] }` + +### $sortArray + +- **Snippet:** `{ $sortArray: { input: "${1:\$array}", sortBy: { ${2:field}: ${3:1} } } }` + +### $zip + +- **Snippet:** `{ $zip: { inputs: ["${1:\$array1}", "${2:\$array2}"] } }` + +### $maxN + +- **Snippet:** `{ $maxN: { input: "${1:\$array}", n: ${2:number} } }` + +### $minN + +- **Snippet:** `{ $minN: { input: "${1:\$array}", n: ${2:number} } }` + +### $firstN + +- **Snippet:** `{ $firstN: { input: "${1:\$array}", n: ${2:number} } }` + +### $lastN + +- **Snippet:** `{ $lastN: { input: "${1:\$array}", n: ${2:number} } }` + +## Boolean Expression Operators + +### DEFAULT + +- **Snippet:** `{ {{VALUE}}: ["${1:expression1}", "${2:expression2}"] }` + +### $not + +- **Snippet:** `{ $not: ["${1:expression}"] }` + +## Comparison Expression Operators + +### DEFAULT + +- **Snippet:** `{ {{VALUE}}: ["${1:\$field1}", "${2:\$field2}"] }` + +## Conditional Expression Operators + +### $cond + +- **Snippet:** `{ $cond: { if: { ${1:expression} }, then: ${2:trueValue}, else: ${3:falseValue} } }` + +### $ifNull + +- **Snippet:** `{ $ifNull: ["${1:\$field}", ${2:replacement}] }` + +### $switch + +- **Snippet:** `{ $switch: { branches: [{ case: { ${1:expression} }, then: ${2:value} }], default: ${3:defaultValue} } }` + +## Date Expression Operators + +### DEFAULT + +- **Snippet:** `{ {{VALUE}}: "${1:\$dateField}" }` + +### $dateAdd + +- **Snippet:** `{ $dateAdd: { startDate: "${1:\$dateField}", unit: "${2:day}", amount: ${3:1} } }` + +### $dateSubtract + +- **Snippet:** `{ $dateSubtract: { startDate: "${1:\$dateField}", unit: "${2:day}", amount: ${3:1} } }` + +### $dateDiff + +- **Snippet:** `{ $dateDiff: { startDate: "${1:\$startDate}", endDate: "${2:\$endDate}", unit: "${3:day}" } }` + +### $dateFromParts + +- **Snippet:** `{ $dateFromParts: { year: ${1:2024}, month: ${2:1}, day: ${3:1} } }` + +### $dateToParts + +- **Snippet:** `{ $dateToParts: { date: "${1:\$dateField}" } }` + +### $dateFromString + +- **Snippet:** `{ $dateFromString: { dateString: "${1:dateString}" } }` + +### $dateToString + +- **Snippet:** `{ $dateToString: { format: "${1:%Y-%m-%d}", date: "${2:\$dateField}" } }` + +### $dateTrunc + +- **Snippet:** `{ $dateTrunc: { date: "${1:\$dateField}", unit: "${2:day}" } }` + +### $toDate + +- **Snippet:** `{ $toDate: "${1:\$field}" }` + +## Object Expression Operators + +### DEFAULT + +- **Snippet:** `{ {{VALUE}}: "${1:\$object}" }` + +### $mergeObjects + +- **Snippet:** `{ $mergeObjects: ["${1:\$object1}", "${2:\$object2}"] }` + +### $setField + +- **Snippet:** `{ $setField: { field: "${1:fieldName}", input: "${2:\$object}", value: ${3:value} } }` + +## Set Expression Operators + +### DEFAULT + +- **Snippet:** `{ {{VALUE}}: ["${1:\$set1}", "${2:\$set2}"] }` + +### $anyElementTrue + +- **Snippet:** `{ $anyElementTrue: ["${1:\$array}"] }` + +### $allElementsTrue + +- **Snippet:** `{ $allElementsTrue: ["${1:\$array}"] }` + +## String Expression Operators + +### DEFAULT + +- **Snippet:** `{ {{VALUE}}: "${1:\$string}" }` + +### $concat + +- **Snippet:** `{ $concat: ["${1:\$string1}", "${2:\$string2}"] }` + +### $indexOfBytes + +- **Snippet:** `{ $indexOfBytes: ["${1:\$string}", "${2:substring}"] }` + +### $indexOfCP + +- **Snippet:** `{ $indexOfCP: ["${1:\$string}", "${2:substring}"] }` + +### $regexFind + +- **Snippet:** `{ $regexFind: { input: "${1:\$string}", regex: "${2:pattern}" } }` + +### $regexFindAll + +- **Snippet:** `{ $regexFindAll: { input: "${1:\$string}", regex: "${2:pattern}" } }` + +### $regexMatch + +- **Snippet:** `{ $regexMatch: { input: "${1:\$string}", regex: "${2:pattern}" } }` + +### $replaceOne + +- **Snippet:** `{ $replaceOne: { input: "${1:\$string}", find: "${2:find}", replacement: "${3:replacement}" } }` + +### $replaceAll + +- **Snippet:** `{ $replaceAll: { input: "${1:\$string}", find: "${2:find}", replacement: "${3:replacement}" } }` + +### $split + +- **Snippet:** `{ $split: ["${1:\$string}", "${2:delimiter}"] }` + +### $substr + +- **Snippet:** `{ $substr: ["${1:\$string}", ${2:start}, ${3:length}] }` + +### $substrBytes + +- **Snippet:** `{ $substrBytes: ["${1:\$string}", ${2:start}, ${3:length}] }` + +### $substrCP + +- **Snippet:** `{ $substrCP: ["${1:\$string}", ${2:start}, ${3:length}] }` + +### $strcasecmp + +- **Snippet:** `{ $strcasecmp: ["${1:\$string1}", "${2:\$string2}"] }` + +### $trim + +- **Snippet:** `{ $trim: { input: "${1:\$string}" } }` + +### $ltrim + +- **Snippet:** `{ $ltrim: { input: "${1:\$string}" } }` + +### $rtrim + +- **Snippet:** `{ $rtrim: { input: "${1:\$string}" } }` + +## Trigonometry Expression Operators + +### DEFAULT + +- **Snippet:** `{ {{VALUE}}: "${1:\$value}" }` + +### $degreesToRadians + +- **Snippet:** `{ $degreesToRadians: "${1:\$angle}" }` + +### $radiansToDegrees + +- **Snippet:** `{ $radiansToDegrees: "${1:\$angle}" }` + +## Type Expression Operators + +### DEFAULT + +- **Snippet:** `{ {{VALUE}}: "${1:\$field}" }` + +### $convert + +- **Snippet:** `{ $convert: { input: "${1:\$field}", to: "${2:type}" } }` + +## Data Size Operators + +### DEFAULT + +- **Snippet:** `{ {{VALUE}}: "${1:\$field}" }` + +## Literal Expression Operator + +### DEFAULT + +- **Snippet:** `{ {{VALUE}}: ${1:value} }` + +## Miscellaneous Operators + +### DEFAULT + +- **Snippet:** `{ {{VALUE}}: ${1:value} }` + +### $getField + +- **Snippet:** `{ $getField: { field: "${1:fieldName}", input: "${2:\$object}" } }` + +### $rand + +- **Snippet:** `{ $rand: {} }` + +### $sampleRate + +- **Snippet:** `{ $sampleRate: ${1:0.5} }` + +## Bitwise Operators + +### DEFAULT + +- **Snippet:** `{ {{VALUE}}: [${1:value1}, ${2:value2}] }` + +### $bitNot + +- **Snippet:** `{ $bitNot: "${1:\$field}" }` + +## Timestamp Expression Operators + +### DEFAULT + +- **Snippet:** `{ {{VALUE}}: "${1:\$timestampField}" }` + +## Variable Expression Operators + +### $let + +- **Snippet:** `{ $let: { vars: { ${1:var}: ${2:expression} }, in: ${3:expression} } }` diff --git a/packages/documentdb-constants/resources/scraped/operator-reference.md b/packages/documentdb-constants/resources/scraped/operator-reference.md new file mode 100644 index 000000000..6ba385cfb --- /dev/null +++ b/packages/documentdb-constants/resources/scraped/operator-reference.md @@ -0,0 +1,4113 @@ +# DocumentDB Operator Reference + + + + + +## Summary + +| Category | Listed | Total | +| ------------------------------------------------------------- | ------- | ------- | +| Comparison Query Operators | 8 | 8 | +| Logical Query Operators | 4 | 4 | +| Element Query Operators | 2 | 2 | +| Evaluation Query Operators | 5 | 6 | +| Geospatial Operators | 11 | 11 | +| Array Query Operators | 3 | 3 | +| Bitwise Query Operators | 4 | 4 | +| Projection Operators | 3 | 4 | +| Miscellaneous Query Operators | 3 | 3 | +| Field Update Operators | 9 | 9 | +| Array Update Operators | 12 | 12 | +| Bitwise Update Operators | 1 | 1 | +| Arithmetic Expression Operators | 16 | 16 | +| Array Expression Operators | 20 | 20 | +| Bitwise Operators | 4 | 4 | +| Boolean Expression Operators | 3 | 3 | +| Comparison Expression Operators | 7 | 7 | +| Custom Aggregation Expression Operators | 0 | 2 | +| Data Size Operators | 2 | 2 | +| Date Expression Operators | 22 | 22 | +| Literal Expression Operator | 1 | 1 | +| Miscellaneous Operators | 3 | 3 | +| Object Expression Operators | 3 | 3 | +| Set Expression Operators | 7 | 7 | +| String Expression Operators | 23 | 23 | +| Text Expression Operator | 0 | 1 | +| Timestamp Expression Operators | 2 | 2 | +| Trigonometry Expression Operators | 15 | 15 | +| Type Expression Operators | 11 | 11 | +| Accumulators ($group, $bucket, $bucketAuto, $setWindowFields) | 21 | 22 | +| Accumulators (in Other Stages) | 10 | 10 | +| Variable Expression Operators | 1 | 1 | +| Window Operators | 27 | 27 | +| Conditional Expression Operators | 3 | 3 | +| Aggregation Pipeline Stages | 35 | 42 | +| Variables in Aggregation Expressions | 7 | 10 | +| **Total** | **308** | **324** | + +## Comparison Query Operators + +### $eq + +- **Description:** The $eq query operator compares the value of a field to a specified value +- **Syntax:** + +```javascript +{ + field: { + $eq: + } +} +``` + +- **Doc Link:** https://learn.microsoft.com/en-us/azure/documentdb/operators/comparison-query/$eq + +### $gt + +- **Description:** The $gt query operator retrieves documents where the value of a field is greater than a specified value +- **Syntax:** + +```javascript +{ + field: { + $gt: value; + } +} +``` + +- **Doc Link:** https://learn.microsoft.com/en-us/azure/documentdb/operators/comparison-query/$gt + +### $gte + +- **Description:** The $gte operator retrieves documents where the value of a field is greater than or equal to a specified value +- **Syntax:** + +```javascript +{ + field: { + $gte: + } +} +``` + +- **Doc Link:** https://learn.microsoft.com/en-us/azure/documentdb/operators/comparison-query/$gte + +### $in + +- **Description:** The $in operator matches value of a field against an array of specified values +- **Syntax:** + +```javascript +{ + field: { + $in: [listOfValues]; + } +} +``` + +- **Doc Link:** https://learn.microsoft.com/en-us/azure/documentdb/operators/comparison-query/$in + +### $lt + +- **Description:** The $lt operator retrieves documents where the value of field is less than a specified value +- **Syntax:** + +```javascript +{ + field: { + $lt: value; + } +} +``` + +- **Doc Link:** https://learn.microsoft.com/en-us/azure/documentdb/operators/comparison-query/$lt + +### $lte + +- **Description:** The $lte operator retrieves documents where the value of a field is less than or equal to a specified value +- **Syntax:** + +```javascript +{ + field: { + $lte: + } +} +``` + +- **Doc Link:** https://learn.microsoft.com/en-us/azure/documentdb/operators/comparison-query/$lte + +### $ne + +- **Description:** The $ne operator retrieves documents where the value of a field doesn't equal a specified value +- **Syntax:** + +```javascript +{ + field: { + $ne: value; + } +} +``` + +- **Doc Link:** https://learn.microsoft.com/en-us/azure/documentdb/operators/comparison-query/$ne + +### $nin + +- **Description:** The $nin operator retrieves documents where the value of a field doesn't match a list of values +- **Syntax:** + +```javascript +{ + field: { + $nin: [ < listOfValues > ] + } +} +``` + +- **Doc Link:** https://learn.microsoft.com/en-us/azure/documentdb/operators/comparison-query/$nin + +## Logical Query Operators + +### $and + +- **Description:** The $and operator joins multiple query clauses and returns documents that match all specified conditions. +- **Syntax:** + +```javascript +{ + $and: [{ + < expression1 > + }, { + < expression2 > + }, ..., { + < expressionN > + }] +} +``` + +- **Doc Link:** https://learn.microsoft.com/en-us/azure/documentdb/operators/logical-query/$and + +### $not + +- **Description:** The $not operator performs a logical NOT operation on a specified expression, selecting documents that don't match the expression. +- **Syntax:** + +```javascript +{ + field: { + $not: { + < operator - expression > + } + } +} +``` + +- **Doc Link:** https://learn.microsoft.com/en-us/azure/documentdb/operators/logical-query/$not + +### $nor + +- **Description:** The $nor operator performs a logical NOR on an array of expressions and retrieves documents that fail all the conditions. +- **Syntax:** + +```javascript +{ + $nor: [{ + < expression1 > + }, { + < expression2 > + }, ..., { + < expressionN > + }] +} +``` + +- **Doc Link:** https://learn.microsoft.com/en-us/azure/documentdb/operators/logical-query/$nor + +### $or + +- **Description:** The $or operator joins query clauses with a logical OR and returns documents that match at least one of the specified conditions. +- **Syntax:** + +```javascript +{ + $or: [{ + < expression1 > + }, { + < expression2 > + }, ..., { + < expressionN > + }] +} +``` + +- **Doc Link:** https://learn.microsoft.com/en-us/azure/documentdb/operators/logical-query/$or + +## Element Query Operators + +### $exists + +- **Description:** The $exists operator retrieves documents that contain the specified field in their document structure. +- **Syntax:** + +```javascript +{ + : { $exists: } +} +``` + +- **Doc Link:** https://learn.microsoft.com/en-us/azure/documentdb/operators/element-query/$exists + +### $type + +- **Description:** The $type operator retrieves documents if the chosen field is of the specified type. +- **Syntax:** + +```javascript +{ + : { $type: | } +} +``` + +- **Doc Link:** https://learn.microsoft.com/en-us/azure/documentdb/operators/element-query/$type + +## Evaluation Query Operators + +### $expr + +- **Description:** The $expr operator allows the use of aggregation expressions within the query language, enabling complex field comparisons and calculations. +- **Syntax:** + +```javascript +{ + $expr: { } +} +``` + +- **Doc Link:** https://learn.microsoft.com/en-us/azure/documentdb/operators/evaluation-query/$expr + +### $jsonSchema + +- **Description:** The $jsonSchema operator validates documents against a JSON Schema definition for data validation and structure enforcement. Discover supported features and limitations. +- **Syntax:** + +```javascript +db.createCollection('collectionName', { + validator: { + $jsonSchema: { + bsonType: 'object', + required: ['field1', 'field2'], + properties: { + field1: { + bsonType: 'string', + }, + field2: { + bsonType: 'int', + minimum: 0, + description: 'Description of field2 requirements', + }, + }, + }, + }, + validationLevel: 'strict', // Optional: "strict" or "moderate" + validationAction: 'error', // Optional: "error" or "warn" +}); +``` + +- **Doc Link:** https://learn.microsoft.com/en-us/azure/documentdb/operators/evaluation-query/$jsonschema + +### $mod + +- **Description:** The $mod operator performs a modulo operation on the value of a field and selects documents with a specified result. +- **Syntax:** + +```javascript +{ + : { $mod: [ , ] } +} +``` + +- **Doc Link:** https://learn.microsoft.com/en-us/azure/documentdb/operators/evaluation-query/$mod + +### $regex + +- **Description:** The $regex operator provides regular expression capabilities for pattern matching in queries, allowing flexible string matching and searching. +- **Syntax:** + +```javascript +{ + : { $regex: , $options: } +} +``` + +- **Doc Link:** https://learn.microsoft.com/en-us/azure/documentdb/operators/evaluation-query/$regex + +### $text + +- **Description:** The $text operator performs text search on the content of indexed string fields, enabling full-text search capabilities. +- **Syntax:** + +```javascript +{ + $text: { + $search: , + $language: , + $caseSensitive: , + $diacriticSensitive: + } +} +``` + +- **Doc Link:** https://learn.microsoft.com/en-us/azure/documentdb/operators/evaluation-query/$text + +## Geospatial Operators + +### $geoIntersects + +- **Description:** The $geoIntersects operator selects documents whose location field intersects with a specified GeoJSON object. +- **Syntax:** + +```javascript +{ + : { + $geoIntersects: { + $geometry: { + type: , + coordinates: + } + } + } +} +``` + +- **Doc Link:** https://learn.microsoft.com/en-us/azure/documentdb/operators/geospatial/$geointersects + +### $geoWithin + +- **Description:** The $geoWithin operator selects documents whose location field is completely within a specified geometry. +- **Syntax:** + +```javascript +// Using $box +{ + : { + $geoWithin: { + $box: [ [ ], [ ] ] + } + } +} + +// Using $center +{ + : { + $geoWithin: { + $center: [ [ , ], ] + } + } +} + +// Using $geometry +{ + : { + $geoWithin: { + $geometry: { + type: , + coordinates: + } + } + } +} +``` + +- **Doc Link:** https://learn.microsoft.com/en-us/azure/documentdb/operators/geospatial/$geowithin + +### $box + +- **Description:** The $box operator defines a rectangular area for geospatial queries using coordinate pairs. +- **Syntax:** + +```javascript +{ + : { + $geoWithin: { + $box: [ + [, ], + [, ] + ] + } + } +} +``` + +- **Doc Link:** https://learn.microsoft.com/en-us/azure/documentdb/operators/geospatial/$box + +### $center + +- **Description:** The $center operator specifies a circle using legacy coordinate pairs for $geoWithin queries. +- **Syntax:** + +```javascript +{ + $geoWithin: { + $center: [ [ , ], ] + } +} +``` + +- **Doc Link:** https://learn.microsoft.com/en-us/azure/documentdb/operators/geospatial/$center + +### $centerSphere + +- **Description:** The $centerSphere operator specifies a circle using spherical geometry for $geoWithin queries. +- **Syntax:** + +```javascript +{ + $geoWithin: { + $centerSphere: [ [ , ], ] + } +} +``` + +- **Doc Link:** https://learn.microsoft.com/en-us/azure/documentdb/operators/geospatial/$centersphere + +### $geometry + +- **Description:** The $geometry operator specifies a GeoJSON geometry for geospatial queries. +- **Syntax:** + +```javascript +{ + $geometry: { + type: , + coordinates: + } +} +``` + +- **Doc Link:** https://learn.microsoft.com/en-us/azure/documentdb/operators/geospatial/$geometry + +### $maxDistance + +- **Description:** The $maxDistance operator specifies the maximum distance that can exist between two points in a geospatial query. +- **Syntax:** + +```javascript +{ + : { + $near: { + $geometry: { + type: "Point", + coordinates: [, ] + }, + $maxDistance: + } + } +} +``` + +- **Doc Link:** https://learn.microsoft.com/en-us/azure/documentdb/operators/geospatial/$maxdistance + +### $minDistance + +- **Description:** The $minDistance operator specifies the minimum distance that must exist between two points in a geospatial query. +- **Syntax:** + +```javascript +{ + : { + $near: { + $geometry: { + type: "Point", + coordinates: [, ] + }, + $minDistance: + } + } +} +``` + +- **Doc Link:** https://learn.microsoft.com/en-us/azure/documentdb/operators/geospatial/$mindistance + +### $polygon + +- **Description:** The $polygon operator defines a polygon for geospatial queries, allowing you to find locations within an irregular shape. +- **Syntax:** + +```javascript +{ + : { + $geoWithin: { + $geometry: { + type: "Polygon", + coordinates: [ + [[, ], ..., [, ], [, ]] + ] + } + } + } +} +``` + +- **Doc Link:** https://learn.microsoft.com/en-us/azure/documentdb/operators/geospatial/$polygon + +### $near + +- **Description:** The $near operator returns documents with location fields that are near a specified point, sorted by distance. +- **Syntax:** + +```javascript +{ + : { + $near: { + $geometry: { + type: "Point", + coordinates: [, ] + }, + $maxDistance: , + $minDistance: + } + } +} +``` + +- **Doc Link:** https://learn.microsoft.com/en-us/azure/documentdb/operators/geospatial/$near + +### $nearSphere + +- **Description:** The $nearSphere operator returns documents whose location fields are near a specified point on a sphere, sorted by distance on a spherical surface. +- **Syntax:** + +```javascript +{ + : { + $nearSphere: { + $geometry: { + type: "Point", + coordinates: [, ] + }, + $maxDistance: , + $minDistance: + } + } +} +``` + +- **Doc Link:** https://learn.microsoft.com/en-us/azure/documentdb/operators/geospatial/$nearsphere + +## Array Query Operators + +### $all + +- **Description:** The $all operator helps finding array documents matching all the elements. +- **Syntax:** + +```javascript +db.collection.find({ + field : { + $all: [ < value1 > , < value2 > ] + } +}) +``` + +- **Doc Link:** https://learn.microsoft.com/en-us/azure/documentdb/operators/array-query/$all + +### $elemMatch + +- **Description:** The $elemmatch operator returns complete array, qualifying criteria with at least one matching array element. +- **Syntax:** + +```javascript +db.collection.find({ : { $elemMatch: { , , ... } } }) +``` + +- **Doc Link:** https://learn.microsoft.com/en-us/azure/documentdb/operators/array-query/$elemmatch + +### $size + +- **Description:** The $size operator is used to query documents where an array field has a specified number of elements. +- **Syntax:** + +```javascript +db.collection.find({ : { $size: } }) +``` + +- **Doc Link:** https://learn.microsoft.com/en-us/azure/documentdb/operators/array-query/$size + +## Bitwise Query Operators + +### $bitsAllClear + +- **Description:** The $bitsAllClear operator is used to match documents where all the bit positions specified in a bitmask are clear. +- **Syntax:** + +```javascript +{ + : { $bitsAllClear: } +} +``` + +- **Doc Link:** https://learn.microsoft.com/en-us/azure/documentdb/operators/bitwise-query/$bitsallclear + +### $bitsAllSet + +- **Description:** The bitsAllSet command is used to match documents where all the specified bit positions are set. +- **Syntax:** + +```javascript +{ + : { $bitsAllSet: } +} +``` + +- **Doc Link:** https://learn.microsoft.com/en-us/azure/documentdb/operators/bitwise-query/$bitsallset + +### $bitsAnyClear + +- **Description:** The $bitsAnyClear operator matches documents where any of the specified bit positions in a bitmask are clear. +- **Syntax:** + +```javascript +{ + : { $bitsAnyClear: } +} +``` + +- **Doc Link:** https://learn.microsoft.com/en-us/azure/documentdb/operators/bitwise-query/$bitsanyclear + +### $bitsAnySet + +- **Description:** The $bitsAnySet operator returns documents where any of the specified bit positions are set to 1. +- **Syntax:** + +```javascript +{ + : { $bitsAnySet: [ ] } +} +``` + +- **Doc Link:** https://learn.microsoft.com/en-us/azure/documentdb/operators/bitwise-query/$bitsanyset + +## Projection Operators + +### $ + +- **Description:** The $ positional operator identifies an element in an array to update without explicitly specifying the position of the element in the array. +- **Syntax:** + +```javascript +db.collection.updateOne( + { : }, + { : { ".$": } } +) +``` + +- **Doc Link:** none +- **Scraper Comment:** Doc page not found in expected directory 'projection/'. Content scraped from 'array-update/'. + +### $elemMatch + +- **Description:** The $elemmatch operator returns complete array, qualifying criteria with at least one matching array element. +- **Syntax:** + +```javascript +db.collection.find({ : { $elemMatch: { , , ... } } }) +``` + +- **Doc Link:** none +- **Scraper Comment:** Doc page not found in expected directory 'projection/'. Content scraped from 'array-query/'. + +### $slice + +- **Description:** The $slice operator returns a subset of an array from any element onwards in the array. +- **Syntax:** + +```javascript +{ + $slice: [ , ] +} +``` + +- **Doc Link:** none +- **Scraper Comment:** Doc page not found in expected directory 'projection/'. Content scraped from 'array-expression/'. + +## Miscellaneous Query Operators + +### $comment + +- **Description:** The $comment operator adds a comment to a query to help identify the query in logs and profiler output. +- **Syntax:** + +```javascript +{ + $comment: +} +``` + +- **Doc Link:** https://learn.microsoft.com/en-us/azure/documentdb/operators/miscellaneous-query/$comment + +### $rand + +- **Description:** The $rand operator generates a random float value between 0 and 1. +- **Syntax:** + +```javascript +{ + $rand: { + } +} +``` + +- **Doc Link:** https://learn.microsoft.com/en-us/azure/documentdb/operators/miscellaneous-query/$rand + +### $natural + +- **Description:** The $natural operator forces the query to use the natural order of documents in a collection, providing control over document ordering and retrieval. +- **Syntax:** + +```javascript +{ + $natural: <1 | -1> +} +``` + +- **Doc Link:** https://learn.microsoft.com/en-us/azure/documentdb/operators/miscellaneous-query/$natural + +## Field Update Operators + +### $currentDate + +- **Description:** The $currentDate operator sets the value of a field to the current date, either as a Date or a timestamp. +- **Syntax:** + +```javascript +{ + $currentDate: { + : , + : , + ... + } +} +``` + +- **Doc Link:** https://learn.microsoft.com/en-us/azure/documentdb/operators/field-update/$currentdate + +### $inc + +- **Description:** The $inc operator increments the value of a field by a specified amount. +- **Syntax:** + +```javascript +{ + $inc: { + : , + : , + ... + } +} +``` + +- **Doc Link:** https://learn.microsoft.com/en-us/azure/documentdb/operators/field-update/$inc + +### $min + +- **Description:** Retrieves the minimum value for a specified field +- **Syntax:** + +```javascript +$min: +``` + +- **Doc Link:** none +- **Scraper Comment:** Doc page not found in expected directory 'field-update/'. Content scraped from 'accumulators/'. + +### $max + +- **Description:** The $max operator returns the maximum value from a set of input values. +- **Syntax:** + +```javascript +$max: +``` + +- **Doc Link:** none +- **Scraper Comment:** Doc page not found in expected directory 'field-update/'. Content scraped from 'accumulators/'. + +### $mul + +- **Description:** The $mul operator multiplies the value of a field by a specified number. +- **Syntax:** + +```javascript +{ + $mul: { + : , + : , + ... + } +} +``` + +- **Doc Link:** https://learn.microsoft.com/en-us/azure/documentdb/operators/field-update/$mul + +### $rename + +- **Description:** The $rename operator allows renaming fields in documents during update operations. +- **Syntax:** + +```javascript +{ + $rename: { + : , + : , + ... + } +} +``` + +- **Doc Link:** https://learn.microsoft.com/en-us/azure/documentdb/operators/field-update/$rename + +### $set + +- **Description:** The $set operator in Azure DocumentDB updates or creates a new field with a specified value +- **Syntax:** + +```javascript +{ + $set: { + newField: + } +} +``` + +- **Doc Link:** none +- **Scraper Comment:** Doc page not found in expected directory 'field-update/'. Content scraped from 'aggregation/'. + +### $setOnInsert + +- **Description:** The $setOnInsert operator sets field values only when an upsert operation results in an insert of a new document. +- **Syntax:** + +```javascript +{ + $setOnInsert: { + : , + : , + ... + } +} +``` + +- **Doc Link:** https://learn.microsoft.com/en-us/azure/documentdb/operators/field-update/$setoninsert + +### $unset + +- **Description:** The $unset stage in the aggregation pipeline is used to remove specified fields from documents. +- **Syntax:** + +```javascript +{ + $unset: "" | ["", "", ...] +} +``` + +- **Doc Link:** none +- **Scraper Comment:** Doc page not found in expected directory 'field-update/'. Content scraped from 'aggregation/'. + +## Array Update Operators + +### $ + +- **Description:** The $ positional operator identifies an element in an array to update without explicitly specifying the position of the element in the array. +- **Syntax:** + +```javascript +db.collection.updateOne( + { : }, + { : { ".$": } } +) +``` + +- **Doc Link:** https://learn.microsoft.com/en-us/azure/documentdb/operators/array-update/$ + +### $[] + +### $[identifier] + +### $addToSet + +- **Description:** The addToSet operator adds elements to an array if they don't already exist, while ensuring uniqueness of elements within the set. +- **Syntax:** + +```javascript +{ + $addToSet: { : } +} +``` + +- **Doc Link:** https://learn.microsoft.com/en-us/azure/documentdb/operators/array-update/$addtoset + +### $pop + +- **Description:** Removes the first or last element of an array. +- **Syntax:** + +```javascript +{ + $pop: { + : + } +} +``` + +- **Doc Link:** https://learn.microsoft.com/en-us/azure/documentdb/operators/array-update/$pop + +### $pull + +- **Description:** Removes all instances of a value from an array. +- **Syntax:** + +```javascript +{ + $pull: { : } +} +``` + +- **Doc Link:** https://learn.microsoft.com/en-us/azure/documentdb/operators/array-update/$pull + +### $push + +- **Description:** The $push operator adds a specified value to an array within a document. +- **Syntax:** + +```javascript +db.collection.update({ + < query > +}, { + $push: { + < field >: < value > + } +}, { + < options > +}) +``` + +- **Doc Link:** https://learn.microsoft.com/en-us/azure/documentdb/operators/array-update/$push + +### $pullAll + +- **Description:** The $pullAll operator is used to remove all instances of the specified values from an array. +- **Syntax:** + +```javascript +{ + $pullAll: { : [ , ] } +} +``` + +- **Doc Link:** https://learn.microsoft.com/en-us/azure/documentdb/operators/array-update/$pullall + +### $each + +- **Description:** The $each operator is used within an `$addToSet`or`$push` operation to add multiple elements to an array field in a single update operation. +- **Syntax:** + +```javascript +{ + $push: { + : { + $each: [ , ], + : , + : + } + } +} +``` + +- **Doc Link:** https://learn.microsoft.com/en-us/azure/documentdb/operators/array-update/$each + +### $position + +### $slice + +- **Description:** The $slice operator returns a subset of an array from any element onwards in the array. +- **Syntax:** + +```javascript +{ + $slice: [ , ] +} +``` + +- **Doc Link:** none +- **Scraper Comment:** Doc page not found in expected directory 'array-update/'. Content scraped from 'array-expression/'. + +### $sort + +- **Description:** The $sort stage in the aggregation pipeline is used to order the documents in the pipeline by a specified field or fields. +- **Syntax:** + +```javascript +{ + $sort: { + < field1 >: < sort order > , + < field2 >: < sort order > + } +} +``` + +- **Doc Link:** none +- **Scraper Comment:** Doc page not found in expected directory 'array-update/'. Content scraped from 'aggregation/'. + +## Bitwise Update Operators + +### $bit + +- **Description:** The `$bit` operator is used to perform bitwise operations on integer values. +- **Syntax:** + +```javascript +{ + $bit: { + < field >: { + < operator >: < number > + } + } +} +``` + +- **Doc Link:** https://learn.microsoft.com/en-us/azure/documentdb/operators/bitwise-update/$bit + +## Arithmetic Expression Operators + +### $abs + +- **Description:** The $abs operator returns the absolute value of a number. +- **Syntax:** + +```javascript +{ + $abs: +} +``` + +- **Doc Link:** https://learn.microsoft.com/en-us/azure/documentdb/operators/arithmetic-expression/$abs + +### $add + +- **Description:** The $add operator returns the sum of two numbers or the sum of a date and numbers. +- **Syntax:** + +```javascript +{ + $add: [ ] +} +``` + +- **Doc Link:** https://learn.microsoft.com/en-us/azure/documentdb/operators/arithmetic-expression/$add + +### $ceil + +- **Description:** The $ceil operator returns the smallest integer greater than or equal to the specified number. +- **Syntax:** + +```javascript +{ + $ceil: +} +``` + +- **Doc Link:** https://learn.microsoft.com/en-us/azure/documentdb/operators/arithmetic-expression/$ceil + +### $divide + +- **Description:** The $divide operator divides two numbers and returns the quotient. +- **Syntax:** + +```javascript +{ + $divide: [ , ] +} +``` + +- **Doc Link:** https://learn.microsoft.com/en-us/azure/documentdb/operators/arithmetic-expression/$divide + +### $exp + +- **Description:** The $exp operator raises e to the specified exponent and returns the result +- **Syntax:** + +```javascript +{ + $exp: +} +``` + +- **Doc Link:** https://learn.microsoft.com/en-us/azure/documentdb/operators/arithmetic-expression/$exp + +### $floor + +- **Description:** The $floor operator returns the largest integer less than or equal to the specified number +- **Syntax:** + +```javascript +{ + $floor: +} +``` + +- **Doc Link:** https://learn.microsoft.com/en-us/azure/documentdb/operators/arithmetic-expression/$floor + +### $ln + +- **Description:** The $ln operator calculates the natural logarithm of the input +- **Syntax:** + +```javascript +{ + $ln: +} +``` + +- **Doc Link:** https://learn.microsoft.com/en-us/azure/documentdb/operators/arithmetic-expression/$ln + +### $log + +- **Description:** The $log operator calculates the logarithm of a number in the specified base +- **Syntax:** + +```javascript +{ + $log: [ , ] +} +``` + +- **Doc Link:** https://learn.microsoft.com/en-us/azure/documentdb/operators/arithmetic-expression/$log + +### $log10 + +- **Description:** The $log10 operator calculates the log of a specified number in base 10 +- **Syntax:** + +```javascript +{ + $log10: +} +``` + +- **Doc Link:** https://learn.microsoft.com/en-us/azure/documentdb/operators/arithmetic-expression/$log10 + +### $mod + +- **Description:** The $mod operator performs a modulo operation on the value of a field and selects documents with a specified result. +- **Syntax:** + +```javascript +{ + : { $mod: [ , ] } +} +``` + +- **Doc Link:** none +- **Scraper Comment:** Doc page not found in expected directory 'arithmetic-expression/'. Content scraped from 'evaluation-query/'. + +### $multiply + +- **Description:** The $multiply operator multiplies the input numerical values +- **Syntax:** + +```javascript +{ + $multiply: [ ] +} +``` + +- **Doc Link:** https://learn.microsoft.com/en-us/azure/documentdb/operators/arithmetic-expression/$multiply + +### $pow + +- **Description:** The `$pow` operator calculates the value of a numerical value raised to the power of a specified exponent. +- **Syntax:** + +```javascript +{ + $pow: [ , ] +} +``` + +- **Doc Link:** https://learn.microsoft.com/en-us/azure/documentdb/operators/arithmetic-expression/$pow + +### $round + +- **Description:** The $round operator rounds a number to a specified decimal place. +- **Syntax:** + +```javascript +{ + $round: [ , ] +} +``` + +- **Doc Link:** https://learn.microsoft.com/en-us/azure/documentdb/operators/arithmetic-expression/$round + +### $sqrt + +- **Description:** The $sqrt operator calculates and returns the square root of an input number +- **Syntax:** + +```javascript +{ + $sqrt: +} +``` + +- **Doc Link:** https://learn.microsoft.com/en-us/azure/documentdb/operators/arithmetic-expression/$sqrt + +### $subtract + +- **Description:** The $subtract operator subtracts two numbers and returns the result. +- **Syntax:** + +```javascript +{ + $subtract: [ , ] +} +``` + +- **Doc Link:** https://learn.microsoft.com/en-us/azure/documentdb/operators/arithmetic-expression/$subtract + +### $trunc + +- **Description:** The $trunc operator truncates a number to a specified decimal place. +- **Syntax:** + +```javascript +{ + $trunc: [ , ] +} +``` + +- **Doc Link:** https://learn.microsoft.com/en-us/azure/documentdb/operators/arithmetic-expression/$trunc + +## Array Expression Operators + +### $arrayElemAt + +- **Description:** The $arrayElemAt returns the element at the specified array index. +- **Syntax:** + +```javascript +{ + $arrayElemAt: ["", ] +} +``` + +- **Doc Link:** https://learn.microsoft.com/en-us/azure/documentdb/operators/array-expression/$arrayelemat + +### $arrayToObject + +- **Description:** The $arrayToObject allows converting an array into a single document. +- **Syntax:** + +```javascript +{ + $arrayToObject: ''; +} +``` + +- **Doc Link:** https://learn.microsoft.com/en-us/azure/documentdb/operators/array-expression/$arraytoobject + +### $concatArrays + +- **Description:** The $concatArrays is used to combine multiple arrays into a single array. +- **Syntax:** + +```javascript +{ + $concatArrays: ['', '']; +} +``` + +- **Doc Link:** https://learn.microsoft.com/en-us/azure/documentdb/operators/array-expression/$concatarrays + +### $filter + +- **Description:** The $filter operator filters for elements from an array based on a specified condition. +- **Syntax:** + +```javascript +{ + $filter: { + input: "", + as: "", + cond: "" + } +} +``` + +- **Doc Link:** https://learn.microsoft.com/en-us/azure/documentdb/operators/array-expression/$filter + +### $firstN + +- **Description:** The $firstN operator sorts documents on one or more fields specified by the query and returns the first N document matching the filtering criteria +- **Syntax:** + +```javascript +{ + $firstN: { + input: [listOfFields], + sortBy: { + : + }, + n: + } +} +``` + +- **Doc Link:** none +- **Scraper Comment:** Doc page not found in expected directory 'array-expression/'. Content scraped from 'accumulators/'. + +### $in + +- **Description:** The $in operator matches value of a field against an array of specified values +- **Syntax:** + +```javascript +{ + field: { + $in: [listOfValues]; + } +} +``` + +- **Doc Link:** none +- **Scraper Comment:** Doc page not found in expected directory 'array-expression/'. Content scraped from 'comparison-query/'. + +### $indexOfArray + +- **Description:** The $indexOfArray operator is used to search for an element in an array and return the index of the first occurrence of the element. +- **Syntax:** + +```javascript +{ + $indexOfArray: [ < array > , < searchElement > , < start > , < end > ] +} +``` + +- **Doc Link:** https://learn.microsoft.com/en-us/azure/documentdb/operators/array-expression/$indexofarray + +### $isArray + +- **Description:** The $isArray operator is used to determine if a specified value is an array. +- **Syntax:** + +```javascript +{ + $isArray: +} +``` + +- **Doc Link:** https://learn.microsoft.com/en-us/azure/documentdb/operators/array-expression/$isarray + +### $lastN + +- **Description:** The $lastN accumulator operator returns the last N values in a group of documents. +- **Syntax:** + +```javascript +{ + $group: { + _id: < expression > , + < field >: { + $lastN: { + n: < number >, + input: < expression > + } + } + } +} +``` + +- **Doc Link:** none +- **Scraper Comment:** Doc page not found in expected directory 'array-expression/'. Content scraped from 'accumulators/'. + +### $map + +- **Description:** The $map operator allows applying an expression to each element in an array. +- **Syntax:** + +```javascript +{ + $map: { + input: , + as: , + in: + } +} +``` + +- **Doc Link:** https://learn.microsoft.com/en-us/azure/documentdb/operators/array-expression/$map + +### $maxN + +- **Description:** Retrieves the top N values based on a specified filtering criteria +- **Syntax:** + +```javascript +$maxN: { + input: < field or expression > , + n: < number of values to retrieve > +} +``` + +- **Doc Link:** none +- **Scraper Comment:** Doc page not found in expected directory 'array-expression/'. Content scraped from 'accumulators/'. + +### $minN + +- **Description:** Retrieves the bottom N values based on a specified filtering criteria +- **Syntax:** + +```javascript +$minN: { + input: < field or expression > , + n: < number of values to retrieve > +} +``` + +- **Doc Link:** none +- **Scraper Comment:** Doc page not found in expected directory 'array-expression/'. Content scraped from 'accumulators/'. + +### $objectToArray + +### $range + +- **Description:** The $range operator allows generating an array of sequential integers. +- **Syntax:** + +```javascript +{ + $range: [ , , ] +} +``` + +- **Doc Link:** https://learn.microsoft.com/en-us/azure/documentdb/operators/array-expression/$range + +### $reduce + +- **Description:** The $reduce operator applies an expression to each element in an array & accumulate result as single value. +- **Syntax:** + +```javascript +$reduce: { + input: , + initialValue: , + in: +} +``` + +- **Doc Link:** https://learn.microsoft.com/en-us/azure/documentdb/operators/array-expression/$reduce + +### $reverseArray + +- **Description:** The $reverseArray operator is used to reverse the order of elements in an array. +- **Syntax:** + +```javascript +{ + $reverseArray: +} +``` + +- **Doc Link:** https://learn.microsoft.com/en-us/azure/documentdb/operators/array-expression/$reversearray + +### $size + +- **Description:** The $size operator is used to query documents where an array field has a specified number of elements. +- **Syntax:** + +```javascript +db.collection.find({ : { $size: } }) +``` + +- **Doc Link:** none +- **Scraper Comment:** Doc page not found in expected directory 'array-expression/'. Content scraped from 'array-query/'. + +### $slice + +- **Description:** The $slice operator returns a subset of an array from any element onwards in the array. +- **Syntax:** + +```javascript +{ + $slice: [ , ] +} +``` + +- **Doc Link:** https://learn.microsoft.com/en-us/azure/documentdb/operators/array-expression/$slice + +### $sortArray + +- **Description:** The $sortArray operator helps in sorting the elements in an array. +- **Syntax:** + +```javascript +{ + $sortArray: { + input: , + sortBy: + } +} +``` + +- **Doc Link:** https://learn.microsoft.com/en-us/azure/documentdb/operators/array-expression/$sortarray + +### $zip + +- **Description:** The $zip operator allows merging two or more arrays element-wise into a single array or arrays. +- **Syntax:** + +```javascript +{ + $zip: { + inputs: [ , , ... ], + useLongestLength: , // Optional + defaults: // Optional + } +} +``` + +- **Doc Link:** https://learn.microsoft.com/en-us/azure/documentdb/operators/array-expression/$zip + +## Bitwise Operators + +### $bitAnd + +- **Description:** The $bitAnd operator performs a bitwise AND operation on integer values and returns the result as an integer. +- **Syntax:** + +```javascript +{ + $bitAnd: [ , , ... ] +} +``` + +- **Doc Link:** https://learn.microsoft.com/en-us/azure/documentdb/operators/bitwise/$bitand + +### $bitNot + +- **Description:** The $bitNot operator performs a bitwise NOT operation on integer values and returns the result as an integer. +- **Syntax:** + +```javascript +{ + $bitNot: +} +``` + +- **Doc Link:** https://learn.microsoft.com/en-us/azure/documentdb/operators/bitwise/$bitnot + +### $bitOr + +- **Description:** The $bitOr operator performs a bitwise OR operation on integer values and returns the result as an integer. +- **Syntax:** + +```javascript +{ + $bitOr: [ , , ... ] +} +``` + +- **Doc Link:** https://learn.microsoft.com/en-us/azure/documentdb/operators/bitwise/$bitor + +### $bitXor + +- **Description:** The $bitXor operator performs a bitwise XOR operation on integer values. +- **Syntax:** + +```javascript +{ + $bitXor: [ , , ... ] +} +``` + +- **Doc Link:** https://learn.microsoft.com/en-us/azure/documentdb/operators/bitwise/$bitxor + +## Boolean Expression Operators + +### $and + +- **Description:** The $and operator joins multiple query clauses and returns documents that match all specified conditions. +- **Syntax:** + +```javascript +{ + $and: [{ + < expression1 > + }, { + < expression2 > + }, ..., { + < expressionN > + }] +} +``` + +- **Doc Link:** none +- **Scraper Comment:** Doc page not found in expected directory 'boolean-expression/'. Content scraped from 'logical-query/'. + +### $not + +- **Description:** The $not operator performs a logical NOT operation on a specified expression, selecting documents that don't match the expression. +- **Syntax:** + +```javascript +{ + field: { + $not: { + < operator - expression > + } + } +} +``` + +- **Doc Link:** none +- **Scraper Comment:** Doc page not found in expected directory 'boolean-expression/'. Content scraped from 'logical-query/'. + +### $or + +- **Description:** The $or operator joins query clauses with a logical OR and returns documents that match at least one of the specified conditions. +- **Syntax:** + +```javascript +{ + $or: [{ + < expression1 > + }, { + < expression2 > + }, ..., { + < expressionN > + }] +} +``` + +- **Doc Link:** none +- **Scraper Comment:** Doc page not found in expected directory 'boolean-expression/'. Content scraped from 'logical-query/'. + +## Comparison Expression Operators + +### $cmp + +- **Description:** The $cmp operator compares two values +- **Syntax:** + +```javascript +{ + $cmp: [, ] +} +``` + +- **Doc Link:** none +- **Scraper Comment:** Doc page not found in expected directory 'comparison-expression/'. Content scraped from 'comparison-query/'. + +### $eq + +- **Description:** The $eq query operator compares the value of a field to a specified value +- **Syntax:** + +```javascript +{ + field: { + $eq: + } +} +``` + +- **Doc Link:** none +- **Scraper Comment:** Doc page not found in expected directory 'comparison-expression/'. Content scraped from 'comparison-query/'. + +### $gt + +- **Description:** The $gt query operator retrieves documents where the value of a field is greater than a specified value +- **Syntax:** + +```javascript +{ + field: { + $gt: value; + } +} +``` + +- **Doc Link:** none +- **Scraper Comment:** Doc page not found in expected directory 'comparison-expression/'. Content scraped from 'comparison-query/'. + +### $gte + +- **Description:** The $gte operator retrieves documents where the value of a field is greater than or equal to a specified value +- **Syntax:** + +```javascript +{ + field: { + $gte: + } +} +``` + +- **Doc Link:** none +- **Scraper Comment:** Doc page not found in expected directory 'comparison-expression/'. Content scraped from 'comparison-query/'. + +### $lt + +- **Description:** The $lt operator retrieves documents where the value of field is less than a specified value +- **Syntax:** + +```javascript +{ + field: { + $lt: value; + } +} +``` + +- **Doc Link:** none +- **Scraper Comment:** Doc page not found in expected directory 'comparison-expression/'. Content scraped from 'comparison-query/'. + +### $lte + +- **Description:** The $lte operator retrieves documents where the value of a field is less than or equal to a specified value +- **Syntax:** + +```javascript +{ + field: { + $lte: + } +} +``` + +- **Doc Link:** none +- **Scraper Comment:** Doc page not found in expected directory 'comparison-expression/'. Content scraped from 'comparison-query/'. + +### $ne + +- **Description:** The $ne operator retrieves documents where the value of a field doesn't equal a specified value +- **Syntax:** + +```javascript +{ + field: { + $ne: value; + } +} +``` + +- **Doc Link:** none +- **Scraper Comment:** Doc page not found in expected directory 'comparison-expression/'. Content scraped from 'comparison-query/'. + +## Data Size Operators + +### $bsonSize + +- **Description:** The $bsonSize operator returns the size of a document in bytes when encoded as BSON. +- **Syntax:** + +```javascript +{ + $bsonSize: +} +``` + +- **Doc Link:** https://learn.microsoft.com/en-us/azure/documentdb/operators/data-size/$bsonsize + +### $binarySize + +- **Description:** The $binarySize operator is used to return the size of a binary data field. +- **Syntax:** + +```javascript +{ + $binarySize: ''; +} +``` + +- **Doc Link:** https://learn.microsoft.com/en-us/azure/documentdb/operators/data-size/$binarysize + +## Date Expression Operators + +### $dateAdd + +- **Description:** The $dateAdd operator adds a specified number of time units (day, hour, month etc) to a date. +- **Syntax:** + +```javascript +$dateAdd: { + startDate: , + unit: , + amount: , + timezone: // Optional +} +``` + +- **Doc Link:** https://learn.microsoft.com/en-us/azure/documentdb/operators/date-expression/$dateadd + +### $dateDiff + +- **Description:** The $dateDiff operator calculates the difference between two dates in various units such as years, months, days, etc. +- **Syntax:** + +```javascript +$dateDiff: { + startDate: , + endDate: , + unit: , + timezone: , // Optional + startOfWeek: // Optional +} +``` + +- **Doc Link:** https://learn.microsoft.com/en-us/azure/documentdb/operators/date-expression/$datediff + +### $dateFromParts + +- **Description:** The $dateFromParts operator constructs a date from individual components. +- **Syntax:** + +```javascript +{ + $dateFromParts: { + year: < year > , + month: < month > , + day: < day > , + hour: < hour > , + minute: < minute > , + second: < second > , + millisecond: < millisecond > , + timezone: < timezone > + } +} +``` + +- **Doc Link:** https://learn.microsoft.com/en-us/azure/documentdb/operators/date-expression/$datefromparts + +### $dateFromString + +- **Description:** The $dateDiff operator converts a date/time string to a date object. +- **Syntax:** + +```javascript +{ + $dateFromString: { + dateString: < string > , + format: < string > , + timezone: < string > , + onError: < expression > , + onNull: < expression > + } +} +``` + +- **Doc Link:** https://learn.microsoft.com/en-us/azure/documentdb/operators/date-expression/$datefromstring + +### $dateSubtract + +- **Description:** The $dateSubtract operator subtracts a specified amount of time from a date. +- **Syntax:** + +```javascript +{ + $dateSubtract: { + startDate: , + unit: "", + amount: , + timezone: "" // optional + } +} +``` + +- **Doc Link:** https://learn.microsoft.com/en-us/azure/documentdb/operators/date-expression/$datesubtract + +### $dateToParts + +- **Description:** The $dateToParts operator decomposes a date into its individual parts such as year, month, day, and more. +- **Syntax:** + +```javascript +$dateToParts: { + date: , + timezone: , // optional + iso8601: // optional +} +``` + +- **Doc Link:** https://learn.microsoft.com/en-us/azure/documentdb/operators/date-expression/$datetoparts + +### $dateToString + +- **Description:** The $dateToString operator converts a date object into a formatted string. +- **Syntax:** + +```javascript +{ + $dateToString: { + format: "", + date: , + timezone: "", + onNull: "" + } +} +``` + +- **Doc Link:** https://learn.microsoft.com/en-us/azure/documentdb/operators/date-expression/$datetostring + +### $dateTrunc + +- **Description:** The $dateTrunc operator truncates a date to a specified unit. +- **Syntax:** + +```javascript +$dateTrunc: { + date: , + unit: "", + binSize: , // optional + timezone: "", // optional + startOfWeek: "" // optional (used when unit is "week") + } +``` + +- **Doc Link:** https://learn.microsoft.com/en-us/azure/documentdb/operators/date-expression/$datetrunc + +### $dayOfMonth + +- **Description:** The $dayOfMonth operator extracts the day of the month from a date. +- **Syntax:** + +```javascript +{ + $dayOfMonth: +} +``` + +- **Doc Link:** https://learn.microsoft.com/en-us/azure/documentdb/operators/date-expression/$dayofmonth + +### $dayOfWeek + +- **Description:** The $dayOfWeek operator extracts the day of the week from a date. +- **Syntax:** + +```javascript +{ + $dayOfWeek: +} +``` + +- **Doc Link:** https://learn.microsoft.com/en-us/azure/documentdb/operators/date-expression/$dayofweek + +### $dayOfYear + +- **Description:** The $dayOfYear operator extracts the day of the year from a date. +- **Syntax:** + +```javascript +{ + $dayOfYear: +} +``` + +- **Doc Link:** https://learn.microsoft.com/en-us/azure/documentdb/operators/date-expression/$dayofyear + +### $hour + +- **Description:** The $hour operator returns the hour portion of a date as a number between 0 and 23. +- **Syntax:** + +```javascript +{ + $hour: +} +``` + +- **Doc Link:** https://learn.microsoft.com/en-us/azure/documentdb/operators/date-expression/$hour + +### $isoDayOfWeek + +- **Description:** The $isoDayOfWeek operator returns the weekday number in ISO 8601 format, ranging from 1 (Monday) to 7 (Sunday). +- **Syntax:** + +```javascript +{ + $isoDayOfWeek: +} +``` + +- **Doc Link:** https://learn.microsoft.com/en-us/azure/documentdb/operators/date-expression/$isodayofweek + +### $isoWeek + +- **Description:** The $isoWeek operator returns the week number of the year in ISO 8601 format, ranging from 1 to 53. +- **Syntax:** + +```javascript +{ + $isoWeek: +} +``` + +- **Doc Link:** https://learn.microsoft.com/en-us/azure/documentdb/operators/date-expression/$isoweek + +### $isoWeekYear + +- **Description:** The $isoWeekYear operator returns the year number in ISO 8601 format, which can differ from the calendar year for dates at the beginning or end of the year. +- **Syntax:** + +```javascript +{ + $isoWeekYear: +} +``` + +- **Doc Link:** https://learn.microsoft.com/en-us/azure/documentdb/operators/date-expression/$isoweekyear + +### $millisecond + +- **Description:** The $millisecond operator extracts the milliseconds portion from a date value. +- **Syntax:** + +```javascript +{ + $millisecond: +} +``` + +- **Doc Link:** https://learn.microsoft.com/en-us/azure/documentdb/operators/date-expression/$millisecond + +### $minute + +- **Description:** The $minute operator extracts the minute portion from a date value. +- **Syntax:** + +```javascript +{ + $minute: +} +``` + +- **Doc Link:** https://learn.microsoft.com/en-us/azure/documentdb/operators/date-expression/$minute + +### $month + +- **Description:** The $month operator extracts the month portion from a date value. +- **Syntax:** + +```javascript +{ + $month: +} +``` + +- **Doc Link:** https://learn.microsoft.com/en-us/azure/documentdb/operators/date-expression/$month + +### $second + +- **Description:** The $second operator extracts the seconds portion from a date value. +- **Syntax:** + +```javascript +{ + $second: +} +``` + +- **Doc Link:** https://learn.microsoft.com/en-us/azure/documentdb/operators/date-expression/$second + +### $toDate + +- **Description:** The $toDate operator converts supported types to a proper Date object. +- **Syntax:** + +```javascript +{ + $toDate: +} +``` + +- **Doc Link:** none +- **Scraper Comment:** Doc page not found in expected directory 'date-expression/'. Content scraped from 'aggregation/type-expression/'. + +### $week + +- **Description:** The $week operator returns the week number for a date as a value between 0 and 53. +- **Syntax:** + +```javascript +{ + $week: +} +``` + +- **Doc Link:** https://learn.microsoft.com/en-us/azure/documentdb/operators/date-expression/$week + +### $year + +- **Description:** The $year operator returns the year for a date as a four-digit number. +- **Syntax:** + +```javascript +{ + $year: +} +``` + +- **Doc Link:** https://learn.microsoft.com/en-us/azure/documentdb/operators/date-expression/$year + +## Literal Expression Operator + +### $literal + +- **Description:** The $literal operator returns the specified value without parsing it as an expression, allowing literal values to be used in aggregation pipelines. +- **Syntax:** + +```javascript +{ + $literal: +} +``` + +- **Doc Link:** https://learn.microsoft.com/en-us/azure/documentdb/operators/literal-expression/$literal + +## Miscellaneous Operators + +### $getField + +- **Description:** The $getField operator allows retrieving the value of a specified field from a document. +- **Syntax:** + +```javascript +{ + $getField: { + field: , + input: + } +} +``` + +- **Doc Link:** https://learn.microsoft.com/en-us/azure/documentdb/operators/miscellaneous/$getfield + +### $rand + +- **Description:** The $rand operator generates a random float value between 0 and 1. +- **Syntax:** + +```javascript +{ + $rand: { + } +} +``` + +- **Doc Link:** none +- **Scraper Comment:** Doc page not found in expected directory 'miscellaneous/'. Content scraped from 'miscellaneous-query/'. + +### $sampleRate + +- **Description:** The $sampleRate operator randomly samples documents from a collection based on a specified probability rate, useful for statistical analysis and testing. +- **Syntax:** + +```javascript +{ + $match: { + $sampleRate: + } +} +``` + +- **Doc Link:** https://learn.microsoft.com/en-us/azure/documentdb/operators/miscellaneous/$samplerate + +## Object Expression Operators + +### $mergeObjects + +- **Description:** The $mergeObjects operator merges multiple documents into a single document +- **Syntax:** + +```javascript +{ + $mergeObjects: [ < document1 > , < document2 > , ...] +} +``` + +- **Doc Link:** https://learn.microsoft.com/en-us/azure/documentdb/operators/object-expression/$mergeobjects + +### $objectToArray + +- **Description:** The objectToArray command is used to transform a document (object) into an array of key-value pairs. +- **Syntax:** + +```javascript +{ + $objectToArray: +} +``` + +- **Doc Link:** https://learn.microsoft.com/en-us/azure/documentdb/operators/object-expression/$objecttoarray + +### $setField + +- **Description:** The setField command is used to add, update, or remove fields in embedded documents. +- **Syntax:** + +```javascript +{ + $setField: { + field: , + input: , + value: + } +} +``` + +- **Doc Link:** https://learn.microsoft.com/en-us/azure/documentdb/operators/object-expression/$setfield + +## Set Expression Operators + +### $allElementsTrue + +- **Description:** The $allElementsTrue operator returns true if all elements in an array evaluate to true. +- **Syntax:** + +```javascript +{ + $allElementsTrue: [ ] +} +``` + +- **Doc Link:** https://learn.microsoft.com/en-us/azure/documentdb/operators/set-expression/$allelementstrue + +### $anyElementTrue + +- **Description:** The $anyElementTrue operator returns true if any element in an array evaluates to a value of true. +- **Syntax:** + +```javascript +{ + $anyElementTrue: [ ] +} +``` + +- **Doc Link:** https://learn.microsoft.com/en-us/azure/documentdb/operators/set-expression/$anyelementtrue + +### $setDifference + +- **Description:** The $setDifference operator returns a set with elements that exist in one set but not in a second set. +- **Syntax:** + +```javascript +{ + $setDifference: [ , ] +} +``` + +- **Doc Link:** https://learn.microsoft.com/en-us/azure/documentdb/operators/set-expression/$setdifference + +### $setEquals + +- **Description:** The $setEquals operator returns true if two sets have the same distinct elements. +- **Syntax:** + +```javascript +{ + $setEquals: [ , , ... ] +} +``` + +- **Doc Link:** https://learn.microsoft.com/en-us/azure/documentdb/operators/set-expression/$setequals + +### $setIntersection + +- **Description:** The $setIntersection operator returns the common elements that appear in all input arrays. +- **Syntax:** + +```javascript +{ + $setIntersection: [ , , ... ] +} +``` + +- **Doc Link:** https://learn.microsoft.com/en-us/azure/documentdb/operators/set-expression/$setintersection + +### $setIsSubset + +- **Description:** The $setIsSubset operator determines if one array is a subset of a second array. +- **Syntax:** + +```javascript +{ + $setIsSubset: [ , ] +} +``` + +- **Doc Link:** https://learn.microsoft.com/en-us/azure/documentdb/operators/set-expression/$setissubset + +### $setUnion + +- **Description:** The $setUnion operator returns an array that contains all the unique elements from the input arrays. +- **Syntax:** + +```javascript +{ + $setUnion: [ , , ... ] +} +``` + +- **Doc Link:** https://learn.microsoft.com/en-us/azure/documentdb/operators/set-expression/$setunion + +## String Expression Operators + +### $concat + +### $dateFromString + +- **Description:** The $dateDiff operator converts a date/time string to a date object. +- **Syntax:** + +```javascript +{ + $dateFromString: { + dateString: < string > , + format: < string > , + timezone: < string > , + onError: < expression > , + onNull: < expression > + } +} +``` + +- **Doc Link:** none +- **Scraper Comment:** Doc page not found in expected directory 'string-expression/'. Content scraped from 'date-expression/'. + +### $dateToString + +- **Description:** The $dateToString operator converts a date object into a formatted string. +- **Syntax:** + +```javascript +{ + $dateToString: { + format: "", + date: , + timezone: "", + onNull: "" + } +} +``` + +- **Doc Link:** none +- **Scraper Comment:** Doc page not found in expected directory 'string-expression/'. Content scraped from 'date-expression/'. + +### $indexOfBytes + +### $indexOfCP + +### $ltrim + +### $regexFind + +### $regexFindAll + +### $regexMatch + +### $replaceOne + +### $replaceAll + +### $rtrim + +### $split + +### $strLenBytes + +### $strLenCP + +### $strcasecmp + +### $substr + +### $substrBytes + +### $substrCP + +### $toLower + +### $toString + +- **Description:** The $toString operator converts an expression into a String +- **Syntax:** + +```javascript +{ + $toString: < expression > +} +``` + +- **Doc Link:** none +- **Scraper Comment:** Doc page not found in expected directory 'string-expression/'. Content scraped from 'aggregation/type-expression/'. + +### $trim + +### $toUpper + +## Timestamp Expression Operators + +### $tsIncrement + +- **Description:** The $tsIncrement operator extracts the increment portion from a timestamp value. +- **Syntax:** + +```javascript +{ + $tsIncrement: +} +``` + +- **Doc Link:** https://learn.microsoft.com/en-us/azure/documentdb/operators/timestamp-expression/$tsincrement + +### $tsSecond + +- **Description:** The $tsSecond operator extracts the seconds portion from a timestamp value. +- **Syntax:** + +```javascript +{ + $tsSecond: +} +``` + +- **Doc Link:** https://learn.microsoft.com/en-us/azure/documentdb/operators/timestamp-expression/$tssecond + +## Trigonometry Expression Operators + +### $sin + +### $cos + +### $tan + +### $asin + +### $acos + +### $atan + +### $atan2 + +### $asinh + +### $acosh + +### $atanh + +### $sinh + +### $cosh + +### $tanh + +### $degreesToRadians + +### $radiansToDegrees + +## Type Expression Operators + +### $convert + +- **Description:** The $convert operator converts an expression into the specified type +- **Syntax:** + +```javascript +{ + $convert: { + input: < expression > , + to: < type > , + format: < binData format > , + onError: < value to return on error > , + onNull: < value to return on null > + } +} +``` + +- **Doc Link:** https://learn.microsoft.com/en-us/azure/documentdb/operators/aggregation/type-expression/$convert + +### $isNumber + +- **Description:** The $isNumber operator checks if a specified expression is a numerical type +- **Syntax:** + +```javascript +{ + $isNumber: < expression > +} +``` + +- **Doc Link:** https://learn.microsoft.com/en-us/azure/documentdb/operators/aggregation/type-expression/$isnumber + +### $toBool + +- **Description:** The $toBool operator converts an expression into a Boolean type +- **Syntax:** + +```javascript +{ + $toBool: < expression > +} +``` + +- **Doc Link:** https://learn.microsoft.com/en-us/azure/documentdb/operators/aggregation/type-expression/$tobool + +### $toDate + +- **Description:** The $toDate operator converts supported types to a proper Date object. +- **Syntax:** + +```javascript +{ + $toDate: +} +``` + +- **Doc Link:** https://learn.microsoft.com/en-us/azure/documentdb/operators/aggregation/type-expression/$todate + +### $toDecimal + +- **Description:** The $toDecimal operator converts an expression into a Decimal type +- **Syntax:** + +```javascript +{ + $toDecimal: < expression > +} +``` + +- **Doc Link:** https://learn.microsoft.com/en-us/azure/documentdb/operators/aggregation/type-expression/$todecimal + +### $toDouble + +- **Description:** The $toDouble operator converts an expression into a Double value +- **Syntax:** + +```javascript +{ + $toDouble: < expression > +} +``` + +- **Doc Link:** https://learn.microsoft.com/en-us/azure/documentdb/operators/aggregation/type-expression/$todouble + +### $toInt + +- **Description:** The $toInt operator converts an expression into an Integer +- **Syntax:** + +```javascript +{ + $toInt: < expression > +} +``` + +- **Doc Link:** https://learn.microsoft.com/en-us/azure/documentdb/operators/aggregation/type-expression/$toint + +### $toLong + +- **Description:** The $toLong operator converts an expression into a Long value +- **Syntax:** + +```javascript +{ + $toLong: < expression > +} +``` + +- **Doc Link:** https://learn.microsoft.com/en-us/azure/documentdb/operators/aggregation/type-expression/$tolong + +### $toObjectId + +- **Description:** The $toObjectId operator converts an expression into an ObjectId +- **Syntax:** + +```javascript +{ + $toObject: < expression > +} +``` + +- **Doc Link:** https://learn.microsoft.com/en-us/azure/documentdb/operators/aggregation/type-expression/$toobjectid + +### $toString + +- **Description:** The $toString operator converts an expression into a String +- **Syntax:** + +```javascript +{ + $toString: < expression > +} +``` + +- **Doc Link:** https://learn.microsoft.com/en-us/azure/documentdb/operators/aggregation/type-expression/$tostring + +### $type + +- **Description:** The $type operator retrieves documents if the chosen field is of the specified type. +- **Syntax:** + +```javascript +{ + : { $type: | } +} +``` + +- **Doc Link:** none +- **Scraper Comment:** Doc page not found in expected directory 'aggregation/type-expression/'. Content scraped from 'element-query/'. + +## Accumulators ($group, $bucket, $bucketAuto, $setWindowFields) + +### $addToSet + +- **Description:** The addToSet operator adds elements to an array if they don't already exist, while ensuring uniqueness of elements within the set. +- **Syntax:** + +```javascript +{ + $addToSet: { : } +} +``` + +- **Doc Link:** none +- **Scraper Comment:** Doc page not found in expected directory 'accumulators/'. Content scraped from 'array-update/'. + +### $avg + +- **Description:** Computes the average of numeric values for documents in a group, bucket, or window. +- **Syntax:** + +```javascript +$avg: +``` + +- **Doc Link:** https://learn.microsoft.com/en-us/azure/documentdb/operators/accumulators/$avg + +### $bottom + +- **Description:** The $bottom operator returns the last document from the query's result set sorted by one or more fields +- **Syntax:** + +```javascript +{ + $bottom: { + output: [listOfFields], + sortBy: { + : < sortOrder > + } + } +} +``` + +- **Doc Link:** https://learn.microsoft.com/en-us/azure/documentdb/operators/accumulators/$bottom + +### $bottomN + +- **Description:** The $bottomN operator returns the last N documents from the result sorted by one or more fields +- **Syntax:** + +```javascript +{ + $bottomN: { + output: [listOfFields], + sortBy: { + : < sortOrder > + }, + n: < numDocumentsToReturn > + } +} +``` + +- **Doc Link:** https://learn.microsoft.com/en-us/azure/documentdb/operators/accumulators/$bottomn + +### $count + +- **Description:** The `$count` operator is used to count the number of documents that match a query filtering criteria. +- **Syntax:** + +```javascript +{ + $count: ''; +} +``` + +- **Doc Link:** https://learn.microsoft.com/en-us/azure/documentdb/operators/accumulators/$count + +### $first + +- **Description:** The $first operator returns the first value in a group according to the group's sorting order. +- **Syntax:** + +```javascript +{ + $first: +} +``` + +- **Doc Link:** https://learn.microsoft.com/en-us/azure/documentdb/operators/accumulators/$first + +### $firstN + +- **Description:** The $firstN operator sorts documents on one or more fields specified by the query and returns the first N document matching the filtering criteria +- **Syntax:** + +```javascript +{ + $firstN: { + input: [listOfFields], + sortBy: { + : + }, + n: + } +} +``` + +- **Doc Link:** https://learn.microsoft.com/en-us/azure/documentdb/operators/accumulators/$firstn + +### $last + +- **Description:** The $last operator returns the last document from the result sorted by one or more fields +- **Syntax:** + +```javascript +{ + "$last": +} +``` + +- **Doc Link:** https://learn.microsoft.com/en-us/azure/documentdb/operators/accumulators/$last + +### $lastN + +- **Description:** The $lastN accumulator operator returns the last N values in a group of documents. +- **Syntax:** + +```javascript +{ + $group: { + _id: < expression > , + < field >: { + $lastN: { + n: < number >, + input: < expression > + } + } + } +} +``` + +- **Doc Link:** https://learn.microsoft.com/en-us/azure/documentdb/operators/accumulators/$lastn + +### $max + +- **Description:** The $max operator returns the maximum value from a set of input values. +- **Syntax:** + +```javascript +$max: +``` + +- **Doc Link:** https://learn.microsoft.com/en-us/azure/documentdb/operators/accumulators/$max + +### $maxN + +- **Description:** Retrieves the top N values based on a specified filtering criteria +- **Syntax:** + +```javascript +$maxN: { + input: < field or expression > , + n: < number of values to retrieve > +} +``` + +- **Doc Link:** https://learn.microsoft.com/en-us/azure/documentdb/operators/accumulators/$maxn + +### $median + +- **Description:** The $median operator calculates the median value of a numeric field in a group of documents. +- **Syntax:** + +```javascript +{ + $group: { + _id: < expression > , + medianValue: { + $median: { + input: < field or expression > , + method: < > + } + } + } +} +``` + +- **Doc Link:** https://learn.microsoft.com/en-us/azure/documentdb/operators/accumulators/$median + +### $mergeObjects + +- **Description:** The $mergeObjects operator merges multiple documents into a single document +- **Syntax:** + +```javascript +{ + $mergeObjects: [ < document1 > , < document2 > , ...] +} +``` + +- **Doc Link:** none +- **Scraper Comment:** Doc page not found in expected directory 'accumulators/'. Content scraped from 'object-expression/'. + +### $min + +- **Description:** Retrieves the minimum value for a specified field +- **Syntax:** + +```javascript +$min: +``` + +- **Doc Link:** https://learn.microsoft.com/en-us/azure/documentdb/operators/accumulators/$min + +### $percentile + +- **Description:** The $percentile operator calculates the percentile of numerical values that match a filtering criteria +- **Syntax:** + +```javascript +$percentile: { + input: < field or expression > , + p: [ < percentile values > ], + method: < method > +} +``` + +- **Doc Link:** https://learn.microsoft.com/en-us/azure/documentdb/operators/accumulators/$percentile + +### $push + +- **Description:** The $push operator adds a specified value to an array within a document. +- **Syntax:** + +```javascript +db.collection.update({ + < query > +}, { + $push: { + < field >: < value > + } +}, { + < options > +}) +``` + +- **Doc Link:** none +- **Scraper Comment:** Doc page not found in expected directory 'accumulators/'. Content scraped from 'array-update/'. + +### $stdDevPop + +- **Description:** The $stddevpop operator calculates the standard deviation of the specified values +- **Syntax:** + +```javascript +{ + $stddevpop: { + fieldName; + } +} +``` + +- **Doc Link:** https://learn.microsoft.com/en-us/azure/documentdb/operators/accumulators/$stddevpop + +### $stdDevSamp + +- **Description:** The $stddevsamp operator calculates the standard deviation of a specified sample of values and not the entire population +- **Syntax:** + +```javascript +{ + $stddevsamp: { + fieldName; + } +} +``` + +- **Doc Link:** https://learn.microsoft.com/en-us/azure/documentdb/operators/accumulators/$stddevsamp + +### $sum + +- **Description:** The $sum operator calculates the sum of the values of a field based on a filtering criteria +- **Syntax:** + +```javascript +{ + $sum: +} +``` + +- **Doc Link:** https://learn.microsoft.com/en-us/azure/documentdb/operators/accumulators/$sum + +### $top + +- **Description:** The $top operator returns the first document from the result set sorted by one or more fields +- **Syntax:** + +```javascript +{ + $top: { + output: [listOfFields], + sortBy: { + < fieldName >: < sortOrder > + } + } +} +``` + +- **Doc Link:** https://learn.microsoft.com/en-us/azure/documentdb/operators/accumulators/$top + +### $topN + +- **Description:** The $topN operator returns the first N documents from the result sorted by one or more fields +- **Syntax:** + +```javascript +{ + $topN: { + output: [listOfFields], + sortBy: { + : < sortOrder > + }, + n: < numDocumentsToReturn > + } +} +``` + +- **Doc Link:** https://learn.microsoft.com/en-us/azure/documentdb/operators/accumulators/$topn + +## Accumulators (in Other Stages) + +### $avg + +- **Description:** Computes the average of numeric values for documents in a group, bucket, or window. +- **Syntax:** + +```javascript +$avg: +``` + +- **Doc Link:** https://learn.microsoft.com/en-us/azure/documentdb/operators/accumulators/$avg + +### $first + +- **Description:** The $first operator returns the first value in a group according to the group's sorting order. +- **Syntax:** + +```javascript +{ + $first: +} +``` + +- **Doc Link:** https://learn.microsoft.com/en-us/azure/documentdb/operators/accumulators/$first + +### $last + +- **Description:** The $last operator returns the last document from the result sorted by one or more fields +- **Syntax:** + +```javascript +{ + "$last": +} +``` + +- **Doc Link:** https://learn.microsoft.com/en-us/azure/documentdb/operators/accumulators/$last + +### $max + +- **Description:** The $max operator returns the maximum value from a set of input values. +- **Syntax:** + +```javascript +$max: +``` + +- **Doc Link:** https://learn.microsoft.com/en-us/azure/documentdb/operators/accumulators/$max + +### $median + +- **Description:** The $median operator calculates the median value of a numeric field in a group of documents. +- **Syntax:** + +```javascript +{ + $group: { + _id: < expression > , + medianValue: { + $median: { + input: < field or expression > , + method: < > + } + } + } +} +``` + +- **Doc Link:** https://learn.microsoft.com/en-us/azure/documentdb/operators/accumulators/$median + +### $min + +- **Description:** Retrieves the minimum value for a specified field +- **Syntax:** + +```javascript +$min: +``` + +- **Doc Link:** https://learn.microsoft.com/en-us/azure/documentdb/operators/accumulators/$min + +### $percentile + +- **Description:** The $percentile operator calculates the percentile of numerical values that match a filtering criteria +- **Syntax:** + +```javascript +$percentile: { + input: < field or expression > , + p: [ < percentile values > ], + method: < method > +} +``` + +- **Doc Link:** https://learn.microsoft.com/en-us/azure/documentdb/operators/accumulators/$percentile + +### $stdDevPop + +- **Description:** The $stddevpop operator calculates the standard deviation of the specified values +- **Syntax:** + +```javascript +{ + $stddevpop: { + fieldName; + } +} +``` + +- **Doc Link:** https://learn.microsoft.com/en-us/azure/documentdb/operators/accumulators/$stddevpop + +### $stdDevSamp + +- **Description:** The $stddevsamp operator calculates the standard deviation of a specified sample of values and not the entire population +- **Syntax:** + +```javascript +{ + $stddevsamp: { + fieldName; + } +} +``` + +- **Doc Link:** https://learn.microsoft.com/en-us/azure/documentdb/operators/accumulators/$stddevsamp + +### $sum + +- **Description:** The $sum operator calculates the sum of the values of a field based on a filtering criteria +- **Syntax:** + +```javascript +{ + $sum: +} +``` + +- **Doc Link:** https://learn.microsoft.com/en-us/azure/documentdb/operators/accumulators/$sum + +## Variable Expression Operators + +### $let + +- **Description:** The $let operator allows defining variables for use in a specified expression, enabling complex calculations and reducing code repetition. +- **Syntax:** + +```javascript +{ + $let: { + vars: { + : , + : , + ... + }, + in: + } +} +``` + +- **Doc Link:** https://learn.microsoft.com/en-us/azure/documentdb/operators/variable-expression/$let + +## Window Operators + +### $sum + +- **Description:** The $sum operator calculates the sum of the values of a field based on a filtering criteria +- **Syntax:** + +```javascript +{ + $sum: +} +``` + +- **Doc Link:** none +- **Scraper Comment:** Doc page not found in expected directory 'window-operators/'. Content scraped from 'accumulators/'. + +### $push + +- **Description:** The $push operator adds a specified value to an array within a document. +- **Syntax:** + +```javascript +db.collection.update({ + < query > +}, { + $push: { + < field >: < value > + } +}, { + < options > +}) +``` + +- **Doc Link:** none +- **Scraper Comment:** Doc page not found in expected directory 'window-operators/'. Content scraped from 'array-update/'. + +### $addToSet + +- **Description:** The addToSet operator adds elements to an array if they don't already exist, while ensuring uniqueness of elements within the set. +- **Syntax:** + +```javascript +{ + $addToSet: { : } +} +``` + +- **Doc Link:** none +- **Scraper Comment:** Doc page not found in expected directory 'window-operators/'. Content scraped from 'array-update/'. + +### $count + +- **Description:** The `$count` operator is used to count the number of documents that match a query filtering criteria. +- **Syntax:** + +```javascript +{ + $count: ''; +} +``` + +- **Doc Link:** none +- **Scraper Comment:** Doc page not found in expected directory 'window-operators/'. Content scraped from 'accumulators/'. + +### $max + +- **Description:** The $max operator returns the maximum value from a set of input values. +- **Syntax:** + +```javascript +$max: +``` + +- **Doc Link:** none +- **Scraper Comment:** Doc page not found in expected directory 'window-operators/'. Content scraped from 'accumulators/'. + +### $min + +- **Description:** Retrieves the minimum value for a specified field +- **Syntax:** + +```javascript +$min: +``` + +- **Doc Link:** none +- **Scraper Comment:** Doc page not found in expected directory 'window-operators/'. Content scraped from 'accumulators/'. + +### $avg + +- **Description:** Computes the average of numeric values for documents in a group, bucket, or window. +- **Syntax:** + +```javascript +$avg: +``` + +- **Doc Link:** none +- **Scraper Comment:** Doc page not found in expected directory 'window-operators/'. Content scraped from 'accumulators/'. + +### $stdDevPop + +- **Description:** The $stddevpop operator calculates the standard deviation of the specified values +- **Syntax:** + +```javascript +{ + $stddevpop: { + fieldName; + } +} +``` + +- **Doc Link:** none +- **Scraper Comment:** Doc page not found in expected directory 'window-operators/'. Content scraped from 'accumulators/'. + +### $bottom + +- **Description:** The $bottom operator returns the last document from the query's result set sorted by one or more fields +- **Syntax:** + +```javascript +{ + $bottom: { + output: [listOfFields], + sortBy: { + : < sortOrder > + } + } +} +``` + +- **Doc Link:** none +- **Scraper Comment:** Doc page not found in expected directory 'window-operators/'. Content scraped from 'accumulators/'. + +### $bottomN + +- **Description:** The $bottomN operator returns the last N documents from the result sorted by one or more fields +- **Syntax:** + +```javascript +{ + $bottomN: { + output: [listOfFields], + sortBy: { + : < sortOrder > + }, + n: < numDocumentsToReturn > + } +} +``` + +- **Doc Link:** none +- **Scraper Comment:** Doc page not found in expected directory 'window-operators/'. Content scraped from 'accumulators/'. + +### $covariancePop + +- **Description:** The $covariancePop operator returns the covariance of two numerical expressions +- **Syntax:** + +```javascript +{ + $covariancePop: [ < numericalExpression1 > , < numericalExpression2 > ] +} +``` + +- **Doc Link:** https://learn.microsoft.com/en-us/azure/documentdb/operators/window-operators/$covariancepop + +### $covarianceSamp + +- **Description:** The $covarianceSamp operator returns the covariance of a sample of two numerical expressions +- **Syntax:** + +```javascript +{ + $covarianceSamp: [ < numericalExpression1 > , < numericalExpression2 > ] +} +``` + +- **Doc Link:** https://learn.microsoft.com/en-us/azure/documentdb/operators/window-operators/$covariancesamp + +### $denseRank + +- **Description:** The $denseRank operator assigns and returns a positional ranking for each document within a partition based on a specified sort order +- **Syntax:** + +```javascript +{ + $denseRank: { + } +} +``` + +- **Doc Link:** https://learn.microsoft.com/en-us/azure/documentdb/operators/window-operators/$denserank + +### $derivative + +- **Description:** The $derivative operator calculates the average rate of change of the value of a field within a specified window. +- **Syntax:** + +```javascript +{ + $derivative: { + input: < expression >, + unit: < timeWindow > + } +} +``` + +- **Doc Link:** https://learn.microsoft.com/en-us/azure/documentdb/operators/window-operators/$derivative + +### $documentNumber + +- **Description:** The $documentNumber operator assigns and returns a position for each document within a partition based on a specified sort order +- **Syntax:** + +```javascript +{ + $documentNumber: { + } +} +``` + +- **Doc Link:** https://learn.microsoft.com/en-us/azure/documentdb/operators/window-operators/$documentnumber + +### $expMovingAvg + +- **Description:** The $expMovingAvg operator calculates the moving average of a field based on the specified number of documents to hold the highest weight +- **Syntax:** + +```javascript +{ + $expMovingAvg: { + input: < field to use for calculation >, + N: < number of recent documents with the highest weight + } +} +``` + +- **Doc Link:** https://learn.microsoft.com/en-us/azure/documentdb/operators/window-operators/$expmovingavg + +### $first + +- **Description:** The $first operator returns the first value in a group according to the group's sorting order. +- **Syntax:** + +```javascript +{ + $first: +} +``` + +- **Doc Link:** none +- **Scraper Comment:** Doc page not found in expected directory 'window-operators/'. Content scraped from 'accumulators/'. + +### $integral + +- **Description:** The $integral operator calculates the area under a curve with the specified range of documents forming the adjacent documents for the calculation. +- **Syntax:** + +```javascript +{ + $integral: { + input: < expression > , + unit: < time window > + } +} +``` + +- **Doc Link:** https://learn.microsoft.com/en-us/azure/documentdb/operators/window-operators/$integral + +### $last + +- **Description:** The $last operator returns the last document from the result sorted by one or more fields +- **Syntax:** + +```javascript +{ + "$last": +} +``` + +- **Doc Link:** none +- **Scraper Comment:** Doc page not found in expected directory 'window-operators/'. Content scraped from 'accumulators/'. + +### $linearFill + +- **Description:** The $linearFill operator interpolates missing values in a sequence of documents using linear interpolation. +- **Syntax:** + +```javascript +{ + $linearFill: { + input: < expression > , + sortBy: { + < field >: < 1 or - 1 > + } + } +} +``` + +- **Doc Link:** https://learn.microsoft.com/en-us/azure/documentdb/operators/window-operators/$linearfill + +### $locf + +- **Description:** The $locf operator propagates the last observed non-null value forward within a partition in a windowed query. +- **Syntax:** + +```javascript +{ + $locf: { + input: , + sortBy: + } +} +``` + +- **Doc Link:** https://learn.microsoft.com/en-us/azure/documentdb/operators/window-operators/$locf + +### $minN + +- **Description:** Retrieves the bottom N values based on a specified filtering criteria +- **Syntax:** + +```javascript +$minN: { + input: < field or expression > , + n: < number of values to retrieve > +} +``` + +- **Doc Link:** none +- **Scraper Comment:** Doc page not found in expected directory 'window-operators/'. Content scraped from 'accumulators/'. + +### $rank + +- **Description:** The $rank operator ranks documents within a partition based on a specified sort order. +- **Syntax:** + +```javascript +{ + $setWindowFields: { + partitionBy: < expression > , + sortBy: { + < field >: < order > + }, + output: { + < outputField >: { + $rank: {} + } + } + } +} +``` + +- **Doc Link:** https://learn.microsoft.com/en-us/azure/documentdb/operators/window-operators/$rank + +### $shift + +- **Description:** A window operator that shifts values within a partition and returns the shifted value. +- **Syntax:** + +```javascript +{ + $shift: { + output: , + by: , + default: + } +} +``` + +- **Doc Link:** https://learn.microsoft.com/en-us/azure/documentdb/operators/window-operators/$shift + +### $stdDevSamp + +- **Description:** The $stddevsamp operator calculates the standard deviation of a specified sample of values and not the entire population +- **Syntax:** + +```javascript +{ + $stddevsamp: { + fieldName; + } +} +``` + +- **Doc Link:** none +- **Scraper Comment:** Doc page not found in expected directory 'window-operators/'. Content scraped from 'accumulators/'. + +### $top + +- **Description:** The $top operator returns the first document from the result set sorted by one or more fields +- **Syntax:** + +```javascript +{ + $top: { + output: [listOfFields], + sortBy: { + < fieldName >: < sortOrder > + } + } +} +``` + +- **Doc Link:** none +- **Scraper Comment:** Doc page not found in expected directory 'window-operators/'. Content scraped from 'accumulators/'. + +### $topN + +- **Description:** The $topN operator returns the first N documents from the result sorted by one or more fields +- **Syntax:** + +```javascript +{ + $topN: { + output: [listOfFields], + sortBy: { + : < sortOrder > + }, + n: < numDocumentsToReturn > + } +} +``` + +- **Doc Link:** none +- **Scraper Comment:** Doc page not found in expected directory 'window-operators/'. Content scraped from 'accumulators/'. + +## Conditional Expression Operators + +### $cond + +- **Description:** The $cond operator is used to evaluate a condition and return one of two expressions based on the result. +- **Syntax:** + +```javascript +{ + $cond: { + if: , + then: , + else: + } +} +``` + +- **Doc Link:** https://learn.microsoft.com/en-us/azure/documentdb/operators/conditional-expression/$cond + +### $ifNull + +- **Description:** The $ifNull operator is used to evaluate an expression and return a specified value if the expression resolves to null. +- **Syntax:** + +```javascript +{ + $ifNull: [ , ] +} +``` + +- **Doc Link:** https://learn.microsoft.com/en-us/azure/documentdb/operators/conditional-expression/$ifnull + +### $switch + +- **Description:** The $switch operator is used to evaluate a series of conditions and return a value based on the first condition that evaluates to true. +- **Syntax:** + +```javascript +{ + $switch: { + branches: [ + { case: , then: }, + { case: , then: } + ], + default: + } +} +``` + +- **Doc Link:** https://learn.microsoft.com/en-us/azure/documentdb/operators/conditional-expression/$switch + +## Aggregation Pipeline Stages + +### $addFields + +- **Description:** The $addFields stage in the aggregation pipeline is used to add new fields to documents. +- **Syntax:** + +```javascript +{ + $addFields: { + : , + : , + ... + } +} +``` + +- **Doc Link:** https://learn.microsoft.com/en-us/azure/documentdb/operators/aggregation/$addfields + +### $bucket + +- **Description:** Groups input documents into buckets based on specified boundaries. +- **Syntax:** + +```javascript +{ + $bucket: { + groupBy: , + boundaries: [ , , ... ], + default: , + output: { + : { }, + ... + } + } +} +``` + +- **Doc Link:** https://learn.microsoft.com/en-us/azure/documentdb/operators/aggregation/$bucket + +### $bucketAuto + +### $changeStream + +- **Description:** The $changeStream stage opens a change stream cursor to track data changes in real-time. +- **Syntax:** + +```javascript +{ + $changeStream: { + allChangesForCluster: , + fullDocument: , + fullDocumentBeforeChange: , + resumeAfter: , + startAfter: , + startAtOperationTime: , + showExpandedEvents: + } +} +``` + +- **Doc Link:** https://learn.microsoft.com/en-us/azure/documentdb/operators/aggregation/$changestream + +### $collStats + +- **Description:** The $collStats stage in the aggregation pipeline is used to return statistics about a collection. +- **Syntax:** + +```javascript +{ + $collStats: { + latencyStats: { histograms: }, + storageStats: { scale: }, + count: {} + } +} +``` + +- **Doc Link:** https://learn.microsoft.com/en-us/azure/documentdb/operators/aggregation/$collstats + +### $count + +- **Description:** The `$count` operator is used to count the number of documents that match a query filtering criteria. +- **Syntax:** + +```javascript +{ + $count: ''; +} +``` + +- **Doc Link:** none +- **Scraper Comment:** Doc page not found in expected directory 'aggregation/'. Content scraped from 'accumulators/'. + +### $densify + +- **Description:** Adds missing data points in a sequence of values within an array or collection. +- **Syntax:** + +```javascript +{ + $densify: { + field: , + range: { + step: , + unit: , // Optional, e.g., "hour", "day", "month", etc. + bounds: [, ] // Optional + }, + partitionByFields: [, , ...] // Optional + } +} +``` + +- **Doc Link:** https://learn.microsoft.com/en-us/azure/documentdb/operators/aggregation/$densify + +### $documents + +- **Description:** The $documents stage creates a pipeline from a set of provided documents. +- **Syntax:** + +```javascript +{ + $documents: [ + , + , + ... + ] +} +``` + +- **Doc Link:** https://learn.microsoft.com/en-us/azure/documentdb/operators/aggregation/$documents + +### $facet + +- **Description:** The $facet allows for multiple parallel aggregations to be executed within a single pipeline stage. +- **Syntax:** + +```javascript +{ + "$facet": { + "outputField1": [ { "stage1": {} }, { "stage2": {} } ], + "outputField2": [ { "stage1": {} }, { "stage2": {} } ] + } +} +``` + +- **Doc Link:** https://learn.microsoft.com/en-us/azure/documentdb/operators/aggregation/$facet + +### $fill + +- **Description:** The $fill stage allows filling missing values in documents based on specified methods and criteria. +- **Syntax:** + +```javascript +{ + $fill: { + sortBy: , + partitionBy: , + partitionByFields: , + output: { + : { value: }, + : { method: } + } + } +} +``` + +- **Doc Link:** https://learn.microsoft.com/en-us/azure/documentdb/operators/aggregation/$fill + +### $geoNear + +- **Description:** The $geoNear operator finds and sorts documents by their proximity to a geospatial point, returning distance information for each document. +- **Syntax:** + +```javascript +{ + $geoNear: { + near: { + type: "Point", + coordinates: [, ] + }, + distanceField: , + maxDistance: , + minDistance: , + query: , + includeLocs: , + distanceMultiplier: , + spherical: , + key: + } +} +``` + +- **Doc Link:** https://learn.microsoft.com/en-us/azure/documentdb/operators/aggregation/$geonear + +### $graphLookup + +### $group + +- **Description:** The $group stage groups documents by specified identifier expressions and applies accumulator expressions. +- **Syntax:** + +```javascript +{ + $group: { + _id: , + : { : }, + : { : } + } +} +``` + +- **Doc Link:** https://learn.microsoft.com/en-us/azure/documentdb/operators/aggregation/$group + +### $indexStats + +- **Description:** The $indexStats stage returns usage statistics for each index in the collection. +- **Syntax:** + +```javascript +{ + $indexStats: { + } +} +``` + +- **Doc Link:** https://learn.microsoft.com/en-us/azure/documentdb/operators/aggregation/$indexstats + +### $limit + +### $lookup + +- **Description:** The $lookup stage in the Aggregation Framework is used to perform left outer joins with other collections. +- **Syntax:** + +```javascript +{ + $lookup: { + from: , + localField: , + foreignField: , + as: + } +} +``` + +- **Doc Link:** https://learn.microsoft.com/en-us/azure/documentdb/operators/aggregation/$lookup + +### $match + +- **Description:** The $match stage in the aggregation pipeline is used to filter documents that match a specified condition. +- **Syntax:** + +```javascript +{ + $match: { + + } +} +``` + +- **Doc Link:** https://learn.microsoft.com/en-us/azure/documentdb/operators/aggregation/$match + +### $merge + +- **Description:** The $merge stage in an aggregation pipeline writes the results of the aggregation to a specified collection. +- **Syntax:** + +```javascript +{ + $merge: { + into: , + on: , + whenMatched: , + whenNotMatched: + } +} +``` + +- **Doc Link:** https://learn.microsoft.com/en-us/azure/documentdb/operators/aggregation/$merge + +### $out + +- **Description:** The `$out` stage in an aggregation pipeline writes the resulting documents to a specified collection. +- **Syntax:** + +```javascript +{ + $out: ''; +} +``` + +- **Doc Link:** https://learn.microsoft.com/en-us/azure/documentdb/operators/aggregation/$out + +### $project + +### $redact + +- **Description:** Filters the content of the documents based on access rights. +- **Syntax:** + +```javascript +{ + $redact: +} +``` + +- **Doc Link:** https://learn.microsoft.com/en-us/azure/documentdb/operators/aggregation/$redact + +### $replaceRoot + +### $replaceWith + +- **Description:** The $replaceWith operator in Azure DocumentDB returns a document after replacing a document with the specified document +- **Syntax:** + +```javascript +{ + "$replaceWith": +} +``` + +- **Doc Link:** https://learn.microsoft.com/en-us/azure/documentdb/operators/aggregation/$replacewith + +### $sample + +- **Description:** The $sample operator in Azure DocumentDB returns a randomly selected number of documents +- **Syntax:** + +```javascript +{ + $sample: { size: } +} +``` + +- **Doc Link:** https://learn.microsoft.com/en-us/azure/documentdb/operators/aggregation/$sample + +### $search + +### $searchMeta + +### $set + +- **Description:** The $set operator in Azure DocumentDB updates or creates a new field with a specified value +- **Syntax:** + +```javascript +{ + $set: { + newField: + } +} +``` + +- **Doc Link:** https://learn.microsoft.com/en-us/azure/documentdb/operators/aggregation/$set + +### $setWindowFields + +### $skip + +- **Description:** The $skip stage in the aggregation pipeline is used to skip a specified number of documents from the input and pass the remaining documents to the next stage in the pipeline. +- **Syntax:** + +```javascript +{ + $skip: +} +``` + +- **Doc Link:** https://learn.microsoft.com/en-us/azure/documentdb/operators/aggregation/$skip + +### $sort + +- **Description:** The $sort stage in the aggregation pipeline is used to order the documents in the pipeline by a specified field or fields. +- **Syntax:** + +```javascript +{ + $sort: { + < field1 >: < sort order > , + < field2 >: < sort order > + } +} +``` + +- **Doc Link:** https://learn.microsoft.com/en-us/azure/documentdb/operators/aggregation/$sort + +### $sortByCount + +- **Description:** The $sortByCount stage in the aggregation pipeline is used to group documents by a specified expression and then sort the count of documents in each group in descending order. +- **Syntax:** + +```javascript +{ + $sortByCount: +} +``` + +- **Doc Link:** https://learn.microsoft.com/en-us/azure/documentdb/operators/aggregation/$sortbycount + +### $unionWith + +### $unset + +- **Description:** The $unset stage in the aggregation pipeline is used to remove specified fields from documents. +- **Syntax:** + +```javascript +{ + $unset: "" | ["", "", ...] +} +``` + +- **Doc Link:** https://learn.microsoft.com/en-us/azure/documentdb/operators/aggregation/$unset + +### $unwind + +- **Description:** The $unwind stage in the aggregation framework is used to deconstruct an array field from the input documents to output a document for each element. +- **Syntax:** + +```javascript +{ + $unwind: { + path: , + includeArrayIndex: , // Optional + preserveNullAndEmptyArrays: // Optional + } +} +``` + +- **Doc Link:** https://learn.microsoft.com/en-us/azure/documentdb/operators/aggregation/$unwind + +### $currentOp + +## Variables in Aggregation Expressions + +### $$NOW + +### $$ROOT + +### $$REMOVE + +### $$CURRENT + +### $$DESCEND + +### $$PRUNE + +### $$KEEP + +## Not Listed + +Operators below are present on the compatibility page but are not in scope +for this package (deprecated or not available in DocumentDB). + +- **$where** (Evaluation Query Operators) β€” Deprecated in Mongo version 8.0 +- **$meta** (Projection Operators) β€” Not in scope +- **$accumulator** (Custom Aggregation Expression Operators) β€” Deprecated in Mongo version 8.0 +- **$function** (Custom Aggregation Expression Operators) β€” Deprecated in Mongo version 8.0 +- **$meta** (Text Expression Operator) β€” Not in scope +- **$accumulator** (Accumulators ($group, $bucket, $bucketAuto, $setWindowFields)) β€” Deprecated in Mongo version 8.0 +- **$changeStreamSplitLargeEvent** (Aggregation Pipeline Stages) β€” Not in scope +- **$listSampledQueries** (Aggregation Pipeline Stages) β€” Not in scope +- **$listSearchIndexes** (Aggregation Pipeline Stages) β€” Not in scope +- **$listSessions** (Aggregation Pipeline Stages) β€” Not in scope +- **$planCacheStats** (Aggregation Pipeline Stages) β€” Not in scope +- **$shardedDataDistribution** (Aggregation Pipeline Stages) β€” Not in scope +- **$listLocalSessions** (Aggregation Pipeline Stages) β€” Not in scope +- **$$CLUSTER_TIME** (Variables in Aggregation Expressions) β€” Not in scope +- **$$SEARCH_META** (Variables in Aggregation Expressions) β€” Not in scope +- **$$USER_ROLES** (Variables in Aggregation Expressions) β€” Not in scope diff --git a/packages/documentdb-constants/scripts/README.md b/packages/documentdb-constants/scripts/README.md new file mode 100644 index 000000000..d642ecb08 --- /dev/null +++ b/packages/documentdb-constants/scripts/README.md @@ -0,0 +1,97 @@ +# Scripts + +Helper scripts for maintaining the `@vscode-documentdb/documentdb-constants` package. + +## scrape-operator-docs.ts + +Scrapes the DocumentDB compatibility page and per-operator documentation to produce `resources/scraped/operator-reference.md`. + +```bash +npm run scrape +``` + +**When to run:** When the upstream DocumentDB documentation changes (new operators, updated descriptions, etc.). This is infrequent β€” typically once per DocumentDB release. + +**Output:** `resources/scraped/operator-reference.md` β€” a machine-generated Markdown dump of all supported operators, their descriptions, syntax blocks, and doc links. + +## generate-from-reference.ts + +Reads the scraped dump, hand-maintained overrides file, and snippet templates, then generates the TypeScript operator data files in `src/`. + +```bash +npm run generate +``` + +**When to run:** + +- After running the scraper (`npm run scrape`) +- After editing `resources/overrides/operator-overrides.md` +- After editing `resources/overrides/operator-snippets.md` + +**Inputs:** + +| File | Purpose | +| ------------------------------------------- | ---------------------------------- | +| `resources/scraped/operator-reference.md` | Primary data (machine-generated) | +| `resources/overrides/operator-overrides.md` | Manual overrides (hand-maintained) | +| `resources/overrides/operator-snippets.md` | Snippet templates per category | + +**Outputs:** Seven TypeScript files in `src/`: + +- `queryOperators.ts` β€” comparison, logical, element, evaluation, geospatial, array, bitwise, projection, misc query operators +- `updateOperators.ts` β€” field, array, and bitwise update operators +- `expressionOperators.ts` β€” arithmetic, array, bitwise, boolean, comparison, conditional, data-size, date, literal, misc, object, set, string, timestamp, trig, type, and variable expression operators +- `accumulators.ts` β€” group and other-stage accumulators +- `windowOperators.ts` β€” window function operators +- `stages.ts` β€” aggregation pipeline stages +- `systemVariables.ts` β€” system variables (`$$NOW`, `$$ROOT`, etc.) + +> **Do not edit the generated `src/` files by hand.** Put corrections in the overrides or snippets files instead. The generated files contain a header warning to this effect. + +## evaluate-overrides.ts + +Evaluates the relationship between scraped data, manual overrides, and snippet coverage. Produces a color-coded report. + +```bash +npm run evaluate +``` + +**When to run:** + +- After re-scraping (`npm run scrape`) to see if previously-missing descriptions are now available +- Periodically, to check coverage and detect redundant overrides + +**Report sections:** + +1. **GAPS** β€” operators with empty scraped descriptions and no override (need attention) +2. **POTENTIALLY REDUNDANT** β€” operators that have **both** a scraped description and an override description; the override may no longer be needed +3. **ACTIVE OVERRIDES** β€” overrides filling real gaps, with both override and scraped values shown +4. **SNIPPET COVERAGE** β€” operators with/without snippet templates per category +5. **SUMMARY** β€” total counts and coverage percentage + +## Workflow + +``` + β”Œβ”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β” + β”‚ Upstream docs change β”‚ + β””β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”¬β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”˜ + β–Ό + npm run scrape + β”‚ + β–Ό + scraped/operator-reference.md + β”‚ + β”œβ”€β”€β”€β”€ npm run evaluate (check gaps, redundant overrides & snippet coverage) + β”‚ + β”œβ”€β”€β”€β”€ overrides/operator-overrides.md (manual) + β”œβ”€β”€β”€β”€ overrides/operator-snippets.md (manual) + β”‚ + β–Ό + npm run generate + β”‚ + β–Ό + src/*.ts (generated) + β”‚ + β–Ό + npm run build +``` diff --git a/packages/documentdb-constants/scripts/evaluate-overrides.ts b/packages/documentdb-constants/scripts/evaluate-overrides.ts new file mode 100644 index 000000000..366bfc608 --- /dev/null +++ b/packages/documentdb-constants/scripts/evaluate-overrides.ts @@ -0,0 +1,598 @@ +/*--------------------------------------------------------------------------------------------- + * Copyright (c) Microsoft Corporation. All rights reserved. + * Licensed under the MIT License. See License.txt in the project root for license information. + *--------------------------------------------------------------------------------------------*/ + +/** + * evaluate-overrides.ts + * + * Evaluates the relationship between scraped operator data and manual overrides. + * Produces a report showing: + * + * 1. Operators with empty descriptions in the scrape AND no override + * (gaps that still need attention) + * 2. Operators that have overrides β€” shows both the override value and the + * original scraped value so you can detect when an override is no longer + * needed (e.g. the upstream docs now have a description) + * 3. Summary statistics + * + * Usage: npm run evaluate + */ + +import * as fs from 'fs'; +import * as path from 'path'; + +// --------------------------------------------------------------------------- +// Types (lightweight β€” reuses the same Markdown format as the generator) +// --------------------------------------------------------------------------- + +interface ParsedEntry { + value: string; + description: string; + category: string; + docLink: string; +} + +interface OverrideEntry { + description?: string; + syntax?: string; + docLink?: string; + snippet?: string; +} + +// --------------------------------------------------------------------------- +// Parsers (simplified versions of the generator's parsers) +// --------------------------------------------------------------------------- + +function parseDump(content: string): ParsedEntry[] { + const lines = content.split('\n'); + const entries: ParsedEntry[] = []; + + let currentCategory = ''; + let currentOp: Partial | null = null; + let inCodeBlock = false; + + for (const line of lines) { + if (line.startsWith('```')) { + inCodeBlock = !inCodeBlock; + continue; + } + if (inCodeBlock) continue; + + const h2 = line.match(/^## (.+)$/); + if (h2) { + if (currentOp && currentCategory) { + entries.push({ + value: currentOp.value!, + description: currentOp.description || '', + category: currentCategory, + docLink: currentOp.docLink || '', + }); + } + currentOp = null; + const cat = h2[1].trim(); + if (cat === 'Summary' || cat === 'Not Listed') { + currentCategory = ''; + continue; + } + currentCategory = cat; + continue; + } + + const h3 = line.match(/^### (.+)$/); + if (h3 && currentCategory) { + if (currentOp) { + entries.push({ + value: currentOp.value!, + description: currentOp.description || '', + category: currentCategory, + docLink: currentOp.docLink || '', + }); + } + currentOp = { value: h3[1].trim(), description: '', docLink: '', category: currentCategory }; + continue; + } + + if (currentOp && line.startsWith('- **Description:**')) { + currentOp.description = line.replace('- **Description:**', '').trim(); + } + + // Parse doc link ('none' means scraper found no page at expected location) + if (currentOp && line.startsWith('- **Doc Link:**')) { + const rawLink = line.replace('- **Doc Link:**', '').trim(); + currentOp.docLink = rawLink === 'none' ? '' : rawLink; + } + } + + if (currentOp && currentCategory) { + entries.push({ + value: currentOp.value!, + description: currentOp.description || '', + category: currentCategory, + docLink: currentOp.docLink || '', + }); + } + + return entries; +} + +function parseOverrides(content: string): Map> { + const lines = content.split('\n'); + const result = new Map>(); + + let currentCategory = ''; + let currentOp: { value: string; entry: OverrideEntry } | null = null; + let inCodeBlock = false; + let syntaxLines: string[] = []; + + for (const line of lines) { + if (line.startsWith('```')) { + if (inCodeBlock) { + inCodeBlock = false; + if (currentOp) { + currentOp.entry.syntax = syntaxLines.join('\n').trim(); + } + syntaxLines = []; + continue; + } else { + inCodeBlock = true; + continue; + } + } + if (inCodeBlock) { + syntaxLines.push(line); + continue; + } + + const h2 = line.match(/^## (.+)$/); + if (h2) { + if (currentOp && currentCategory) { + saveOverride(result, currentCategory, currentOp); + } + currentOp = null; + currentCategory = h2[1].trim(); + continue; + } + + const h3 = line.match(/^### (.+)$/); + if (h3 && currentCategory) { + if (currentOp) { + saveOverride(result, currentCategory, currentOp); + } + currentOp = { value: h3[1].trim(), entry: {} }; + continue; + } + + if (currentOp) { + if (line.startsWith('- **Description:**')) { + currentOp.entry.description = line.replace('- **Description:**', '').trim(); + } + if (line.startsWith('- **Doc Link:**')) { + currentOp.entry.docLink = line.replace('- **Doc Link:**', '').trim(); + } + if (line.startsWith('- **Snippet:**')) { + let snippet = line.replace('- **Snippet:**', '').trim(); + if (snippet.startsWith('`') && snippet.endsWith('`')) { + snippet = snippet.slice(1, -1); + } + currentOp.entry.snippet = snippet; + } + } + } + + if (currentOp && currentCategory) { + saveOverride(result, currentCategory, currentOp); + } + + return result; +} + +function saveOverride( + map: Map>, + category: string, + op: { value: string; entry: OverrideEntry }, +): void { + if (!map.has(category)) map.set(category, new Map()); + map.get(category)!.set(op.value, op.entry); +} + +// --------------------------------------------------------------------------- +// Lookup helpers +// --------------------------------------------------------------------------- + +/** + * Find an override for a dump entry, mirroring how the generator resolves overrides. + * + * The generator's `applyOverrides` iterates override categories: + * 1. If the override category exists in the dump, it looks for the operator in that exact category. + * 2. If the override category does NOT exist in the dump, it falls back to cross-category search. + * + * So for a dump entry (operatorValue, category), an override matches only if: + * (a) The override is in the same category as the dump entry (exact match), OR + * (b) The override is in a category that doesn't exist in the dump at all, and no + * earlier dump category already claimed this operator via cross-category fallback. + * + * We pass `dumpCategories` (all category names in the dump) to distinguish (a) from (b). + */ +function findOverride( + overrides: Map>, + operatorValue: string, + category: string, + dumpCategories: Set, +): { override: OverrideEntry; overrideCategory: string } | undefined { + // Exact category match: override category === dump entry category + const catOverrides = overrides.get(category); + if (catOverrides) { + const entry = catOverrides.get(operatorValue); + if (entry) return { override: entry, overrideCategory: category }; + } + + // Cross-category fallback: only if override category doesn't exist in the dump. + // This mirrors the generator, which only enters the cross-category path when + // `categorizedOps.get(category)` returns undefined. + for (const [overrideCat, opMap] of overrides) { + if (overrideCat === category) continue; + // If this override category exists in the dump, the generator would do an + // exact-category-only lookup there β€” it would NOT spill into other categories. + if (dumpCategories.has(overrideCat)) continue; + const entry = opMap.get(operatorValue); + if (entry) return { override: entry, overrideCategory: overrideCat }; + } + + return undefined; +} + +// --------------------------------------------------------------------------- +// ANSI colors for terminal output +// --------------------------------------------------------------------------- + +const RED = '\x1b[31m'; +const GREEN = '\x1b[32m'; +const YELLOW = '\x1b[33m'; +const CYAN = '\x1b[36m'; +const DIM = '\x1b[2m'; +const BOLD = '\x1b[1m'; +const RESET = '\x1b[0m'; + +// --------------------------------------------------------------------------- +// Category β†’ meta tag mapping (mirrors generator's CATEGORY_TO_META) +// --------------------------------------------------------------------------- + +const CATEGORY_TO_META: Record = { + 'Comparison Query Operators': 'META_QUERY_COMPARISON', + 'Logical Query Operators': 'META_QUERY_LOGICAL', + 'Element Query Operators': 'META_QUERY_ELEMENT', + 'Evaluation Query Operators': 'META_QUERY_EVALUATION', + 'Geospatial Operators': 'META_QUERY_GEOSPATIAL', + 'Array Query Operators': 'META_QUERY_ARRAY', + 'Bitwise Query Operators': 'META_QUERY_BITWISE', + 'Projection Operators': 'META_QUERY_PROJECTION', + 'Miscellaneous Query Operators': 'META_QUERY_MISC', + 'Field Update Operators': 'META_UPDATE_FIELD', + 'Array Update Operators': 'META_UPDATE_ARRAY', + 'Bitwise Update Operators': 'META_UPDATE_BITWISE', + 'Arithmetic Expression Operators': 'META_EXPR_ARITH', + 'Array Expression Operators': 'META_EXPR_ARRAY', + 'Bitwise Operators': 'META_EXPR_BITWISE', + 'Boolean Expression Operators': 'META_EXPR_BOOL', + 'Comparison Expression Operators': 'META_EXPR_COMPARISON', + 'Data Size Operators': 'META_EXPR_DATASIZE', + 'Date Expression Operators': 'META_EXPR_DATE', + 'Literal Expression Operator': 'META_EXPR_LITERAL', + 'Miscellaneous Operators': 'META_EXPR_MISC', + 'Object Expression Operators': 'META_EXPR_OBJECT', + 'Set Expression Operators': 'META_EXPR_SET', + 'String Expression Operators': 'META_EXPR_STRING', + 'Timestamp Expression Operators': 'META_EXPR_TIMESTAMP', + 'Trigonometry Expression Operators': 'META_EXPR_TRIG', + 'Type Expression Operators': 'META_EXPR_TYPE', + 'Accumulators ($group, $bucket, $bucketAuto, $setWindowFields)': 'META_ACCUMULATOR', + 'Accumulators (in Other Stages)': 'META_ACCUMULATOR', + Accumulators: 'META_ACCUMULATOR', + 'Variable Expression Operators': 'META_EXPR_VARIABLE', + 'Window Operators': 'META_WINDOW', + 'Conditional Expression Operators': 'META_EXPR_CONDITIONAL', + 'Aggregation Pipeline Stages': 'META_STAGE', + 'Variables in Aggregation Expressions': 'META_VARIABLE', +}; + +// --------------------------------------------------------------------------- +// Snippet file parser +// --------------------------------------------------------------------------- + +function parseSnippetsFile(content: string): Map> { + const lines = content.split('\n'); + const result = new Map>(); + + let currentMeta = ''; + let currentOp = ''; + let inCodeBlock = false; + + for (const line of lines) { + if (line.startsWith('```')) { + inCodeBlock = !inCodeBlock; + continue; + } + if (inCodeBlock) continue; + + const h2 = line.match(/^## (.+)$/); + if (h2) { + const cat = h2[1].trim(); + const meta = CATEGORY_TO_META[cat]; + if (meta) { + currentMeta = meta; + if (!result.has(currentMeta)) { + result.set(currentMeta, new Map()); + } + } else { + currentMeta = ''; + } + currentOp = ''; + continue; + } + + const h3 = line.match(/^### (.+)$/); + if (h3 && currentMeta) { + currentOp = h3[1].trim(); + continue; + } + + if (currentMeta && currentOp && line.startsWith('- **Snippet:**')) { + let snippet = line.replace('- **Snippet:**', '').trim(); + if (snippet.startsWith('`') && snippet.endsWith('`')) { + snippet = snippet.slice(1, -1); + } + if (snippet) { + result.get(currentMeta)!.set(currentOp, snippet); + } + continue; + } + } + + return result; +} + +function operatorHasSnippet( + snippets: Map>, + meta: string, + operatorValue: string, + overrideSnippet: string | undefined, +): boolean { + if (overrideSnippet) return true; + const catSnippets = snippets.get(meta); + if (!catSnippets) return false; + if (catSnippets.has(operatorValue)) return true; + if (catSnippets.has('DEFAULT')) return true; + return false; +} + +// --------------------------------------------------------------------------- +// Main +// --------------------------------------------------------------------------- + +function main(): void { + const dumpPath = path.join(__dirname, '..', 'resources', 'scraped', 'operator-reference.md'); + const overridePath = path.join(__dirname, '..', 'resources', 'overrides', 'operator-overrides.md'); + const snippetsPath = path.join(__dirname, '..', 'resources', 'overrides', 'operator-snippets.md'); + + if (!fs.existsSync(dumpPath)) { + console.error(`❌ Scraped dump not found: ${dumpPath}`); + process.exit(1); + } + + console.log(`${BOLD}πŸ“Š Evaluating operator overrides${RESET}\n`); + + // Parse both files + const dumpContent = fs.readFileSync(dumpPath, 'utf-8'); + const dumpEntries = parseDump(dumpContent); + + let overrides = new Map>(); + let totalOverrideCount = 0; + if (fs.existsSync(overridePath)) { + const overrideContent = fs.readFileSync(overridePath, 'utf-8'); + overrides = parseOverrides(overrideContent); + for (const [, catMap] of overrides) { + totalOverrideCount += catMap.size; + } + } + + // Categorize every scraped entry + const gaps: ParsedEntry[] = []; // empty description, no override + const overridden: { entry: ParsedEntry; override: OverrideEntry; overrideCategory: string }[] = []; + const redundantOverrides: { entry: ParsedEntry; override: OverrideEntry; overrideCategory: string }[] = []; + const docLinkOnlyOverrides: { entry: ParsedEntry; override: OverrideEntry; overrideCategory: string }[] = []; + const descriptionsOk: ParsedEntry[] = []; + + // Collect all dump category names so findOverride can distinguish exact vs cross-category + const dumpCategories = new Set(dumpEntries.map((e) => e.category)); + + for (const entry of dumpEntries) { + const match = findOverride(overrides, entry.value, entry.category, dumpCategories); + const hasScrapedDescription = entry.description.trim().length > 0; + + if (match) { + const hasDescOverride = !!match.override.description; + const hasDocLinkOverride = !!match.override.docLink; + const hasSnippetOverride = !!match.override.snippet; + + if (hasScrapedDescription && hasDescOverride) { + // Has both scraped description AND an override description + redundantOverrides.push({ entry, override: match.override, overrideCategory: match.overrideCategory }); + } else if (!hasDescOverride && hasDocLinkOverride && !hasSnippetOverride) { + // Override provides only a doc link (no description, no snippet) + docLinkOnlyOverrides.push({ + entry, + override: match.override, + overrideCategory: match.overrideCategory, + }); + } else { + // Override is filling a description gap (or overriding snippet) + overridden.push({ entry, override: match.override, overrideCategory: match.overrideCategory }); + } + } else if (!hasScrapedDescription) { + gaps.push(entry); + } else { + descriptionsOk.push(entry); + } + } + + // ----------------------------------------------------------------------- + // Section 1: Gaps β€” empty description, no override + // ----------------------------------------------------------------------- + console.log(`${BOLD}${RED}═══ GAPS: Empty description, no override (${gaps.length}) ═══${RESET}`); + if (gaps.length === 0) { + console.log(` ${GREEN}βœ… No gaps β€” all operators have descriptions or overrides.${RESET}\n`); + } else { + const byCategory = groupByCategory(gaps); + for (const [cat, ops] of byCategory) { + console.log(` ${CYAN}${cat}${RESET}`); + for (const op of ops) { + console.log(` ${RED}⚠${RESET} ${op.value}`); + } + } + console.log(''); + } + + // ----------------------------------------------------------------------- + // Section 2: Potentially redundant overrides + // (scraped dump NOW has a description, but override also provides one) + // ----------------------------------------------------------------------- + console.log(`${BOLD}${YELLOW}═══ POTENTIALLY REDUNDANT OVERRIDES (${redundantOverrides.length}) ═══${RESET}`); + if (redundantOverrides.length === 0) { + console.log(` ${GREEN}βœ… No redundant overrides β€” all overrides are filling gaps.${RESET}\n`); + } else { + console.log( + ` ${DIM}These operators now have scraped descriptions. The override may no longer be needed.${RESET}`, + ); + console.log( + ` ${DIM}Compare the values below β€” if the scraped one is good enough, remove the override.${RESET}\n`, + ); + for (const { entry, override, overrideCategory } of redundantOverrides) { + console.log(` ${CYAN}${entry.value}${RESET} ${DIM}(${entry.category})${RESET}`); + console.log(` ${DIM}Override (${overrideCategory}):${RESET} ${override.description}`); + console.log(` ${DIM}Scraped:${RESET} ${entry.description}`); + console.log(''); + } + } + + // ----------------------------------------------------------------------- + // Section 3: Active overrides filling gaps + // ----------------------------------------------------------------------- + console.log(`${BOLD}${GREEN}═══ ACTIVE OVERRIDES FILLING GAPS (${overridden.length}) ═══${RESET}`); + if (overridden.length === 0) { + console.log(` ${DIM}No active overrides.${RESET}\n`); + } else { + const byCategory = new Map(); + for (const item of overridden) { + const cat = item.overrideCategory; + if (!byCategory.has(cat)) byCategory.set(cat, []); + byCategory.get(cat)!.push(item); + } + for (const [cat, items] of byCategory) { + console.log(` ${CYAN}${cat}${RESET} (${items.length} overrides)`); + for (const { entry, override } of items) { + const overrideDesc = override.description || '(no description override)'; + const scrapedDesc = entry.description || '(empty)'; + console.log(` ${GREEN}βœ“${RESET} ${entry.value}`); + console.log(` ${DIM}Override:${RESET} ${overrideDesc}`); + if (scrapedDesc !== '(empty)') { + console.log(` ${DIM}Scraped:${RESET} ${scrapedDesc}`); + } + } + } + console.log(''); + } + + // ----------------------------------------------------------------------- + // Section 3b: Doc link overrides (operators with 'none' in dump, link provided via override) + // ----------------------------------------------------------------------- + console.log(`${BOLD}${GREEN}═══ DOC LINK OVERRIDES (${docLinkOnlyOverrides.length}) ═══${RESET}`); + if (docLinkOnlyOverrides.length === 0) { + console.log(` ${DIM}No doc-link-only overrides.${RESET}\n`); + } else { + console.log(` ${DIM}These operators have 'none' in the dump (doc page not at expected directory).${RESET}`); + console.log( + ` ${DIM}The override provides a doc link that the generator can't infer via cross-reference.${RESET}\n`, + ); + for (const { entry, override, overrideCategory } of docLinkOnlyOverrides) { + const dumpLink = entry.docLink || 'none'; + console.log(` ${CYAN}${entry.value}${RESET} ${DIM}(${entry.category})${RESET}`); + console.log(` ${DIM}Override (${overrideCategory}):${RESET} ${override.docLink}`); + console.log(` ${DIM}Dump link:${RESET} ${dumpLink}`); + console.log(''); + } + } + + // ----------------------------------------------------------------------- + // Section 4: Snippet coverage + // ----------------------------------------------------------------------- + let snippets = new Map>(); + if (fs.existsSync(snippetsPath)) { + const snippetsContent = fs.readFileSync(snippetsPath, 'utf-8'); + snippets = parseSnippetsFile(snippetsContent); + } + + const withSnippet: ParsedEntry[] = []; + const withoutSnippet: ParsedEntry[] = []; + + for (const entry of dumpEntries) { + const meta = CATEGORY_TO_META[entry.category]; + if (!meta) { + withoutSnippet.push(entry); + continue; + } + const match = findOverride(overrides, entry.value, entry.category, dumpCategories); + const overrideSnippet = match?.override.snippet; + if (operatorHasSnippet(snippets, meta, entry.value, overrideSnippet)) { + withSnippet.push(entry); + } else { + withoutSnippet.push(entry); + } + } + + console.log(`${BOLD}${CYAN}═══ SNIPPET COVERAGE (${withSnippet.length}/${dumpEntries.length}) ═══${RESET}`); + if (withoutSnippet.length === 0) { + console.log(` ${GREEN}βœ… All operators have snippet templates.${RESET}\n`); + } else { + console.log(` ${DIM}Operators without snippet templates (by category):${RESET}\n`); + const byCategory = groupByCategory(withoutSnippet); + for (const [cat, ops] of byCategory) { + console.log(` ${CYAN}${cat}${RESET}`); + for (const op of ops) { + console.log(` ${DIM}β€”${RESET} ${op.value}`); + } + } + console.log(''); + } + + // ----------------------------------------------------------------------- + // Section 5: Summary + // ----------------------------------------------------------------------- + console.log(`${BOLD}═══ SUMMARY ═══${RESET}`); + console.log(` Total scraped operators: ${dumpEntries.length}`); + console.log(` With scraped description: ${descriptionsOk.length + redundantOverrides.length}`); + console.log(` Filled by override: ${overridden.length}`); + console.log(` Doc-link-only overrides: ${docLinkOnlyOverrides.length}`); + console.log(` Potentially redundant: ${YELLOW}${redundantOverrides.length}${RESET}`); + console.log(` ${RED}Gaps remaining:${RESET} ${gaps.length}`); + console.log(` Total overrides in file: ${totalOverrideCount}`); + console.log(` With snippet template: ${withSnippet.length}`); + console.log(` Without snippet: ${withoutSnippet.length}`); + console.log(` Description coverage: ${((1 - gaps.length / dumpEntries.length) * 100).toFixed(1)}%`); + console.log(` Snippet coverage: ${((withSnippet.length / dumpEntries.length) * 100).toFixed(1)}%`); +} + +// --------------------------------------------------------------------------- +// Helpers +// --------------------------------------------------------------------------- + +function groupByCategory(entries: ParsedEntry[]): Map { + const map = new Map(); + for (const e of entries) { + if (!map.has(e.category)) map.set(e.category, []); + map.get(e.category)!.push(e); + } + return map; +} + +main(); diff --git a/packages/documentdb-constants/scripts/generate-from-reference.ts b/packages/documentdb-constants/scripts/generate-from-reference.ts new file mode 100644 index 000000000..0e198b548 --- /dev/null +++ b/packages/documentdb-constants/scripts/generate-from-reference.ts @@ -0,0 +1,871 @@ +/*--------------------------------------------------------------------------------------------- + * Copyright (c) Microsoft Corporation. All rights reserved. + * Licensed under the MIT License. See License.txt in the project root for license information. + *--------------------------------------------------------------------------------------------*/ + +/** + * Generates TypeScript operator data files from the scraped reference dump. + * + * Reads: + * resources/scraped/operator-reference.md β€” scraped operator data (primary) + * resources/overrides/operator-overrides.md β€” hand-written overrides (wins) + * resources/overrides/operator-snippets.md β€” snippet templates per category + * + * Writes: + * src/queryOperators.ts, src/updateOperators.ts, src/expressionOperators.ts, + * src/accumulators.ts, src/windowOperators.ts, src/stages.ts, + * src/systemVariables.ts + * + * The override file uses the same Markdown format as the dump. Any field + * specified in an override entry replaces the corresponding scraped value. + * Omitted fields keep their scraped values. + * + * Snippets are resolved in order: + * 1. Snippet override from operator-overrides.md (highest priority) + * 2. Per-operator snippet from operator-snippets.md + * 3. DEFAULT snippet from operator-snippets.md ({{VALUE}} β†’ operator name) + * 4. No snippet + * + * Usage: npm run generate + * Note: This script overwrites the generated src/ files. Do NOT edit + * those files by hand β€” put corrections in the overrides/snippets + * files instead. + */ + +import { execSync } from 'child_process'; +import * as fs from 'fs'; +import * as path from 'path'; +import { getDocLink } from '../src/docLinks'; +import * as MetaTags from '../src/metaTags'; + +// --------------------------------------------------------------------------- +// Types +// --------------------------------------------------------------------------- + +interface ParsedOperator { + value: string; + description: string; + syntax: string; + docLink: string; + category: string; + snippetOverride?: string; + standalone?: boolean; +} + +interface FileSpec { + fileName: string; + variableName: string; + metaImport: string; + metaValue: string; + operators: ParsedOperator[]; + extraImports?: string; +} + +// --------------------------------------------------------------------------- +// Category β†’ meta tag mapping +// --------------------------------------------------------------------------- + +const CATEGORY_TO_META: Record = { + 'Comparison Query Operators': 'META_QUERY_COMPARISON', + 'Logical Query Operators': 'META_QUERY_LOGICAL', + 'Element Query Operators': 'META_QUERY_ELEMENT', + 'Evaluation Query Operators': 'META_QUERY_EVALUATION', + 'Geospatial Operators': 'META_QUERY_GEOSPATIAL', + 'Array Query Operators': 'META_QUERY_ARRAY', + 'Bitwise Query Operators': 'META_QUERY_BITWISE', + 'Projection Operators': 'META_QUERY_PROJECTION', + 'Miscellaneous Query Operators': 'META_QUERY_MISC', + 'Field Update Operators': 'META_UPDATE_FIELD', + 'Array Update Operators': 'META_UPDATE_ARRAY', + 'Bitwise Update Operators': 'META_UPDATE_BITWISE', + 'Arithmetic Expression Operators': 'META_EXPR_ARITH', + 'Array Expression Operators': 'META_EXPR_ARRAY', + 'Bitwise Operators': 'META_EXPR_BITWISE', + 'Boolean Expression Operators': 'META_EXPR_BOOL', + 'Comparison Expression Operators': 'META_EXPR_COMPARISON', + 'Data Size Operators': 'META_EXPR_DATASIZE', + 'Date Expression Operators': 'META_EXPR_DATE', + 'Literal Expression Operator': 'META_EXPR_LITERAL', + 'Miscellaneous Operators': 'META_EXPR_MISC', + 'Object Expression Operators': 'META_EXPR_OBJECT', + 'Set Expression Operators': 'META_EXPR_SET', + 'String Expression Operators': 'META_EXPR_STRING', + 'Timestamp Expression Operators': 'META_EXPR_TIMESTAMP', + 'Trigonometry Expression Operators': 'META_EXPR_TRIG', + 'Type Expression Operators': 'META_EXPR_TYPE', + 'Accumulators ($group, $bucket, $bucketAuto, $setWindowFields)': 'META_ACCUMULATOR', + 'Accumulators (in Other Stages)': 'META_ACCUMULATOR', + Accumulators: 'META_ACCUMULATOR', + 'Variable Expression Operators': 'META_EXPR_VARIABLE', + 'Window Operators': 'META_WINDOW', + 'Conditional Expression Operators': 'META_EXPR_CONDITIONAL', + 'Aggregation Pipeline Stages': 'META_STAGE', + 'Variables in Aggregation Expressions': 'META_VARIABLE', +}; + +/** + * Maps META constant names (like 'META_EXPR_STRING') to their string values + * (like 'expr:string') so we can call getDocLink() at generation time to + * compare the computed URL against the dump's verified URL. + */ +const META_CONST_TO_VALUE: Record = Object.fromEntries( + Object.entries(MetaTags) + .filter(([, v]) => typeof v === 'string') + .map(([k, v]) => [k, v as string]), +); + +// Category β†’ output file mapping +const CATEGORY_TO_FILE: Record = { + 'Comparison Query Operators': 'queryOperators', + 'Logical Query Operators': 'queryOperators', + 'Element Query Operators': 'queryOperators', + 'Evaluation Query Operators': 'queryOperators', + 'Geospatial Operators': 'queryOperators', + 'Array Query Operators': 'queryOperators', + 'Bitwise Query Operators': 'queryOperators', + 'Projection Operators': 'queryOperators', + 'Miscellaneous Query Operators': 'queryOperators', + 'Field Update Operators': 'updateOperators', + 'Array Update Operators': 'updateOperators', + 'Bitwise Update Operators': 'updateOperators', + 'Arithmetic Expression Operators': 'expressionOperators', + 'Array Expression Operators': 'expressionOperators', + 'Bitwise Operators': 'expressionOperators', + 'Boolean Expression Operators': 'expressionOperators', + 'Comparison Expression Operators': 'expressionOperators', + 'Data Size Operators': 'expressionOperators', + 'Date Expression Operators': 'expressionOperators', + 'Literal Expression Operator': 'expressionOperators', + 'Miscellaneous Operators': 'expressionOperators', + 'Object Expression Operators': 'expressionOperators', + 'Set Expression Operators': 'expressionOperators', + 'String Expression Operators': 'expressionOperators', + 'Timestamp Expression Operators': 'expressionOperators', + 'Trigonometry Expression Operators': 'expressionOperators', + 'Type Expression Operators': 'expressionOperators', + 'Conditional Expression Operators': 'expressionOperators', + 'Variable Expression Operators': 'expressionOperators', + 'Accumulators ($group, $bucket, $bucketAuto, $setWindowFields)': 'accumulators', + 'Accumulators (in Other Stages)': 'accumulators', + 'Window Operators': 'windowOperators', + 'Aggregation Pipeline Stages': 'stages', + 'Variables in Aggregation Expressions': 'systemVariables', +}; + +// --------------------------------------------------------------------------- +// Parser +// --------------------------------------------------------------------------- + +function parseDump(content: string): Map { + const lines = content.split('\n'); + const categorizedOps = new Map(); + + let currentCategory = ''; + let currentOp: Partial | null = null; + let inCodeBlock = false; + let syntaxLines: string[] = []; + + for (let i = 0; i < lines.length; i++) { + const line = lines[i]; + + // Track code blocks + if (line.startsWith('```')) { + if (inCodeBlock) { + // End of code block + inCodeBlock = false; + if (currentOp) { + currentOp.syntax = syntaxLines.join('\n').trim(); + } + syntaxLines = []; + continue; + } else { + inCodeBlock = true; + continue; + } + } + + if (inCodeBlock) { + syntaxLines.push(line); + continue; + } + + // H2 = category + const h2Match = line.match(/^## (.+)$/); + if (h2Match) { + // Save previous operator + if (currentOp && currentCategory) { + saveOperator(categorizedOps, currentCategory, currentOp as ParsedOperator); + } + currentOp = null; + + const cat = h2Match[1].trim(); + if (cat === 'Summary' || cat === 'Not Listed') { + currentCategory = ''; + continue; + } + currentCategory = cat; + if (!categorizedOps.has(currentCategory)) { + categorizedOps.set(currentCategory, []); + } + continue; + } + + // H3 = operator + const h3Match = line.match(/^### (.+)$/); + if (h3Match && currentCategory) { + // Save previous operator + if (currentOp) { + saveOperator(categorizedOps, currentCategory, currentOp as ParsedOperator); + } + currentOp = { + value: h3Match[1].trim(), + description: '', + syntax: '', + docLink: '', + category: currentCategory, + }; + continue; + } + + // Description line + if (currentOp && line.startsWith('- **Description:**')) { + currentOp.description = line.replace('- **Description:**', '').trim(); + continue; + } + + // Doc link line ('none' means the scraper found no page at the expected location) + if (currentOp && line.startsWith('- **Doc Link:**')) { + const rawLink = line.replace('- **Doc Link:**', '').trim(); + currentOp.docLink = rawLink === 'none' ? '' : rawLink; + continue; + } + } + + // Save last operator + if (currentOp && currentCategory) { + saveOperator(categorizedOps, currentCategory, currentOp as ParsedOperator); + } + + return categorizedOps; +} + +function saveOperator(map: Map, category: string, op: Partial): void { + if (!op.value) return; + const list = map.get(category) || []; + list.push({ + value: op.value || '', + description: op.description || '', + syntax: op.syntax || '', + docLink: op.docLink || '', + category: category, + snippetOverride: op.snippetOverride, + }); + map.set(category, list); +} + +// --------------------------------------------------------------------------- +// Override parsing and merging +// --------------------------------------------------------------------------- + +interface OverrideEntry { + description?: string; + syntax?: string; + docLink?: string; + snippet?: string; + standalone?: boolean; +} + +function parseOverrides(content: string): Map> { + const lines = content.split('\n'); + const result = new Map>(); + + let currentCategory = ''; + let currentOp: { value: string; entry: OverrideEntry } | null = null; + let inCodeBlock = false; + let syntaxLines: string[] = []; + + for (const line of lines) { + if (line.startsWith('```')) { + if (inCodeBlock) { + inCodeBlock = false; + if (currentOp) { + currentOp.entry.syntax = syntaxLines.join('\n').trim(); + } + syntaxLines = []; + continue; + } else { + inCodeBlock = true; + continue; + } + } + if (inCodeBlock) { + syntaxLines.push(line); + continue; + } + + const h2 = line.match(/^## (.+)$/); + if (h2) { + if (currentOp && currentCategory) { + saveOverride(result, currentCategory, currentOp); + } + currentOp = null; + currentCategory = h2[1].trim(); + continue; + } + + const h3 = line.match(/^### (.+)$/); + if (h3 && currentCategory) { + if (currentOp) { + saveOverride(result, currentCategory, currentOp); + } + currentOp = { value: h3[1].trim(), entry: {} }; + continue; + } + + if (currentOp && line.startsWith('- **Description:**')) { + currentOp.entry.description = line.replace('- **Description:**', '').trim(); + } + if (currentOp && line.startsWith('- **Doc Link:**')) { + currentOp.entry.docLink = line.replace('- **Doc Link:**', '').trim(); + } + if (currentOp && line.startsWith('- **Snippet:**')) { + let snippet = line.replace('- **Snippet:**', '').trim(); + if (snippet.startsWith('`') && snippet.endsWith('`')) { + snippet = snippet.slice(1, -1); + } + currentOp.entry.snippet = snippet; + } + if (currentOp && line.startsWith('- **Standalone:**')) { + const val = line.replace('- **Standalone:**', '').trim().toLowerCase(); + currentOp.entry.standalone = val !== 'false' ? undefined : false; + } + } + + if (currentOp && currentCategory) { + saveOverride(result, currentCategory, currentOp); + } + + return result; +} + +function saveOverride( + map: Map>, + category: string, + op: { value: string; entry: OverrideEntry }, +): void { + if (!map.has(category)) map.set(category, new Map()); + map.get(category)!.set(op.value, op.entry); +} + +function applyOverrides( + categorizedOps: Map, + overrides: Map>, +): void { + let applied = 0; + let missed = 0; + + for (const [category, opOverrides] of overrides) { + const ops = categorizedOps.get(category); + if (!ops) { + // Try to find operators across all categories (override category + // may not match dump category exactly for cross-category operators) + for (const [opName, override] of opOverrides) { + const matches: Array<{ category: string; op: ParsedOperator }> = []; + for (const [cat, catOps] of categorizedOps) { + const op = catOps.find((o) => o.value === opName); + if (op) matches.push({ category: cat, op }); + } + if (matches.length === 0) { + console.warn(`⚠️ Override target not found: ${opName} in "${category}"`); + missed++; + } else { + if (matches.length > 1) { + const catList = matches.map((m) => `"${m.category}"`).join(', '); + console.warn( + `⚠️ Ambiguous override fallback: "${opName}" β€” found in ${matches.length} categories: [${catList}]. Override from "${category}" applied to first match. Specify the correct category to disambiguate.`, + ); + } else { + console.log( + `ℹ️ Override fallback: "${opName}" not found in "${category}", applied to match in "${matches[0].category}".`, + ); + } + mergeOverride(matches[0].op, override); + applied++; + } + } + continue; + } + + for (const [opName, override] of opOverrides) { + const op = ops.find((o) => o.value === opName); + if (op) { + mergeOverride(op, override); + applied++; + } else { + console.warn(`⚠️ Override target not found: ${opName} in "${category}"`); + missed++; + } + } + } + + console.log(` Applied ${applied} overrides (${missed} missed)`); +} + +function mergeOverride(op: ParsedOperator, override: OverrideEntry): void { + if (override.description !== undefined && override.description !== '') { + op.description = override.description; + } + if (override.syntax !== undefined && override.syntax !== '') { + op.syntax = override.syntax; + } + if (override.docLink !== undefined && override.docLink !== '') { + op.docLink = override.docLink; + } + if (override.snippet !== undefined && override.snippet !== '') { + op.snippetOverride = override.snippet; + } + if (override.standalone !== undefined) { + op.standalone = override.standalone; + } +} + +// --------------------------------------------------------------------------- +// Snippet loading (from resources/overrides/operator-snippets.md) +// --------------------------------------------------------------------------- + +/** + * Parses the operator-snippets.md file into a map of meta-tag β†’ (operator|DEFAULT β†’ snippet). + * Uses the same heading conventions as the dump/overrides parsers. + */ +function parseSnippets(content: string): Map> { + const lines = content.split('\n'); + const result = new Map>(); + + let currentMeta = ''; + let currentOp = ''; + let inCodeBlock = false; + + for (const line of lines) { + if (line.startsWith('```')) { + inCodeBlock = !inCodeBlock; + continue; + } + if (inCodeBlock) continue; + + // H2 = category + const h2 = line.match(/^## (.+)$/); + if (h2) { + const cat = h2[1].trim(); + const meta = CATEGORY_TO_META[cat]; + if (meta) { + currentMeta = meta; + if (!result.has(currentMeta)) { + result.set(currentMeta, new Map()); + } + } else { + currentMeta = ''; + console.warn(`⚠️ Unknown snippet category: "${cat}"`); + } + currentOp = ''; + continue; + } + + // H3 = operator name or DEFAULT + const h3 = line.match(/^### (.+)$/); + if (h3 && currentMeta) { + currentOp = h3[1].trim(); + continue; + } + + // Snippet value (backticks are stripped if present: `...` β†’ ...) + if (currentMeta && currentOp && line.startsWith('- **Snippet:**')) { + let snippet = line.replace('- **Snippet:**', '').trim(); + if (snippet.startsWith('`') && snippet.endsWith('`')) { + snippet = snippet.slice(1, -1); + } + if (snippet) { + result.get(currentMeta)!.set(currentOp, snippet); + } + continue; + } + } + + return result; +} + +/** + * Looks up a snippet for an operator from the parsed snippets map. + * + * Resolution order: + * 1. Exact operator match in the category + * 2. DEFAULT entry in the category (with {{VALUE}} replaced by operator name) + * 3. undefined (no snippet) + */ +function lookupSnippet( + snippets: Map>, + meta: string, + operatorValue: string, +): string | undefined { + const catSnippets = snippets.get(meta); + if (!catSnippets) return undefined; + + // Exact operator match + const exact = catSnippets.get(operatorValue); + if (exact !== undefined) return exact; + + // Fall back to category DEFAULT + const def = catSnippets.get('DEFAULT'); + if (def) return def.replace(/\{\{VALUE\}\}/g, operatorValue); + + return undefined; +} + +// --------------------------------------------------------------------------- +// BSON type applicability +// --------------------------------------------------------------------------- + +function getApplicableBsonTypes(op: ParsedOperator, meta: string): string[] | undefined { + const v = op.value; + + // String-specific operators + if (v === '$regex' || v === '$text') return ['string']; + if (meta === 'META_EXPR_STRING' || meta === 'META_EXPR_TRIG') return undefined; // expression context, not filter-level + + // Array-specific operators (query context) + if (meta === 'META_QUERY_ARRAY') return ['array']; + + // Bitwise query operators β€” use 'int32' to match SchemaAnalyzer BSON types + if (meta === 'META_QUERY_BITWISE') return ['int32', 'long']; + + return undefined; +} + +// --------------------------------------------------------------------------- +// Cross-reference: resolve missing doc links from other categories +// --------------------------------------------------------------------------- + +/** + * Builds a map of operator name β†’ URL from all categories. + * For operators that appear with a URL in ANY category, we can use that URL + * when the same operator appears without one in a different category. + * + * Returns the number of operators whose links were inferred. + */ +function crossReferenceMissingLinks(categorizedOps: Map): number { + // Build global URL lookup: operator name β†’ first known URL + const urlLookup = new Map(); + for (const ops of categorizedOps.values()) { + for (const op of ops) { + if (op.docLink && !urlLookup.has(op.value)) { + urlLookup.set(op.value, op.docLink); + } + } + } + + // Fill in missing links from the cross-reference + let inferred = 0; + for (const [category, ops] of categorizedOps.entries()) { + for (const op of ops) { + if (!op.docLink) { + const altUrl = urlLookup.get(op.value); + if (altUrl) { + op.docLink = altUrl; + // Mark as inferred so generateSection can annotate it + (op as ParsedOperator & { inferredLink?: boolean }).inferredLink = true; + inferred++; + console.log(` Inferred link: ${op.value} (${category}) β†’ ${altUrl}`); + } + } + } + } + + return inferred; +} + +// --------------------------------------------------------------------------- +// File generation +// --------------------------------------------------------------------------- + +function generateFileContent(specs: FileSpec[], snippets: Map>): string { + const copyright = `/*--------------------------------------------------------------------------------------------- + * Copyright (c) Microsoft Corporation. All rights reserved. + * Licensed under the MIT License. See License.txt in the project root for license information. + *--------------------------------------------------------------------------------------------*/ + +// AUTO-GENERATED β€” DO NOT EDIT BY HAND +// +// Generated by: npm run generate (scripts/generate-from-reference.ts) +// Sources: resources/scraped/operator-reference.md +// resources/overrides/operator-overrides.md +// resources/overrides/operator-snippets.md +// +// To change operator data, edit the overrides/snippets files and re-run the generator. +`; + + // Collect all unique meta imports + const allMetaImports = new Set(); + for (const spec of specs) { + allMetaImports.add(spec.metaImport); + } + + const metaImportsList = [...allMetaImports].sort().join(',\n '); + + // Pre-generate all sections so we can detect whether getDocLink is used + const sections: string[] = []; + for (const spec of specs) { + sections.push(generateSection(spec, snippets)); + } + const sectionsStr = sections.join('\n'); + + // Only import getDocLink if at least one operator uses it in this file + const needsDocLink = sectionsStr.includes('getDocLink('); + const docLinkImport = needsDocLink ? `\nimport { getDocLink } from './docLinks';` : ''; + + let content = `${copyright} +import { type OperatorEntry } from './types'; +import { ${metaImportsList} } from './metaTags';${docLinkImport} +import { registerOperators } from './getFilteredCompletions'; + +`; + + content += sectionsStr; + + // Derive the exported load function name from the first spec's fileName + // e.g. "queryOperators" β†’ "loadQueryOperators" + const fileName = specs[0]?.fileName ?? 'operators'; + const loadFnName = 'load' + fileName.charAt(0).toUpperCase() + fileName.slice(1); + + // Emit an explicit load function instead of a side-effect registration call + const allVarNames = specs.map((s) => `...${s.variableName}`).join(',\n '); + content += `// ---------------------------------------------------------------------------\n`; + content += `// Registration\n`; + content += `// ---------------------------------------------------------------------------\n\n`; + content += `export function ${loadFnName}(): void {\n`; + content += ` registerOperators([\n ${allVarNames},\n ]);\n`; + content += `}\n`; + + return content; +} + +function generateSection(spec: FileSpec, snippets: Map>): string { + let section = `// ---------------------------------------------------------------------------\n`; + section += `// ${spec.operators[0]?.category || spec.variableName}\n`; + section += `// ---------------------------------------------------------------------------\n\n`; + + section += `const ${spec.variableName}: readonly OperatorEntry[] = [\n`; + + // Resolve the meta tag's string value for runtime getDocLink comparison + const metaStringValue = META_CONST_TO_VALUE[spec.metaImport] || ''; + + for (const op of spec.operators) { + const snippet = op.snippetOverride || lookupSnippet(snippets, spec.metaImport, op.value); + const bsonTypes = getApplicableBsonTypes(op, spec.metaImport); + + // Determine the correct link emission strategy: + // - If dump has a URL that matches what getDocLink() would produce β†’ use getDocLink() (compact) + // - If the URL was inferred via cross-reference β†’ emit hardcoded string with comment + // - If dump has a URL that differs from getDocLink() β†’ emit hardcoded string + // - If dump has no URL β†’ omit the link property + const computedLink = getDocLink(op.value, metaStringValue); + const dumpLink = op.docLink || ''; + const isInferred = (op as ParsedOperator & { inferredLink?: boolean }).inferredLink === true; + let linkLine: string; + if (!dumpLink) { + // No documentation page exists β€” omit the link + linkLine = ''; + } else if (isInferred) { + // Link was inferred from another category via cross-reference (scraper confirmed + // no page exists at this operator's own category URL β€” use the real page found) + linkLine = ` link: '${escapeString(dumpLink)}', // inferred from another category\n`; + } else if (dumpLink === computedLink) { + // The computed URL matches β€” use the compact getDocLink() call + linkLine = ` link: getDocLink('${escapeString(op.value)}', ${spec.metaImport}),\n`; + } else { + // The dump has a verified URL that differs from getDocLink() β€” emit hardcoded + linkLine = ` link: '${escapeString(dumpLink)}',\n`; + } + + section += ` {\n`; + section += ` value: '${escapeString(op.value)}',\n`; + section += ` meta: ${spec.metaImport},\n`; + section += ` description: '${escapeString(op.description)}',\n`; + if (snippet) { + section += ` snippet: '${escapeString(snippet)}',\n`; + } + if (linkLine) { + section += linkLine; + } + if (bsonTypes) { + section += ` applicableBsonTypes: [${bsonTypes.map((t) => `'${t}'`).join(', ')}],\n`; + } + if (op.standalone === false) { + section += ` standalone: false,\n`; + } + section += ` },\n`; + } + + section += `];\n\n`; + return section; +} + +function escapeString(s: string): string { + return s.replace(/\\/g, '\\\\').replace(/'/g, "\\'"); +} + +// --------------------------------------------------------------------------- +// Group operators by file and generate +// --------------------------------------------------------------------------- + +function buildFileSpecs(categorizedOps: Map): Map { + const fileGroups = new Map(); + + // Track seen operators per file to deduplicate + const seenPerFile = new Map>(); + + for (const [category, ops] of categorizedOps) { + const fileName = CATEGORY_TO_FILE[category]; + const metaConst = CATEGORY_TO_META[category]; + + if (!fileName || !metaConst) { + console.warn(`⚠️ No mapping for category: "${category}" (${ops.length} operators)`); + continue; + } + + if (!seenPerFile.has(fileName)) { + seenPerFile.set(fileName, new Set()); + } + const seen = seenPerFile.get(fileName)!; + + // Deduplicate operators (e.g., $elemMatch appears in both query:array and projection) + const uniqueOps = ops.filter((op) => { + if (seen.has(op.value + ':' + metaConst)) return false; + seen.add(op.value + ':' + metaConst); + return true; + }); + + if (uniqueOps.length === 0) continue; + + // Create a camelCase variable name from the category + const varName = categoryToVarName(category); + + const spec: FileSpec = { + fileName, + variableName: varName, + metaImport: metaConst, + metaValue: metaConst, + operators: uniqueOps, + }; + + if (!fileGroups.has(fileName)) { + fileGroups.set(fileName, []); + } + fileGroups.get(fileName)!.push(spec); + } + + return fileGroups; +} + +function categoryToVarName(category: string): string { + // "Comparison Query Operators" β†’ "comparisonQueryOperators" + // "Accumulators ($group, $bucket, $bucketAuto, $setWindowFields)" β†’ "groupAccumulators" + + if (category === 'Accumulators ($group, $bucket, $bucketAuto, $setWindowFields)') { + return 'groupAccumulators'; + } + if (category === 'Accumulators (in Other Stages)') { + return 'otherStageAccumulators'; + } + if (category === 'Variables in Aggregation Expressions') { + return 'systemVariables'; + } + + const words = category + .replace(/[()$,]/g, '') + .split(/\s+/) + .filter((w) => w.length > 0); + return words + .map((w, i) => (i === 0 ? w.toLowerCase() : w.charAt(0).toUpperCase() + w.slice(1).toLowerCase())) + .join(''); +} + +// --------------------------------------------------------------------------- +// Main +// --------------------------------------------------------------------------- + +function main(): void { + const dumpPath = path.join(__dirname, '..', 'resources', 'scraped', 'operator-reference.md'); + const overridePath = path.join(__dirname, '..', 'resources', 'overrides', 'operator-overrides.md'); + const snippetsPath = path.join(__dirname, '..', 'resources', 'overrides', 'operator-snippets.md'); + const srcDir = path.join(__dirname, '..', 'src'); + + console.log('πŸ“– Reading operator reference dump...'); + const content = fs.readFileSync(dumpPath, 'utf-8'); + + console.log('πŸ” Parsing operators...'); + const categorizedOps = parseDump(content); + + let totalOps = 0; + for (const [cat, ops] of categorizedOps) { + console.log(` ${cat}: ${ops.length} operators`); + totalOps += ops.length; + } + console.log(` Total: ${totalOps} operators\n`); + + // Apply overrides if the file exists + if (fs.existsSync(overridePath)) { + console.log('πŸ“ Reading overrides...'); + const overrideContent = fs.readFileSync(overridePath, 'utf-8'); + const overrides = parseOverrides(overrideContent); + applyOverrides(categorizedOps, overrides); + console.log(''); + } else { + console.log('ℹ️ No overrides file found, skipping.\n'); + } + + // Cross-reference missing doc links from other categories + console.log('πŸ”— Cross-referencing missing doc links...'); + const inferred = crossReferenceMissingLinks(categorizedOps); + console.log(` Inferred ${inferred} links from other categories\n`); + + // Load snippet templates + let snippetsMap = new Map>(); + if (fs.existsSync(snippetsPath)) { + console.log('πŸ“‹ Reading snippet templates...'); + const snippetsContent = fs.readFileSync(snippetsPath, 'utf-8'); + snippetsMap = parseSnippets(snippetsContent); + let snippetCount = 0; + for (const [, catMap] of snippetsMap) { + snippetCount += catMap.size; + } + console.log(` Loaded ${snippetCount} snippet entries across ${snippetsMap.size} categories\n`); + } else { + console.log('ℹ️ No snippets file found, skipping.\n'); + } + + console.log('πŸ“ Building file specs...'); + const fileGroups = buildFileSpecs(categorizedOps); + + for (const [fileName, specs] of fileGroups) { + const filePath = path.join(srcDir, `${fileName}.ts`); + console.log( + `✍️ Generating ${fileName}.ts (${specs.reduce((n, s) => n + s.operators.length, 0)} operators)...`, + ); + const fileContent = generateFileContent(specs, snippetsMap); + fs.writeFileSync(filePath, fileContent, 'utf-8'); + } + + // Format generated files with Prettier + const generatedFiles = [...fileGroups.keys()].map((f) => path.join(srcDir, `${f}.ts`)); + console.log('\n🎨 Formatting generated files with Prettier...'); + execSync(`npx prettier --write ${generatedFiles.map((f) => `"${f}"`).join(' ')}`, { + stdio: 'inherit', + }); + + console.log('\nβœ… Done! Generated files:'); + for (const [fileName, specs] of fileGroups) { + const count = specs.reduce((n, s) => n + s.operators.length, 0); + console.log(` src/${fileName}.ts β€” ${count} operators`); + } +} + +main(); diff --git a/packages/documentdb-constants/scripts/scrape-operator-docs.ts b/packages/documentdb-constants/scripts/scrape-operator-docs.ts new file mode 100644 index 000000000..a4780a1d8 --- /dev/null +++ b/packages/documentdb-constants/scripts/scrape-operator-docs.ts @@ -0,0 +1,964 @@ +/*--------------------------------------------------------------------------------------------- + * Copyright (c) Microsoft Corporation. All rights reserved. + * Licensed under the MIT License. See License.txt in the project root for license information. + *--------------------------------------------------------------------------------------------*/ + +/** + * scrape-operator-docs.ts + * + * Scrapes the DocumentDB compatibility page and per-operator documentation + * to generate the resources/scraped/operator-reference.md dump file. + * + * Usage: + * npx ts-node packages/documentdb-constants/scripts/scrape-operator-docs.ts + * + * The scraper has three phases: + * Phase 1: Fetch and parse the compatibility page (operator list + support status) + * Phase 2: Fetch per-operator doc pages (descriptions + syntax) + * Phase 3: Generate the Markdown dump file + * + * Before doing real work, a verification step checks that the upstream + * documentation structure is as expected by fetching a few known URLs. + */ + +import * as fs from 'fs'; +import * as path from 'path'; + +// --------------------------------------------------------------------------- +// Types +// --------------------------------------------------------------------------- + +interface OperatorInfo { + operator: string; + category: string; + listed: boolean; + /** Human-readable reason if not listed */ + notListedReason?: string; + /** Description from the per-operator doc page YAML frontmatter */ + description?: string; + /** Syntax snippet from the per-operator doc page */ + syntax?: string; + /** Documentation URL (derived from the directory where the .md file was found) */ + docLink?: string; + /** + * Human-readable note added when the scraper resolves a doc page from a + * different directory than the operator's primary category, or when other + * notable resolution decisions are made. Written to the dump as + * `- **Scraper Comment:**` for traceability. + */ + scraperComment?: string; +} + +// --------------------------------------------------------------------------- +// Constants +// --------------------------------------------------------------------------- + +const COMPAT_PAGE_URL = + 'https://raw.githubusercontent.com/MicrosoftDocs/azure-databases-docs/main/articles/documentdb/compatibility-query-language.md'; + +const OPERATOR_DOC_BASE = + 'https://raw.githubusercontent.com/MicrosoftDocs/azure-databases-docs/main/articles/documentdb/operators'; + +const DOC_LINK_BASE = 'https://learn.microsoft.com/en-us/azure/documentdb/operators'; + +/** + * Maps category names (as they appear in column 1 of the compat page table) + * to the docs directory used for per-operator doc pages. + * + * This mapping is derived from the operators TOC.yml in the azure-databases-docs repo. + * Category names are trimmed before lookup, so leading/trailing spaces are OK. + */ +const CATEGORY_TO_DIR: Record = { + // Query operators + 'Comparison Query Operators': 'comparison-query', + 'Logical Query Operators': 'logical-query', + 'Element Query Operators': 'element-query', + 'Evaluation Query Operators': 'evaluation-query', + 'Array Query Operators': 'array-query', + 'Bitwise Query Operators': 'bitwise-query', + 'Geospatial Operators': 'geospatial', + 'Projection Operators': 'projection', + 'Miscellaneous Query Operators': 'miscellaneous-query', + // Update operators + 'Field Update Operators': 'field-update', + 'Array Update Operators': 'array-update', + 'Bitwise Update Operators': 'bitwise-update', + // Aggregation + 'Aggregation Pipeline Stages': 'aggregation', + 'Accumulators ($group, $bucket, $bucketAuto, $setWindowFields)': 'accumulators', + 'Accumulators (in Other Stages)': 'accumulators', + // Expression operators + 'Arithmetic Expression Operators': 'arithmetic-expression', + 'Array Expression Operators': 'array-expression', + 'Bitwise Operators': 'bitwise', + 'Boolean Expression Operators': 'boolean-expression', + 'Comparison Expression Operators': 'comparison-expression', + 'Conditional Expression Operators': 'conditional-expression', + 'Data Size Operators': 'data-size', + 'Date Expression Operators': 'date-expression', + 'Literal Expression Operator': 'literal-expression', + 'Miscellaneous Operators': 'miscellaneous', + 'Object Expression Operators': 'object-expression', + 'Set Expression Operators': 'set-expression', + 'String Expression Operators': 'string-expression', + 'Trigonometry Expression Operators': 'trigonometry-expression', + 'Type Expression Operators': 'aggregation/type-expression', + 'Timestamp Expression Operators': 'timestamp-expression', + 'Variable Expression Operators': 'variable-expression', + 'Text Expression Operator': 'miscellaneous', + 'Custom Aggregation Expression Operators': 'miscellaneous', + // Window + 'Window Operators': 'window-operators', + // System variables β€” no per-operator doc pages + 'Variables in Aggregation Expressions': '', +}; + +/** Delay between batches of concurrent requests (ms) */ +const BATCH_DELAY_MS = 200; + +/** Number of concurrent requests per batch */ +const BATCH_SIZE = 10; + +/** Maximum number of retry attempts for transient HTTP errors */ +const MAX_RETRIES = 3; + +/** Base delay for exponential backoff (ms). Doubled on each retry. */ +const BACKOFF_BASE_MS = 1000; + +// --------------------------------------------------------------------------- +// Utilities +// --------------------------------------------------------------------------- + +interface FetchResult { + content: string | null; + /** Non-null when content is null β€” e.g. "404 Not Found" or "NetworkError: ..." */ + failReason?: string; +} + +/** + * Returns true for HTTP status codes that are transient and worth retrying: + * - 429 Too Many Requests + * - 5xx Server errors + */ +function isRetryableStatus(status: number): boolean { + return status === 429 || status >= 500; +} + +/** + * Fetches a URL as text with exponential backoff for transient errors. + * + * Retries on 429 (rate-limited) and 5xx (server errors). Respects + * Retry-After headers when present. Non-retryable failures (e.g., 404) + * are returned immediately without retry. + */ +async function fetchText(url: string): Promise { + let lastError: string | undefined; + + for (let attempt = 0; attempt <= MAX_RETRIES; attempt++) { + try { + const response = await fetch(url); + + if (response.ok) { + return { content: await response.text() }; + } + + const reason = `${response.status} ${response.statusText}`; + + if (!isRetryableStatus(response.status)) { + // Non-retryable (e.g., 404, 403) β€” fail immediately + return { content: null, failReason: reason }; + } + + lastError = reason; + + // Calculate backoff: honour Retry-After header if present, + // otherwise use exponential backoff + const retryAfter = response.headers.get('Retry-After'); + let delayMs: number; + if (retryAfter) { + const seconds = Number(retryAfter); + delayMs = Number.isNaN(seconds) ? BACKOFF_BASE_MS * 2 ** attempt : seconds * 1000; + } else { + delayMs = BACKOFF_BASE_MS * 2 ** attempt; + } + + if (attempt < MAX_RETRIES) { + console.log( + `\n ⏳ ${reason} for ${url} β€” retrying in ${delayMs}ms (attempt ${attempt + 1}/${MAX_RETRIES})`, + ); + await sleep(delayMs); + } + } catch (error) { + const msg = error instanceof Error ? error.message : String(error); + lastError = `NetworkError: ${msg}`; + + if (attempt < MAX_RETRIES) { + const delayMs = BACKOFF_BASE_MS * 2 ** attempt; + console.log(`\n ⏳ ${lastError} β€” retrying in ${delayMs}ms (attempt ${attempt + 1}/${MAX_RETRIES})`); + await sleep(delayMs); + } + } + } + + return { content: null, failReason: lastError }; +} + +interface FetchJsonResult { + data: T | null; + failReason?: string; +} + +/** + * Fetches a URL as JSON with exponential backoff for transient errors. + * Same retry semantics as {@link fetchText}. + */ +async function fetchJson(url: string): Promise> { + let lastError: string | undefined; + + for (let attempt = 0; attempt <= MAX_RETRIES; attempt++) { + try { + const response = await fetch(url); + + if (response.ok) { + return { data: (await response.json()) as T }; + } + + const reason = `${response.status} ${response.statusText}`; + + if (!isRetryableStatus(response.status)) { + return { data: null, failReason: reason }; + } + + lastError = reason; + + const retryAfter = response.headers.get('Retry-After'); + let delayMs: number; + if (retryAfter) { + const seconds = Number(retryAfter); + delayMs = Number.isNaN(seconds) ? BACKOFF_BASE_MS * 2 ** attempt : seconds * 1000; + } else { + delayMs = BACKOFF_BASE_MS * 2 ** attempt; + } + + if (attempt < MAX_RETRIES) { + console.log( + `\n ⏳ ${reason} for ${url} β€” retrying in ${delayMs}ms (attempt ${attempt + 1}/${MAX_RETRIES})`, + ); + await sleep(delayMs); + } + } catch (error) { + const msg = error instanceof Error ? error.message : String(error); + lastError = `NetworkError: ${msg}`; + + if (attempt < MAX_RETRIES) { + const delayMs = BACKOFF_BASE_MS * 2 ** attempt; + console.log(`\n ⏳ ${lastError} β€” retrying in ${delayMs}ms (attempt ${attempt + 1}/${MAX_RETRIES})`); + await sleep(delayMs); + } + } + } + + return { data: null, failReason: lastError }; +} + +function sleep(ms: number): Promise { + return new Promise((resolve) => setTimeout(resolve, ms)); +} + +/** + * Resolves a category name to its docs directory. + */ +function getCategoryDir(category: string): string | undefined { + return CATEGORY_TO_DIR[category]; +} + +/** + * Extracts the YAML frontmatter description from a docs Markdown file. + * Normalizes CRLF line endings before parsing. + */ +function extractDescription(markdown: string): string | undefined { + const normalized = markdown.replace(/\r\n/g, '\n').replace(/\r/g, '\n'); + const fmMatch = normalized.match(/^---\s*\n([\s\S]*?)\n---/); + if (!fmMatch) return undefined; + + const frontmatter = fmMatch[1]; + // Look for description field β€” may be indented (e.g. " description: ...") + const descMatch = frontmatter.match(/^\s*description:\s*(.+)$/m); + if (descMatch) { + return descMatch[1].trim().replace(/^['"]|['"]$/g, ''); + } + return undefined; +} + +/** + * Extracts the first code block after a ## Syntax heading. + * Normalizes CRLF line endings to LF. + */ +function extractSyntax(markdown: string): string | undefined { + // Find ## Syntax (or ### Syntax) section + const syntaxSectionMatch = markdown.match(/##\s*Syntax\s*\n([\s\S]*?)(?=\n##\s|\n$)/i); + if (!syntaxSectionMatch) return undefined; + + const section = syntaxSectionMatch[1]; + // Find first code block in this section + const codeBlockMatch = section.match(/```[\w]*\s*\n([\s\S]*?)```/); + if (codeBlockMatch) { + return codeBlockMatch[1].replace(/\r\n/g, '\n').replace(/\r/g, '\n').trim(); + } + return undefined; +} + +/** + * Escape pipe characters and collapse whitespace in table cell content. + * Handles both \n and \r\n line endings (GitHub raw content may use CRLF). + */ +function escapeTableCell(text: string): string { + return text + .replace(/\r\n|\r|\n/g, ' ') + .replace(/\\/g, '\\\\') + .replace(/\|/g, '\\|') + .replace(/\s+/g, ' ') + .trim(); +} + +// --------------------------------------------------------------------------- +// Phase 0: Verification +// --------------------------------------------------------------------------- + +interface VerificationResult { + passed: boolean; + checks: Array<{ name: string; passed: boolean; detail: string }>; +} + +async function runVerification(): Promise { + console.log(''); + console.log('='.repeat(60)); + console.log(' VERIFICATION STEP'); + console.log(' Checking that upstream documentation structure is as expected'); + console.log('='.repeat(60)); + console.log(''); + + const checks: VerificationResult['checks'] = []; + + // Check 1: Compatibility page is accessible and has expected structure + console.log(' [1/4] Fetching compatibility page...'); + const compatResult = await fetchText(COMPAT_PAGE_URL); + if (compatResult.content) { + const hasTable = /\|.*\|.*\|/.test(compatResult.content); + const hasOperators = /\$\w+/.test(compatResult.content); + const passed = hasTable && hasOperators; + checks.push({ + name: 'Compatibility page accessible & has tables + operators', + passed, + detail: passed + ? `OK β€” ${(compatResult.content.length / 1024).toFixed(1)} KB, tables found` + : `FAIL β€” tables: ${hasTable}, operators: ${hasOperators}`, + }); + } else { + checks.push({ + name: 'Compatibility page accessible', + passed: false, + detail: `FAIL β€” could not fetch ${COMPAT_PAGE_URL} (${compatResult.failReason})`, + }); + } + + // Check 2: A known operator doc page exists ($match β€” aggregation stage) + console.log(' [2/4] Fetching known operator page ($match)...'); + const matchUrl = `${OPERATOR_DOC_BASE}/aggregation/$match.md`; + const matchResult = await fetchText(matchUrl); + if (matchResult.content) { + const hasDescription = extractDescription(matchResult.content) !== undefined; + checks.push({ + name: '$match doc page has YAML frontmatter with description', + passed: hasDescription, + detail: hasDescription + ? `OK β€” description: "${extractDescription(matchResult.content)}"` + : 'FAIL β€” no description in frontmatter', + }); + } else { + checks.push({ + name: '$match doc page accessible', + passed: false, + detail: `FAIL β€” could not fetch ${matchUrl} (${matchResult.failReason})`, + }); + } + + // Check 3: A known query operator doc page exists ($eq β€” comparison query) + console.log(' [3/4] Fetching known operator page ($eq)...'); + const eqUrl = `${OPERATOR_DOC_BASE}/comparison-query/$eq.md`; + const eqResult = await fetchText(eqUrl); + if (eqResult.content) { + const desc = extractDescription(eqResult.content); + const syntax = extractSyntax(eqResult.content); + const passed = desc !== undefined; + checks.push({ + name: '$eq doc page has frontmatter description', + passed, + detail: passed + ? `OK β€” description: "${desc}", syntax: ${syntax ? 'found' : 'not found'}` + : 'FAIL β€” no description in frontmatter', + }); + } else { + checks.push({ + name: '$eq doc page accessible', + passed: false, + detail: `FAIL β€” could not fetch ${eqUrl} (${eqResult.failReason})`, + }); + } + + // Check 4: A known accumulator doc page exists ($sum) + console.log(' [4/4] Fetching known operator page ($sum)...'); + const sumUrl = `${OPERATOR_DOC_BASE}/accumulators/$sum.md`; + const sumResult = await fetchText(sumUrl); + if (sumResult.content) { + const desc = extractDescription(sumResult.content); + const passed = desc !== undefined; + checks.push({ + name: '$sum doc page has frontmatter description', + passed, + detail: passed ? `OK β€” description: "${desc}"` : 'FAIL β€” no description in frontmatter', + }); + } else { + checks.push({ + name: '$sum doc page accessible', + passed: false, + detail: `FAIL β€” could not fetch ${sumUrl} (${sumResult.failReason})`, + }); + } + + // Print results + console.log(''); + console.log('-'.repeat(60)); + console.log(' Verification Results:'); + console.log('-'.repeat(60)); + const allPassed = checks.every((c) => c.passed); + for (const check of checks) { + const icon = check.passed ? 'βœ…' : '❌'; + console.log(` ${icon} ${check.name}`); + console.log(` ${check.detail}`); + } + console.log('-'.repeat(60)); + if (allPassed) { + console.log(' βœ… VERIFICATION PASSED β€” all checks succeeded'); + } else { + console.log(' ❌ VERIFICATION FAILED β€” some checks did not pass'); + console.log(' The documentation structure may have changed.'); + console.log(' Review the failures above before proceeding.'); + } + console.log('='.repeat(60)); + console.log(''); + + return { passed: allPassed, checks }; +} + +// --------------------------------------------------------------------------- +// Phase 1: Compatibility table extraction +// --------------------------------------------------------------------------- + +/** + * Sections we explicitly skip (not operators β€” commands, indexes, etc.) + */ +const SKIP_SECTIONS = ['Database commands', 'Index types', 'Index properties', 'Related content']; + +function parseCompatibilityTables(markdown: string): OperatorInfo[] { + const operators: OperatorInfo[] = []; + const lines = markdown.split('\n'); + + // The compatibility page has a single "## Operators" section with one big table: + // | Category | Operator | Supported (v5.0) | Supported (v6.0) | Supported (v7.0) | Supported (v8.0) | + // | --- | --- | --- | --- | --- | --- | + // | Comparison Query Operators | `$eq` | βœ… Yes | βœ… Yes | βœ… Yes | βœ… Yes | + + let currentSection = ''; + let inTable = false; + let separatorSeen = false; + + for (const line of lines) { + // Detect section headings + const h2Match = line.match(/^##\s+(.+)/); + if (h2Match) { + currentSection = h2Match[1].trim(); + inTable = false; + separatorSeen = false; + continue; + } + + // Skip sections we don't care about + if (SKIP_SECTIONS.some((s) => currentSection.startsWith(s))) { + continue; + } + + // Only process lines that start with | + if (!line.startsWith('|')) { + if (inTable) { + inTable = false; + separatorSeen = false; + } + continue; + } + + // Parse table rows + const cells = line + .split('|') + .map((c) => c.trim()) + .filter((c) => c.length > 0); + + if (cells.length < 2) continue; + + // Detect separator row (| --- | --- | ... |) + if (cells.every((c) => /^-+$/.test(c) || /^:?-+:?$/.test(c))) { + separatorSeen = true; + inTable = true; + continue; + } + + // Skip header row (before separator) + if (!separatorSeen) { + continue; + } + + // Data row: | Category | Operator | v5.0 | v6.0 | v7.0 | v8.0 | + if (inTable && cells.length >= 2) { + const rawCategory = cells[0].replace(/`/g, '').replace(/\*\*/g, '').trim(); + let rawOperator = cells[1]; + + // Extract from markdown links like [`$eq`](...) + const linkMatch = rawOperator.match(/\[([^\]]+)\]/); + if (linkMatch) { + rawOperator = linkMatch[1]; + } + rawOperator = rawOperator.replace(/`/g, '').replace(/\*+$/, '').trim(); + + // Handle $[identifier] which may be parsed incorrectly + // The compat page shows `$[identifier]` β€” square brackets get stripped by link parsing + if (rawOperator === 'identifier' && rawCategory.includes('Array Update')) { + rawOperator = '$[identifier]'; + } + + // For Variables in Aggregation Expressions, add $$ prefix + if (rawCategory === 'Variables in Aggregation Expressions' && !rawOperator.startsWith('$')) { + rawOperator = '$$' + rawOperator; + } + + if (!rawOperator || rawOperator === 'Operator' || rawOperator === 'Command') { + continue; + } + + // Skip summary table rows where "operator" column contains numbers + // (e.g., "| **Aggregation Stages** | 60 | 58 | 96.67% |") + if (/^\d+$/.test(rawOperator)) { + continue; + } + + // Skip rows where category contains percentage or "Total" + if (rawCategory.includes('%') || rawCategory === 'Total') { + continue; + } + + // Check support status from version columns (cells 2+) + const versionCells = cells.slice(2); + const hasYes = versionCells.some((c) => c.includes('Yes') || c.includes('βœ…') || c.includes('βœ“')); + const hasNo = versionCells.some((c) => c.includes('No') || c.includes('❌') || c.includes('βœ—')); + const hasDeprecated = versionCells.some((c) => c.toLowerCase().includes('deprecated')); + + let listed: boolean; + let notListedReason: string | undefined; + + if (hasDeprecated) { + listed = false; + const depCell = versionCells.find((c) => c.toLowerCase().includes('deprecated')); + notListedReason = depCell?.replace(/[*`]/g, '').trim() || 'Deprecated'; + } else if (hasNo && !hasYes) { + listed = false; + notListedReason = 'Not in scope'; + } else { + listed = true; + } + + operators.push({ + operator: rawOperator, + category: rawCategory, + listed, + notListedReason, + }); + } + } + + return operators; +} + +// --------------------------------------------------------------------------- +// Phase 2: Per-operator doc fetching +// --------------------------------------------------------------------------- + +/** + * Builds a global index of all operator doc files in the docs repo + * by crawling each known directory. Returns a map from lowercase filename + * (e.g. "$eq.md") to the directory path it lives in. + * + * This allows the scraper to find operators that are filed in a different + * directory than expected (e.g. $cmp is a comparison expression operator + * but lives in comparison-query/). + */ +async function buildGlobalFileIndex(): Promise> { + const GITHUB_API_BASE = + 'https://api.github.com/repos/MicrosoftDocs/azure-databases-docs/contents/articles/documentdb/operators'; + + type GithubEntry = { name: string; type: string }; + const index = new Map(); + + const rootResult = await fetchJson(GITHUB_API_BASE); + if (!rootResult.data) { + console.log( + ` ⚠ Could not fetch directory listing from GitHub API β€” skipping global index (${rootResult.failReason})`, + ); + return index; + } + + const dirs = rootResult.data.filter((d) => d.type === 'dir' && d.name !== 'includes'); + + for (const dir of dirs) { + await sleep(300); // Rate limit GitHub API + + const dirResult = await fetchJson(`${GITHUB_API_BASE}/${dir.name}`); + if (!dirResult.data) continue; + + const files = dirResult.data.filter((f) => f.name.endsWith('.md')); + const subdirs = dirResult.data.filter((f) => f.type === 'dir'); + + for (const file of files) { + index.set(file.name.toLowerCase(), dir.name); + } + + // Also check subdirectories (e.g., aggregation/type-expression/) + for (const sub of subdirs) { + await sleep(300); + + const subResult = await fetchJson(`${GITHUB_API_BASE}/${dir.name}/${sub.name}`); + if (!subResult.data) continue; + + for (const file of subResult.data.filter((f) => f.name.endsWith('.md'))) { + index.set(file.name.toLowerCase(), `${dir.name}/${sub.name}`); + } + } + } + + return index; +} + +interface FetchOperatorDocsResult { + failureDetails: { operator: string; category: string; reason: string }[]; +} + +async function fetchOperatorDocs(operators: OperatorInfo[]): Promise { + // Build a global index of all doc files to use as fallback + console.log(' Building global file index from GitHub API...'); + const globalIndex = await buildGlobalFileIndex(); + console.log(` Global index: ${globalIndex.size} files found across all directories`); + console.log(''); + + // Only fetch for listed operators that have a doc directory or are in global index + const fetchable = operators.filter((op) => { + if (!op.listed) return false; + const dir = getCategoryDir(op.category); + // Skip operators whose category maps to empty string (e.g. system variables) + if (dir === '') return false; + // Include if we have a directory mapping OR if the file exists in the global index + const opFileName = op.operator.toLowerCase() + '.md'; + return dir !== undefined || globalIndex.has(opFileName); + }); + const total = fetchable.length; + let fetched = 0; + let succeeded = 0; + let failed = 0; + const skipped = operators.filter((op) => op.listed).length - total; + + const failureDetails: { operator: string; category: string; reason: string }[] = []; + + console.log(` Phase 2: Fetching per-operator doc pages (${total} operators, ${skipped} skipped)...`); + console.log(''); + + // Process in batches + for (let i = 0; i < fetchable.length; i += BATCH_SIZE) { + const batch = fetchable.slice(i, i + BATCH_SIZE); + + const promises = batch.map(async (op) => { + const primaryDir = getCategoryDir(op.category); + const opNameLower = op.operator.toLowerCase(); + const opNameOriginal = op.operator; + const opFileName = opNameLower + '.md'; + + // Strategy: + // 1. Try primary directory (lowercase filename) + // 2. Try primary directory (original casing) + // 3. Try global index fallback directory (lowercase filename) + // 4. Try global index fallback directory (original casing) + let content: string | null = null; + let resolvedDir: string | undefined; + let lastFailReason: string | undefined; + + if (primaryDir) { + const result = await fetchText(`${OPERATOR_DOC_BASE}/${primaryDir}/${opNameLower}.md`); + if (result.content) { + content = result.content; + resolvedDir = primaryDir; + } else { + lastFailReason = result.failReason; + if (opNameLower !== opNameOriginal) { + const result2 = await fetchText(`${OPERATOR_DOC_BASE}/${primaryDir}/${opNameOriginal}.md`); + if (result2.content) { + content = result2.content; + resolvedDir = primaryDir; + } else { + lastFailReason = result2.failReason; + } + } + } + } + + // Fallback: check global index for a different directory + if (!content && globalIndex.has(opFileName)) { + const fallbackDir = globalIndex.get(opFileName)!; + if (fallbackDir !== primaryDir) { + const result3 = await fetchText(`${OPERATOR_DOC_BASE}/${fallbackDir}/${opFileName}`); + if (result3.content) { + content = result3.content; + resolvedDir = fallbackDir; + } else { + lastFailReason = result3.failReason; + } + } + } + + if (content) { + op.description = extractDescription(content); + op.syntax = extractSyntax(content); + + if (primaryDir && resolvedDir !== primaryDir) { + // Doc page found in a different directory β€” emit 'none' + // so the generator can cross-reference alternative URLs. + // Description/syntax were still scraped from the fallback page. + op.docLink = 'none'; + op.scraperComment = + `Doc page not found in expected directory '${primaryDir}/'. ` + + `Content scraped from '${resolvedDir}/'.`; + } else { + op.docLink = `${DOC_LINK_BASE}/${resolvedDir}/${opNameLower}`; + } + succeeded++; + } else { + failureDetails.push({ + operator: op.operator, + category: op.category, + reason: lastFailReason ?? 'Unknown', + }); + failed++; + } + fetched++; + }); + + await Promise.all(promises); + + // Progress output + const pct = ((fetched / total) * 100).toFixed(0); + process.stdout.write(`\r Progress: ${fetched}/${total} (${pct}%) β€” ${succeeded} succeeded, ${failed} failed`); + + // Rate limiting between batches + if (i + BATCH_SIZE < fetchable.length) { + await sleep(BATCH_DELAY_MS); + } + } + + console.log(''); // newline after progress + console.log(` Phase 2 complete: ${succeeded}/${total} docs fetched successfully`); + if (failed > 0) { + console.log(` ⚠ ${failed} operators could not be fetched (will have empty descriptions)`); + console.log(''); + + // Group failures by reason for a clear summary + const byReason = new Map(); + for (const f of failureDetails) { + const list = byReason.get(f.reason) ?? []; + list.push(f); + byReason.set(f.reason, list); + } + + for (const [reason, ops] of byReason) { + console.log(` [${reason}] (${ops.length} operators):`); + for (const f of ops) { + const dir = getCategoryDir(f.category) || '???'; + const fallback = globalIndex.get(f.operator.toLowerCase() + '.md'); + const extra = fallback && fallback !== dir ? ` (also tried ${fallback})` : ''; + console.log(` - ${f.operator} (${f.category} β†’ ${dir}${extra})`); + } + console.log(''); + } + } + + return { failureDetails }; +} + +// --------------------------------------------------------------------------- +// Phase 3: Dump generation +// --------------------------------------------------------------------------- + +function generateDump(operators: OperatorInfo[]): string { + const now = new Date().toISOString().split('T')[0]; + const lines: string[] = []; + + lines.push('# DocumentDB Operator Reference'); + lines.push(''); + lines.push(''); + lines.push(``); + lines.push(''); + lines.push(''); + + // Summary table (compact β€” stays as a table) + const categories = new Map(); + for (const op of operators) { + if (!categories.has(op.category)) { + categories.set(op.category, { listed: 0, notListed: 0 }); + } + const cat = categories.get(op.category)!; + if (op.listed) { + cat.listed++; + } else { + cat.notListed++; + } + } + + lines.push('## Summary'); + lines.push(''); + lines.push('| Category | Listed | Total |'); + lines.push('| --- | --- | --- |'); + let totalListed = 0; + let totalAll = 0; + for (const [cat, counts] of categories) { + const total = counts.listed + counts.notListed; + totalListed += counts.listed; + totalAll += total; + lines.push(`| ${escapeTableCell(cat)} | ${counts.listed} | ${total} |`); + } + lines.push(`| **Total** | **${totalListed}** | **${totalAll}** |`); + lines.push(''); + + // Per-category sections with structured operator entries + const categoriesInOrder = [...categories.keys()]; + for (const cat of categoriesInOrder) { + const catOps = operators.filter((op) => op.category === cat && op.listed); + if (catOps.length === 0) continue; + + lines.push(`## ${cat}`); + lines.push(''); + + for (const op of catOps) { + lines.push(`### ${op.operator}`); + lines.push(''); + if (op.description) { + lines.push(`- **Description:** ${op.description}`); + } + if (op.syntax) { + lines.push('- **Syntax:**'); + lines.push(''); + lines.push('```javascript'); + lines.push(op.syntax); + lines.push('```'); + lines.push(''); + } + if (op.docLink) { + lines.push(`- **Doc Link:** ${op.docLink}`); + } + if (op.scraperComment) { + lines.push(`- **Scraper Comment:** ${op.scraperComment}`); + } + lines.push(''); + } + } + + // Not-listed operators section + const notListed = operators.filter((op) => !op.listed); + if (notListed.length > 0) { + lines.push('## Not Listed'); + lines.push(''); + lines.push('Operators below are present on the compatibility page but are not in scope'); + lines.push('for this package (deprecated or not available in DocumentDB).'); + lines.push(''); + for (const op of notListed) { + lines.push(`- **${op.operator}** (${op.category}) β€” ${op.notListedReason || 'Not in scope'}`); + } + lines.push(''); + } + + return lines.join('\n'); +} + +// --------------------------------------------------------------------------- +// Main +// --------------------------------------------------------------------------- + +async function main(): Promise { + console.log('DocumentDB Operator Documentation Scraper'); + console.log('========================================='); + console.log(''); + + // Phase 0: Verification + const verification = await runVerification(); + if (!verification.passed) { + console.error('Aborting due to verification failure.'); + console.error('If the documentation structure has changed, update the scraper accordingly.'); + process.exit(1); + } + + // Phase 1: Fetch and parse compatibility page + console.log(' Phase 1: Fetching compatibility page...'); + const compatResult = await fetchText(COMPAT_PAGE_URL); + if (!compatResult.content) { + console.error(`ERROR: Could not fetch compatibility page (${compatResult.failReason})`); + process.exit(1); + } + console.log(` Fetched ${(compatResult.content.length / 1024).toFixed(1)} KB`); + + const operators = parseCompatibilityTables(compatResult.content); + const listed = operators.filter((op) => op.listed); + const notListed = operators.filter((op) => !op.listed); + console.log(` Parsed ${operators.length} operators (${listed.length} listed, ${notListed.length} not listed)`); + console.log(''); + + // Phase 2: Fetch per-operator docs + const { failureDetails } = await fetchOperatorDocs(operators); + console.log(''); + + // Fail immediately on network errors (transient connectivity problems that + // exhaust all retries). 404s are expected for operators without dedicated + // doc pages and do not abort the run. + const networkFailures = failureDetails.filter((f) => f.reason.startsWith('NetworkError:')); + if (networkFailures.length > 0) { + console.error(`ERROR: ${networkFailures.length} operator(s) failed due to network errors (not 404). Aborting.`); + for (const f of networkFailures) { + console.error(` - ${f.operator} (${f.category}): ${f.reason}`); + } + process.exit(1); + } + + // Phase 3: Generate dump + console.log(' Phase 3: Generating scraped/operator-reference.md...'); + const dump = generateDump(operators); + + const outputDir = path.join(__dirname, '..', 'resources', 'scraped'); + if (!fs.existsSync(outputDir)) { + fs.mkdirSync(outputDir, { recursive: true }); + } + + const outputPath = path.join(outputDir, 'operator-reference.md'); + fs.writeFileSync(outputPath, dump, 'utf-8'); + + console.log(` Written to: ${outputPath}`); + console.log(` File size: ${(dump.length / 1024).toFixed(1)} KB`); + console.log(''); + console.log('Done! Review the generated file and commit it to the repo.'); +} + +main().catch((err) => { + console.error('Scraper failed:', err); + process.exit(1); +}); diff --git a/packages/documentdb-constants/src/accumulators.ts b/packages/documentdb-constants/src/accumulators.ts new file mode 100644 index 000000000..c2d4d97d0 --- /dev/null +++ b/packages/documentdb-constants/src/accumulators.ts @@ -0,0 +1,186 @@ +/*--------------------------------------------------------------------------------------------- + * Copyright (c) Microsoft Corporation. All rights reserved. + * Licensed under the MIT License. See License.txt in the project root for license information. + *--------------------------------------------------------------------------------------------*/ + +// AUTO-GENERATED β€” DO NOT EDIT BY HAND +// +// Generated by: npm run generate (scripts/generate-from-reference.ts) +// Sources: resources/scraped/operator-reference.md +// resources/overrides/operator-overrides.md +// resources/overrides/operator-snippets.md +// +// To change operator data, edit the overrides/snippets files and re-run the generator. + +import { getDocLink } from './docLinks'; +import { registerOperators } from './getFilteredCompletions'; +import { META_ACCUMULATOR } from './metaTags'; +import { type OperatorEntry } from './types'; + +// --------------------------------------------------------------------------- +// Accumulators ($group, $bucket, $bucketAuto, $setWindowFields) +// --------------------------------------------------------------------------- + +const groupAccumulators: readonly OperatorEntry[] = [ + { + value: '$addToSet', + meta: META_ACCUMULATOR, + description: + "The addToSet operator adds elements to an array if they don't already exist, while ensuring uniqueness of elements within the set.", + snippet: '{ $addToSet: "${1:\\$field}" }', + link: 'https://learn.microsoft.com/en-us/azure/documentdb/operators/array-update/$addtoset', // inferred from another category + }, + { + value: '$avg', + meta: META_ACCUMULATOR, + description: 'Computes the average of numeric values for documents in a group, bucket, or window.', + snippet: '{ $avg: "${1:\\$field}" }', + link: getDocLink('$avg', META_ACCUMULATOR), + }, + { + value: '$bottom', + meta: META_ACCUMULATOR, + description: + "The $bottom operator returns the last document from the query's result set sorted by one or more fields", + snippet: '{ $bottom: { sortBy: { ${1:field}: ${2:1} }, output: "${3:\\$field}" } }', + link: getDocLink('$bottom', META_ACCUMULATOR), + }, + { + value: '$bottomN', + meta: META_ACCUMULATOR, + description: 'The $bottomN operator returns the last N documents from the result sorted by one or more fields', + snippet: '{ $bottomN: { n: ${1:number}, sortBy: { ${2:field}: ${3:1} }, output: "${4:\\$field}" } }', + link: getDocLink('$bottomN', META_ACCUMULATOR), + }, + { + value: '$count', + meta: META_ACCUMULATOR, + description: + 'The `$count` operator is used to count the number of documents that match a query filtering criteria.', + snippet: '{ $count: {} }', + link: getDocLink('$count', META_ACCUMULATOR), + }, + { + value: '$first', + meta: META_ACCUMULATOR, + description: "The $first operator returns the first value in a group according to the group's sorting order.", + snippet: '{ $first: "${1:\\$field}" }', + link: getDocLink('$first', META_ACCUMULATOR), + }, + { + value: '$firstN', + meta: META_ACCUMULATOR, + description: + 'The $firstN operator sorts documents on one or more fields specified by the query and returns the first N document matching the filtering criteria', + snippet: '{ $firstN: { input: "${1:\\$field}", n: ${2:number} } }', + link: getDocLink('$firstN', META_ACCUMULATOR), + }, + { + value: '$last', + meta: META_ACCUMULATOR, + description: 'The $last operator returns the last document from the result sorted by one or more fields', + snippet: '{ $last: "${1:\\$field}" }', + link: getDocLink('$last', META_ACCUMULATOR), + }, + { + value: '$lastN', + meta: META_ACCUMULATOR, + description: 'The $lastN accumulator operator returns the last N values in a group of documents.', + snippet: '{ $lastN: { input: "${1:\\$field}", n: ${2:number} } }', + link: getDocLink('$lastN', META_ACCUMULATOR), + }, + { + value: '$max', + meta: META_ACCUMULATOR, + description: 'The $max operator returns the maximum value from a set of input values.', + snippet: '{ $max: "${1:\\$field}" }', + link: getDocLink('$max', META_ACCUMULATOR), + }, + { + value: '$maxN', + meta: META_ACCUMULATOR, + description: 'Retrieves the top N values based on a specified filtering criteria', + snippet: '{ $maxN: { input: "${1:\\$field}", n: ${2:number} } }', + link: getDocLink('$maxN', META_ACCUMULATOR), + }, + { + value: '$median', + meta: META_ACCUMULATOR, + description: 'The $median operator calculates the median value of a numeric field in a group of documents.', + snippet: '{ $median: { input: "${1:\\$field}", method: "approximate" } }', + link: getDocLink('$median', META_ACCUMULATOR), + }, + { + value: '$mergeObjects', + meta: META_ACCUMULATOR, + description: 'The $mergeObjects operator merges multiple documents into a single document', + snippet: '{ $mergeObjects: "${1:\\$field}" }', + link: 'https://learn.microsoft.com/en-us/azure/documentdb/operators/object-expression/$mergeobjects', // inferred from another category + }, + { + value: '$min', + meta: META_ACCUMULATOR, + description: 'Retrieves the minimum value for a specified field', + snippet: '{ $min: "${1:\\$field}" }', + link: getDocLink('$min', META_ACCUMULATOR), + }, + { + value: '$percentile', + meta: META_ACCUMULATOR, + description: + 'The $percentile operator calculates the percentile of numerical values that match a filtering criteria', + snippet: '{ $percentile: { input: "${1:\\$field}", p: [${2:0.5}], method: "approximate" } }', + link: getDocLink('$percentile', META_ACCUMULATOR), + }, + { + value: '$push', + meta: META_ACCUMULATOR, + description: 'The $push operator adds a specified value to an array within a document.', + snippet: '{ $push: "${1:\\$field}" }', + link: 'https://learn.microsoft.com/en-us/azure/documentdb/operators/array-update/$push', // inferred from another category + }, + { + value: '$stdDevPop', + meta: META_ACCUMULATOR, + description: 'The $stddevpop operator calculates the standard deviation of the specified values', + snippet: '{ $stdDevPop: "${1:\\$field}" }', + link: getDocLink('$stdDevPop', META_ACCUMULATOR), + }, + { + value: '$stdDevSamp', + meta: META_ACCUMULATOR, + description: + 'The $stddevsamp operator calculates the standard deviation of a specified sample of values and not the entire population', + snippet: '{ $stdDevSamp: "${1:\\$field}" }', + link: getDocLink('$stdDevSamp', META_ACCUMULATOR), + }, + { + value: '$sum', + meta: META_ACCUMULATOR, + description: 'The $sum operator calculates the sum of the values of a field based on a filtering criteria', + snippet: '{ $sum: "${1:\\$field}" }', + link: getDocLink('$sum', META_ACCUMULATOR), + }, + { + value: '$top', + meta: META_ACCUMULATOR, + description: 'The $top operator returns the first document from the result set sorted by one or more fields', + snippet: '{ $top: { sortBy: { ${1:field}: ${2:1} }, output: "${3:\\$field}" } }', + link: getDocLink('$top', META_ACCUMULATOR), + }, + { + value: '$topN', + meta: META_ACCUMULATOR, + description: 'The $topN operator returns the first N documents from the result sorted by one or more fields', + snippet: '{ $topN: { n: ${1:number}, sortBy: { ${2:field}: ${3:1} }, output: "${4:\\$field}" } }', + link: getDocLink('$topN', META_ACCUMULATOR), + }, +]; + +// --------------------------------------------------------------------------- +// Registration +// --------------------------------------------------------------------------- + +export function loadAccumulators(): void { + registerOperators([...groupAccumulators]); +} diff --git a/packages/documentdb-constants/src/bsonConstructors.ts b/packages/documentdb-constants/src/bsonConstructors.ts new file mode 100644 index 000000000..5e08a22d7 --- /dev/null +++ b/packages/documentdb-constants/src/bsonConstructors.ts @@ -0,0 +1,83 @@ +/*--------------------------------------------------------------------------------------------- + * Copyright (c) Microsoft Corporation. All rights reserved. + * Licensed under the MIT License. See License.txt in the project root for license information. + *--------------------------------------------------------------------------------------------*/ + +import { registerOperators } from './getFilteredCompletions'; +import { META_BSON } from './metaTags'; +import { type OperatorEntry } from './types'; + +// --------------------------------------------------------------------------- +// BSON Constructor Functions +// --------------------------------------------------------------------------- + +const bsonConstructors: readonly OperatorEntry[] = [ + { + value: 'ObjectId', + meta: META_BSON, + description: 'Creates a new ObjectId value, a 12-byte unique identifier.', + snippet: 'ObjectId("${1:hex}")', + }, + { + value: 'ISODate', + meta: META_BSON, + description: 'Creates a date object from an ISO 8601 date string.', + snippet: 'ISODate("${1:2025-01-01T00:00:00Z}")', + }, + { + value: 'NumberLong', + meta: META_BSON, + description: 'Creates a 64-bit integer (long) value.', + snippet: 'NumberLong(${1:value})', + }, + { + value: 'NumberInt', + meta: META_BSON, + description: 'Creates a 32-bit integer value.', + snippet: 'NumberInt(${1:value})', + }, + { + value: 'NumberDecimal', + meta: META_BSON, + description: 'Creates a 128-bit decimal value for high-precision calculations.', + snippet: 'NumberDecimal("${1:value}")', + }, + { + value: 'BinData', + meta: META_BSON, + description: 'Creates a binary data value with a specified subtype.', + snippet: 'BinData(${1:subtype}, "${2:base64}")', + }, + { + value: 'UUID', + meta: META_BSON, + description: 'Creates a UUID (Universally Unique Identifier) value.', + snippet: 'UUID("${1:uuid}")', + }, + { + value: 'Timestamp', + meta: META_BSON, + description: 'Creates a BSON timestamp value for internal replication use.', + snippet: 'Timestamp(${1:seconds}, ${2:increment})', + }, + { + value: 'MinKey', + meta: META_BSON, + description: 'Represents the lowest possible BSON value, comparing less than all other types.', + snippet: 'MinKey()', + }, + { + value: 'MaxKey', + meta: META_BSON, + description: 'Represents the highest possible BSON value, comparing greater than all other types.', + snippet: 'MaxKey()', + }, +]; + +// --------------------------------------------------------------------------- +// Registration +// --------------------------------------------------------------------------- + +export function loadBsonConstructors(): void { + registerOperators(bsonConstructors); +} diff --git a/packages/documentdb-constants/src/docLinks.test.ts b/packages/documentdb-constants/src/docLinks.test.ts new file mode 100644 index 000000000..c79a53da9 --- /dev/null +++ b/packages/documentdb-constants/src/docLinks.test.ts @@ -0,0 +1,82 @@ +/*--------------------------------------------------------------------------------------------- + * Copyright (c) Microsoft Corporation. All rights reserved. + * Licensed under the MIT License. See License.txt in the project root for license information. + *--------------------------------------------------------------------------------------------*/ + +/** + * Unit tests for docLinks.ts β€” URL generation for DocumentDB operator docs. + */ + +import { getDocBase, getDocLink } from './index'; + +describe('docLinks', () => { + test('getDocBase returns the expected base URL', () => { + expect(getDocBase()).toBe('https://learn.microsoft.com/en-us/azure/documentdb/operators'); + }); + + describe('getDocLink', () => { + test('generates correct URL for comparison query operator', () => { + const link = getDocLink('$eq', 'query:comparison'); + expect(link).toBe('https://learn.microsoft.com/en-us/azure/documentdb/operators/comparison-query/$eq'); + }); + + test('generates correct URL for aggregation stage', () => { + const link = getDocLink('$match', 'stage'); + expect(link).toBe('https://learn.microsoft.com/en-us/azure/documentdb/operators/aggregation/$match'); + }); + + test('generates correct URL for accumulator', () => { + const link = getDocLink('$sum', 'accumulator'); + expect(link).toBe('https://learn.microsoft.com/en-us/azure/documentdb/operators/accumulators/$sum'); + }); + + test('generates correct URL for field update operator', () => { + const link = getDocLink('$set', 'update:field'); + expect(link).toBe('https://learn.microsoft.com/en-us/azure/documentdb/operators/field-update/$set'); + }); + + test('generates correct URL for array expression operator', () => { + const link = getDocLink('$filter', 'expr:array'); + expect(link).toBe('https://learn.microsoft.com/en-us/azure/documentdb/operators/array-expression/$filter'); + }); + + test('generates correct URL for type expression operator (nested dir)', () => { + const link = getDocLink('$convert', 'expr:type'); + expect(link).toBe( + 'https://learn.microsoft.com/en-us/azure/documentdb/operators/aggregation/type-expression/$convert', + ); + }); + + test('generates correct URL for window operator', () => { + const link = getDocLink('$rank', 'window'); + expect(link).toBe('https://learn.microsoft.com/en-us/azure/documentdb/operators/window-operators/$rank'); + }); + + test('lowercases operator names in URLs', () => { + const link = getDocLink('$AddFields', 'stage'); + expect(link).toBe('https://learn.microsoft.com/en-us/azure/documentdb/operators/aggregation/$addfields'); + }); + + test('returns undefined for unknown meta tag', () => { + expect(getDocLink('$eq', 'unknown:tag')).toBeUndefined(); + }); + + test('returns undefined for BSON meta tag (no docs directory)', () => { + expect(getDocLink('ObjectId', 'bson')).toBeUndefined(); + }); + + test('returns undefined for variable meta tag (no docs directory)', () => { + expect(getDocLink('$$NOW', 'variable')).toBeUndefined(); + }); + + test('generates correct URL for boolean expression operator', () => { + const link = getDocLink('$and', 'expr:bool'); + expect(link).toBe('https://learn.microsoft.com/en-us/azure/documentdb/operators/boolean-expression/$and'); + }); + + test('generates correct URL for comparison expression operator', () => { + const link = getDocLink('$eq', 'expr:comparison'); + expect(link).toBe('https://learn.microsoft.com/en-us/azure/documentdb/operators/comparison-expression/$eq'); + }); + }); +}); diff --git a/packages/documentdb-constants/src/docLinks.ts b/packages/documentdb-constants/src/docLinks.ts new file mode 100644 index 000000000..460112548 --- /dev/null +++ b/packages/documentdb-constants/src/docLinks.ts @@ -0,0 +1,77 @@ +/*--------------------------------------------------------------------------------------------- + * Copyright (c) Microsoft Corporation. All rights reserved. + * Licensed under the MIT License. See License.txt in the project root for license information. + *--------------------------------------------------------------------------------------------*/ + +/** + * URL generation helpers for DocumentDB documentation pages. + * + * Each operator has a documentation page at: + * https://learn.microsoft.com/en-us/azure/documentdb/operators/{category}/{operatorName} + */ + +const DOC_BASE = 'https://learn.microsoft.com/en-us/azure/documentdb/operators'; + +/** + * Maps meta tag prefixes to the docs directory name used in the + * DocumentDB documentation URL path. + */ +const META_TO_DOC_DIR: Record = { + 'query:comparison': 'comparison-query', + 'query:logical': 'logical-query', + 'query:element': 'element-query', + 'query:evaluation': 'evaluation-query', + 'query:array': 'array-query', + 'query:bitwise': 'bitwise-query', + 'query:geospatial': 'geospatial', + 'query:projection': 'projection', + 'query:misc': 'miscellaneous-query', + 'update:field': 'field-update', + 'update:array': 'array-update', + 'update:bitwise': 'bitwise-update', + stage: 'aggregation', + accumulator: 'accumulators', + 'expr:arith': 'arithmetic-expression', + 'expr:array': 'array-expression', + 'expr:bool': 'boolean-expression', + 'expr:comparison': 'comparison-expression', + 'expr:conditional': 'conditional-expression', + 'expr:date': 'date-expression', + 'expr:object': 'object-expression', + 'expr:set': 'set-expression', + 'expr:string': 'string-expression', + 'expr:trig': 'trigonometry-expression', + 'expr:type': 'aggregation/type-expression', + 'expr:datasize': 'data-size', + 'expr:timestamp': 'timestamp-expression', + 'expr:bitwise': 'bitwise', + 'expr:literal': 'literal-expression', + 'expr:misc': 'miscellaneous', + 'expr:variable': 'variable-expression', + window: 'window-operators', +}; + +/** + * Generates a documentation URL for a DocumentDB operator. + * + * @param operatorValue - the operator name, e.g. "$bucket", "$gt" + * @param meta - the meta tag, e.g. "stage", "query:comparison" + * @returns URL string or undefined if no mapping exists for the meta tag + */ +export function getDocLink(operatorValue: string, meta: string): string | undefined { + const dir = META_TO_DOC_DIR[meta]; + if (!dir) { + return undefined; + } + + // Operator names in URLs keep their $ prefix and are lowercased + const name = operatorValue.toLowerCase(); + return `${DOC_BASE}/${dir}/${name}`; +} + +/** + * Returns the base URL for the DocumentDB operators documentation. + */ +export function getDocBase(): string { + return DOC_BASE; +} diff --git a/packages/documentdb-constants/src/expressionOperators.ts b/packages/documentdb-constants/src/expressionOperators.ts new file mode 100644 index 000000000..a75905738 --- /dev/null +++ b/packages/documentdb-constants/src/expressionOperators.ts @@ -0,0 +1,1181 @@ +/*--------------------------------------------------------------------------------------------- + * Copyright (c) Microsoft Corporation. All rights reserved. + * Licensed under the MIT License. See License.txt in the project root for license information. + *--------------------------------------------------------------------------------------------*/ + +// AUTO-GENERATED β€” DO NOT EDIT BY HAND +// +// Generated by: npm run generate (scripts/generate-from-reference.ts) +// Sources: resources/scraped/operator-reference.md +// resources/overrides/operator-overrides.md +// resources/overrides/operator-snippets.md +// +// To change operator data, edit the overrides/snippets files and re-run the generator. + +import { getDocLink } from './docLinks'; +import { registerOperators } from './getFilteredCompletions'; +import { + META_EXPR_ARITH, + META_EXPR_ARRAY, + META_EXPR_BITWISE, + META_EXPR_BOOL, + META_EXPR_COMPARISON, + META_EXPR_CONDITIONAL, + META_EXPR_DATASIZE, + META_EXPR_DATE, + META_EXPR_LITERAL, + META_EXPR_MISC, + META_EXPR_OBJECT, + META_EXPR_SET, + META_EXPR_STRING, + META_EXPR_TIMESTAMP, + META_EXPR_TRIG, + META_EXPR_TYPE, + META_EXPR_VARIABLE, +} from './metaTags'; +import { type OperatorEntry } from './types'; + +// --------------------------------------------------------------------------- +// Arithmetic Expression Operators +// --------------------------------------------------------------------------- + +const arithmeticExpressionOperators: readonly OperatorEntry[] = [ + { + value: '$abs', + meta: META_EXPR_ARITH, + description: 'The $abs operator returns the absolute value of a number.', + snippet: '{ $abs: "${1:\\$field}" }', + link: getDocLink('$abs', META_EXPR_ARITH), + }, + { + value: '$add', + meta: META_EXPR_ARITH, + description: 'The $add operator returns the sum of two numbers or the sum of a date and numbers.', + snippet: '{ $add: ["${1:\\$field1}", "${2:\\$field2}"] }', + link: getDocLink('$add', META_EXPR_ARITH), + }, + { + value: '$ceil', + meta: META_EXPR_ARITH, + description: 'The $ceil operator returns the smallest integer greater than or equal to the specified number.', + snippet: '{ $ceil: "${1:\\$field}" }', + link: getDocLink('$ceil', META_EXPR_ARITH), + }, + { + value: '$divide', + meta: META_EXPR_ARITH, + description: 'The $divide operator divides two numbers and returns the quotient.', + snippet: '{ $divide: ["${1:\\$field1}", "${2:\\$field2}"] }', + link: getDocLink('$divide', META_EXPR_ARITH), + }, + { + value: '$exp', + meta: META_EXPR_ARITH, + description: 'The $exp operator raises e to the specified exponent and returns the result', + snippet: '{ $exp: "${1:\\$field}" }', + link: getDocLink('$exp', META_EXPR_ARITH), + }, + { + value: '$floor', + meta: META_EXPR_ARITH, + description: 'The $floor operator returns the largest integer less than or equal to the specified number', + snippet: '{ $floor: "${1:\\$field}" }', + link: getDocLink('$floor', META_EXPR_ARITH), + }, + { + value: '$ln', + meta: META_EXPR_ARITH, + description: 'The $ln operator calculates the natural logarithm of the input', + snippet: '{ $ln: "${1:\\$field}" }', + link: getDocLink('$ln', META_EXPR_ARITH), + }, + { + value: '$log', + meta: META_EXPR_ARITH, + description: 'The $log operator calculates the logarithm of a number in the specified base', + snippet: '{ $log: ["${1:\\$number}", ${2:base}] }', + link: getDocLink('$log', META_EXPR_ARITH), + }, + { + value: '$log10', + meta: META_EXPR_ARITH, + description: 'The $log10 operator calculates the log of a specified number in base 10', + snippet: '{ $log10: "${1:\\$field}" }', + link: getDocLink('$log10', META_EXPR_ARITH), + }, + { + value: '$mod', + meta: META_EXPR_ARITH, + description: + 'The $mod operator performs a modulo operation on the value of a field and selects documents with a specified result.', + snippet: '{ $mod: ["${1:\\$field1}", "${2:\\$field2}"] }', + link: 'https://learn.microsoft.com/en-us/azure/documentdb/operators/evaluation-query/$mod', // inferred from another category + }, + { + value: '$multiply', + meta: META_EXPR_ARITH, + description: 'The $multiply operator multiplies the input numerical values', + snippet: '{ $multiply: ["${1:\\$field1}", "${2:\\$field2}"] }', + link: getDocLink('$multiply', META_EXPR_ARITH), + }, + { + value: '$pow', + meta: META_EXPR_ARITH, + description: + 'The `$pow` operator calculates the value of a numerical value raised to the power of a specified exponent.', + snippet: '{ $pow: ["${1:\\$field1}", "${2:\\$field2}"] }', + link: getDocLink('$pow', META_EXPR_ARITH), + }, + { + value: '$round', + meta: META_EXPR_ARITH, + description: 'The $round operator rounds a number to a specified decimal place.', + snippet: '{ $round: ["${1:\\$field}", ${2:place}] }', + link: getDocLink('$round', META_EXPR_ARITH), + }, + { + value: '$sqrt', + meta: META_EXPR_ARITH, + description: 'The $sqrt operator calculates and returns the square root of an input number', + snippet: '{ $sqrt: "${1:\\$field}" }', + link: getDocLink('$sqrt', META_EXPR_ARITH), + }, + { + value: '$subtract', + meta: META_EXPR_ARITH, + description: 'The $subtract operator subtracts two numbers and returns the result.', + snippet: '{ $subtract: ["${1:\\$field1}", "${2:\\$field2}"] }', + link: getDocLink('$subtract', META_EXPR_ARITH), + }, + { + value: '$trunc', + meta: META_EXPR_ARITH, + description: 'The $trunc operator truncates a number to a specified decimal place.', + snippet: '{ $trunc: "${1:\\$field}" }', + link: getDocLink('$trunc', META_EXPR_ARITH), + }, +]; + +// --------------------------------------------------------------------------- +// Array Expression Operators +// --------------------------------------------------------------------------- + +const arrayExpressionOperators: readonly OperatorEntry[] = [ + { + value: '$arrayElemAt', + meta: META_EXPR_ARRAY, + description: 'The $arrayElemAt returns the element at the specified array index.', + snippet: '{ $arrayElemAt: ["${1:\\$array}", ${2:index}] }', + link: getDocLink('$arrayElemAt', META_EXPR_ARRAY), + }, + { + value: '$arrayToObject', + meta: META_EXPR_ARRAY, + description: 'The $arrayToObject allows converting an array into a single document.', + snippet: '{ $arrayToObject: "${1:\\$array}" }', + link: getDocLink('$arrayToObject', META_EXPR_ARRAY), + }, + { + value: '$concatArrays', + meta: META_EXPR_ARRAY, + description: 'The $concatArrays is used to combine multiple arrays into a single array.', + snippet: '{ $concatArrays: ["${1:\\$array1}", "${2:\\$array2}"] }', + link: getDocLink('$concatArrays', META_EXPR_ARRAY), + }, + { + value: '$filter', + meta: META_EXPR_ARRAY, + description: 'The $filter operator filters for elements from an array based on a specified condition.', + snippet: '{ $filter: { input: "${1:\\$array}", as: "${2:item}", cond: { ${3:expression} } } }', + link: getDocLink('$filter', META_EXPR_ARRAY), + }, + { + value: '$firstN', + meta: META_EXPR_ARRAY, + description: + 'The $firstN operator sorts documents on one or more fields specified by the query and returns the first N document matching the filtering criteria', + snippet: '{ $firstN: { input: "${1:\\$array}", n: ${2:number} } }', + link: 'https://learn.microsoft.com/en-us/azure/documentdb/operators/accumulators/$firstn', // inferred from another category + }, + { + value: '$in', + meta: META_EXPR_ARRAY, + description: 'The $in operator matches value of a field against an array of specified values', + snippet: '{ $in: ["${1:\\$field}", "${2:\\$array}"] }', + link: 'https://learn.microsoft.com/en-us/azure/documentdb/operators/comparison-query/$in', // inferred from another category + }, + { + value: '$indexOfArray', + meta: META_EXPR_ARRAY, + description: + 'The $indexOfArray operator is used to search for an element in an array and return the index of the first occurrence of the element.', + snippet: '{ $indexOfArray: ["${1:\\$array}", "${2:value}"] }', + link: getDocLink('$indexOfArray', META_EXPR_ARRAY), + }, + { + value: '$isArray', + meta: META_EXPR_ARRAY, + description: 'The $isArray operator is used to determine if a specified value is an array.', + snippet: '{ $isArray: "${1:\\$field}" }', + link: getDocLink('$isArray', META_EXPR_ARRAY), + }, + { + value: '$lastN', + meta: META_EXPR_ARRAY, + description: 'The $lastN accumulator operator returns the last N values in a group of documents.', + snippet: '{ $lastN: { input: "${1:\\$array}", n: ${2:number} } }', + link: 'https://learn.microsoft.com/en-us/azure/documentdb/operators/accumulators/$lastn', // inferred from another category + }, + { + value: '$map', + meta: META_EXPR_ARRAY, + description: 'The $map operator allows applying an expression to each element in an array.', + snippet: '{ $map: { input: "${1:\\$array}", as: "${2:item}", in: { ${3:expression} } } }', + link: getDocLink('$map', META_EXPR_ARRAY), + }, + { + value: '$maxN', + meta: META_EXPR_ARRAY, + description: 'Retrieves the top N values based on a specified filtering criteria', + snippet: '{ $maxN: { input: "${1:\\$array}", n: ${2:number} } }', + link: 'https://learn.microsoft.com/en-us/azure/documentdb/operators/accumulators/$maxn', // inferred from another category + }, + { + value: '$minN', + meta: META_EXPR_ARRAY, + description: 'Retrieves the bottom N values based on a specified filtering criteria', + snippet: '{ $minN: { input: "${1:\\$array}", n: ${2:number} } }', + link: 'https://learn.microsoft.com/en-us/azure/documentdb/operators/accumulators/$minn', + }, + { + value: '$objectToArray', + meta: META_EXPR_ARRAY, + description: 'Converts an object into an array of key-value pair documents.', + snippet: '{ $objectToArray: "${1:\\$object}" }', + link: 'https://learn.microsoft.com/en-us/azure/documentdb/operators/object-expression/$objecttoarray', // inferred from another category + }, + { + value: '$range', + meta: META_EXPR_ARRAY, + description: 'The $range operator allows generating an array of sequential integers.', + snippet: '{ $range: [${1:start}, ${2:end}, ${3:step}] }', + link: getDocLink('$range', META_EXPR_ARRAY), + }, + { + value: '$reduce', + meta: META_EXPR_ARRAY, + description: + 'The $reduce operator applies an expression to each element in an array & accumulate result as single value.', + snippet: '{ $reduce: { input: "${1:\\$array}", initialValue: ${2:0}, in: { ${3:expression} } } }', + link: getDocLink('$reduce', META_EXPR_ARRAY), + }, + { + value: '$reverseArray', + meta: META_EXPR_ARRAY, + description: 'The $reverseArray operator is used to reverse the order of elements in an array.', + snippet: '{ $reverseArray: "${1:\\$array}" }', + link: getDocLink('$reverseArray', META_EXPR_ARRAY), + }, + { + value: '$size', + meta: META_EXPR_ARRAY, + description: + 'The $size operator is used to query documents where an array field has a specified number of elements.', + snippet: '{ $size: "${1:\\$array}" }', + link: 'https://learn.microsoft.com/en-us/azure/documentdb/operators/array-query/$size', // inferred from another category + }, + { + value: '$slice', + meta: META_EXPR_ARRAY, + description: 'The $slice operator returns a subset of an array from any element onwards in the array.', + snippet: '{ $slice: ["${1:\\$array}", ${2:n}] }', + link: getDocLink('$slice', META_EXPR_ARRAY), + }, + { + value: '$sortArray', + meta: META_EXPR_ARRAY, + description: 'The $sortArray operator helps in sorting the elements in an array.', + snippet: '{ $sortArray: { input: "${1:\\$array}", sortBy: { ${2:field}: ${3:1} } } }', + link: getDocLink('$sortArray', META_EXPR_ARRAY), + }, + { + value: '$zip', + meta: META_EXPR_ARRAY, + description: 'The $zip operator allows merging two or more arrays element-wise into a single array or arrays.', + snippet: '{ $zip: { inputs: ["${1:\\$array1}", "${2:\\$array2}"] } }', + link: getDocLink('$zip', META_EXPR_ARRAY), + }, +]; + +// --------------------------------------------------------------------------- +// Bitwise Operators +// --------------------------------------------------------------------------- + +const bitwiseOperators: readonly OperatorEntry[] = [ + { + value: '$bitAnd', + meta: META_EXPR_BITWISE, + description: + 'The $bitAnd operator performs a bitwise AND operation on integer values and returns the result as an integer.', + snippet: '{ $bitAnd: [${1:value1}, ${2:value2}] }', + link: getDocLink('$bitAnd', META_EXPR_BITWISE), + }, + { + value: '$bitNot', + meta: META_EXPR_BITWISE, + description: + 'The $bitNot operator performs a bitwise NOT operation on integer values and returns the result as an integer.', + snippet: '{ $bitNot: "${1:\\$field}" }', + link: getDocLink('$bitNot', META_EXPR_BITWISE), + }, + { + value: '$bitOr', + meta: META_EXPR_BITWISE, + description: + 'The $bitOr operator performs a bitwise OR operation on integer values and returns the result as an integer.', + snippet: '{ $bitOr: [${1:value1}, ${2:value2}] }', + link: getDocLink('$bitOr', META_EXPR_BITWISE), + }, + { + value: '$bitXor', + meta: META_EXPR_BITWISE, + description: 'The $bitXor operator performs a bitwise XOR operation on integer values.', + snippet: '{ $bitXor: [${1:value1}, ${2:value2}] }', + link: getDocLink('$bitXor', META_EXPR_BITWISE), + }, +]; + +// --------------------------------------------------------------------------- +// Boolean Expression Operators +// --------------------------------------------------------------------------- + +const booleanExpressionOperators: readonly OperatorEntry[] = [ + { + value: '$and', + meta: META_EXPR_BOOL, + description: + 'The $and operator joins multiple query clauses and returns documents that match all specified conditions.', + snippet: '{ $and: ["${1:expression1}", "${2:expression2}"] }', + link: 'https://learn.microsoft.com/en-us/azure/documentdb/operators/logical-query/$and', // inferred from another category + }, + { + value: '$not', + meta: META_EXPR_BOOL, + description: + "The $not operator performs a logical NOT operation on a specified expression, selecting documents that don't match the expression.", + snippet: '{ $not: ["${1:expression}"] }', + link: 'https://learn.microsoft.com/en-us/azure/documentdb/operators/logical-query/$not', // inferred from another category + }, + { + value: '$or', + meta: META_EXPR_BOOL, + description: + 'The $or operator joins query clauses with a logical OR and returns documents that match at least one of the specified conditions.', + snippet: '{ $or: ["${1:expression1}", "${2:expression2}"] }', + link: 'https://learn.microsoft.com/en-us/azure/documentdb/operators/logical-query/$or', // inferred from another category + }, +]; + +// --------------------------------------------------------------------------- +// Comparison Expression Operators +// --------------------------------------------------------------------------- + +const comparisonExpressionOperators: readonly OperatorEntry[] = [ + { + value: '$cmp', + meta: META_EXPR_COMPARISON, + description: 'The $cmp operator compares two values', + snippet: '{ $cmp: ["${1:\\$field1}", "${2:\\$field2}"] }', + link: 'https://learn.microsoft.com/en-us/azure/documentdb/operators/comparison-query/$cmp', + }, + { + value: '$eq', + meta: META_EXPR_COMPARISON, + description: 'The $eq query operator compares the value of a field to a specified value', + snippet: '{ $eq: ["${1:\\$field1}", "${2:\\$field2}"] }', + link: 'https://learn.microsoft.com/en-us/azure/documentdb/operators/comparison-query/$eq', // inferred from another category + }, + { + value: '$gt', + meta: META_EXPR_COMPARISON, + description: + 'The $gt query operator retrieves documents where the value of a field is greater than a specified value', + snippet: '{ $gt: ["${1:\\$field1}", "${2:\\$field2}"] }', + link: 'https://learn.microsoft.com/en-us/azure/documentdb/operators/comparison-query/$gt', // inferred from another category + }, + { + value: '$gte', + meta: META_EXPR_COMPARISON, + description: + 'The $gte operator retrieves documents where the value of a field is greater than or equal to a specified value', + snippet: '{ $gte: ["${1:\\$field1}", "${2:\\$field2}"] }', + link: 'https://learn.microsoft.com/en-us/azure/documentdb/operators/comparison-query/$gte', // inferred from another category + }, + { + value: '$lt', + meta: META_EXPR_COMPARISON, + description: 'The $lt operator retrieves documents where the value of field is less than a specified value', + snippet: '{ $lt: ["${1:\\$field1}", "${2:\\$field2}"] }', + link: 'https://learn.microsoft.com/en-us/azure/documentdb/operators/comparison-query/$lt', // inferred from another category + }, + { + value: '$lte', + meta: META_EXPR_COMPARISON, + description: + 'The $lte operator retrieves documents where the value of a field is less than or equal to a specified value', + snippet: '{ $lte: ["${1:\\$field1}", "${2:\\$field2}"] }', + link: 'https://learn.microsoft.com/en-us/azure/documentdb/operators/comparison-query/$lte', // inferred from another category + }, + { + value: '$ne', + meta: META_EXPR_COMPARISON, + description: "The $ne operator retrieves documents where the value of a field doesn't equal a specified value", + snippet: '{ $ne: ["${1:\\$field1}", "${2:\\$field2}"] }', + link: 'https://learn.microsoft.com/en-us/azure/documentdb/operators/comparison-query/$ne', // inferred from another category + }, +]; + +// --------------------------------------------------------------------------- +// Data Size Operators +// --------------------------------------------------------------------------- + +const dataSizeOperators: readonly OperatorEntry[] = [ + { + value: '$bsonSize', + meta: META_EXPR_DATASIZE, + description: 'The $bsonSize operator returns the size of a document in bytes when encoded as BSON.', + snippet: '{ $bsonSize: "${1:\\$field}" }', + link: getDocLink('$bsonSize', META_EXPR_DATASIZE), + }, + { + value: '$binarySize', + meta: META_EXPR_DATASIZE, + description: 'The $binarySize operator is used to return the size of a binary data field.', + snippet: '{ $binarySize: "${1:\\$field}" }', + link: getDocLink('$binarySize', META_EXPR_DATASIZE), + }, +]; + +// --------------------------------------------------------------------------- +// Date Expression Operators +// --------------------------------------------------------------------------- + +const dateExpressionOperators: readonly OperatorEntry[] = [ + { + value: '$dateAdd', + meta: META_EXPR_DATE, + description: 'The $dateAdd operator adds a specified number of time units (day, hour, month etc) to a date.', + snippet: '{ $dateAdd: { startDate: "${1:\\$dateField}", unit: "${2:day}", amount: ${3:1} } }', + link: getDocLink('$dateAdd', META_EXPR_DATE), + }, + { + value: '$dateDiff', + meta: META_EXPR_DATE, + description: + 'The $dateDiff operator calculates the difference between two dates in various units such as years, months, days, etc.', + snippet: '{ $dateDiff: { startDate: "${1:\\$startDate}", endDate: "${2:\\$endDate}", unit: "${3:day}" } }', + link: getDocLink('$dateDiff', META_EXPR_DATE), + }, + { + value: '$dateFromParts', + meta: META_EXPR_DATE, + description: 'The $dateFromParts operator constructs a date from individual components.', + snippet: '{ $dateFromParts: { year: ${1:2024}, month: ${2:1}, day: ${3:1} } }', + link: getDocLink('$dateFromParts', META_EXPR_DATE), + }, + { + value: '$dateFromString', + meta: META_EXPR_DATE, + description: 'The $dateDiff operator converts a date/time string to a date object.', + snippet: '{ $dateFromString: { dateString: "${1:dateString}" } }', + link: getDocLink('$dateFromString', META_EXPR_DATE), + }, + { + value: '$dateSubtract', + meta: META_EXPR_DATE, + description: 'The $dateSubtract operator subtracts a specified amount of time from a date.', + snippet: '{ $dateSubtract: { startDate: "${1:\\$dateField}", unit: "${2:day}", amount: ${3:1} } }', + link: getDocLink('$dateSubtract', META_EXPR_DATE), + }, + { + value: '$dateToParts', + meta: META_EXPR_DATE, + description: + 'The $dateToParts operator decomposes a date into its individual parts such as year, month, day, and more.', + snippet: '{ $dateToParts: { date: "${1:\\$dateField}" } }', + link: getDocLink('$dateToParts', META_EXPR_DATE), + }, + { + value: '$dateToString', + meta: META_EXPR_DATE, + description: 'The $dateToString operator converts a date object into a formatted string.', + snippet: '{ $dateToString: { format: "${1:%Y-%m-%d}", date: "${2:\\$dateField}" } }', + link: getDocLink('$dateToString', META_EXPR_DATE), + }, + { + value: '$dateTrunc', + meta: META_EXPR_DATE, + description: 'The $dateTrunc operator truncates a date to a specified unit.', + snippet: '{ $dateTrunc: { date: "${1:\\$dateField}", unit: "${2:day}" } }', + link: getDocLink('$dateTrunc', META_EXPR_DATE), + }, + { + value: '$dayOfMonth', + meta: META_EXPR_DATE, + description: 'The $dayOfMonth operator extracts the day of the month from a date.', + snippet: '{ $dayOfMonth: "${1:\\$dateField}" }', + link: getDocLink('$dayOfMonth', META_EXPR_DATE), + }, + { + value: '$dayOfWeek', + meta: META_EXPR_DATE, + description: 'The $dayOfWeek operator extracts the day of the week from a date.', + snippet: '{ $dayOfWeek: "${1:\\$dateField}" }', + link: getDocLink('$dayOfWeek', META_EXPR_DATE), + }, + { + value: '$dayOfYear', + meta: META_EXPR_DATE, + description: 'The $dayOfYear operator extracts the day of the year from a date.', + snippet: '{ $dayOfYear: "${1:\\$dateField}" }', + link: getDocLink('$dayOfYear', META_EXPR_DATE), + }, + { + value: '$hour', + meta: META_EXPR_DATE, + description: 'The $hour operator returns the hour portion of a date as a number between 0 and 23.', + snippet: '{ $hour: "${1:\\$dateField}" }', + link: getDocLink('$hour', META_EXPR_DATE), + }, + { + value: '$isoDayOfWeek', + meta: META_EXPR_DATE, + description: + 'The $isoDayOfWeek operator returns the weekday number in ISO 8601 format, ranging from 1 (Monday) to 7 (Sunday).', + snippet: '{ $isoDayOfWeek: "${1:\\$dateField}" }', + link: getDocLink('$isoDayOfWeek', META_EXPR_DATE), + }, + { + value: '$isoWeek', + meta: META_EXPR_DATE, + description: + 'The $isoWeek operator returns the week number of the year in ISO 8601 format, ranging from 1 to 53.', + snippet: '{ $isoWeek: "${1:\\$dateField}" }', + link: getDocLink('$isoWeek', META_EXPR_DATE), + }, + { + value: '$isoWeekYear', + meta: META_EXPR_DATE, + description: + 'The $isoWeekYear operator returns the year number in ISO 8601 format, which can differ from the calendar year for dates at the beginning or end of the year.', + snippet: '{ $isoWeekYear: "${1:\\$dateField}" }', + link: getDocLink('$isoWeekYear', META_EXPR_DATE), + }, + { + value: '$millisecond', + meta: META_EXPR_DATE, + description: 'The $millisecond operator extracts the milliseconds portion from a date value.', + snippet: '{ $millisecond: "${1:\\$dateField}" }', + link: getDocLink('$millisecond', META_EXPR_DATE), + }, + { + value: '$minute', + meta: META_EXPR_DATE, + description: 'The $minute operator extracts the minute portion from a date value.', + snippet: '{ $minute: "${1:\\$dateField}" }', + link: getDocLink('$minute', META_EXPR_DATE), + }, + { + value: '$month', + meta: META_EXPR_DATE, + description: 'The $month operator extracts the month portion from a date value.', + snippet: '{ $month: "${1:\\$dateField}" }', + link: getDocLink('$month', META_EXPR_DATE), + }, + { + value: '$second', + meta: META_EXPR_DATE, + description: 'The $second operator extracts the seconds portion from a date value.', + snippet: '{ $second: "${1:\\$dateField}" }', + link: getDocLink('$second', META_EXPR_DATE), + }, + { + value: '$toDate', + meta: META_EXPR_DATE, + description: 'The $toDate operator converts supported types to a proper Date object.', + snippet: '{ $toDate: "${1:\\$field}" }', + link: 'https://learn.microsoft.com/en-us/azure/documentdb/operators/aggregation/type-expression/$todate', // inferred from another category + }, + { + value: '$week', + meta: META_EXPR_DATE, + description: 'The $week operator returns the week number for a date as a value between 0 and 53.', + snippet: '{ $week: "${1:\\$dateField}" }', + link: getDocLink('$week', META_EXPR_DATE), + }, + { + value: '$year', + meta: META_EXPR_DATE, + description: 'The $year operator returns the year for a date as a four-digit number.', + snippet: '{ $year: "${1:\\$dateField}" }', + link: getDocLink('$year', META_EXPR_DATE), + }, +]; + +// --------------------------------------------------------------------------- +// Literal Expression Operator +// --------------------------------------------------------------------------- + +const literalExpressionOperator: readonly OperatorEntry[] = [ + { + value: '$literal', + meta: META_EXPR_LITERAL, + description: + 'The $literal operator returns the specified value without parsing it as an expression, allowing literal values to be used in aggregation pipelines.', + snippet: '{ $literal: ${1:value} }', + link: getDocLink('$literal', META_EXPR_LITERAL), + }, +]; + +// --------------------------------------------------------------------------- +// Miscellaneous Operators +// --------------------------------------------------------------------------- + +const miscellaneousOperators: readonly OperatorEntry[] = [ + { + value: '$getField', + meta: META_EXPR_MISC, + description: 'The $getField operator allows retrieving the value of a specified field from a document.', + snippet: '{ $getField: { field: "${1:fieldName}", input: "${2:\\$object}" } }', + link: getDocLink('$getField', META_EXPR_MISC), + }, + { + value: '$rand', + meta: META_EXPR_MISC, + description: 'The $rand operator generates a random float value between 0 and 1.', + snippet: '{ $rand: {} }', + link: 'https://learn.microsoft.com/en-us/azure/documentdb/operators/miscellaneous-query/$rand', // inferred from another category + }, + { + value: '$sampleRate', + meta: META_EXPR_MISC, + description: + 'The $sampleRate operator randomly samples documents from a collection based on a specified probability rate, useful for statistical analysis and testing.', + snippet: '{ $sampleRate: ${1:0.5} }', + link: getDocLink('$sampleRate', META_EXPR_MISC), + }, +]; + +// --------------------------------------------------------------------------- +// Object Expression Operators +// --------------------------------------------------------------------------- + +const objectExpressionOperators: readonly OperatorEntry[] = [ + { + value: '$mergeObjects', + meta: META_EXPR_OBJECT, + description: 'The $mergeObjects operator merges multiple documents into a single document', + snippet: '{ $mergeObjects: ["${1:\\$object1}", "${2:\\$object2}"] }', + link: getDocLink('$mergeObjects', META_EXPR_OBJECT), + }, + { + value: '$objectToArray', + meta: META_EXPR_OBJECT, + description: + 'The objectToArray command is used to transform a document (object) into an array of key-value pairs.', + snippet: '{ $objectToArray: "${1:\\$object}" }', + link: getDocLink('$objectToArray', META_EXPR_OBJECT), + }, + { + value: '$setField', + meta: META_EXPR_OBJECT, + description: 'The setField command is used to add, update, or remove fields in embedded documents.', + snippet: '{ $setField: { field: "${1:fieldName}", input: "${2:\\$object}", value: ${3:value} } }', + link: getDocLink('$setField', META_EXPR_OBJECT), + }, +]; + +// --------------------------------------------------------------------------- +// Set Expression Operators +// --------------------------------------------------------------------------- + +const setExpressionOperators: readonly OperatorEntry[] = [ + { + value: '$allElementsTrue', + meta: META_EXPR_SET, + description: 'The $allElementsTrue operator returns true if all elements in an array evaluate to true.', + snippet: '{ $allElementsTrue: ["${1:\\$array}"] }', + link: getDocLink('$allElementsTrue', META_EXPR_SET), + }, + { + value: '$anyElementTrue', + meta: META_EXPR_SET, + description: + 'The $anyElementTrue operator returns true if any element in an array evaluates to a value of true.', + snippet: '{ $anyElementTrue: ["${1:\\$array}"] }', + link: getDocLink('$anyElementTrue', META_EXPR_SET), + }, + { + value: '$setDifference', + meta: META_EXPR_SET, + description: + 'The $setDifference operator returns a set with elements that exist in one set but not in a second set.', + snippet: '{ $setDifference: ["${1:\\$set1}", "${2:\\$set2}"] }', + link: getDocLink('$setDifference', META_EXPR_SET), + }, + { + value: '$setEquals', + meta: META_EXPR_SET, + description: 'The $setEquals operator returns true if two sets have the same distinct elements.', + snippet: '{ $setEquals: ["${1:\\$set1}", "${2:\\$set2}"] }', + link: getDocLink('$setEquals', META_EXPR_SET), + }, + { + value: '$setIntersection', + meta: META_EXPR_SET, + description: 'The $setIntersection operator returns the common elements that appear in all input arrays.', + snippet: '{ $setIntersection: ["${1:\\$set1}", "${2:\\$set2}"] }', + link: getDocLink('$setIntersection', META_EXPR_SET), + }, + { + value: '$setIsSubset', + meta: META_EXPR_SET, + description: 'The $setIsSubset operator determines if one array is a subset of a second array.', + snippet: '{ $setIsSubset: ["${1:\\$set1}", "${2:\\$set2}"] }', + link: getDocLink('$setIsSubset', META_EXPR_SET), + }, + { + value: '$setUnion', + meta: META_EXPR_SET, + description: + 'The $setUnion operator returns an array that contains all the unique elements from the input arrays.', + snippet: '{ $setUnion: ["${1:\\$set1}", "${2:\\$set2}"] }', + link: getDocLink('$setUnion', META_EXPR_SET), + }, +]; + +// --------------------------------------------------------------------------- +// String Expression Operators +// --------------------------------------------------------------------------- + +const stringExpressionOperators: readonly OperatorEntry[] = [ + { + value: '$concat', + meta: META_EXPR_STRING, + description: 'Concatenates two or more strings and returns the resulting string.', + snippet: '{ $concat: ["${1:\\$string1}", "${2:\\$string2}"] }', + }, + { + value: '$dateFromString', + meta: META_EXPR_STRING, + description: 'The $dateDiff operator converts a date/time string to a date object.', + snippet: '{ $dateFromString: "${1:\\$string}" }', + link: 'https://learn.microsoft.com/en-us/azure/documentdb/operators/date-expression/$datefromstring', // inferred from another category + }, + { + value: '$dateToString', + meta: META_EXPR_STRING, + description: 'The $dateToString operator converts a date object into a formatted string.', + snippet: '{ $dateToString: "${1:\\$string}" }', + link: 'https://learn.microsoft.com/en-us/azure/documentdb/operators/date-expression/$datetostring', // inferred from another category + }, + { + value: '$indexOfBytes', + meta: META_EXPR_STRING, + description: 'Returns the byte index of the first occurrence of a substring within a string.', + snippet: '{ $indexOfBytes: ["${1:\\$string}", "${2:substring}"] }', + }, + { + value: '$indexOfCP', + meta: META_EXPR_STRING, + description: 'Returns the code point index of the first occurrence of a substring within a string.', + snippet: '{ $indexOfCP: ["${1:\\$string}", "${2:substring}"] }', + }, + { + value: '$ltrim', + meta: META_EXPR_STRING, + description: 'Removes whitespace or specified characters from the beginning of a string.', + snippet: '{ $ltrim: { input: "${1:\\$string}" } }', + }, + { + value: '$regexFind', + meta: META_EXPR_STRING, + description: 'Applies a regular expression to a string and returns the first match.', + snippet: '{ $regexFind: { input: "${1:\\$string}", regex: "${2:pattern}" } }', + }, + { + value: '$regexFindAll', + meta: META_EXPR_STRING, + description: 'Applies a regular expression to a string and returns all matches as an array.', + snippet: '{ $regexFindAll: { input: "${1:\\$string}", regex: "${2:pattern}" } }', + }, + { + value: '$regexMatch', + meta: META_EXPR_STRING, + description: 'Applies a regular expression to a string and returns a boolean indicating if a match was found.', + snippet: '{ $regexMatch: { input: "${1:\\$string}", regex: "${2:pattern}" } }', + }, + { + value: '$replaceOne', + meta: META_EXPR_STRING, + description: 'Replaces the first occurrence of a search string with a replacement string.', + snippet: '{ $replaceOne: { input: "${1:\\$string}", find: "${2:find}", replacement: "${3:replacement}" } }', + }, + { + value: '$replaceAll', + meta: META_EXPR_STRING, + description: 'Replaces all occurrences of a search string with a replacement string.', + snippet: '{ $replaceAll: { input: "${1:\\$string}", find: "${2:find}", replacement: "${3:replacement}" } }', + }, + { + value: '$rtrim', + meta: META_EXPR_STRING, + description: 'Removes whitespace or specified characters from the end of a string.', + snippet: '{ $rtrim: { input: "${1:\\$string}" } }', + }, + { + value: '$split', + meta: META_EXPR_STRING, + description: 'Splits a string by a delimiter and returns an array of substrings.', + snippet: '{ $split: ["${1:\\$string}", "${2:delimiter}"] }', + }, + { + value: '$strLenBytes', + meta: META_EXPR_STRING, + description: 'Returns the number of UTF-8 encoded bytes in the specified string.', + snippet: '{ $strLenBytes: "${1:\\$string}" }', + }, + { + value: '$strLenCP', + meta: META_EXPR_STRING, + description: 'Returns the number of UTF-8 code points in the specified string.', + snippet: '{ $strLenCP: "${1:\\$string}" }', + }, + { + value: '$strcasecmp', + meta: META_EXPR_STRING, + description: 'Performs a case-insensitive comparison of two strings and returns an integer.', + snippet: '{ $strcasecmp: ["${1:\\$string1}", "${2:\\$string2}"] }', + }, + { + value: '$substr', + meta: META_EXPR_STRING, + description: + 'Returns a substring of a string, starting at a specified index for a specified length. Deprecated β€” use $substrBytes or $substrCP.', + snippet: '{ $substr: ["${1:\\$string}", ${2:start}, ${3:length}] }', + }, + { + value: '$substrBytes', + meta: META_EXPR_STRING, + description: + 'Returns a substring of a string by byte index, starting at a specified index for a specified number of bytes.', + snippet: '{ $substrBytes: ["${1:\\$string}", ${2:start}, ${3:length}] }', + }, + { + value: '$substrCP', + meta: META_EXPR_STRING, + description: + 'Returns a substring of a string by code point index, starting at a specified index for a specified number of code points.', + snippet: '{ $substrCP: ["${1:\\$string}", ${2:start}, ${3:length}] }', + }, + { + value: '$toLower', + meta: META_EXPR_STRING, + description: 'Converts a string to lowercase and returns the result.', + snippet: '{ $toLower: "${1:\\$string}" }', + }, + { + value: '$toString', + meta: META_EXPR_STRING, + description: 'The $toString operator converts an expression into a String', + snippet: '{ $toString: "${1:\\$string}" }', + link: 'https://learn.microsoft.com/en-us/azure/documentdb/operators/aggregation/type-expression/$tostring', // inferred from another category + }, + { + value: '$trim', + meta: META_EXPR_STRING, + description: 'Removes whitespace or specified characters from both ends of a string.', + snippet: '{ $trim: { input: "${1:\\$string}" } }', + }, + { + value: '$toUpper', + meta: META_EXPR_STRING, + description: 'Converts a string to uppercase and returns the result.', + snippet: '{ $toUpper: "${1:\\$string}" }', + }, +]; + +// --------------------------------------------------------------------------- +// Timestamp Expression Operators +// --------------------------------------------------------------------------- + +const timestampExpressionOperators: readonly OperatorEntry[] = [ + { + value: '$tsIncrement', + meta: META_EXPR_TIMESTAMP, + description: 'The $tsIncrement operator extracts the increment portion from a timestamp value.', + snippet: '{ $tsIncrement: "${1:\\$timestampField}" }', + link: getDocLink('$tsIncrement', META_EXPR_TIMESTAMP), + }, + { + value: '$tsSecond', + meta: META_EXPR_TIMESTAMP, + description: 'The $tsSecond operator extracts the seconds portion from a timestamp value.', + snippet: '{ $tsSecond: "${1:\\$timestampField}" }', + link: getDocLink('$tsSecond', META_EXPR_TIMESTAMP), + }, +]; + +// --------------------------------------------------------------------------- +// Trigonometry Expression Operators +// --------------------------------------------------------------------------- + +const trigonometryExpressionOperators: readonly OperatorEntry[] = [ + { + value: '$sin', + meta: META_EXPR_TRIG, + description: 'Returns the sine of a value measured in radians.', + snippet: '{ $sin: "${1:\\$value}" }', + }, + { + value: '$cos', + meta: META_EXPR_TRIG, + description: 'Returns the cosine of a value measured in radians.', + snippet: '{ $cos: "${1:\\$value}" }', + }, + { + value: '$tan', + meta: META_EXPR_TRIG, + description: 'Returns the tangent of a value measured in radians.', + snippet: '{ $tan: "${1:\\$value}" }', + }, + { + value: '$asin', + meta: META_EXPR_TRIG, + description: 'Returns the arcsine (inverse sine) of a value in radians.', + snippet: '{ $asin: "${1:\\$value}" }', + }, + { + value: '$acos', + meta: META_EXPR_TRIG, + description: 'Returns the arccosine (inverse cosine) of a value in radians.', + snippet: '{ $acos: "${1:\\$value}" }', + }, + { + value: '$atan', + meta: META_EXPR_TRIG, + description: 'Returns the arctangent (inverse tangent) of a value in radians.', + snippet: '{ $atan: "${1:\\$value}" }', + }, + { + value: '$atan2', + meta: META_EXPR_TRIG, + description: 'Returns the arctangent of the quotient of two values, using the signs to determine the quadrant.', + snippet: '{ $atan2: "${1:\\$value}" }', + }, + { + value: '$asinh', + meta: META_EXPR_TRIG, + description: 'Returns the inverse hyperbolic sine of a value.', + snippet: '{ $asinh: "${1:\\$value}" }', + }, + { + value: '$acosh', + meta: META_EXPR_TRIG, + description: 'Returns the inverse hyperbolic cosine of a value.', + snippet: '{ $acosh: "${1:\\$value}" }', + }, + { + value: '$atanh', + meta: META_EXPR_TRIG, + description: 'Returns the inverse hyperbolic tangent of a value.', + snippet: '{ $atanh: "${1:\\$value}" }', + }, + { + value: '$sinh', + meta: META_EXPR_TRIG, + description: 'Returns the hyperbolic sine of a value.', + snippet: '{ $sinh: "${1:\\$value}" }', + }, + { + value: '$cosh', + meta: META_EXPR_TRIG, + description: 'Returns the hyperbolic cosine of a value.', + snippet: '{ $cosh: "${1:\\$value}" }', + }, + { + value: '$tanh', + meta: META_EXPR_TRIG, + description: 'Returns the hyperbolic tangent of a value.', + snippet: '{ $tanh: "${1:\\$value}" }', + }, + { + value: '$degreesToRadians', + meta: META_EXPR_TRIG, + description: 'Converts a value from degrees to radians.', + snippet: '{ $degreesToRadians: "${1:\\$angle}" }', + }, + { + value: '$radiansToDegrees', + meta: META_EXPR_TRIG, + description: 'Converts a value from radians to degrees.', + snippet: '{ $radiansToDegrees: "${1:\\$angle}" }', + }, +]; + +// --------------------------------------------------------------------------- +// Type Expression Operators +// --------------------------------------------------------------------------- + +const typeExpressionOperators: readonly OperatorEntry[] = [ + { + value: '$convert', + meta: META_EXPR_TYPE, + description: 'The $convert operator converts an expression into the specified type', + snippet: '{ $convert: { input: "${1:\\$field}", to: "${2:type}" } }', + link: getDocLink('$convert', META_EXPR_TYPE), + }, + { + value: '$isNumber', + meta: META_EXPR_TYPE, + description: 'The $isNumber operator checks if a specified expression is a numerical type', + snippet: '{ $isNumber: "${1:\\$field}" }', + link: getDocLink('$isNumber', META_EXPR_TYPE), + }, + { + value: '$toBool', + meta: META_EXPR_TYPE, + description: 'The $toBool operator converts an expression into a Boolean type', + snippet: '{ $toBool: "${1:\\$field}" }', + link: getDocLink('$toBool', META_EXPR_TYPE), + }, + { + value: '$toDate', + meta: META_EXPR_TYPE, + description: 'The $toDate operator converts supported types to a proper Date object.', + snippet: '{ $toDate: "${1:\\$field}" }', + link: getDocLink('$toDate', META_EXPR_TYPE), + }, + { + value: '$toDecimal', + meta: META_EXPR_TYPE, + description: 'The $toDecimal operator converts an expression into a Decimal type', + snippet: '{ $toDecimal: "${1:\\$field}" }', + link: getDocLink('$toDecimal', META_EXPR_TYPE), + }, + { + value: '$toDouble', + meta: META_EXPR_TYPE, + description: 'The $toDouble operator converts an expression into a Double value', + snippet: '{ $toDouble: "${1:\\$field}" }', + link: getDocLink('$toDouble', META_EXPR_TYPE), + }, + { + value: '$toInt', + meta: META_EXPR_TYPE, + description: 'The $toInt operator converts an expression into an Integer', + snippet: '{ $toInt: "${1:\\$field}" }', + link: getDocLink('$toInt', META_EXPR_TYPE), + }, + { + value: '$toLong', + meta: META_EXPR_TYPE, + description: 'The $toLong operator converts an expression into a Long value', + snippet: '{ $toLong: "${1:\\$field}" }', + link: getDocLink('$toLong', META_EXPR_TYPE), + }, + { + value: '$toObjectId', + meta: META_EXPR_TYPE, + description: 'The $toObjectId operator converts an expression into an ObjectId', + snippet: '{ $toObjectId: "${1:\\$field}" }', + link: getDocLink('$toObjectId', META_EXPR_TYPE), + }, + { + value: '$toString', + meta: META_EXPR_TYPE, + description: 'The $toString operator converts an expression into a String', + snippet: '{ $toString: "${1:\\$field}" }', + link: getDocLink('$toString', META_EXPR_TYPE), + }, + { + value: '$type', + meta: META_EXPR_TYPE, + description: 'The $type operator retrieves documents if the chosen field is of the specified type.', + snippet: '{ $type: "${1:\\$field}" }', + link: 'https://learn.microsoft.com/en-us/azure/documentdb/operators/element-query/$type', // inferred from another category + }, +]; + +// --------------------------------------------------------------------------- +// Variable Expression Operators +// --------------------------------------------------------------------------- + +const variableExpressionOperators: readonly OperatorEntry[] = [ + { + value: '$let', + meta: META_EXPR_VARIABLE, + description: + 'The $let operator allows defining variables for use in a specified expression, enabling complex calculations and reducing code repetition.', + snippet: '{ $let: { vars: { ${1:var}: ${2:expression} }, in: ${3:expression} } }', + link: getDocLink('$let', META_EXPR_VARIABLE), + }, +]; + +// --------------------------------------------------------------------------- +// Conditional Expression Operators +// --------------------------------------------------------------------------- + +const conditionalExpressionOperators: readonly OperatorEntry[] = [ + { + value: '$cond', + meta: META_EXPR_CONDITIONAL, + description: + 'The $cond operator is used to evaluate a condition and return one of two expressions based on the result.', + snippet: '{ $cond: { if: { ${1:expression} }, then: ${2:trueValue}, else: ${3:falseValue} } }', + link: getDocLink('$cond', META_EXPR_CONDITIONAL), + }, + { + value: '$ifNull', + meta: META_EXPR_CONDITIONAL, + description: + 'The $ifNull operator is used to evaluate an expression and return a specified value if the expression resolves to null.', + snippet: '{ $ifNull: ["${1:\\$field}", ${2:replacement}] }', + link: getDocLink('$ifNull', META_EXPR_CONDITIONAL), + }, + { + value: '$switch', + meta: META_EXPR_CONDITIONAL, + description: + 'The $switch operator is used to evaluate a series of conditions and return a value based on the first condition that evaluates to true.', + snippet: + '{ $switch: { branches: [{ case: { ${1:expression} }, then: ${2:value} }], default: ${3:defaultValue} } }', + link: getDocLink('$switch', META_EXPR_CONDITIONAL), + }, +]; + +// --------------------------------------------------------------------------- +// Registration +// --------------------------------------------------------------------------- + +export function loadExpressionOperators(): void { + registerOperators([ + ...arithmeticExpressionOperators, + ...arrayExpressionOperators, + ...bitwiseOperators, + ...booleanExpressionOperators, + ...comparisonExpressionOperators, + ...dataSizeOperators, + ...dateExpressionOperators, + ...literalExpressionOperator, + ...miscellaneousOperators, + ...objectExpressionOperators, + ...setExpressionOperators, + ...stringExpressionOperators, + ...timestampExpressionOperators, + ...trigonometryExpressionOperators, + ...typeExpressionOperators, + ...variableExpressionOperators, + ...conditionalExpressionOperators, + ]); +} diff --git a/packages/documentdb-constants/src/getFilteredCompletions.test.ts b/packages/documentdb-constants/src/getFilteredCompletions.test.ts new file mode 100644 index 000000000..02e683e5d --- /dev/null +++ b/packages/documentdb-constants/src/getFilteredCompletions.test.ts @@ -0,0 +1,237 @@ +/*--------------------------------------------------------------------------------------------- + * Copyright (c) Microsoft Corporation. All rights reserved. + * Licensed under the MIT License. See License.txt in the project root for license information. + *--------------------------------------------------------------------------------------------*/ + +/** + * Unit tests for getFilteredCompletions and completion presets. + */ + +import { + EXPRESSION_COMPLETION_META, + FILTER_COMPLETION_META, + GROUP_EXPRESSION_COMPLETION_META, + PROJECTION_COMPLETION_META, + STAGE_COMPLETION_META, + UPDATE_COMPLETION_META, + WINDOW_COMPLETION_META, + getAllCompletions, + getFilteredCompletions, + loadOperators, +} from './index'; + +describe('getFilteredCompletions', () => { + test('returns all operators when filtering by all top-level meta prefixes', () => { + const all = getAllCompletions(); + expect(all.length).toBeGreaterThan(0); + }); + + test('filtering by "query" returns only query operators', () => { + const results = getFilteredCompletions({ meta: ['query'] }); + expect(results.length).toBeGreaterThan(0); + for (const r of results) { + expect(r.meta).toMatch(/^query/); + } + }); + + test('filtering by "query:comparison" returns only comparison operators', () => { + const results = getFilteredCompletions({ meta: ['query:comparison'] }); + expect(results.length).toBe(8); // $eq, $gt, $gte, $in, $lt, $lte, $ne, $nin + for (const r of results) { + expect(r.meta).toBe('query:comparison'); + } + }); + + test('filtering by "stage" returns aggregation pipeline stages', () => { + const results = getFilteredCompletions({ meta: ['stage'] }); + expect(results.length).toBe(35); + for (const r of results) { + expect(r.meta).toBe('stage'); + } + }); + + test('filtering by "update" returns all update operators', () => { + const results = getFilteredCompletions({ meta: ['update'] }); + expect(results.length).toBe(22); + for (const r of results) { + expect(r.meta).toMatch(/^update/); + } + }); + + test('filtering by "accumulator" returns accumulator operators', () => { + const results = getFilteredCompletions({ meta: ['accumulator'] }); + expect(results.length).toBe(21); + for (const r of results) { + expect(r.meta).toBe('accumulator'); + } + }); + + test('filtering by "expr" returns all expression operators', () => { + const results = getFilteredCompletions({ meta: ['expr'] }); + expect(results.length).toBeGreaterThan(100); + for (const r of results) { + expect(r.meta).toMatch(/^expr:/); + } + }); + + test('filtering by "window" returns window operators', () => { + const results = getFilteredCompletions({ meta: ['window'] }); + expect(results.length).toBe(27); + for (const r of results) { + expect(r.meta).toBe('window'); + } + }); + + test('filtering by "bson" returns BSON constructors', () => { + const results = getFilteredCompletions({ meta: ['bson'] }); + expect(results.length).toBe(10); + for (const r of results) { + expect(r.meta).toBe('bson'); + } + }); + + test('filtering by "variable" returns system variables', () => { + const results = getFilteredCompletions({ meta: ['variable'] }); + expect(results.length).toBe(7); + for (const r of results) { + expect(r.meta).toBe('variable'); + } + }); + + test('filtering by multiple meta tags combines results', () => { + const queryOnly = getFilteredCompletions({ meta: ['query'] }); + const stageOnly = getFilteredCompletions({ meta: ['stage'] }); + const combined = getFilteredCompletions({ meta: ['query', 'stage'] }); + expect(combined.length).toBe(queryOnly.length + stageOnly.length); + }); + + test('empty meta array returns no results', () => { + const results = getFilteredCompletions({ meta: [] }); + expect(results.length).toBe(0); + }); + + test('unknown meta tag returns no results', () => { + const results = getFilteredCompletions({ meta: ['nonexistent'] }); + expect(results.length).toBe(0); + }); + + describe('BSON type filtering', () => { + test('filtering by bsonTypes narrows type-specific operators', () => { + const allQuery = getFilteredCompletions({ meta: ['query'] }); + const stringOnly = getFilteredCompletions({ + meta: ['query'], + bsonTypes: ['string'], + }); + // String-only should have fewer or equal operators (universal + string-specific) + expect(stringOnly.length).toBeLessThanOrEqual(allQuery.length); + expect(stringOnly.length).toBeGreaterThan(0); + }); + + test('universal operators (no applicableBsonTypes) always pass BSON filter', () => { + const withBsonFilter = getFilteredCompletions({ + meta: ['query:comparison'], + bsonTypes: ['string'], + }); + // All comparison operators are universal + expect(withBsonFilter.length).toBe(8); + }); + + test('type-specific operators are excluded when BSON type does not match', () => { + const stringOps = getFilteredCompletions({ + meta: ['query'], + bsonTypes: ['number'], + }); + // $regex should NOT be included (it's string-only) + const hasRegex = stringOps.some((op) => op.value === '$regex'); + expect(hasRegex).toBe(false); + }); + + test('type-specific operators are included when BSON type matches', () => { + const stringOps = getFilteredCompletions({ + meta: ['query'], + bsonTypes: ['string'], + }); + // $regex should be included for string type + const hasRegex = stringOps.some((op) => op.value === '$regex'); + expect(hasRegex).toBe(true); + }); + }); +}); + +describe('completion context presets', () => { + test('FILTER_COMPLETION_META returns query + bson + variable', () => { + const results = getFilteredCompletions({ meta: FILTER_COMPLETION_META }); + const metas = new Set(results.map((r) => r.meta.split(':')[0])); + expect(metas).toContain('query'); + expect(metas).toContain('bson'); + expect(metas).toContain('variable'); + expect(metas).not.toContain('stage'); + expect(metas).not.toContain('update'); + }); + + test('STAGE_COMPLETION_META returns only stages', () => { + const results = getFilteredCompletions({ meta: STAGE_COMPLETION_META }); + expect(results.length).toBe(35); + for (const r of results) { + expect(r.meta).toBe('stage'); + } + }); + + test('UPDATE_COMPLETION_META returns only update operators', () => { + const results = getFilteredCompletions({ meta: UPDATE_COMPLETION_META }); + expect(results.length).toBe(22); + for (const r of results) { + expect(r.meta).toMatch(/^update/); + } + }); + + test('GROUP_EXPRESSION_COMPLETION_META returns expr + accumulator + bson + variable', () => { + const results = getFilteredCompletions({ meta: GROUP_EXPRESSION_COMPLETION_META }); + const metaPrefixes = new Set(results.map((r) => r.meta.split(':')[0])); + expect(metaPrefixes).toContain('expr'); + expect(metaPrefixes).toContain('accumulator'); + expect(metaPrefixes).toContain('bson'); + expect(metaPrefixes).toContain('variable'); + expect(metaPrefixes).not.toContain('query'); + expect(metaPrefixes).not.toContain('stage'); + }); + + test('EXPRESSION_COMPLETION_META returns expr + bson + variable (no accumulators)', () => { + const results = getFilteredCompletions({ meta: EXPRESSION_COMPLETION_META }); + const metaPrefixes = new Set(results.map((r) => r.meta.split(':')[0])); + expect(metaPrefixes).toContain('expr'); + expect(metaPrefixes).toContain('bson'); + expect(metaPrefixes).toContain('variable'); + expect(metaPrefixes).not.toContain('accumulator'); + }); + + test('WINDOW_COMPLETION_META returns window + accumulator + expr + bson + variable', () => { + const results = getFilteredCompletions({ meta: WINDOW_COMPLETION_META }); + const metaPrefixes = new Set(results.map((r) => r.meta.split(':')[0])); + expect(metaPrefixes).toContain('window'); + expect(metaPrefixes).toContain('accumulator'); + expect(metaPrefixes).toContain('expr'); + expect(metaPrefixes).toContain('bson'); + expect(metaPrefixes).toContain('variable'); + }); + + test('PROJECTION_COMPLETION_META returns projection operators + BSON constructors', () => { + const results = getFilteredCompletions({ meta: PROJECTION_COMPLETION_META }); + // field:identifier entries are injected at runtime, not statically registered + // But projection operators ($, $elemMatch, $slice) and BSON constructors are static + expect(results.length).toBeGreaterThan(0); + const metas = [...new Set(results.map((r) => r.meta))]; + expect(metas).toContain('query:projection'); + expect(metas).toContain('bson'); + }); +}); + +describe('registry idempotency', () => { + test('calling loadOperators() twice does not duplicate entries', () => { + const countBefore = getAllCompletions().length; + // loadOperators is re-exported from index + loadOperators(); + const countAfter = getAllCompletions().length; + expect(countAfter).toBe(countBefore); + }); +}); diff --git a/packages/documentdb-constants/src/getFilteredCompletions.ts b/packages/documentdb-constants/src/getFilteredCompletions.ts new file mode 100644 index 000000000..170353f78 --- /dev/null +++ b/packages/documentdb-constants/src/getFilteredCompletions.ts @@ -0,0 +1,99 @@ +/*--------------------------------------------------------------------------------------------- + * Copyright (c) Microsoft Corporation. All rights reserved. + * Licensed under the MIT License. See License.txt in the project root for license information. + *--------------------------------------------------------------------------------------------*/ + +/** + * Primary consumer API for the documentdb-constants package. + * + * Provides filtered access to the operator entries based on meta tags + * and optional BSON type constraints. + */ + +import { type CompletionFilter, type OperatorEntry } from './types'; + +/** + * Internal registry of all operator entries. Populated by the + * individual operator module files (queryOperators, stages, etc.) + * via {@link registerOperators}. + */ +const allOperatorsSet = new Set(); +const allOperators: OperatorEntry[] = []; + +/** + * Registers operator entries into the global registry. + * Duplicate entries (same value + meta key) are silently skipped, + * making repeated calls idempotent. + * + * Called by each operator module during module initialization. + * + * @param entries - array of OperatorEntry objects to register + */ +export function registerOperators(entries: readonly OperatorEntry[]): void { + for (const entry of entries) { + const key = `${entry.value}|${entry.meta}`; + if (!allOperatorsSet.has(key)) { + allOperatorsSet.add(key); + allOperators.push(entry); + } + } +} + +/** + * Clears all registered operator entries. + * Intended for internal/testing use only. + */ +export function clearOperators(): void { + allOperators.length = 0; + allOperatorsSet.clear(); +} + +/** + * Returns operator entries matching the given filter. + * + * Meta tag matching uses **prefix matching**: a filter meta of 'query' + * matches 'query', 'query:comparison', 'query:logical', etc. + * A filter meta of 'expr' matches all 'expr:*' entries. + * + * BSON type filtering is applied as an intersection: if `filter.bsonTypes` + * is provided, only operators whose `applicableBsonTypes` includes at least + * one of the requested types are returned. Operators without + * `applicableBsonTypes` (universal operators) are always included. + * + * @param filter - the filtering criteria + * @returns matching operator entries as a new array β€” `Array.prototype.filter` + * always allocates a fresh array, so callers cannot mutate the internal registry + * through this return value. + */ +export function getFilteredCompletions(filter: CompletionFilter): readonly OperatorEntry[] { + return allOperators.filter((entry) => { + // Meta tag prefix matching + const metaMatch = filter.meta.some((prefix) => entry.meta === prefix || entry.meta.startsWith(prefix + ':')); + if (!metaMatch) { + return false; + } + + // BSON type filtering (if specified) + if (filter.bsonTypes && filter.bsonTypes.length > 0) { + // Universal operators (no applicableBsonTypes) always pass + if (entry.applicableBsonTypes && entry.applicableBsonTypes.length > 0) { + const hasMatch = entry.applicableBsonTypes.some((t) => filter.bsonTypes!.includes(t)); + if (!hasMatch) { + return false; + } + } + } + + return true; + }); +} + +/** + * Returns all operator entries (unfiltered). + * Useful for validation, testing, and diagnostics. + * + * Returns a shallow copy so callers cannot mutate the internal registry. + */ +export function getAllCompletions(): readonly OperatorEntry[] { + return [...allOperators]; +} diff --git a/packages/documentdb-constants/src/index.ts b/packages/documentdb-constants/src/index.ts new file mode 100644 index 000000000..d888fcf6b --- /dev/null +++ b/packages/documentdb-constants/src/index.ts @@ -0,0 +1,105 @@ +/*--------------------------------------------------------------------------------------------- + * Copyright (c) Microsoft Corporation. All rights reserved. + * Licensed under the MIT License. See License.txt in the project root for license information. + *--------------------------------------------------------------------------------------------*/ + +/** + * @vscode-documentdb/documentdb-constants + * + * Static operator metadata for DocumentDB-supported operators, stages, + * accumulators, update operators, BSON constructors, and system variables. + */ + +// -- Core types -- +export type { CompletionFilter, MetaTag, OperatorEntry } from './types'; + +// -- Meta tag constants and presets -- +export { + ALL_META_TAGS, + EXPRESSION_COMPLETION_META, + // Completion context presets + FILTER_COMPLETION_META, + GROUP_EXPRESSION_COMPLETION_META, + META_ACCUMULATOR, + META_BSON, + META_EXPR_ARITH, + META_EXPR_ARRAY, + META_EXPR_BITWISE, + META_EXPR_BOOL, + META_EXPR_COMPARISON, + META_EXPR_CONDITIONAL, + META_EXPR_DATASIZE, + META_EXPR_DATE, + META_EXPR_LITERAL, + META_EXPR_MISC, + META_EXPR_OBJECT, + META_EXPR_SET, + META_EXPR_STRING, + META_EXPR_TIMESTAMP, + META_EXPR_TRIG, + META_EXPR_TYPE, + META_EXPR_VARIABLE, + META_FIELD_IDENTIFIER, + // Individual meta tags + META_QUERY, + META_QUERY_ARRAY, + META_QUERY_BITWISE, + META_QUERY_COMPARISON, + META_QUERY_ELEMENT, + META_QUERY_EVALUATION, + META_QUERY_GEOSPATIAL, + META_QUERY_LOGICAL, + META_QUERY_MISC, + META_QUERY_PROJECTION, + META_STAGE, + META_UPDATE, + META_UPDATE_ARRAY, + META_UPDATE_BITWISE, + META_UPDATE_FIELD, + META_VARIABLE, + META_WINDOW, + PROJECTION_COMPLETION_META, + STAGE_COMPLETION_META, + UPDATE_COMPLETION_META, + WINDOW_COMPLETION_META, +} from './metaTags'; + +// -- Consumer API -- +export { getAllCompletions, getFilteredCompletions } from './getFilteredCompletions'; + +// -- Documentation URL helpers -- +export { getDocBase, getDocLink } from './docLinks'; + +// -- Operator data modules -- +import { loadAccumulators } from './accumulators'; +import { loadBsonConstructors } from './bsonConstructors'; +import { loadExpressionOperators } from './expressionOperators'; +import { loadQueryOperators } from './queryOperators'; +import { loadStages } from './stages'; +import { loadSystemVariables } from './systemVariables'; +import { loadUpdateOperators } from './updateOperators'; +import { loadWindowOperators } from './windowOperators'; + +/** + * Loads all built-in operator data into the registry. + * + * Called automatically at module import time so that consumers using + * `import { getFilteredCompletions } from '@vscode-documentdb/documentdb-constants'` + * get all operators without any additional setup. + * + * Can also be called explicitly (e.g. in workers or tests) β€” the call is + * idempotent when combined with {@link clearOperators}. + */ +export function loadOperators(): void { + loadAccumulators(); + loadBsonConstructors(); + loadExpressionOperators(); + loadQueryOperators(); + loadStages(); + loadSystemVariables(); + loadUpdateOperators(); + loadWindowOperators(); +} + +// Auto-load on module import so the public API works out of the box. +loadOperators(); diff --git a/packages/documentdb-constants/src/metaTags.ts b/packages/documentdb-constants/src/metaTags.ts new file mode 100644 index 000000000..7a4dd7add --- /dev/null +++ b/packages/documentdb-constants/src/metaTags.ts @@ -0,0 +1,130 @@ +/*--------------------------------------------------------------------------------------------- + * Copyright (c) Microsoft Corporation. All rights reserved. + * Licensed under the MIT License. See License.txt in the project root for license information. + *--------------------------------------------------------------------------------------------*/ + +/** + * Meta tag constants for categorizing operators in the DocumentDB constants package. + * + * Tags use a hierarchical scheme where prefix matching is supported: + * filtering by 'query' matches 'query', 'query:comparison', 'query:logical', etc. + */ + +// -- Query operators -- +export const META_QUERY = 'query' as const; +export const META_QUERY_COMPARISON = 'query:comparison' as const; +export const META_QUERY_LOGICAL = 'query:logical' as const; +export const META_QUERY_ELEMENT = 'query:element' as const; +export const META_QUERY_EVALUATION = 'query:evaluation' as const; +export const META_QUERY_ARRAY = 'query:array' as const; +export const META_QUERY_BITWISE = 'query:bitwise' as const; +export const META_QUERY_GEOSPATIAL = 'query:geospatial' as const; +export const META_QUERY_PROJECTION = 'query:projection' as const; +export const META_QUERY_MISC = 'query:misc' as const; + +// -- Update operators -- +export const META_UPDATE = 'update' as const; +export const META_UPDATE_FIELD = 'update:field' as const; +export const META_UPDATE_ARRAY = 'update:array' as const; +export const META_UPDATE_BITWISE = 'update:bitwise' as const; + +// -- Aggregation pipeline -- +export const META_STAGE = 'stage' as const; +export const META_ACCUMULATOR = 'accumulator' as const; + +// -- Expression operators -- +export const META_EXPR_ARITH = 'expr:arith' as const; +export const META_EXPR_ARRAY = 'expr:array' as const; +export const META_EXPR_BOOL = 'expr:bool' as const; +export const META_EXPR_COMPARISON = 'expr:comparison' as const; +export const META_EXPR_CONDITIONAL = 'expr:conditional' as const; +export const META_EXPR_DATE = 'expr:date' as const; +export const META_EXPR_OBJECT = 'expr:object' as const; +export const META_EXPR_SET = 'expr:set' as const; +export const META_EXPR_STRING = 'expr:string' as const; +export const META_EXPR_TRIG = 'expr:trig' as const; +export const META_EXPR_TYPE = 'expr:type' as const; +export const META_EXPR_DATASIZE = 'expr:datasize' as const; +export const META_EXPR_TIMESTAMP = 'expr:timestamp' as const; +export const META_EXPR_BITWISE = 'expr:bitwise' as const; +export const META_EXPR_LITERAL = 'expr:literal' as const; +export const META_EXPR_MISC = 'expr:misc' as const; +export const META_EXPR_VARIABLE = 'expr:variable' as const; + +// -- Window operators -- +export const META_WINDOW = 'window' as const; + +// -- BSON constructors -- +export const META_BSON = 'bson' as const; + +// -- System variables -- +export const META_VARIABLE = 'variable' as const; + +// -- Schema-injected field names (not static β€” provided at runtime) -- +export const META_FIELD_IDENTIFIER = 'field:identifier' as const; + +/** + * All known meta tag values for validation purposes. + */ +export const ALL_META_TAGS = [ + META_QUERY, + META_QUERY_COMPARISON, + META_QUERY_LOGICAL, + META_QUERY_ELEMENT, + META_QUERY_EVALUATION, + META_QUERY_ARRAY, + META_QUERY_BITWISE, + META_QUERY_GEOSPATIAL, + META_QUERY_PROJECTION, + META_QUERY_MISC, + META_UPDATE, + META_UPDATE_FIELD, + META_UPDATE_ARRAY, + META_UPDATE_BITWISE, + META_STAGE, + META_ACCUMULATOR, + META_EXPR_ARITH, + META_EXPR_ARRAY, + META_EXPR_BOOL, + META_EXPR_COMPARISON, + META_EXPR_CONDITIONAL, + META_EXPR_DATE, + META_EXPR_OBJECT, + META_EXPR_SET, + META_EXPR_STRING, + META_EXPR_TRIG, + META_EXPR_TYPE, + META_EXPR_DATASIZE, + META_EXPR_TIMESTAMP, + META_EXPR_BITWISE, + META_EXPR_LITERAL, + META_EXPR_MISC, + META_EXPR_VARIABLE, + META_WINDOW, + META_BSON, + META_VARIABLE, + META_FIELD_IDENTIFIER, +] as const; + +// -- Completion context presets -- + +/** Query filter contexts: find filter bar, $match stage body */ +export const FILTER_COMPLETION_META: readonly string[] = ['query', 'bson', 'variable']; + +/** Projection/sort contexts: field names + projection operators */ +export const PROJECTION_COMPLETION_META: readonly string[] = ['field:identifier', 'query:projection', 'bson']; + +/** $group/$project/$addFields stage body: expressions + accumulators */ +export const GROUP_EXPRESSION_COMPLETION_META: readonly string[] = ['expr', 'accumulator', 'bson', 'variable']; + +/** Other stage bodies: expressions only (no accumulators) */ +export const EXPRESSION_COMPLETION_META: readonly string[] = ['expr', 'bson', 'variable']; + +/** Update operations: update operators */ +export const UPDATE_COMPLETION_META: readonly string[] = ['update']; + +/** Top-level aggregation pipeline: stage names */ +export const STAGE_COMPLETION_META: readonly string[] = ['stage']; + +/** Window fields: window operators + accumulators + expressions */ +export const WINDOW_COMPLETION_META: readonly string[] = ['window', 'accumulator', 'expr', 'bson', 'variable']; diff --git a/packages/documentdb-constants/src/operatorReference.test.ts b/packages/documentdb-constants/src/operatorReference.test.ts new file mode 100644 index 000000000..4d4a8d853 --- /dev/null +++ b/packages/documentdb-constants/src/operatorReference.test.ts @@ -0,0 +1,359 @@ +/*--------------------------------------------------------------------------------------------- + * Copyright (c) Microsoft Corporation. All rights reserved. + * Licensed under the MIT License. See License.txt in the project root for license information. + *--------------------------------------------------------------------------------------------*/ + +/** + * Dump-vs-implementation verification test. + * + * Ensures the TypeScript operator implementation always matches the + * resource dump (scraped/operator-reference.md). This test is the + * enforcing contract between "what does DocumentDB support?" (the dump) + * and "what does our code provide?" (the implementation). + * + * See Β§2.3.3 of docs/plan/03-documentdb-constants.md for design rationale. + */ + +import * as fs from 'fs'; +import * as path from 'path'; +import { getAllCompletions } from './index'; +import { parseOperatorReference, type ReferenceOperator } from './parseOperatorReference'; + +const dumpPath = path.join(__dirname, '..', 'resources', 'scraped', 'operator-reference.md'); +const dumpContent = fs.readFileSync(dumpPath, 'utf-8'); +const parsed = parseOperatorReference(dumpContent); +const referenceOperators = parsed.operators; +const notListedOperators = parsed.notListed; +const implementedOperators = getAllCompletions(); + +/** + * Category-to-meta mapping. Maps dump category names to the meta tags + * used in the implementation. Some dump categories map to the same meta + * tag (e.g., both accumulator categories map to 'accumulator'). + */ +const CATEGORY_TO_META: Record = { + 'Comparison Query Operators': 'query:comparison', + 'Logical Query Operators': 'query:logical', + 'Element Query Operators': 'query:element', + 'Evaluation Query Operators': 'query:evaluation', + 'Geospatial Operators': 'query:geospatial', + 'Array Query Operators': 'query:array', + 'Bitwise Query Operators': 'query:bitwise', + 'Projection Operators': 'query:projection', + 'Miscellaneous Query Operators': 'query:misc', + 'Field Update Operators': 'update:field', + 'Array Update Operators': 'update:array', + 'Bitwise Update Operators': 'update:bitwise', + 'Arithmetic Expression Operators': 'expr:arith', + 'Array Expression Operators': 'expr:array', + 'Bitwise Operators': 'expr:bitwise', + 'Boolean Expression Operators': 'expr:bool', + 'Comparison Expression Operators': 'expr:comparison', + 'Data Size Operators': 'expr:datasize', + 'Date Expression Operators': 'expr:date', + 'Literal Expression Operator': 'expr:literal', + 'Miscellaneous Operators': 'expr:misc', + 'Object Expression Operators': 'expr:object', + 'Set Expression Operators': 'expr:set', + 'String Expression Operators': 'expr:string', + 'Timestamp Expression Operators': 'expr:timestamp', + 'Trigonometry Expression Operators': 'expr:trig', + 'Type Expression Operators': 'expr:type', + 'Accumulators ($group, $bucket, $bucketAuto, $setWindowFields)': 'accumulator', + 'Accumulators (in Other Stages)': 'accumulator', + 'Variable Expression Operators': 'expr:variable', + 'Window Operators': 'window', + 'Conditional Expression Operators': 'expr:conditional', + 'Aggregation Pipeline Stages': 'stage', + 'Variables in Aggregation Expressions': 'variable', +}; + +describe('operator reference verification', () => { + test('dump file exists and is parseable', () => { + expect(dumpContent.length).toBeGreaterThan(1000); + expect(referenceOperators.length).toBeGreaterThan(250); + }); + + test('every listed operator in the dump has an implementation entry', () => { + const implementedValues = new Set(implementedOperators.map((op) => op.value)); + const missing: string[] = []; + + for (const ref of referenceOperators) { + // Some operators appear in multiple dump categories (e.g., $objectToArray + // in both "Array Expression" and "Object Expression"). The implementation + // only needs one entry per (value, meta) pair β€” check by value. + if (!implementedValues.has(ref.operator)) { + missing.push(`${ref.operator} (${ref.category})`); + } + } + + expect(missing).toEqual([]); + }); + + test('no extra operators in implementation beyond the dump (excluding BSON/variables)', () => { + // Build a set of all operator values from the dump + const dumpValues = new Set(referenceOperators.map((r) => r.operator)); + + // Filter implementation entries: exclude BSON constructors and system variables + // (these are hand-authored, not from the compatibility page dump) + const extras = implementedOperators.filter( + (op) => !op.meta.startsWith('bson') && !op.meta.startsWith('variable') && !dumpValues.has(op.value), + ); + + expect(extras.map((e) => `${e.value} (${e.meta})`)).toEqual([]); + }); + + test('descriptions match the dump (detect drift)', () => { + const mismatches: string[] = []; + + for (const ref of referenceOperators) { + if (!ref.description) { + continue; // some operators have empty descriptions (missing upstream docs) + } + + // Find implementation entry matching this operator + category's meta + const expectedMeta = CATEGORY_TO_META[ref.category]; + if (!expectedMeta) { + continue; // unknown category + } + + const impl = implementedOperators.find((op) => op.value === ref.operator && op.meta === expectedMeta); + + if (impl && impl.description !== ref.description) { + mismatches.push( + `${ref.operator} (${ref.category}): expected "${ref.description}", got "${impl.description}"`, + ); + } + } + + expect(mismatches).toEqual([]); + }); + + test('not-listed operators are NOT in the implementation', () => { + const leaked: string[] = []; + + for (const nl of notListedOperators) { + // Check the exact meta category from the dump + const expectedMeta = CATEGORY_TO_META[nl.category]; + if (!expectedMeta) { + continue; + } + + const found = implementedOperators.find((op) => op.value === nl.operator && op.meta === expectedMeta); + + if (found) { + leaked.push(`${nl.operator} (${nl.category}) β€” ${nl.reason}`); + } + } + + expect(leaked).toEqual([]); + }); + + test('all dump categories have a known meta mapping', () => { + const categories = new Set(referenceOperators.map((r) => r.category)); + const unmapped = [...categories].filter((c) => !CATEGORY_TO_META[c]); + expect(unmapped).toEqual([]); + }); + + test('reference parser found the expected number of not-listed operators', () => { + // The plan lists 16 not-listed operators (Β§2.1) + expect(notListedOperators.length).toBeGreaterThanOrEqual(14); + expect(notListedOperators.length).toBeLessThanOrEqual(20); + }); +}); + +// --------------------------------------------------------------------------- +// Merged dump + overrides verification +// +// The generator (scripts/generate-from-reference.ts) merges the scraped dump +// with manual overrides. These tests verify the implementation matches the +// MERGED result β€” catching scenarios where: +// - Someone hand-edits a generated .ts file instead of using overrides +// - Someone adds an override but forgets to run `npm run generate` +// - Someone runs `npm run scrape` but forgets `npm run generate` +// - The override file is accidentally truncated +// --------------------------------------------------------------------------- + +const overridesPath = path.join(__dirname, '..', 'resources', 'overrides', 'operator-overrides.md'); +const overridesContent = fs.readFileSync(overridesPath, 'utf-8'); +const parsedOverrides = parseOperatorReference(overridesContent); +const overrideOperators = parsedOverrides.operators; + +/** + * Merges dump and override operators. For each (operator, category) pair, + * the override description wins if non-empty; otherwise the dump description + * is used. This mirrors what the generator does. + */ +function getMergedOperators(): readonly ReferenceOperator[] { + // Build a lookup: "operator|category" β†’ override entry + const overrideLookup = new Map(); + for (const ov of overrideOperators) { + overrideLookup.set(`${ov.operator}|${ov.category}`, ov); + } + + return referenceOperators.map((ref) => { + const override = overrideLookup.get(`${ref.operator}|${ref.category}`); + if (!override) { + return ref; + } + return { + operator: ref.operator, + category: ref.category, + description: override.description || ref.description, + docLink: override.docLink || ref.docLink, + }; + }); +} + +const mergedOperators = getMergedOperators(); + +describe('merged dump + overrides verification', () => { + test('overrides file exists and has entries', () => { + expect(overridesContent.length).toBeGreaterThan(100); + expect(overrideOperators.length).toBeGreaterThan(0); + }); + + test('override count is within expected range (detect truncation)', () => { + // Currently 56 overrides. Allow some flex for additions/removals, + // but catch catastrophic truncation (e.g., file emptied to <10). + expect(overrideOperators.length).toBeGreaterThanOrEqual(40); + expect(overrideOperators.length).toBeLessThanOrEqual(80); + }); + + test('every override targets an operator that exists in the dump', () => { + const dumpKeys = new Set(referenceOperators.map((r) => `${r.operator}|${r.category}`)); + const orphans: string[] = []; + + for (const ov of overrideOperators) { + if (!dumpKeys.has(`${ov.operator}|${ov.category}`)) { + orphans.push(`${ov.operator} (${ov.category})`); + } + } + + expect(orphans).toEqual([]); + }); + + test('descriptions match the merged dump+overrides (detect hand-edits and stale generates)', () => { + const mismatches: string[] = []; + + for (const merged of mergedOperators) { + if (!merged.description) { + continue; // operator with no description in either dump or override + } + + const expectedMeta = CATEGORY_TO_META[merged.category]; + if (!expectedMeta) { + continue; + } + + const impl = implementedOperators.find((op) => op.value === merged.operator && op.meta === expectedMeta); + + if (impl && impl.description !== merged.description) { + mismatches.push( + `${merged.operator} (${merged.category}): ` + + `expected "${merged.description}", got "${impl.description}"`, + ); + } + } + + expect(mismatches).toEqual([]); + }); + + test('doc links from dump match implementation links for single-category operators', () => { + // Many operators appear in multiple dump categories (e.g., $eq in both + // "Comparison Query" and "Comparison Expression"). The scraper finds the + // doc page under whichever category directory it tries first, while the + // implementation generates URLs from each operator's meta tag. For + // cross-category operators, the dump link and impl link will point to + // different (but both valid) doc directories. + // + // This test only compares links for operators where the dump category + // maps to a unique operator β€” no cross-category ambiguity. + + // Known scraper mismatches: the scraper's global index fallback found + // these operators' doc pages under a different directory than their + // category implies. The implementation link is correct; the dump link is + // a scraper artifact. Update this set when refreshing the dump. + // + // NOTE: After fixing META_TO_DOC_DIR in docLinks.ts (expr:bool β†’ logical-query, + // expr:comparison β†’ comparison-query) and adding smart link emission in the + // generator (hardcoded URLs for cross-category fallbacks), this set should + // remain empty unless new scraper mismatches are discovered. + const KNOWN_SCRAPER_MISMATCHES = new Set([]); + + // Build a set of operators that appear in more than one dump category + const operatorCategories = new Map>(); + for (const ref of referenceOperators) { + const cats = operatorCategories.get(ref.operator) ?? new Set(); + cats.add(ref.category); + operatorCategories.set(ref.operator, cats); + } + + const mismatches: string[] = []; + + for (const ref of referenceOperators) { + if (!ref.docLink) { + continue; + } + + // Skip cross-category operators β€” their dump link may come from + // a different category than the implementation's meta tag + const cats = operatorCategories.get(ref.operator); + if (cats && cats.size > 1) { + continue; + } + + // Skip known scraper mismatches (documented above) + if (KNOWN_SCRAPER_MISMATCHES.has(ref.operator)) { + continue; + } + + const expectedMeta = CATEGORY_TO_META[ref.category]; + if (!expectedMeta) { + continue; + } + + const impl = implementedOperators.find((op) => op.value === ref.operator && op.meta === expectedMeta); + + if (!impl || !impl.link) { + continue; + } + + const dumpLink = ref.docLink.toLowerCase(); + const implLink = impl.link.toLowerCase(); + + if (dumpLink !== implLink) { + mismatches.push(`${ref.operator} (${ref.category}): ` + `dump="${ref.docLink}", impl="${impl.link}"`); + } + } + + expect(mismatches).toEqual([]); + }); + + test('every override with a description was applied (not silently ignored)', () => { + const unapplied: string[] = []; + + for (const ov of overrideOperators) { + if (!ov.description) { + continue; + } + + const expectedMeta = CATEGORY_TO_META[ov.category]; + if (!expectedMeta) { + continue; + } + + const impl = implementedOperators.find((op) => op.value === ov.operator && op.meta === expectedMeta); + + if (!impl) { + unapplied.push(`${ov.operator} (${ov.category}): no implementation entry found`); + } else if (impl.description !== ov.description) { + unapplied.push( + `${ov.operator} (${ov.category}): override="${ov.description}", ` + `impl="${impl.description}"`, + ); + } + } + + expect(unapplied).toEqual([]); + }); +}); diff --git a/packages/documentdb-constants/src/parseOperatorReference.test.ts b/packages/documentdb-constants/src/parseOperatorReference.test.ts new file mode 100644 index 000000000..4ebf5138a --- /dev/null +++ b/packages/documentdb-constants/src/parseOperatorReference.test.ts @@ -0,0 +1,156 @@ +/*--------------------------------------------------------------------------------------------- + * Copyright (c) Microsoft Corporation. All rights reserved. + * Licensed under the MIT License. See License.txt in the project root for license information. + *--------------------------------------------------------------------------------------------*/ + +/** + * Unit tests for the parseOperatorReference helper. + */ + +import { parseOperatorReference } from './parseOperatorReference'; + +describe('parseOperatorReference', () => { + test('parses a minimal dump with one category and one operator', () => { + const content = `# DocumentDB Operator Reference + +## Summary + +| Category | Listed | Total | +| --- | --- | --- | +| Test Category | 1 | 1 | + +## Test Category + +### $testOp + +- **Description:** A test operator +- **Doc Link:** https://example.com/test + +## Not Listed + +- **$excluded** (Test Category) β€” Not supported +`; + const result = parseOperatorReference(content); + expect(result.operators).toHaveLength(1); + expect(result.operators[0]).toEqual({ + operator: '$testOp', + category: 'Test Category', + description: 'A test operator', + docLink: 'https://example.com/test', + }); + expect(result.notListed).toHaveLength(1); + expect(result.notListed[0]).toEqual({ + operator: '$excluded', + category: 'Test Category', + reason: 'Not supported', + }); + }); + + test('handles operators with empty description and doc link', () => { + const content = `## Variables + +### $$NOW + +### $$ROOT +`; + const result = parseOperatorReference(content); + expect(result.operators).toHaveLength(2); + expect(result.operators[0]).toEqual({ + operator: '$$NOW', + category: 'Variables', + description: '', + docLink: '', + }); + expect(result.operators[1]).toEqual({ + operator: '$$ROOT', + category: 'Variables', + description: '', + docLink: '', + }); + }); + + test('handles operators with syntax blocks (ignores syntax)', () => { + const content = `## Comparison Query Operators + +### $eq + +- **Description:** Matches values equal to a specified value +- **Syntax:** + +\`\`\`javascript +{ field: { $eq: value } } +\`\`\` + +- **Doc Link:** https://example.com/$eq + +### $gt + +- **Description:** Matches values greater than a specified value +- **Doc Link:** https://example.com/$gt +`; + const result = parseOperatorReference(content); + expect(result.operators).toHaveLength(2); + expect(result.operators[0].operator).toBe('$eq'); + expect(result.operators[0].description).toBe('Matches values equal to a specified value'); + expect(result.operators[1].operator).toBe('$gt'); + }); + + test('skips operators in the Summary section', () => { + const content = `## Summary + +| Category | Listed | Total | +| --- | --- | --- | +| Test | 2 | 3 | + +## Test Category + +### $realOp + +- **Description:** I am real +`; + const result = parseOperatorReference(content); + expect(result.operators).toHaveLength(1); + expect(result.operators[0].operator).toBe('$realOp'); + }); + + test('multiple not-listed entries are parsed correctly', () => { + const content = `## Not Listed + +Operators below are not in scope. + +- **$where** (Evaluation Query) β€” Deprecated in Mongo version 8.0 +- **$meta** (Projection) β€” Not in scope +- **$accumulator** (Custom Aggregation) β€” Deprecated in Mongo version 8.0 +`; + const result = parseOperatorReference(content); + expect(result.notListed).toHaveLength(3); + expect(result.notListed[0].operator).toBe('$where'); + expect(result.notListed[0].reason).toBe('Deprecated in Mongo version 8.0'); + expect(result.notListed[1].operator).toBe('$meta'); + expect(result.notListed[2].operator).toBe('$accumulator'); + }); + + test('handles multiple categories', () => { + const content = `## Cat A + +### $a1 + +- **Description:** Operator a1 + +### $a2 + +- **Description:** Operator a2 + +## Cat B + +### $b1 + +- **Description:** Operator b1 +`; + const result = parseOperatorReference(content); + expect(result.operators).toHaveLength(3); + expect(result.operators[0].category).toBe('Cat A'); + expect(result.operators[1].category).toBe('Cat A'); + expect(result.operators[2].category).toBe('Cat B'); + }); +}); diff --git a/packages/documentdb-constants/src/parseOperatorReference.ts b/packages/documentdb-constants/src/parseOperatorReference.ts new file mode 100644 index 000000000..e1179c336 --- /dev/null +++ b/packages/documentdb-constants/src/parseOperatorReference.ts @@ -0,0 +1,160 @@ +/*--------------------------------------------------------------------------------------------- + * Copyright (c) Microsoft Corporation. All rights reserved. + * Licensed under the MIT License. See License.txt in the project root for license information. + *--------------------------------------------------------------------------------------------*/ + +/** + * Parses the scraped/operator-reference.md dump file into structured data + * for use in the operatorReference verification test. + * + * The dump format uses structured headings: + * ## Category Name β€” category section + * ### $operatorName β€” operator heading + * - **Description:** text β€” operator description + * - **Doc Link:** url β€” documentation URL + * + * ## Not Listed β€” excluded operators section + * - **$operator** (Category) β€” Reason + */ + +/** + * Represents a single operator entry parsed from the reference dump. + */ +export interface ReferenceOperator { + /** Operator name, e.g. "$eq", "$$NOW" */ + readonly operator: string; + /** Category from the dump, e.g. "Comparison Query Operators" */ + readonly category: string; + /** Description from the dump (may be empty) */ + readonly description: string; + /** Documentation URL from the dump (may be empty) */ + readonly docLink: string; +} + +/** + * Represents an operator excluded from the package scope. + */ +export interface NotListedOperator { + /** Operator name, e.g. "$where", "$meta" */ + readonly operator: string; + /** Category from the dump */ + readonly category: string; + /** Reason for exclusion */ + readonly reason: string; +} + +/** + * Complete parsed result from the reference dump. + */ +export interface ParsedReference { + /** All listed (in-scope) operators */ + readonly operators: readonly ReferenceOperator[]; + /** All not-listed (excluded) operators */ + readonly notListed: readonly NotListedOperator[]; +} + +/** + * Parses the scraped/operator-reference.md content into structured data. + * + * @param content - the full Markdown content of the dump file + * @returns parsed reference data + */ +export function parseOperatorReference(content: string): ParsedReference { + const lines = content.split('\n'); + const operators: ReferenceOperator[] = []; + const notListed: NotListedOperator[] = []; + + let currentCategory = ''; + let inNotListed = false; + let inSummary = false; + + // Temp state for building current operator + let currentOperator = ''; + let currentDescription = ''; + let currentDocLink = ''; + + function flushOperator(): void { + if (currentOperator && currentCategory && !inNotListed && !inSummary) { + operators.push({ + operator: currentOperator, + category: currentCategory, + description: currentDescription, + docLink: currentDocLink, + }); + } + currentOperator = ''; + currentDescription = ''; + currentDocLink = ''; + } + + for (const line of lines) { + const trimmed = line.trim(); + + // Detect ## headings (category sections) + const h2Match = trimmed.match(/^## (.+)$/); + if (h2Match) { + flushOperator(); + const heading = h2Match[1].trim(); + if (heading === 'Summary') { + inSummary = true; + inNotListed = false; + currentCategory = ''; + } else if (heading === 'Not Listed') { + inNotListed = true; + inSummary = false; + currentCategory = ''; + } else { + currentCategory = heading; + inNotListed = false; + inSummary = false; + } + continue; + } + + // Skip summary section + if (inSummary) { + continue; + } + + // Parse "Not Listed" entries: - **$operator** (Category) β€” Reason + if (inNotListed) { + const notListedMatch = trimmed.match(/^- \*\*(.+?)\*\* \((.+?)\) β€” (.+)$/); + if (notListedMatch) { + notListed.push({ + operator: notListedMatch[1], + category: notListedMatch[2], + reason: notListedMatch[3], + }); + } + continue; + } + + // Detect ### headings (operator entries) + const h3Match = trimmed.match(/^### (.+)$/); + if (h3Match) { + flushOperator(); + currentOperator = h3Match[1].trim(); + continue; + } + + // Parse description: - **Description:** text + const descMatch = trimmed.match(/^- \*\*Description:\*\* (.+)$/); + if (descMatch && currentOperator) { + currentDescription = descMatch[1].trim(); + continue; + } + + // Parse doc link: - **Doc Link:** url ('none' means no page at expected location) + const linkMatch = trimmed.match(/^- \*\*Doc Link:\*\* (.+)$/); + if (linkMatch && currentOperator) { + const rawLink = linkMatch[1].trim(); + currentDocLink = rawLink === 'none' ? '' : rawLink; + continue; + } + } + + // Flush last operator + flushOperator(); + + return { operators, notListed }; +} diff --git a/packages/documentdb-constants/src/queryOperators.ts b/packages/documentdb-constants/src/queryOperators.ts new file mode 100644 index 000000000..8390356a6 --- /dev/null +++ b/packages/documentdb-constants/src/queryOperators.ts @@ -0,0 +1,458 @@ +/*--------------------------------------------------------------------------------------------- + * Copyright (c) Microsoft Corporation. All rights reserved. + * Licensed under the MIT License. See License.txt in the project root for license information. + *--------------------------------------------------------------------------------------------*/ + +// AUTO-GENERATED β€” DO NOT EDIT BY HAND +// +// Generated by: npm run generate (scripts/generate-from-reference.ts) +// Sources: resources/scraped/operator-reference.md +// resources/overrides/operator-overrides.md +// resources/overrides/operator-snippets.md +// +// To change operator data, edit the overrides/snippets files and re-run the generator. + +import { getDocLink } from './docLinks'; +import { registerOperators } from './getFilteredCompletions'; +import { + META_QUERY_ARRAY, + META_QUERY_BITWISE, + META_QUERY_COMPARISON, + META_QUERY_ELEMENT, + META_QUERY_EVALUATION, + META_QUERY_GEOSPATIAL, + META_QUERY_LOGICAL, + META_QUERY_MISC, + META_QUERY_PROJECTION, +} from './metaTags'; +import { type OperatorEntry } from './types'; + +// --------------------------------------------------------------------------- +// Comparison Query Operators +// --------------------------------------------------------------------------- + +const comparisonQueryOperators: readonly OperatorEntry[] = [ + { + value: '$eq', + meta: META_QUERY_COMPARISON, + description: 'The $eq query operator compares the value of a field to a specified value', + snippet: '{ $eq: ${1:value} }', + link: getDocLink('$eq', META_QUERY_COMPARISON), + }, + { + value: '$gt', + meta: META_QUERY_COMPARISON, + description: + 'The $gt query operator retrieves documents where the value of a field is greater than a specified value', + snippet: '{ $gt: ${1:value} }', + link: getDocLink('$gt', META_QUERY_COMPARISON), + }, + { + value: '$gte', + meta: META_QUERY_COMPARISON, + description: + 'The $gte operator retrieves documents where the value of a field is greater than or equal to a specified value', + snippet: '{ $gte: ${1:value} }', + link: getDocLink('$gte', META_QUERY_COMPARISON), + }, + { + value: '$in', + meta: META_QUERY_COMPARISON, + description: 'The $in operator matches value of a field against an array of specified values', + snippet: '{ $in: [${1:value}] }', + link: getDocLink('$in', META_QUERY_COMPARISON), + }, + { + value: '$lt', + meta: META_QUERY_COMPARISON, + description: 'The $lt operator retrieves documents where the value of field is less than a specified value', + snippet: '{ $lt: ${1:value} }', + link: getDocLink('$lt', META_QUERY_COMPARISON), + }, + { + value: '$lte', + meta: META_QUERY_COMPARISON, + description: + 'The $lte operator retrieves documents where the value of a field is less than or equal to a specified value', + snippet: '{ $lte: ${1:value} }', + link: getDocLink('$lte', META_QUERY_COMPARISON), + }, + { + value: '$ne', + meta: META_QUERY_COMPARISON, + description: "The $ne operator retrieves documents where the value of a field doesn't equal a specified value", + snippet: '{ $ne: ${1:value} }', + link: getDocLink('$ne', META_QUERY_COMPARISON), + }, + { + value: '$nin', + meta: META_QUERY_COMPARISON, + description: "The $nin operator retrieves documents where the value of a field doesn't match a list of values", + snippet: '{ $nin: [${1:value}] }', + link: getDocLink('$nin', META_QUERY_COMPARISON), + }, +]; + +// --------------------------------------------------------------------------- +// Logical Query Operators +// --------------------------------------------------------------------------- + +const logicalQueryOperators: readonly OperatorEntry[] = [ + { + value: '$and', + meta: META_QUERY_LOGICAL, + description: + 'The $and operator joins multiple query clauses and returns documents that match all specified conditions.', + snippet: '{ $and: [{ ${1:expression} }] }', + link: getDocLink('$and', META_QUERY_LOGICAL), + }, + { + value: '$not', + meta: META_QUERY_LOGICAL, + description: + "The $not operator performs a logical NOT operation on a specified expression, selecting documents that don't match the expression.", + snippet: '{ $not: { ${1:expression} } }', + link: getDocLink('$not', META_QUERY_LOGICAL), + }, + { + value: '$nor', + meta: META_QUERY_LOGICAL, + description: + 'The $nor operator performs a logical NOR on an array of expressions and retrieves documents that fail all the conditions.', + snippet: '{ $nor: [{ ${1:expression} }] }', + link: getDocLink('$nor', META_QUERY_LOGICAL), + }, + { + value: '$or', + meta: META_QUERY_LOGICAL, + description: + 'The $or operator joins query clauses with a logical OR and returns documents that match at least one of the specified conditions.', + snippet: '{ $or: [{ ${1:expression} }] }', + link: getDocLink('$or', META_QUERY_LOGICAL), + }, +]; + +// --------------------------------------------------------------------------- +// Element Query Operators +// --------------------------------------------------------------------------- + +const elementQueryOperators: readonly OperatorEntry[] = [ + { + value: '$exists', + meta: META_QUERY_ELEMENT, + description: + 'The $exists operator retrieves documents that contain the specified field in their document structure.', + snippet: '{ $exists: ${1:true} }', + link: getDocLink('$exists', META_QUERY_ELEMENT), + }, + { + value: '$type', + meta: META_QUERY_ELEMENT, + description: 'The $type operator retrieves documents if the chosen field is of the specified type.', + snippet: '{ $type: "${1:type}" }', + link: getDocLink('$type', META_QUERY_ELEMENT), + }, +]; + +// --------------------------------------------------------------------------- +// Evaluation Query Operators +// --------------------------------------------------------------------------- + +const evaluationQueryOperators: readonly OperatorEntry[] = [ + { + value: '$expr', + meta: META_QUERY_EVALUATION, + description: + 'The $expr operator allows the use of aggregation expressions within the query language, enabling complex field comparisons and calculations.', + snippet: '{ $expr: { ${1:expression} } }', + link: getDocLink('$expr', META_QUERY_EVALUATION), + }, + { + value: '$jsonSchema', + meta: META_QUERY_EVALUATION, + description: + 'The $jsonSchema operator validates documents against a JSON Schema definition for data validation and structure enforcement. Discover supported features and limitations.', + snippet: '{ $jsonSchema: { bsonType: "${1:object}" } }', + link: getDocLink('$jsonSchema', META_QUERY_EVALUATION), + }, + { + value: '$mod', + meta: META_QUERY_EVALUATION, + description: + 'The $mod operator performs a modulo operation on the value of a field and selects documents with a specified result.', + snippet: '{ $mod: [${1:divisor}, ${2:remainder}] }', + link: getDocLink('$mod', META_QUERY_EVALUATION), + }, + { + value: '$regex', + meta: META_QUERY_EVALUATION, + description: + 'The $regex operator provides regular expression capabilities for pattern matching in queries, allowing flexible string matching and searching.', + snippet: '{ $regex: /${1:pattern}/ }', + link: getDocLink('$regex', META_QUERY_EVALUATION), + applicableBsonTypes: ['string'], + }, + { + value: '$text', + meta: META_QUERY_EVALUATION, + description: + 'The $text operator performs text search on the content of indexed string fields, enabling full-text search capabilities.', + snippet: '{ $text: { \\$search: "${1:text}" } }', + link: getDocLink('$text', META_QUERY_EVALUATION), + applicableBsonTypes: ['string'], + }, +]; + +// --------------------------------------------------------------------------- +// Geospatial Operators +// --------------------------------------------------------------------------- + +const geospatialOperators: readonly OperatorEntry[] = [ + { + value: '$geoIntersects', + meta: META_QUERY_GEOSPATIAL, + description: + 'The $geoIntersects operator selects documents whose location field intersects with a specified GeoJSON object.', + snippet: '{ $geoIntersects: { \\$geometry: { type: "${1:GeoJSON type}", coordinates: ${2:coordinates} } } }', + link: getDocLink('$geoIntersects', META_QUERY_GEOSPATIAL), + }, + { + value: '$geoWithin', + meta: META_QUERY_GEOSPATIAL, + description: + 'The $geoWithin operator selects documents whose location field is completely within a specified geometry.', + snippet: '{ $geoWithin: { \\$geometry: { type: "${1:GeoJSON type}", coordinates: ${2:coordinates} } } }', + link: getDocLink('$geoWithin', META_QUERY_GEOSPATIAL), + }, + { + value: '$box', + meta: META_QUERY_GEOSPATIAL, + description: 'The $box operator defines a rectangular area for geospatial queries using coordinate pairs.', + snippet: '[[${1:bottomLeftX}, ${2:bottomLeftY}], [${3:upperRightX}, ${4:upperRightY}]]', + link: getDocLink('$box', META_QUERY_GEOSPATIAL), + standalone: false, + }, + { + value: '$center', + meta: META_QUERY_GEOSPATIAL, + description: 'The $center operator specifies a circle using legacy coordinate pairs for $geoWithin queries.', + snippet: '[[${1:x}, ${2:y}], ${3:radius}]', + link: getDocLink('$center', META_QUERY_GEOSPATIAL), + standalone: false, + }, + { + value: '$centerSphere', + meta: META_QUERY_GEOSPATIAL, + description: 'The $centerSphere operator specifies a circle using spherical geometry for $geoWithin queries.', + snippet: '[[${1:x}, ${2:y}], ${3:radiusInRadians}]', + link: getDocLink('$centerSphere', META_QUERY_GEOSPATIAL), + standalone: false, + }, + { + value: '$geometry', + meta: META_QUERY_GEOSPATIAL, + description: 'The $geometry operator specifies a GeoJSON geometry for geospatial queries.', + snippet: '{ type: "${1:Point}", coordinates: [${2:coordinates}] }', + link: getDocLink('$geometry', META_QUERY_GEOSPATIAL), + standalone: false, + }, + { + value: '$maxDistance', + meta: META_QUERY_GEOSPATIAL, + description: + 'The $maxDistance operator specifies the maximum distance that can exist between two points in a geospatial query.', + snippet: '${1:distance}', + link: getDocLink('$maxDistance', META_QUERY_GEOSPATIAL), + standalone: false, + }, + { + value: '$minDistance', + meta: META_QUERY_GEOSPATIAL, + description: + 'The $minDistance operator specifies the minimum distance that must exist between two points in a geospatial query.', + snippet: '${1:distance}', + link: getDocLink('$minDistance', META_QUERY_GEOSPATIAL), + standalone: false, + }, + { + value: '$polygon', + meta: META_QUERY_GEOSPATIAL, + description: + 'The $polygon operator defines a polygon for geospatial queries, allowing you to find locations within an irregular shape.', + snippet: '[[${1:x1}, ${2:y1}], [${3:x2}, ${4:y2}], [${5:x3}, ${6:y3}]]', + link: getDocLink('$polygon', META_QUERY_GEOSPATIAL), + standalone: false, + }, + { + value: '$near', + meta: META_QUERY_GEOSPATIAL, + description: + 'The $near operator returns documents with location fields that are near a specified point, sorted by distance.', + snippet: + '{ $near: { \\$geometry: { type: "Point", coordinates: [${1:lng}, ${2:lat}] }, \\$maxDistance: ${3:distance} } }', + link: getDocLink('$near', META_QUERY_GEOSPATIAL), + }, + { + value: '$nearSphere', + meta: META_QUERY_GEOSPATIAL, + description: + 'The $nearSphere operator returns documents whose location fields are near a specified point on a sphere, sorted by distance on a spherical surface.', + snippet: + '{ $nearSphere: { \\$geometry: { type: "Point", coordinates: [${1:lng}, ${2:lat}] }, \\$maxDistance: ${3:distance} } }', + link: getDocLink('$nearSphere', META_QUERY_GEOSPATIAL), + }, +]; + +// --------------------------------------------------------------------------- +// Array Query Operators +// --------------------------------------------------------------------------- + +const arrayQueryOperators: readonly OperatorEntry[] = [ + { + value: '$all', + meta: META_QUERY_ARRAY, + description: 'The $all operator helps finding array documents matching all the elements.', + snippet: '{ $all: [${1:value}] }', + link: getDocLink('$all', META_QUERY_ARRAY), + applicableBsonTypes: ['array'], + }, + { + value: '$elemMatch', + meta: META_QUERY_ARRAY, + description: + 'The $elemmatch operator returns complete array, qualifying criteria with at least one matching array element.', + snippet: '{ $elemMatch: { ${1:query} } }', + link: getDocLink('$elemMatch', META_QUERY_ARRAY), + applicableBsonTypes: ['array'], + }, + { + value: '$size', + meta: META_QUERY_ARRAY, + description: + 'The $size operator is used to query documents where an array field has a specified number of elements.', + snippet: '{ $size: ${1:number} }', + link: getDocLink('$size', META_QUERY_ARRAY), + applicableBsonTypes: ['array'], + }, +]; + +// --------------------------------------------------------------------------- +// Bitwise Query Operators +// --------------------------------------------------------------------------- + +const bitwiseQueryOperators: readonly OperatorEntry[] = [ + { + value: '$bitsAllClear', + meta: META_QUERY_BITWISE, + description: + 'The $bitsAllClear operator is used to match documents where all the bit positions specified in a bitmask are clear.', + snippet: '{ $bitsAllClear: ${1:bitmask} }', + link: getDocLink('$bitsAllClear', META_QUERY_BITWISE), + applicableBsonTypes: ['int32', 'long'], + }, + { + value: '$bitsAllSet', + meta: META_QUERY_BITWISE, + description: 'The bitsAllSet command is used to match documents where all the specified bit positions are set.', + snippet: '{ $bitsAllSet: ${1:bitmask} }', + link: getDocLink('$bitsAllSet', META_QUERY_BITWISE), + applicableBsonTypes: ['int32', 'long'], + }, + { + value: '$bitsAnyClear', + meta: META_QUERY_BITWISE, + description: + 'The $bitsAnyClear operator matches documents where any of the specified bit positions in a bitmask are clear.', + snippet: '{ $bitsAnyClear: ${1:bitmask} }', + link: getDocLink('$bitsAnyClear', META_QUERY_BITWISE), + applicableBsonTypes: ['int32', 'long'], + }, + { + value: '$bitsAnySet', + meta: META_QUERY_BITWISE, + description: + 'The $bitsAnySet operator returns documents where any of the specified bit positions are set to 1.', + snippet: '{ $bitsAnySet: ${1:bitmask} }', + link: getDocLink('$bitsAnySet', META_QUERY_BITWISE), + applicableBsonTypes: ['int32', 'long'], + }, +]; + +// --------------------------------------------------------------------------- +// Projection Operators +// --------------------------------------------------------------------------- + +const projectionOperators: readonly OperatorEntry[] = [ + { + value: '$', + meta: META_QUERY_PROJECTION, + description: + 'The $ positional operator identifies an element in an array to update without explicitly specifying the position of the element in the array.', + link: 'https://learn.microsoft.com/en-us/azure/documentdb/operators/array-update/$', // inferred from another category + standalone: false, + }, + { + value: '$elemMatch', + meta: META_QUERY_PROJECTION, + description: + 'The $elemmatch operator returns complete array, qualifying criteria with at least one matching array element.', + snippet: '{ $elemMatch: { ${1:query} } }', + link: 'https://learn.microsoft.com/en-us/azure/documentdb/operators/array-query/$elemmatch', // inferred from another category + }, + { + value: '$slice', + meta: META_QUERY_PROJECTION, + description: 'The $slice operator returns a subset of an array from any element onwards in the array.', + snippet: '{ $slice: ${1:number} }', + link: 'https://learn.microsoft.com/en-us/azure/documentdb/operators/array-expression/$slice', // inferred from another category + }, +]; + +// --------------------------------------------------------------------------- +// Miscellaneous Query Operators +// --------------------------------------------------------------------------- + +const miscellaneousQueryOperators: readonly OperatorEntry[] = [ + { + value: '$comment', + meta: META_QUERY_MISC, + description: + 'The $comment operator adds a comment to a query to help identify the query in logs and profiler output.', + snippet: '{ $comment: "${1:comment}" }', + link: getDocLink('$comment', META_QUERY_MISC), + }, + { + value: '$rand', + meta: META_QUERY_MISC, + description: 'The $rand operator generates a random float value between 0 and 1.', + snippet: '{ $rand: {} }', + link: getDocLink('$rand', META_QUERY_MISC), + }, + { + value: '$natural', + meta: META_QUERY_MISC, + description: + 'The $natural operator forces the query to use the natural order of documents in a collection, providing control over document ordering and retrieval.', + snippet: '{ $natural: ${1:1} }', + link: getDocLink('$natural', META_QUERY_MISC), + standalone: false, + }, +]; + +// --------------------------------------------------------------------------- +// Registration +// --------------------------------------------------------------------------- + +export function loadQueryOperators(): void { + registerOperators([ + ...comparisonQueryOperators, + ...logicalQueryOperators, + ...elementQueryOperators, + ...evaluationQueryOperators, + ...geospatialOperators, + ...arrayQueryOperators, + ...bitwiseQueryOperators, + ...projectionOperators, + ...miscellaneousQueryOperators, + ]); +} diff --git a/packages/documentdb-constants/src/stages.ts b/packages/documentdb-constants/src/stages.ts new file mode 100644 index 000000000..0752d7734 --- /dev/null +++ b/packages/documentdb-constants/src/stages.ts @@ -0,0 +1,291 @@ +/*--------------------------------------------------------------------------------------------- + * Copyright (c) Microsoft Corporation. All rights reserved. + * Licensed under the MIT License. See License.txt in the project root for license information. + *--------------------------------------------------------------------------------------------*/ + +// AUTO-GENERATED β€” DO NOT EDIT BY HAND +// +// Generated by: npm run generate (scripts/generate-from-reference.ts) +// Sources: resources/scraped/operator-reference.md +// resources/overrides/operator-overrides.md +// resources/overrides/operator-snippets.md +// +// To change operator data, edit the overrides/snippets files and re-run the generator. + +import { getDocLink } from './docLinks'; +import { registerOperators } from './getFilteredCompletions'; +import { META_STAGE } from './metaTags'; +import { type OperatorEntry } from './types'; + +// --------------------------------------------------------------------------- +// Aggregation Pipeline Stages +// --------------------------------------------------------------------------- + +const aggregationPipelineStages: readonly OperatorEntry[] = [ + { + value: '$addFields', + meta: META_STAGE, + description: 'The $addFields stage in the aggregation pipeline is used to add new fields to documents.', + snippet: '{ $addFields: { ${1:newField}: ${2:expression} } }', + link: getDocLink('$addFields', META_STAGE), + }, + { + value: '$bucket', + meta: META_STAGE, + description: 'Groups input documents into buckets based on specified boundaries.', + snippet: '{ $bucket: { groupBy: "${1:\\$field}", boundaries: [${2:values}], default: "${3:Other}" } }', + link: getDocLink('$bucket', META_STAGE), + }, + { + value: '$bucketAuto', + meta: META_STAGE, + description: + 'Categorizes documents into a specified number of groups based on a given expression, automatically determining bucket boundaries.', + snippet: '{ $bucketAuto: { groupBy: "${1:\\$field}", buckets: ${2:number} } }', + }, + { + value: '$changeStream', + meta: META_STAGE, + description: 'The $changeStream stage opens a change stream cursor to track data changes in real-time.', + snippet: '{ $changeStream: {} }', + link: getDocLink('$changeStream', META_STAGE), + }, + { + value: '$collStats', + meta: META_STAGE, + description: + 'The $collStats stage in the aggregation pipeline is used to return statistics about a collection.', + snippet: '{ $collStats: { storageStats: {} } }', + link: getDocLink('$collStats', META_STAGE), + }, + { + value: '$count', + meta: META_STAGE, + description: + 'The `$count` operator is used to count the number of documents that match a query filtering criteria.', + snippet: '{ $count: "${1:countField}" }', + link: 'https://learn.microsoft.com/en-us/azure/documentdb/operators/accumulators/$count', // inferred from another category + }, + { + value: '$densify', + meta: META_STAGE, + description: 'Adds missing data points in a sequence of values within an array or collection.', + snippet: '{ $densify: { field: "${1:field}", range: { step: ${2:1}, bounds: "full" } } }', + link: getDocLink('$densify', META_STAGE), + }, + { + value: '$documents', + meta: META_STAGE, + description: 'The $documents stage creates a pipeline from a set of provided documents.', + snippet: '{ $documents: [${1:documents}] }', + link: getDocLink('$documents', META_STAGE), + }, + { + value: '$facet', + meta: META_STAGE, + description: + 'The $facet allows for multiple parallel aggregations to be executed within a single pipeline stage.', + snippet: '{ $facet: { ${1:outputField}: [{ ${2:stage} }] } }', + link: getDocLink('$facet', META_STAGE), + }, + { + value: '$fill', + meta: META_STAGE, + description: + 'The $fill stage allows filling missing values in documents based on specified methods and criteria.', + snippet: '{ $fill: { output: { ${1:field}: { method: "${2:linear}" } } } }', + link: getDocLink('$fill', META_STAGE), + }, + { + value: '$geoNear', + meta: META_STAGE, + description: + 'The $geoNear operator finds and sorts documents by their proximity to a geospatial point, returning distance information for each document.', + snippet: + '{ $geoNear: { near: { type: "Point", coordinates: [${1:lng}, ${2:lat}] }, distanceField: "${3:distance}" } }', + link: getDocLink('$geoNear', META_STAGE), + }, + { + value: '$graphLookup', + meta: META_STAGE, + description: + 'Performs a recursive search on a collection to return documents connected by a specified field relationship.', + snippet: + '{ $graphLookup: { from: "${1:collection}", startWith: "${2:\\$field}", connectFromField: "${3:field}", connectToField: "${4:field}", as: "${5:result}" } }', + }, + { + value: '$group', + meta: META_STAGE, + description: + 'The $group stage groups documents by specified identifier expressions and applies accumulator expressions.', + snippet: '{ $group: { _id: "${1:\\$field}", ${2:accumulator}: { ${3:\\$sum}: 1 } } }', + link: getDocLink('$group', META_STAGE), + }, + { + value: '$indexStats', + meta: META_STAGE, + description: 'The $indexStats stage returns usage statistics for each index in the collection.', + snippet: '{ $indexStats: {} }', + link: getDocLink('$indexStats', META_STAGE), + }, + { + value: '$limit', + meta: META_STAGE, + description: 'Restricts the number of documents passed to the next stage in the pipeline.', + snippet: '{ $limit: ${1:number} }', + }, + { + value: '$lookup', + meta: META_STAGE, + description: + 'The $lookup stage in the Aggregation Framework is used to perform left outer joins with other collections.', + snippet: + '{ $lookup: { from: "${1:collection}", localField: "${2:field}", foreignField: "${3:field}", as: "${4:result}" } }', + link: getDocLink('$lookup', META_STAGE), + }, + { + value: '$match', + meta: META_STAGE, + description: + 'The $match stage in the aggregation pipeline is used to filter documents that match a specified condition.', + snippet: '{ $match: { ${1:query} } }', + link: getDocLink('$match', META_STAGE), + }, + { + value: '$merge', + meta: META_STAGE, + description: + 'The $merge stage in an aggregation pipeline writes the results of the aggregation to a specified collection.', + snippet: '{ $merge: { into: "${1:collection}" } }', + link: getDocLink('$merge', META_STAGE), + }, + { + value: '$out', + meta: META_STAGE, + description: + 'The `$out` stage in an aggregation pipeline writes the resulting documents to a specified collection.', + snippet: '{ $out: "${1:collection}" }', + link: getDocLink('$out', META_STAGE), + }, + { + value: '$project', + meta: META_STAGE, + description: 'Reshapes documents by including, excluding, or computing new fields.', + snippet: '{ $project: { ${1:field}: 1 } }', + }, + { + value: '$redact', + meta: META_STAGE, + description: 'Filters the content of the documents based on access rights.', + snippet: + '{ $redact: { \\$cond: { if: { ${1:expression} }, then: "${2:\\$\\$DESCEND}", else: "${3:\\$\\$PRUNE}" } } }', + link: getDocLink('$redact', META_STAGE), + }, + { + value: '$replaceRoot', + meta: META_STAGE, + description: 'Replaces the input document with a specified embedded document, promoting it to the top level.', + snippet: '{ $replaceRoot: { newRoot: "${1:\\$field}" } }', + }, + { + value: '$replaceWith', + meta: META_STAGE, + description: + 'The $replaceWith operator in Azure DocumentDB returns a document after replacing a document with the specified document', + snippet: '{ $replaceWith: "${1:\\$field}" }', + link: getDocLink('$replaceWith', META_STAGE), + }, + { + value: '$sample', + meta: META_STAGE, + description: 'The $sample operator in Azure DocumentDB returns a randomly selected number of documents', + snippet: '{ $sample: { size: ${1:number} } }', + link: getDocLink('$sample', META_STAGE), + }, + { + value: '$search', + meta: META_STAGE, + description: 'Performs full-text search on string fields using Atlas Search or compatible search indexes.', + snippet: '{ $search: { ${1} } }', + }, + { + value: '$searchMeta', + meta: META_STAGE, + description: 'Returns metadata about an Atlas Search query without returning the matching documents.', + snippet: '{ $searchMeta: { ${1} } }', + }, + { + value: '$set', + meta: META_STAGE, + description: 'The $set operator in Azure DocumentDB updates or creates a new field with a specified value', + snippet: '{ $set: { ${1:field}: ${2:expression} } }', + link: getDocLink('$set', META_STAGE), + }, + { + value: '$setWindowFields', + meta: META_STAGE, + description: + 'Adds computed fields to documents using window functions over a specified partition and sort order.', + snippet: + '{ $setWindowFields: { partitionBy: "${1:\\$field}", sortBy: { ${2:field}: ${3:1} }, output: { ${4:newField}: { ${5:windowFunc} } } } }', + }, + { + value: '$skip', + meta: META_STAGE, + description: + 'The $skip stage in the aggregation pipeline is used to skip a specified number of documents from the input and pass the remaining documents to the next stage in the pipeline.', + snippet: '{ $skip: ${1:number} }', + link: getDocLink('$skip', META_STAGE), + }, + { + value: '$sort', + meta: META_STAGE, + description: + 'The $sort stage in the aggregation pipeline is used to order the documents in the pipeline by a specified field or fields.', + snippet: '{ $sort: { ${1:field}: ${2:1} } }', + link: getDocLink('$sort', META_STAGE), + }, + { + value: '$sortByCount', + meta: META_STAGE, + description: + 'The $sortByCount stage in the aggregation pipeline is used to group documents by a specified expression and then sort the count of documents in each group in descending order.', + snippet: '{ $sortByCount: "${1:\\$field}" }', + link: getDocLink('$sortByCount', META_STAGE), + }, + { + value: '$unionWith', + meta: META_STAGE, + description: 'Combines the results of two collections into a single result set, similar to SQL UNION ALL.', + snippet: '{ $unionWith: { coll: "${1:collection}", pipeline: [${2}] } }', + }, + { + value: '$unset', + meta: META_STAGE, + description: 'The $unset stage in the aggregation pipeline is used to remove specified fields from documents.', + snippet: '{ $unset: "${1:field}" }', + link: getDocLink('$unset', META_STAGE), + }, + { + value: '$unwind', + meta: META_STAGE, + description: + 'The $unwind stage in the aggregation framework is used to deconstruct an array field from the input documents to output a document for each element.', + snippet: '{ $unwind: "${1:\\$arrayField}" }', + link: getDocLink('$unwind', META_STAGE), + }, + { + value: '$currentOp', + meta: META_STAGE, + description: 'Returns information on active and queued operations for the database instance.', + snippet: '{ $currentOp: { allUsers: true } }', + }, +]; + +// --------------------------------------------------------------------------- +// Registration +// --------------------------------------------------------------------------- + +export function loadStages(): void { + registerOperators([...aggregationPipelineStages]); +} diff --git a/packages/documentdb-constants/src/structuralInvariants.test.ts b/packages/documentdb-constants/src/structuralInvariants.test.ts new file mode 100644 index 000000000..953fc7831 --- /dev/null +++ b/packages/documentdb-constants/src/structuralInvariants.test.ts @@ -0,0 +1,242 @@ +/*--------------------------------------------------------------------------------------------- + * Copyright (c) Microsoft Corporation. All rights reserved. + * Licensed under the MIT License. See License.txt in the project root for license information. + *--------------------------------------------------------------------------------------------*/ + +/** + * Structural invariant tests for all operator entries. + * + * Validates that every entry in getAllCompletions() has the correct shape, + * consistent meta tags, and reasonable values. + */ + +import { ALL_META_TAGS, getAllCompletions, type OperatorEntry } from './index'; + +const allOperators = getAllCompletions(); + +describe('structural invariants', () => { + test('total operator count is in the expected range', () => { + // 308 total (298 from dump + 10 BSON constructors) + expect(allOperators.length).toBeGreaterThanOrEqual(290); + expect(allOperators.length).toBeLessThanOrEqual(320); + }); + + test('every entry has required fields', () => { + const invalid: string[] = []; + for (const op of allOperators) { + if (!op.value) { + invalid.push('entry missing value'); + } + if (!op.meta) { + invalid.push(`${op.value} missing meta`); + } + if (!op.description) { + invalid.push(`${op.value} missing description`); + } + } + expect(invalid).toEqual([]); + }); + + test('operator values start with $ or $$ (except BSON constructors)', () => { + const invalid: string[] = []; + for (const op of allOperators) { + if (op.meta === 'bson') { + // BSON constructors: ObjectId, ISODate, etc. β€” no $ prefix + expect(op.value).toMatch(/^[A-Z]/); + } else if (op.meta === 'variable') { + // System variables start with $$ + if (!op.value.startsWith('$$')) { + invalid.push(`${op.value} (variable) should start with $$`); + } + } else { + // All other operators start with $ + if (!op.value.startsWith('$')) { + invalid.push(`${op.value} (${op.meta}) should start with $`); + } + } + } + expect(invalid).toEqual([]); + }); + + test('every entry has a valid meta tag', () => { + const validMetas = new Set(ALL_META_TAGS); + const invalid: string[] = []; + for (const op of allOperators) { + if (!validMetas.has(op.meta)) { + invalid.push(`${op.value} has unknown meta: ${op.meta}`); + } + } + expect(invalid).toEqual([]); + }); + + test('descriptions are non-empty strings', () => { + const empty: string[] = []; + for (const op of allOperators) { + if (typeof op.description !== 'string' || op.description.trim().length === 0) { + empty.push(`${op.value} (${op.meta}) has empty description`); + } + } + expect(empty).toEqual([]); + }); + + test('snippets are strings when present', () => { + const invalid: string[] = []; + for (const op of allOperators) { + if (op.snippet !== undefined && typeof op.snippet !== 'string') { + invalid.push(`${op.value} (${op.meta}) has non-string snippet`); + } + } + expect(invalid).toEqual([]); + }); + + test('links are valid URLs when present', () => { + const invalid: string[] = []; + for (const op of allOperators) { + if (op.link !== undefined) { + if (typeof op.link !== 'string' || !op.link.startsWith('https://')) { + invalid.push(`${op.value} (${op.meta}) has invalid link: ${op.link}`); + } + } + } + expect(invalid).toEqual([]); + }); + + test('applicableBsonTypes is a string array when present', () => { + const invalid: string[] = []; + for (const op of allOperators) { + if (op.applicableBsonTypes !== undefined) { + if (!Array.isArray(op.applicableBsonTypes)) { + invalid.push(`${op.value} (${op.meta}) applicableBsonTypes is not an array`); + } else { + for (const t of op.applicableBsonTypes) { + if (typeof t !== 'string' || t.trim().length === 0) { + invalid.push(`${op.value} (${op.meta}) has empty BSON type`); + } + } + } + } + } + expect(invalid).toEqual([]); + }); + + test('no duplicate (value, meta) pairs', () => { + const seen = new Set(); + const duplicates: string[] = []; + for (const op of allOperators) { + const key = `${op.value}|${op.meta}`; + if (seen.has(key)) { + duplicates.push(key); + } + seen.add(key); + } + expect(duplicates).toEqual([]); + }); + + test('BSON constructors have expected entries', () => { + const bsonOps = allOperators.filter((op) => op.meta === 'bson'); + const bsonValues = bsonOps.map((op) => op.value).sort(); + expect(bsonValues).toEqual( + expect.arrayContaining([ + 'BinData', + 'ISODate', + 'MaxKey', + 'MinKey', + 'NumberDecimal', + 'NumberInt', + 'NumberLong', + 'ObjectId', + 'Timestamp', + 'UUID', + ]), + ); + }); + + test('system variables have expected entries', () => { + const varOps = allOperators.filter((op) => op.meta === 'variable'); + const varValues = varOps.map((op) => op.value).sort(); + expect(varValues).toEqual( + expect.arrayContaining(['$$CURRENT', '$$DESCEND', '$$KEEP', '$$NOW', '$$PRUNE', '$$REMOVE', '$$ROOT']), + ); + }); + + test('key operators are present', () => { + const values = new Set(allOperators.map((op) => op.value)); + + // Query operators + expect(values.has('$eq')).toBe(true); + expect(values.has('$gt')).toBe(true); + expect(values.has('$and')).toBe(true); + expect(values.has('$regex')).toBe(true); + expect(values.has('$exists')).toBe(true); + + // Stages + expect(values.has('$match')).toBe(true); + expect(values.has('$group')).toBe(true); + expect(values.has('$lookup')).toBe(true); + expect(values.has('$project')).toBe(true); + expect(values.has('$sort')).toBe(true); + + // Update operators + expect(values.has('$set')).toBe(true); + expect(values.has('$unset')).toBe(true); + expect(values.has('$inc')).toBe(true); + + // Accumulators + expect(values.has('$sum')).toBe(true); + expect(values.has('$avg')).toBe(true); + + // Expressions + expect(values.has('$add')).toBe(true); + expect(values.has('$concat')).toBe(true); + expect(values.has('$cond')).toBe(true); + }); + + test('excluded operators are NOT present with unsupported meta tags', () => { + // These should not be present (deprecated or not supported) + const opsByValueMeta = new Map(); + for (const op of allOperators) { + opsByValueMeta.set(`${op.value}|${op.meta}`, op); + } + + // $where is deprecated and should not be present as evaluation query + expect(opsByValueMeta.has('$where|query:evaluation')).toBe(false); + }); +}); + +describe('meta tag coverage', () => { + test('every meta tag in ALL_META_TAGS has at least one operator (except parent-only and runtime tags)', () => { + const metasWithOps = new Set(allOperators.map((op) => op.meta)); + // Parent-only tags: operators use subcategories (query:comparison, update:field), + // not the bare 'query' or 'update' tags. 'field:identifier' is runtime-injected. + const parentOnlyTags = new Set(['query', 'update', 'field:identifier']); + const missing: string[] = []; + for (const tag of ALL_META_TAGS) { + if (parentOnlyTags.has(tag)) { + continue; + } + if (!metasWithOps.has(tag)) { + missing.push(tag); + } + } + expect(missing).toEqual([]); + }); + + test('top-level meta categories have expected operator counts', () => { + const countByPrefix: Record = {}; + for (const op of allOperators) { + const prefix = op.meta.includes(':') ? op.meta.split(':')[0] : op.meta; + countByPrefix[prefix] = (countByPrefix[prefix] || 0) + 1; + } + + expect(countByPrefix['query']).toBe(43); + expect(countByPrefix['update']).toBe(22); + expect(countByPrefix['stage']).toBe(35); + expect(countByPrefix['accumulator']).toBe(21); + expect(countByPrefix['window']).toBe(27); + expect(countByPrefix['bson']).toBe(10); + expect(countByPrefix['variable']).toBe(7); + // Expression operators: ~143-144 + expect(countByPrefix['expr']).toBeGreaterThanOrEqual(140); + expect(countByPrefix['expr']).toBeLessThanOrEqual(150); + }); +}); diff --git a/packages/documentdb-constants/src/systemVariables.ts b/packages/documentdb-constants/src/systemVariables.ts new file mode 100644 index 000000000..219d04eb0 --- /dev/null +++ b/packages/documentdb-constants/src/systemVariables.ts @@ -0,0 +1,74 @@ +/*--------------------------------------------------------------------------------------------- + * Copyright (c) Microsoft Corporation. All rights reserved. + * Licensed under the MIT License. See License.txt in the project root for license information. + *--------------------------------------------------------------------------------------------*/ + +// AUTO-GENERATED β€” DO NOT EDIT BY HAND +// +// Generated by: npm run generate (scripts/generate-from-reference.ts) +// Sources: resources/scraped/operator-reference.md +// resources/overrides/operator-overrides.md +// resources/overrides/operator-snippets.md +// +// To change operator data, edit the overrides/snippets files and re-run the generator. + +import { registerOperators } from './getFilteredCompletions'; +import { META_VARIABLE } from './metaTags'; +import { type OperatorEntry } from './types'; + +// --------------------------------------------------------------------------- +// Variables in Aggregation Expressions +// --------------------------------------------------------------------------- + +const systemVariables: readonly OperatorEntry[] = [ + { + value: '$$NOW', + meta: META_VARIABLE, + description: + 'Returns the current datetime as a Date object. Constant throughout a single aggregation pipeline.', + }, + { + value: '$$ROOT', + meta: META_VARIABLE, + description: + 'References the root document β€” the top-level document currently being processed in the pipeline stage.', + }, + { + value: '$$REMOVE', + meta: META_VARIABLE, + description: + 'Removes a field from the output document. Used with $project or $addFields to conditionally exclude fields.', + }, + { + value: '$$CURRENT', + meta: META_VARIABLE, + description: + 'References the current document in the pipeline stage. Equivalent to $$ROOT at the start of the pipeline.', + }, + { + value: '$$DESCEND', + meta: META_VARIABLE, + description: + 'Used with $redact. Returns the document fields at the current level and continues descending into subdocuments.', + }, + { + value: '$$PRUNE', + meta: META_VARIABLE, + description: + 'Used with $redact. Excludes all fields at the current document level and stops descending into subdocuments.', + }, + { + value: '$$KEEP', + meta: META_VARIABLE, + description: + 'Used with $redact. Keeps all fields at the current document level without further descending into subdocuments.', + }, +]; + +// --------------------------------------------------------------------------- +// Registration +// --------------------------------------------------------------------------- + +export function loadSystemVariables(): void { + registerOperators([...systemVariables]); +} diff --git a/packages/documentdb-constants/src/types.ts b/packages/documentdb-constants/src/types.ts new file mode 100644 index 000000000..d08cac711 --- /dev/null +++ b/packages/documentdb-constants/src/types.ts @@ -0,0 +1,154 @@ +/*--------------------------------------------------------------------------------------------- + * Copyright (c) Microsoft Corporation. All rights reserved. + * Licensed under the MIT License. See License.txt in the project root for license information. + *--------------------------------------------------------------------------------------------*/ + +import { type ALL_META_TAGS } from './metaTags'; + +/** + * Represents a single operator, stage, accumulator, or BSON constructor + * for use in autocomplete, hover docs, and diagnostics. + */ +export interface OperatorEntry { + /** The operator string, e.g. "$gt", "$match", "ObjectId" */ + readonly value: string; + + /** + * Category tag for filtering. Determines which contexts this entry + * appears in. See {@link MetaTag} for the full set. + * + * Examples: "query", "query:comparison", "stage", "accumulator", + * "expr:arith", "expr:date", "bson", "field:identifier" + */ + readonly meta: MetaTag; + + /** Human-readable one-line description. */ + readonly description: string; + + /** + * Monaco snippet with tab stops for insertion. + * Example: '{ \\$match: { ${1:field}: ${2:value} } }' + * If absent, `value` is inserted as-is. + */ + readonly snippet?: string; + + /** + * URL to the DocumentDB documentation page for this operator. + * Generated from `docLinks.ts` helpers. + */ + readonly link?: string; + + /** + * Applicable BSON types for type-aware filtering. + * If set, this operator only appears when the field's bsonType + * matches one of these values. If absent, the operator is universal. + * + * Example: $regex β†’ ['string'], $size β†’ ['array'] + */ + readonly applicableBsonTypes?: readonly string[]; + + /** + * Whether this operator is valid as a standalone completion at top-level + * positions (key, value, operator). Defaults to `true` when absent. + * + * Set to `false` for operators that are only valid inside another operator's + * value object β€” e.g., geospatial shape specifiers (`$box`, `$geometry`) + * which are only valid inside `$geoWithin`/`$near`, or sort-only modifiers + * like `$natural`. + * + * Completion providers should filter out `standalone === false` entries + * from standard completion lists. These entries remain in the registry + * for hover documentation and future context-aware nested completions. + */ + readonly standalone?: boolean; + + /** + * @experimental Not yet populated by the generator; reserved for a future + * contextual-snippet feature. + * + * When populated, this field carries a hint about the type of value an operator + * produces or expects, enabling the CompletionItemProvider to tailor snippets + * and insert sensible placeholder values based on context. + * + * Planned values and their meanings: + * - `"number"` β€” operator always produces a number + * (e.g. `$size` on an array field β†’ insert a numeric comparand) + * - `"boolean"` β€” operator produces true/false + * (e.g. `$and`, `$or` in expression context) + * - `"string"` β€” operator produces a string + * (e.g. `$concat`, `$toLower`) + * - `"array"` β€” operator produces an array + * (e.g. `$push` accumulator, `$concatArrays`) + * - `"date"` β€” operator produces a date + * (e.g. `$dateAdd`, `$toDate`) + * - `"same"` β€” operator produces the same type as its input + * (e.g. `$min`, `$max`, comparison operators like `$gt`) + * - `"object"` β€” operator produces a document/object + * (e.g. `$mergeObjects`) + * - `"any"` β€” return type is undetermined or context-dependent + * + * This field is intentionally absent from all current entries. The generator + * (`scripts/generate-from-reference.ts`) does not yet emit it. It will be + * populated in a follow-up pass once the `CompletionItemProvider` is ready + * to consume it. + */ + readonly returnType?: string; +} + +/** + * Filter configuration for {@link getFilteredCompletions}. + */ +export interface CompletionFilter { + /** + * Meta tag prefixes to include. Supports prefix matching: + * 'query' matches 'query', 'query:comparison', 'query:logical', etc. + * 'expr' matches all 'expr:*' entries. + */ + readonly meta: readonly string[]; + + /** Optional: only return operators applicable to these BSON types. */ + readonly bsonTypes?: readonly string[]; +} + +/** + * Meta tag constants. Tags use a hierarchical scheme: + * + * - 'query' β€” top-level query operators (in find filter, $match) + * - 'query:comparison' β€” comparison subset ($eq, $gt, etc.) + * - 'query:logical' β€” logical ($and, $or, $not, $nor) + * - 'query:element' β€” element ($exists, $type) + * - 'query:evaluation' β€” evaluation ($expr, $regex, $mod, $text) + * - 'query:array' β€” array ($all, $elemMatch, $size) + * - 'query:bitwise' β€” bitwise ($bitsAllSet, etc.) + * - 'query:geospatial' β€” geospatial ($geoWithin, $near, etc.) + * - 'query:projection' β€” projection ($, $elemMatch, $slice) + * - 'query:misc' β€” miscellaneous ($comment, $rand, $natural) + * - 'update' β€” update operators ($set, $unset, $inc, etc.) + * - 'update:field' β€” field update subset + * - 'update:array' β€” array update subset ($push, $pull, etc.) + * - 'update:bitwise' β€” bitwise update ($bit) + * - 'stage' β€” aggregation pipeline stages ($match, $group, etc.) + * - 'accumulator' β€” accumulators ($sum, $avg, $first, etc.) + * - 'expr:arith' β€” arithmetic expressions ($add, $subtract, etc.) + * - 'expr:array' β€” array expressions ($arrayElemAt, $filter, etc.) + * - 'expr:bool' β€” boolean expressions ($and, $or, $not) + * - 'expr:comparison' β€” comparison expressions ($cmp, $eq, etc.) + * - 'expr:conditional' β€” conditional ($cond, $ifNull, $switch) + * - 'expr:date' β€” date expressions ($dateAdd, $year, etc.) + * - 'expr:object' β€” object expressions ($mergeObjects, etc.) + * - 'expr:set' β€” set expressions ($setUnion, etc.) + * - 'expr:string' β€” string expressions ($concat, $substr, etc.) + * - 'expr:trig' β€” trigonometry ($sin, $cos, etc.) + * - 'expr:type' β€” type conversion ($convert, $toInt, etc.) + * - 'expr:datasize' β€” data size ($bsonSize, $binarySize) + * - 'expr:timestamp' β€” timestamp ($tsIncrement, $tsSecond) + * - 'expr:bitwise' β€” bitwise expressions ($bitAnd, $bitOr, etc.) + * - 'expr:literal' β€” $literal + * - 'expr:misc' β€” miscellaneous expressions ($getField, $rand, etc.) + * - 'expr:variable' β€” variable expressions ($let) + * - 'window' β€” window operators ($rank, $denseRank, etc.) + * - 'bson' β€” BSON constructor functions (ObjectId, ISODate, etc.) + * - 'variable' β€” system variables ($$NOW, $$ROOT, etc.) + * - 'field:identifier' β€” injected field names from schema (not static) + */ +export type MetaTag = (typeof ALL_META_TAGS)[number] | (string & {}); diff --git a/packages/documentdb-constants/src/updateOperators.ts b/packages/documentdb-constants/src/updateOperators.ts new file mode 100644 index 000000000..a90f62fcd --- /dev/null +++ b/packages/documentdb-constants/src/updateOperators.ts @@ -0,0 +1,203 @@ +/*--------------------------------------------------------------------------------------------- + * Copyright (c) Microsoft Corporation. All rights reserved. + * Licensed under the MIT License. See License.txt in the project root for license information. + *--------------------------------------------------------------------------------------------*/ + +// AUTO-GENERATED β€” DO NOT EDIT BY HAND +// +// Generated by: npm run generate (scripts/generate-from-reference.ts) +// Sources: resources/scraped/operator-reference.md +// resources/overrides/operator-overrides.md +// resources/overrides/operator-snippets.md +// +// To change operator data, edit the overrides/snippets files and re-run the generator. + +import { getDocLink } from './docLinks'; +import { registerOperators } from './getFilteredCompletions'; +import { META_UPDATE_ARRAY, META_UPDATE_BITWISE, META_UPDATE_FIELD } from './metaTags'; +import { type OperatorEntry } from './types'; + +// --------------------------------------------------------------------------- +// Field Update Operators +// --------------------------------------------------------------------------- + +const fieldUpdateOperators: readonly OperatorEntry[] = [ + { + value: '$currentDate', + meta: META_UPDATE_FIELD, + description: + 'The $currentDate operator sets the value of a field to the current date, either as a Date or a timestamp.', + snippet: '{ $currentDate: { "${1:field}": true } }', + link: getDocLink('$currentDate', META_UPDATE_FIELD), + }, + { + value: '$inc', + meta: META_UPDATE_FIELD, + description: 'The $inc operator increments the value of a field by a specified amount.', + snippet: '{ $inc: { "${1:field}": ${2:value} } }', + link: getDocLink('$inc', META_UPDATE_FIELD), + }, + { + value: '$min', + meta: META_UPDATE_FIELD, + description: 'Retrieves the minimum value for a specified field', + snippet: '{ $min: { "${1:field}": ${2:value} } }', + link: 'https://learn.microsoft.com/en-us/azure/documentdb/operators/accumulators/$min', // inferred from another category + }, + { + value: '$max', + meta: META_UPDATE_FIELD, + description: 'The $max operator returns the maximum value from a set of input values.', + snippet: '{ $max: { "${1:field}": ${2:value} } }', + link: 'https://learn.microsoft.com/en-us/azure/documentdb/operators/accumulators/$max', // inferred from another category + }, + { + value: '$mul', + meta: META_UPDATE_FIELD, + description: 'The $mul operator multiplies the value of a field by a specified number.', + snippet: '{ $mul: { "${1:field}": ${2:value} } }', + link: getDocLink('$mul', META_UPDATE_FIELD), + }, + { + value: '$rename', + meta: META_UPDATE_FIELD, + description: 'The $rename operator allows renaming fields in documents during update operations.', + snippet: '{ $rename: { "${1:oldField}": "${2:newField}" } }', + link: getDocLink('$rename', META_UPDATE_FIELD), + }, + { + value: '$set', + meta: META_UPDATE_FIELD, + description: 'The $set operator in Azure DocumentDB updates or creates a new field with a specified value', + snippet: '{ $set: { "${1:field}": ${2:value} } }', + link: 'https://learn.microsoft.com/en-us/azure/documentdb/operators/aggregation/$set', // inferred from another category + }, + { + value: '$setOnInsert', + meta: META_UPDATE_FIELD, + description: + 'The $setOnInsert operator sets field values only when an upsert operation results in an insert of a new document.', + snippet: '{ $setOnInsert: { "${1:field}": ${2:value} } }', + link: getDocLink('$setOnInsert', META_UPDATE_FIELD), + }, + { + value: '$unset', + meta: META_UPDATE_FIELD, + description: 'The $unset stage in the aggregation pipeline is used to remove specified fields from documents.', + snippet: '{ $unset: { "${1:field}": ${2:value} } }', + link: 'https://learn.microsoft.com/en-us/azure/documentdb/operators/aggregation/$unset', // inferred from another category + }, +]; + +// --------------------------------------------------------------------------- +// Array Update Operators +// --------------------------------------------------------------------------- + +const arrayUpdateOperators: readonly OperatorEntry[] = [ + { + value: '$', + meta: META_UPDATE_ARRAY, + description: + 'The $ positional operator identifies an element in an array to update without explicitly specifying the position of the element in the array.', + link: getDocLink('$', META_UPDATE_ARRAY), + }, + { + value: '$[]', + meta: META_UPDATE_ARRAY, + description: 'Positional all operator. Acts as a placeholder to update all elements in an array field.', + }, + { + value: '$[identifier]', + meta: META_UPDATE_ARRAY, + description: + 'Filtered positional operator. Acts as a placeholder to update elements that match an arrayFilters condition.', + }, + { + value: '$addToSet', + meta: META_UPDATE_ARRAY, + description: + "The addToSet operator adds elements to an array if they don't already exist, while ensuring uniqueness of elements within the set.", + snippet: '{ $addToSet: { "${1:field}": ${2:value} } }', + link: getDocLink('$addToSet', META_UPDATE_ARRAY), + }, + { + value: '$pop', + meta: META_UPDATE_ARRAY, + description: 'Removes the first or last element of an array.', + snippet: '{ $pop: { "${1:field}": ${2:1} } }', + link: getDocLink('$pop', META_UPDATE_ARRAY), + }, + { + value: '$pull', + meta: META_UPDATE_ARRAY, + description: 'Removes all instances of a value from an array.', + snippet: '{ $pull: { "${1:field}": ${2:condition} } }', + link: getDocLink('$pull', META_UPDATE_ARRAY), + }, + { + value: '$push', + meta: META_UPDATE_ARRAY, + description: 'The $push operator adds a specified value to an array within a document.', + snippet: '{ $push: { "${1:field}": ${2:value} } }', + link: getDocLink('$push', META_UPDATE_ARRAY), + }, + { + value: '$pullAll', + meta: META_UPDATE_ARRAY, + description: 'The $pullAll operator is used to remove all instances of the specified values from an array.', + snippet: '{ $pullAll: { "${1:field}": [${2:values}] } }', + link: getDocLink('$pullAll', META_UPDATE_ARRAY), + }, + { + value: '$each', + meta: META_UPDATE_ARRAY, + description: + 'The $each operator is used within an `$addToSet`or`$push` operation to add multiple elements to an array field in a single update operation.', + snippet: '{ $each: [${1:values}] }', + link: getDocLink('$each', META_UPDATE_ARRAY), + }, + { + value: '$position', + meta: META_UPDATE_ARRAY, + description: + 'Specifies the position in the array at which the $push operator inserts elements. Used with $each.', + snippet: '{ $position: ${1:index} }', + }, + { + value: '$slice', + meta: META_UPDATE_ARRAY, + description: 'The $slice operator returns a subset of an array from any element onwards in the array.', + snippet: '{ $slice: ${1:number} }', + link: 'https://learn.microsoft.com/en-us/azure/documentdb/operators/array-expression/$slice', // inferred from another category + }, + { + value: '$sort', + meta: META_UPDATE_ARRAY, + description: + 'The $sort stage in the aggregation pipeline is used to order the documents in the pipeline by a specified field or fields.', + snippet: '{ $sort: { "${1:field}": ${2:1} } }', + link: 'https://learn.microsoft.com/en-us/azure/documentdb/operators/aggregation/$sort', // inferred from another category + }, +]; + +// --------------------------------------------------------------------------- +// Bitwise Update Operators +// --------------------------------------------------------------------------- + +const bitwiseUpdateOperators: readonly OperatorEntry[] = [ + { + value: '$bit', + meta: META_UPDATE_BITWISE, + description: 'The `$bit` operator is used to perform bitwise operations on integer values.', + snippet: '{ $bit: { "${1:field}": { "${2:and|or|xor}": ${3:value} } } }', + link: getDocLink('$bit', META_UPDATE_BITWISE), + }, +]; + +// --------------------------------------------------------------------------- +// Registration +// --------------------------------------------------------------------------- + +export function loadUpdateOperators(): void { + registerOperators([...fieldUpdateOperators, ...arrayUpdateOperators, ...bitwiseUpdateOperators]); +} diff --git a/packages/documentdb-constants/src/windowOperators.ts b/packages/documentdb-constants/src/windowOperators.ts new file mode 100644 index 000000000..f15b412e1 --- /dev/null +++ b/packages/documentdb-constants/src/windowOperators.ts @@ -0,0 +1,233 @@ +/*--------------------------------------------------------------------------------------------- + * Copyright (c) Microsoft Corporation. All rights reserved. + * Licensed under the MIT License. See License.txt in the project root for license information. + *--------------------------------------------------------------------------------------------*/ + +// AUTO-GENERATED β€” DO NOT EDIT BY HAND +// +// Generated by: npm run generate (scripts/generate-from-reference.ts) +// Sources: resources/scraped/operator-reference.md +// resources/overrides/operator-overrides.md +// resources/overrides/operator-snippets.md +// +// To change operator data, edit the overrides/snippets files and re-run the generator. + +import { getDocLink } from './docLinks'; +import { registerOperators } from './getFilteredCompletions'; +import { META_WINDOW } from './metaTags'; +import { type OperatorEntry } from './types'; + +// --------------------------------------------------------------------------- +// Window Operators +// --------------------------------------------------------------------------- + +const windowOperators: readonly OperatorEntry[] = [ + { + value: '$sum', + meta: META_WINDOW, + description: 'The $sum operator calculates the sum of the values of a field based on a filtering criteria', + snippet: '{ $sum: "${1:\\$field}" }', + link: 'https://learn.microsoft.com/en-us/azure/documentdb/operators/accumulators/$sum', // inferred from another category + }, + { + value: '$push', + meta: META_WINDOW, + description: 'The $push operator adds a specified value to an array within a document.', + snippet: '{ $push: "${1:\\$field}" }', + link: 'https://learn.microsoft.com/en-us/azure/documentdb/operators/array-update/$push', // inferred from another category + }, + { + value: '$addToSet', + meta: META_WINDOW, + description: + "The addToSet operator adds elements to an array if they don't already exist, while ensuring uniqueness of elements within the set.", + snippet: '{ $addToSet: "${1:\\$field}" }', + link: 'https://learn.microsoft.com/en-us/azure/documentdb/operators/array-update/$addtoset', // inferred from another category + }, + { + value: '$count', + meta: META_WINDOW, + description: + 'The `$count` operator is used to count the number of documents that match a query filtering criteria.', + snippet: '{ $count: "${1:\\$field}" }', + link: 'https://learn.microsoft.com/en-us/azure/documentdb/operators/accumulators/$count', // inferred from another category + }, + { + value: '$max', + meta: META_WINDOW, + description: 'The $max operator returns the maximum value from a set of input values.', + snippet: '{ $max: "${1:\\$field}" }', + link: 'https://learn.microsoft.com/en-us/azure/documentdb/operators/accumulators/$max', // inferred from another category + }, + { + value: '$min', + meta: META_WINDOW, + description: 'Retrieves the minimum value for a specified field', + snippet: '{ $min: "${1:\\$field}" }', + link: 'https://learn.microsoft.com/en-us/azure/documentdb/operators/accumulators/$min', // inferred from another category + }, + { + value: '$avg', + meta: META_WINDOW, + description: 'Computes the average of numeric values for documents in a group, bucket, or window.', + snippet: '{ $avg: "${1:\\$field}" }', + link: 'https://learn.microsoft.com/en-us/azure/documentdb/operators/accumulators/$avg', // inferred from another category + }, + { + value: '$stdDevPop', + meta: META_WINDOW, + description: 'The $stddevpop operator calculates the standard deviation of the specified values', + snippet: '{ $stdDevPop: "${1:\\$field}" }', + link: 'https://learn.microsoft.com/en-us/azure/documentdb/operators/accumulators/$stddevpop', // inferred from another category + }, + { + value: '$bottom', + meta: META_WINDOW, + description: + "The $bottom operator returns the last document from the query's result set sorted by one or more fields", + snippet: '{ $bottom: "${1:\\$field}" }', + link: 'https://learn.microsoft.com/en-us/azure/documentdb/operators/accumulators/$bottom', // inferred from another category + }, + { + value: '$bottomN', + meta: META_WINDOW, + description: 'The $bottomN operator returns the last N documents from the result sorted by one or more fields', + snippet: '{ $bottomN: "${1:\\$field}" }', + link: 'https://learn.microsoft.com/en-us/azure/documentdb/operators/accumulators/$bottomn', // inferred from another category + }, + { + value: '$covariancePop', + meta: META_WINDOW, + description: 'The $covariancePop operator returns the covariance of two numerical expressions', + snippet: '{ $covariancePop: "${1:\\$field}" }', + link: getDocLink('$covariancePop', META_WINDOW), + }, + { + value: '$covarianceSamp', + meta: META_WINDOW, + description: 'The $covarianceSamp operator returns the covariance of a sample of two numerical expressions', + snippet: '{ $covarianceSamp: "${1:\\$field}" }', + link: getDocLink('$covarianceSamp', META_WINDOW), + }, + { + value: '$denseRank', + meta: META_WINDOW, + description: + 'The $denseRank operator assigns and returns a positional ranking for each document within a partition based on a specified sort order', + snippet: '{ $denseRank: {} }', + link: getDocLink('$denseRank', META_WINDOW), + }, + { + value: '$derivative', + meta: META_WINDOW, + description: + 'The $derivative operator calculates the average rate of change of the value of a field within a specified window.', + snippet: '{ $derivative: { input: "${1:\\$field}", unit: "${2:hour}" } }', + link: getDocLink('$derivative', META_WINDOW), + }, + { + value: '$documentNumber', + meta: META_WINDOW, + description: + 'The $documentNumber operator assigns and returns a position for each document within a partition based on a specified sort order', + snippet: '{ $documentNumber: {} }', + link: getDocLink('$documentNumber', META_WINDOW), + }, + { + value: '$expMovingAvg', + meta: META_WINDOW, + description: + 'The $expMovingAvg operator calculates the moving average of a field based on the specified number of documents to hold the highest weight', + snippet: '{ $expMovingAvg: { input: "${1:\\$field}", N: ${2:number} } }', + link: getDocLink('$expMovingAvg', META_WINDOW), + }, + { + value: '$first', + meta: META_WINDOW, + description: "The $first operator returns the first value in a group according to the group's sorting order.", + snippet: '{ $first: "${1:\\$field}" }', + link: 'https://learn.microsoft.com/en-us/azure/documentdb/operators/accumulators/$first', // inferred from another category + }, + { + value: '$integral', + meta: META_WINDOW, + description: + 'The $integral operator calculates the area under a curve with the specified range of documents forming the adjacent documents for the calculation.', + snippet: '{ $integral: { input: "${1:\\$field}", unit: "${2:hour}" } }', + link: getDocLink('$integral', META_WINDOW), + }, + { + value: '$last', + meta: META_WINDOW, + description: 'The $last operator returns the last document from the result sorted by one or more fields', + snippet: '{ $last: "${1:\\$field}" }', + link: 'https://learn.microsoft.com/en-us/azure/documentdb/operators/accumulators/$last', // inferred from another category + }, + { + value: '$linearFill', + meta: META_WINDOW, + description: + 'The $linearFill operator interpolates missing values in a sequence of documents using linear interpolation.', + snippet: '{ $linearFill: "${1:\\$field}" }', + link: getDocLink('$linearFill', META_WINDOW), + }, + { + value: '$locf', + meta: META_WINDOW, + description: + 'The $locf operator propagates the last observed non-null value forward within a partition in a windowed query.', + snippet: '{ $locf: "${1:\\$field}" }', + link: getDocLink('$locf', META_WINDOW), + }, + { + value: '$minN', + meta: META_WINDOW, + description: 'Retrieves the bottom N values based on a specified filtering criteria', + snippet: '{ $minN: "${1:\\$field}" }', + link: 'https://learn.microsoft.com/en-us/azure/documentdb/operators/accumulators/$minn', + }, + { + value: '$rank', + meta: META_WINDOW, + description: 'The $rank operator ranks documents within a partition based on a specified sort order.', + snippet: '{ $rank: {} }', + link: getDocLink('$rank', META_WINDOW), + }, + { + value: '$shift', + meta: META_WINDOW, + description: 'A window operator that shifts values within a partition and returns the shifted value.', + snippet: '{ $shift: { output: "${1:\\$field}", by: ${2:1}, default: ${3:null} } }', + link: getDocLink('$shift', META_WINDOW), + }, + { + value: '$stdDevSamp', + meta: META_WINDOW, + description: + 'The $stddevsamp operator calculates the standard deviation of a specified sample of values and not the entire population', + snippet: '{ $stdDevSamp: "${1:\\$field}" }', + link: 'https://learn.microsoft.com/en-us/azure/documentdb/operators/accumulators/$stddevsamp', // inferred from another category + }, + { + value: '$top', + meta: META_WINDOW, + description: 'The $top operator returns the first document from the result set sorted by one or more fields', + snippet: '{ $top: "${1:\\$field}" }', + link: 'https://learn.microsoft.com/en-us/azure/documentdb/operators/accumulators/$top', // inferred from another category + }, + { + value: '$topN', + meta: META_WINDOW, + description: 'The $topN operator returns the first N documents from the result sorted by one or more fields', + snippet: '{ $topN: "${1:\\$field}" }', + link: 'https://learn.microsoft.com/en-us/azure/documentdb/operators/accumulators/$topn', // inferred from another category + }, +]; + +// --------------------------------------------------------------------------- +// Registration +// --------------------------------------------------------------------------- + +export function loadWindowOperators(): void { + registerOperators([...windowOperators]); +} diff --git a/packages/documentdb-constants/tsconfig.json b/packages/documentdb-constants/tsconfig.json new file mode 100644 index 000000000..8688f97ff --- /dev/null +++ b/packages/documentdb-constants/tsconfig.json @@ -0,0 +1,20 @@ +{ + "compilerOptions": { + "composite": true, + "declaration": true, + "declarationMap": true, + "module": "commonjs", + "target": "ES2023", + "lib": ["ES2023"], + "rootDir": "./src", + "outDir": "./dist", + "strict": true, + "esModuleInterop": true, + "allowSyntheticDefaultImports": true, + "skipLibCheck": true, + "resolveJsonModule": true, + "sourceMap": true + }, + "include": ["src/**/*"], + "exclude": ["node_modules", "dist"] +} diff --git a/packages/documentdb-constants/tsconfig.scripts.json b/packages/documentdb-constants/tsconfig.scripts.json new file mode 100644 index 000000000..841c83b0a --- /dev/null +++ b/packages/documentdb-constants/tsconfig.scripts.json @@ -0,0 +1,13 @@ +{ + "extends": "./tsconfig.json", + "compilerOptions": { + "composite": false, + "declaration": false, + "declarationMap": false, + "noEmit": true, + "rootDir": ".", + "types": ["node"] + }, + "include": ["scripts/**/*", "src/**/*"], + "exclude": ["node_modules", "dist", "src/**/*.test.ts", "src/**/*.spec.ts"] +} diff --git a/packages/schema-analyzer/README.md b/packages/schema-analyzer/README.md new file mode 100644 index 000000000..1efa58f49 --- /dev/null +++ b/packages/schema-analyzer/README.md @@ -0,0 +1,52 @@ +# @vscode-documentdb/schema-analyzer + +Incremental JSON Schema analyzer for DocumentDB API and MongoDB API documents. Processes documents one at a time (or in batches) and produces an extended JSON Schema with statistical metadata β€” field occurrence counts, BSON type distributions, min/max values, and array length stats. + +> **Monorepo package** β€” this package is part of the `vscode-documentdb` workspace. +> Dev dependencies (Jest, ts-jest, Prettier, TypeScript, etc.) are provided by the +> root `package.json`. Always install from the repository root: +> +> ```bash +> cd +> npm install +> ``` +> +> **Note:** This package is not yet published to npm. We plan to publish it once the API stabilizes. For now, it is consumed internally via npm workspaces within the [vscode-documentdb](https://github.com/microsoft/vscode-documentdb) repository. + +## Overview + +The `SchemaAnalyzer` incrementally builds a JSON Schema by inspecting DocumentDB API / MongoDB API documents. It is designed for scenarios where documents arrive over time (streaming, pagination) and the schema needs to evolve as new documents are observed. + +Key capabilities: + +- **Incremental analysis** β€” add documents one at a time or in batches; the schema updates in place. +- **BSON type awareness** β€” recognizes BSON types defined by the MongoDB API (`ObjectId`, `Decimal128`, `Binary`, `UUID`, etc.) and annotates them with `x-bsonType`. +- **Statistical extensions** β€” tracks field occurrence (`x-occurrence`), type frequency (`x-typeOccurrence`), min/max values, string lengths, array sizes, and document counts (`x-documentsInspected`). +- **Known fields extraction** β€” derives a flat list of known field paths with their types and occurrence probabilities, useful for autocomplete and UI rendering. +- **Version tracking & caching** β€” a monotonic version counter enables efficient cache invalidation for derived data like `getKnownFields()`. + +## Usage + +```typescript +import { SchemaAnalyzer } from '@vscode-documentdb/schema-analyzer'; + +// Create an analyzer and feed it documents +const analyzer = new SchemaAnalyzer(); +analyzer.addDocument(doc1); +analyzer.addDocuments([doc2, doc3, doc4]); + +// Get the JSON Schema with statistical extensions +const schema = analyzer.getSchema(); + +// Get a flat list of known fields (cached, version-aware) +const fields = analyzer.getKnownFields(); +``` + +## Requirements + +- **Node.js** β‰₯ 18 +- **mongodb** driver β‰₯ 6.0.0 (peer dependency) + +## License + +[MIT](../../LICENSE.md) diff --git a/packages/schema-analyzer/jest.config.js b/packages/schema-analyzer/jest.config.js new file mode 100644 index 000000000..6aecf39aa --- /dev/null +++ b/packages/schema-analyzer/jest.config.js @@ -0,0 +1,11 @@ +/** @type {import('ts-jest').JestConfigWithTsJest} **/ +module.exports = { + // Limit workers to avoid OOM kills on machines with many cores. + // Each ts-jest worker loads the TypeScript compiler and consumes ~500MB+. + maxWorkers: '50%', + testEnvironment: 'node', + testMatch: ['/test/**/*.test.ts'], + transform: { + '^.+\\.tsx?$': ['ts-jest', {}], + }, +}; diff --git a/packages/schema-analyzer/package.json b/packages/schema-analyzer/package.json new file mode 100644 index 000000000..3751cdba2 --- /dev/null +++ b/packages/schema-analyzer/package.json @@ -0,0 +1,27 @@ +{ + "name": "@vscode-documentdb/schema-analyzer", + "version": "1.0.0", + "description": "Incremental JSON Schema analyzer for DocumentDB API / MongoDB API documents with statistical extensions", + "main": "dist/index.js", + "types": "dist/index.d.ts", + "files": [ + "dist" + ], + "scripts": { + "build": "tsc -p .", + "clean": "rimraf dist tsconfig.tsbuildinfo", + "test": "jest --config jest.config.js" + }, + "repository": { + "type": "git", + "url": "https://github.com/microsoft/vscode-documentdb", + "directory": "packages/schema-analyzer" + }, + "license": "MIT", + "peerDependencies": { + "mongodb": ">=6.0.0" + }, + "dependencies": { + "denque": "~2.1.0" + } +} diff --git a/packages/schema-analyzer/src/BSONTypes.ts b/packages/schema-analyzer/src/BSONTypes.ts new file mode 100644 index 000000000..b8fb92f16 --- /dev/null +++ b/packages/schema-analyzer/src/BSONTypes.ts @@ -0,0 +1,199 @@ +/*--------------------------------------------------------------------------------------------- + * Copyright (c) Microsoft Corporation. All rights reserved. + * Licensed under the MIT License. See License.txt in the project root for license information. + *--------------------------------------------------------------------------------------------*/ + +import { + Binary, + BSONSymbol, + Code, + DBRef, + Decimal128, + Double, + Int32, + Long, + MaxKey, + MinKey, + ObjectId, + Timestamp, + UUID, +} from 'mongodb'; + +/** + * Represents the different data types that can be stored in a DocumentDB API / MongoDB API document. + * The string representation is case-sensitive and should match the MongoDB API documentation. + * https://www.mongodb.com/docs/manual/reference/bson-types/ + */ +export enum BSONTypes { + String = 'string', + Number = 'number', + Int32 = 'int32', + Double = 'double', + Decimal128 = 'decimal128', + Long = 'long', + Boolean = 'boolean', + Object = 'object', + Array = 'array', + Null = 'null', + Undefined = 'undefined', + Date = 'date', + RegExp = 'regexp', + Binary = 'binary', + ObjectId = 'objectid', + Symbol = 'symbol', + Timestamp = 'timestamp', + UUID = 'uuid', + UUID_LEGACY = 'uuid-legacy', // old UUID subtype, used in some legacy data + MinKey = 'minkey', + MaxKey = 'maxkey', + DBRef = 'dbref', + Code = 'code', + CodeWithScope = 'codewithscope', + Map = 'map', + // Add any deprecated types if necessary + _UNKNOWN_ = '_unknown_', // Catch-all for unknown types +} + +export namespace BSONTypes { + const displayStringMap: Record = { + [BSONTypes.String]: 'String', + [BSONTypes.Number]: 'Number', + [BSONTypes.Int32]: 'Int32', + [BSONTypes.Double]: 'Double', + [BSONTypes.Decimal128]: 'Decimal128', + [BSONTypes.Long]: 'Long', + [BSONTypes.Boolean]: 'Boolean', + [BSONTypes.Object]: 'Object', + [BSONTypes.Array]: 'Array', + [BSONTypes.Null]: 'Null', + [BSONTypes.Undefined]: 'Undefined', + [BSONTypes.Date]: 'Date', + [BSONTypes.RegExp]: 'RegExp', + [BSONTypes.Binary]: 'Binary', + [BSONTypes.ObjectId]: 'ObjectId', + [BSONTypes.Symbol]: 'Symbol', + [BSONTypes.Timestamp]: 'Timestamp', + [BSONTypes.MinKey]: 'MinKey', + [BSONTypes.MaxKey]: 'MaxKey', + [BSONTypes.DBRef]: 'DBRef', + [BSONTypes.Code]: 'Code', + [BSONTypes.CodeWithScope]: 'CodeWithScope', + [BSONTypes.Map]: 'Map', + [BSONTypes._UNKNOWN_]: 'Unknown', + [BSONTypes.UUID]: 'UUID', + [BSONTypes.UUID_LEGACY]: 'UUID (Legacy)', + }; + + export function toDisplayString(type: BSONTypes): string { + return displayStringMap[type] || 'Unknown'; + } + + export function toString(type: BSONTypes): string { + return type; + } + + /** + * Converts a MongoDB API data type to a case-sensitive JSON data type + * @param type The MongoDB API data type + * @returns A corresponding JSON data type (please note: it's case sensitive) + */ + export function toJSONType(type: BSONTypes): string { + switch (type) { + case BSONTypes.String: + case BSONTypes.Symbol: + case BSONTypes.Date: + case BSONTypes.Timestamp: + case BSONTypes.ObjectId: + case BSONTypes.RegExp: + case BSONTypes.Binary: + case BSONTypes.Code: + case BSONTypes.UUID: + case BSONTypes.UUID_LEGACY: + return 'string'; + + case BSONTypes.Boolean: + return 'boolean'; + + case BSONTypes.Int32: + case BSONTypes.Long: + case BSONTypes.Double: + case BSONTypes.Decimal128: + return 'number'; + + case BSONTypes.Object: + case BSONTypes.Map: + case BSONTypes.DBRef: + case BSONTypes.CodeWithScope: + return 'object'; + + case BSONTypes.Array: + return 'array'; + + case BSONTypes.Null: + case BSONTypes.Undefined: + case BSONTypes.MinKey: + case BSONTypes.MaxKey: + return 'null'; + + default: + return 'string'; // Default to string for unknown types + } + } + + /** + * Accepts a value from a MongoDB API `Document` object and returns the inferred type. + * @param value The value of a field in a MongoDB API `Document` object + * @returns + */ + export function inferType(value: unknown): BSONTypes { + if (value === null) return BSONTypes.Null; + if (value === undefined) return BSONTypes.Undefined; + + switch (typeof value) { + case 'string': + return BSONTypes.String; + case 'number': + return BSONTypes.Double; // JavaScript numbers are doubles + case 'boolean': + return BSONTypes.Boolean; + case 'object': + if (Array.isArray(value)) { + return BSONTypes.Array; + } + + // Check for common BSON types first + if (value instanceof ObjectId) return BSONTypes.ObjectId; + if (value instanceof Int32) return BSONTypes.Int32; + if (value instanceof Double) return BSONTypes.Double; + if (value instanceof Date) return BSONTypes.Date; + if (value instanceof Timestamp) return BSONTypes.Timestamp; + + // Less common types + if (value instanceof Decimal128) return BSONTypes.Decimal128; + if (value instanceof Long) return BSONTypes.Long; + if (value instanceof MinKey) return BSONTypes.MinKey; + if (value instanceof MaxKey) return BSONTypes.MaxKey; + if (value instanceof BSONSymbol) return BSONTypes.Symbol; + if (value instanceof DBRef) return BSONTypes.DBRef; + if (value instanceof Map) return BSONTypes.Map; + if (value instanceof UUID && value.sub_type === Binary.SUBTYPE_UUID) return BSONTypes.UUID; + if (value instanceof UUID && value.sub_type === Binary.SUBTYPE_UUID_OLD) return BSONTypes.UUID_LEGACY; + if (value instanceof Buffer || value instanceof Binary) return BSONTypes.Binary; + if (value instanceof RegExp) return BSONTypes.RegExp; + if (value instanceof Code) { + if (value.scope) { + return BSONTypes.CodeWithScope; + } else { + return BSONTypes.Code; + } + } + + // Default to Object if none of the above match + return BSONTypes.Object; + default: + // This should never happen, but if it does, we'll catch it here + // TODO: add telemetry somewhere to know when it happens (not here, this could get hit too often) + return BSONTypes._UNKNOWN_; + } + } +} diff --git a/src/utils/json/JSONSchema.ts b/packages/schema-analyzer/src/JSONSchema.ts similarity index 80% rename from src/utils/json/JSONSchema.ts rename to packages/schema-analyzer/src/JSONSchema.ts index 467669ed5..3127932d6 100644 --- a/src/utils/json/JSONSchema.ts +++ b/packages/schema-analyzer/src/JSONSchema.ts @@ -24,16 +24,14 @@ export interface JSONSchema { $id?: string; $schema?: string; type?: string | string[]; - 'x-documentsInspected'?: number; - 'x-occurrence'?: number; - 'x-typeOccurrence'?: number; - 'x-bsonType'?: string; // Explicitly declare the key with a dash using quotes title?: string; + description?: string; definitions?: { [name: string]: JSONSchema; }; - description?: string; - properties?: JSONSchema; // changed from: JSONSchemaMap; + + // Structure + properties?: JSONSchemaMap; patternProperties?: JSONSchemaMap; additionalProperties?: JSONSchemaRef; minProperties?: number; @@ -44,7 +42,6 @@ export interface JSONSchema { [prop: string]: string[]; }; items?: JSONSchemaRef | JSONSchemaRef[]; - required?: string[]; $ref?: string; anyOf?: JSONSchemaRef[]; @@ -58,14 +55,35 @@ export interface JSONSchema { propertyNames?: JSONSchemaRef; examples?: undefined[]; $comment?: string; - $defs?: { [name: string]: JSONSchema; }; + + // Monaco extensions markdownEnumDescriptions?: string[]; markdownDescription?: string; doNotSuggest?: boolean; suggestSortText?: string; + + // SchemaAnalyzer extensions β€” document/field level + 'x-documentsInspected'?: number; + 'x-occurrence'?: number; + + // SchemaAnalyzer extensions β€” type entry level (on entries in anyOf) + 'x-bsonType'?: string; + 'x-typeOccurrence'?: number; + 'x-minValue'?: number; + 'x-maxValue'?: number; + 'x-minLength'?: number; + 'x-maxLength'?: number; + 'x-minDate'?: number; + 'x-maxDate'?: number; + 'x-trueCount'?: number; + 'x-falseCount'?: number; + 'x-minItems'?: number; + 'x-maxItems'?: number; + 'x-minProperties'?: number; + 'x-maxProperties'?: number; } export interface JSONSchemaMap { [name: string]: JSONSchemaRef; diff --git a/src/utils/json/mongo/SchemaAnalyzer.ts b/packages/schema-analyzer/src/SchemaAnalyzer.ts similarity index 56% rename from src/utils/json/mongo/SchemaAnalyzer.ts rename to packages/schema-analyzer/src/SchemaAnalyzer.ts index 278f51fc4..8f24d532a 100644 --- a/src/utils/json/mongo/SchemaAnalyzer.ts +++ b/packages/schema-analyzer/src/SchemaAnalyzer.ts @@ -3,66 +3,125 @@ * Licensed under the MIT License. See License.txt in the project root for license information. *--------------------------------------------------------------------------------------------*/ +import Denque from 'denque'; +import { type Document, type WithId } from 'mongodb'; +import assert from 'node:assert/strict'; +import { BSONTypes } from './BSONTypes'; +import { type JSONSchema, type JSONSchemaRef } from './JSONSchema'; +import { type FieldEntry, getKnownFields as getKnownFieldsFromSchema } from './getKnownFields'; + /** - * This is an example of a JSON Schema document that will be generated from MongoDB documents. - * It's optimized for the use-case of generating a schema for a table view, the monaco editor, and schema statistics. - * - * This is a 'work in progress' and will be updated as we progress with the project. - * - * Curent focus is: - * - discovery of the document structure - * - basic pre for future statistics work + * Incremental schema analyzer for documents from the MongoDB API / DocumentDB API. * - * Future tasks: - * - statistics aggregation - * - meaningful 'description' and 'markdownDescription' - * - add more properties to the schema, incl. properties like '$id', '$schema', and enable schema sharing/download + * Analyzes documents one at a time (or in batches) and builds a cumulative + * JSON Schema with statistical extensions (x-occurrence, x-bsonType, etc.). * + * The output schema follows JSON Schema draft-07 with custom x- extensions. + */ +export class SchemaAnalyzer { + private _schema: JSONSchema = {}; + private _version: number = 0; + private _knownFieldsCache: FieldEntry[] | null = null; + private _knownFieldsCacheVersion: number = -1; -{ - "$schema": "http://json-schema.org/draft-07/schema#", - "$id": "https://example.com/sample.schema.json", - "title": "Sample Document Schema", - "type": "object", - "properties": { - "a-propert-root-level": { - "description": "a description as text", - "anyOf": [ // anyOf is used to indicate that the value can be of any of the types listed - { - "type": "string" - }, - { - "type": "string" + /** + * A monotonically increasing version counter. Incremented on every mutation + * (addDocument, addDocuments, reset). Adapters can store this value alongside + * their cached derived data and recompute only when it changes. + */ + get version(): number { + return this._version; + } + + /** + * Adds a single document to the accumulated schema. + * This is the primary incremental API β€” call once per document. + */ + addDocument(document: WithId): void { + updateSchemaWithDocumentInternal(this._schema, document); + this._version++; + } + + /** + * Adds multiple documents to the accumulated schema. + * Convenience method equivalent to calling addDocument() for each. + * Increments version once for the entire batch β€” not per document. + */ + addDocuments(documents: ReadonlyArray>): void { + for (const doc of documents) { + updateSchemaWithDocumentInternal(this._schema, doc); } - ] - }, - "isOpen": { - "description": "Indicates if the item is open", - "anyOf": [ - { - "type": "boolean" - }, - { - "type": "number" + this._version++; + } + + /** + * Returns the current accumulated JSON Schema. + * The returned object is a live reference (not a copy) β€” do not mutate externally. + */ + getSchema(): JSONSchema { + return this._schema; + } + + /** + * Returns the number of documents analyzed so far. + */ + getDocumentCount(): number { + return (this._schema['x-documentsInspected'] as number) ?? 0; + } + + /** + * Resets the analyzer to its initial empty state. + */ + reset(): void { + this._schema = {}; + this._version++; + } + + /** + * Creates a deep copy of this analyzer, including all accumulated schema data. + * Useful for aggregation stage branching where each stage needs its own schema state. + * The clone starts with version 0, independent from the original. + */ + clone(): SchemaAnalyzer { + const copy = new SchemaAnalyzer(); + copy._schema = structuredClone(this._schema); + return copy; + } + + /** + * Returns the cached list of known fields (all nesting levels, sorted). + * Recomputed only when the schema version has changed since the last call. + */ + getKnownFields(): FieldEntry[] { + if (this._knownFieldsCacheVersion !== this._version || this._knownFieldsCache === null) { + this._knownFieldsCache = getKnownFieldsFromSchema(this._schema); + this._knownFieldsCacheVersion = this._version; } - ] + return this._knownFieldsCache; } - }, - "required": ["isOpen"] -} - * - * - */ + /** + * Creates a SchemaAnalyzer from a single document. + * Equivalent to creating an instance and calling addDocument() once. + */ + static fromDocument(document: WithId): SchemaAnalyzer { + const analyzer = new SchemaAnalyzer(); + analyzer.addDocument(document); + return analyzer; + } -import * as l10n from '@vscode/l10n'; -import { assert } from 'console'; -import Denque from 'denque'; -import { type Document, type WithId } from 'mongodb'; -import { type JSONSchema } from '../JSONSchema'; -import { MongoBSONTypes } from './MongoBSONTypes'; + /** + * Creates a SchemaAnalyzer from multiple documents. + * Equivalent to creating an instance and calling addDocuments(). + */ + static fromDocuments(documents: ReadonlyArray>): SchemaAnalyzer { + const analyzer = new SchemaAnalyzer(); + analyzer.addDocuments(documents); + return analyzer; + } +} -export function updateSchemaWithDocument(schema: JSONSchema, document: WithId): void { +function updateSchemaWithDocumentInternal(schema: JSONSchema, document: WithId): void { // Initialize schema if it's empty if (!schema.properties) { schema.properties = {}; @@ -74,7 +133,7 @@ export function updateSchemaWithDocument(schema: JSONSchema, document: WithId; const objKeysCount = Object.keys(objValue).length; // Update min and max property counts updateMinMaxStats(item.propertySchema, 'x-minProperties', 'x-maxProperties', objKeysCount); + // Track how many object instances contributed to this sub-schema. + // This enables uniform probability computation at every nesting level: + // probability = property.x-occurrence / parentObject.x-documentsInspected + // + // Without this, array-embedded objects have no denominator for probability. + // Example: doc1.a=[], doc2.a=[{b:1},...,{b:100}] + // b.x-occurrence = 100, root.x-documentsInspected = 2 + // Naive: 100/2 = 5000% β€” wrong! + // With fix: objectEntry.x-documentsInspected = 100, so 100/100 = 100% + item.propertySchema['x-documentsInspected'] = (item.propertySchema['x-documentsInspected'] ?? 0) + 1; + // Ensure 'properties' exists if (!item.propertySchema.properties) { item.propertySchema.properties = {}; @@ -158,7 +228,7 @@ export function updateSchemaWithDocument(schema: JSONSchema, document: WithId = new Map(); - // Iterate over the array elements for (const element of arrayValue) { - const elementMongoType = MongoBSONTypes.inferType(element); + const elementMongoType = BSONTypes.inferType(element); // Find or create the type entry in 'items.anyOf' let itemEntry = findTypeEntry(itemsSchema.anyOf as JSONSchema[], elementMongoType); + const isNewTypeEntry = !itemEntry; if (!itemEntry) { // Create a new type entry itemEntry = { - type: MongoBSONTypes.toJSONType(elementMongoType), + type: BSONTypes.toJSONType(elementMongoType), 'x-bsonType': elementMongoType, 'x-typeOccurrence': 0, }; @@ -249,18 +317,19 @@ export function updateSchemaWithDocument(schema: JSONSchema, document: WithId entry['x-bsonType'] === bsonType); } @@ -299,221 +368,69 @@ function findTypeEntry(anyOfArray: JSONSchema[], bsonType: MongoBSONTypes): JSON * Helper function to update min and max stats */ function updateMinMaxStats(schema: JSONSchema, minKey: string, maxKey: string, value: number): void { - if (schema[minKey] === undefined || value < schema[minKey]) { - schema[minKey] = value; + const record = schema as Record; + if (record[minKey] === undefined || value < (record[minKey] as number)) { + record[minKey] = value; } - if (schema[maxKey] === undefined || value > schema[maxKey]) { - schema[maxKey] = value; + if (record[maxKey] === undefined || value > (record[maxKey] as number)) { + record[maxKey] = value; } } -export function getSchemaFromDocument(document: WithId): JSONSchema { - const schema: JSONSchema = {}; - schema['x-documentsInspected'] = 1; // we're inspecting one document, this will make sense when we start aggregating stats - schema.properties = {}; - - type WorkItem = { - fieldName: string; - fieldMongoType: MongoBSONTypes; // the inferred BSON type - propertyTypeEntry: JSONSchema; // points to the entry within the 'anyOf' property of the schema - fieldValue: unknown; - pathSoFar: string; // used for debugging - }; - - // having some import/require issues with Denque atm - // prototype with an array - //const fifoQueue = new Denque(); - const fifoQueue: WorkItem[] = []; - - /** - * Push all elements from the root of the document into the queue - */ - for (const [name, value] of Object.entries(document)) { - const mongoDatatype = MongoBSONTypes.inferType(value); - - const typeEntry = { - type: MongoBSONTypes.toJSONType(mongoDatatype), - 'x-bsonType': mongoDatatype, - 'x-typeOccurrence': 1, - }; - - // please note (1/2): we're adding the type entry to the schema here - schema.properties[name] = { anyOf: [typeEntry], 'x-occurrence': 1 }; - - fifoQueue.push({ - fieldName: name, - fieldMongoType: mongoDatatype, - propertyTypeEntry: typeEntry, // please note (2/2): and we're keeping a reference to it here for further updates - fieldValue: value, - pathSoFar: name, - }); - } - - /** - * Work through the queue, adding elements to the schema as we go. - * This is a breadth-first search of the document, do note special - * handling on objects/arrays - */ - while (fifoQueue.length > 0) { - const item = fifoQueue.shift(); // todo, replace with a proper queue - if (item === undefined) { - // unexpected, but let's try to continue - continue; - } - - switch (item.fieldMongoType) { - case MongoBSONTypes.Object: { - const objKeys = Object.keys(item.fieldValue as object).length; - item.propertyTypeEntry['x-maxLength'] = objKeys; - item.propertyTypeEntry['x-minLength'] = objKeys; - - // prepare an entry for the object properties - item.propertyTypeEntry.properties = {}; - - for (const [name, value] of Object.entries(item.fieldValue as object)) { - const mongoDatatype = MongoBSONTypes.inferType(value); - - const typeEntry = { - type: MongoBSONTypes.toJSONType(mongoDatatype), - 'x-bsonType': mongoDatatype, - 'x-typeOccurrence': 1, - }; - - // please note (1/2): we're adding the entry to the main schema here - item.propertyTypeEntry.properties[name] = { anyOf: [typeEntry], 'x-occurrence': 1 }; - - fifoQueue.push({ - fieldName: name, - fieldMongoType: mongoDatatype, - propertyTypeEntry: typeEntry, // please note (2/2): and we're keeping a reference to it here for further updates to the schema - fieldValue: value, - pathSoFar: `${item.pathSoFar}.${item.fieldName}`, - }); - } - break; - } - case MongoBSONTypes.Array: { - const arrayLength = (item.fieldValue as unknown[]).length; - item.propertyTypeEntry['x-maxLength'] = arrayLength; - item.propertyTypeEntry['x-minLength'] = arrayLength; - - // preapare the array items entry (in two lines for ts not to compalin about the missing type later on) - item.propertyTypeEntry.items = {}; - item.propertyTypeEntry.items.anyOf = []; - - const encounteredMongoTypes: Map = new Map(); - - // iterate over the array and infer the type of each element - for (const element of item.fieldValue as unknown[]) { - const elementMongoType = MongoBSONTypes.inferType(element); - - let itemEntry: JSONSchema; - - if (!encounteredMongoTypes.has(elementMongoType)) { - itemEntry = { - type: MongoBSONTypes.toJSONType(elementMongoType), - 'x-bsonType': elementMongoType, - 'x-typeOccurrence': 1, // Initialize type occurrence counter - }; - item.propertyTypeEntry.items.anyOf.push(itemEntry); - encounteredMongoTypes.set(elementMongoType, itemEntry); - - initializeStatsForValue(element, elementMongoType, itemEntry); - } else { - // if we've already encountered this type, we'll just add the type to the existing entry - itemEntry = encounteredMongoTypes.get(elementMongoType) as JSONSchema; - - if (itemEntry === undefined) continue; // unexpected, but let's try to continue - - if (itemEntry['x-typeOccurrence'] !== undefined) { - itemEntry['x-typeOccurrence'] += 1; - } - - // Aggregate stats with the new value - aggregateStatsForValue(element, elementMongoType, itemEntry); - } - - // an imporant exception for arrays as we have to start adding them already now to the schema - // (if we want to avoid more iterations over the data) - if (elementMongoType === MongoBSONTypes.Object || elementMongoType === MongoBSONTypes.Array) { - fifoQueue.push({ - fieldName: '[]', // Array items don't have a field name - fieldMongoType: elementMongoType, - propertyTypeEntry: itemEntry, - fieldValue: element, - pathSoFar: `${item.pathSoFar}.${item.fieldName}.items`, - }); - } - } - - break; - } - - default: { - // For all other types, update stats for the value - initializeStatsForValue(item.fieldValue, item.fieldMongoType, item.propertyTypeEntry); - break; - } - } - } - - return schema; -} - /** * Helper function to compute stats for a value based on its MongoDB data type * Updates the provided propertyTypeEntry with the computed stats */ -function initializeStatsForValue(value: unknown, mongoType: MongoBSONTypes, propertyTypeEntry: JSONSchema): void { +function initializeStatsForValue(value: unknown, mongoType: BSONTypes, propertyTypeEntry: JSONSchema): void { switch (mongoType) { - case MongoBSONTypes.String: { + case BSONTypes.String: { const currentLength = (value as string).length; propertyTypeEntry['x-maxLength'] = currentLength; propertyTypeEntry['x-minLength'] = currentLength; break; } - case MongoBSONTypes.Number: - case MongoBSONTypes.Int32: - case MongoBSONTypes.Long: - case MongoBSONTypes.Double: - case MongoBSONTypes.Decimal128: { + case BSONTypes.Number: + case BSONTypes.Int32: + case BSONTypes.Long: + case BSONTypes.Double: + case BSONTypes.Decimal128: { const numericValue = Number(value); propertyTypeEntry['x-maxValue'] = numericValue; propertyTypeEntry['x-minValue'] = numericValue; break; } - case MongoBSONTypes.Boolean: { + case BSONTypes.Boolean: { const boolValue = value as boolean; propertyTypeEntry['x-trueCount'] = boolValue ? 1 : 0; propertyTypeEntry['x-falseCount'] = boolValue ? 0 : 1; break; } - case MongoBSONTypes.Date: { + case BSONTypes.Date: { const dateValue = (value as Date).getTime(); propertyTypeEntry['x-maxDate'] = dateValue; propertyTypeEntry['x-minDate'] = dateValue; break; } - case MongoBSONTypes.Binary: { + case BSONTypes.Binary: { const binaryLength = (value as Buffer).length; propertyTypeEntry['x-maxLength'] = binaryLength; propertyTypeEntry['x-minLength'] = binaryLength; break; } - case MongoBSONTypes.Null: - case MongoBSONTypes.RegExp: - case MongoBSONTypes.ObjectId: - case MongoBSONTypes.MinKey: - case MongoBSONTypes.MaxKey: - case MongoBSONTypes.Symbol: - case MongoBSONTypes.Timestamp: - case MongoBSONTypes.DBRef: - case MongoBSONTypes.Map: + case BSONTypes.Null: + case BSONTypes.RegExp: + case BSONTypes.ObjectId: + case BSONTypes.MinKey: + case BSONTypes.MaxKey: + case BSONTypes.Symbol: + case BSONTypes.Timestamp: + case BSONTypes.DBRef: + case BSONTypes.Map: // No stats computation for other types break; @@ -527,9 +444,9 @@ function initializeStatsForValue(value: unknown, mongoType: MongoBSONTypes, prop * Helper function to aggregate stats for a value based on its MongoDB data type * Used when processing multiple values (e.g., elements in arrays) */ -function aggregateStatsForValue(value: unknown, mongoType: MongoBSONTypes, propertyTypeEntry: JSONSchema): void { +function aggregateStatsForValue(value: unknown, mongoType: BSONTypes, propertyTypeEntry: JSONSchema): void { switch (mongoType) { - case MongoBSONTypes.String: { + case BSONTypes.String: { const currentLength = (value as string).length; // Update minLength @@ -544,11 +461,11 @@ function aggregateStatsForValue(value: unknown, mongoType: MongoBSONTypes, prope break; } - case MongoBSONTypes.Number: - case MongoBSONTypes.Int32: - case MongoBSONTypes.Long: - case MongoBSONTypes.Double: - case MongoBSONTypes.Decimal128: { + case BSONTypes.Number: + case BSONTypes.Int32: + case BSONTypes.Long: + case BSONTypes.Double: + case BSONTypes.Decimal128: { const numericValue = Number(value); // Update minValue @@ -563,7 +480,7 @@ function aggregateStatsForValue(value: unknown, mongoType: MongoBSONTypes, prope break; } - case MongoBSONTypes.Boolean: { + case BSONTypes.Boolean: { const boolValue = value as boolean; // Update trueCount and falseCount @@ -581,7 +498,7 @@ function aggregateStatsForValue(value: unknown, mongoType: MongoBSONTypes, prope break; } - case MongoBSONTypes.Date: { + case BSONTypes.Date: { const dateValue = (value as Date).getTime(); // Update minDate @@ -596,7 +513,7 @@ function aggregateStatsForValue(value: unknown, mongoType: MongoBSONTypes, prope break; } - case MongoBSONTypes.Binary: { + case BSONTypes.Binary: { const binaryLength = (value as Buffer).length; // Update minLength @@ -617,17 +534,12 @@ function aggregateStatsForValue(value: unknown, mongoType: MongoBSONTypes, prope } } -function getSchemaAtPath(schema: JSONSchema, path: string[]): JSONSchema { - let currentNode = schema; +function getSchemaAtPath(schema: JSONSchema, path: string[]): JSONSchema | undefined { + let currentNode: JSONSchema | undefined = schema; for (let i = 0; i < path.length; i++) { const key = path[i]; - // If the current node is an array, we should move to its `items` - // if (currentNode.type === 'array' && currentNode.items) { - // currentNode = currentNode.items; - // } - // Move to the next property in the schema if (currentNode && currentNode.properties && currentNode.properties[key]) { const nextNode: JSONSchema = currentNode.properties[key] as JSONSchema; @@ -636,13 +548,15 @@ function getSchemaAtPath(schema: JSONSchema, path: string[]): JSONSchema { * We're looking at the "Object"-one, because these have the properties we're interested in. */ if (nextNode.anyOf && nextNode.anyOf.length > 0) { - currentNode = nextNode.anyOf.find((entry: JSONSchema) => entry.type === 'object') as JSONSchema; + currentNode = nextNode.anyOf.find( + (entry: JSONSchemaRef): entry is JSONSchema => typeof entry === 'object' && entry.type === 'object', + ); } else { // we can't continue, as we're missing the next node, we abort at the last node we managed to extract return currentNode; } } else { - throw new Error(l10n.t('No properties found in the schema at path "{0}"', path.slice(0, i + 1).join('/'))); + throw new Error(`No properties found in the schema at path "${path.slice(0, i + 1).join('/')}"`); } } @@ -653,7 +567,7 @@ export function getPropertyNamesAtLevel(jsonSchema: JSONSchema, path: string[]): const headers = new Set(); // Explore the schema and apply the callback to collect headers at the specified path - const selectedSchema: JSONSchema = getSchemaAtPath(jsonSchema, path); + const selectedSchema = getSchemaAtPath(jsonSchema, path); if (selectedSchema && selectedSchema.properties) { Object.keys(selectedSchema.properties).forEach((key) => { diff --git a/src/utils/json/mongo/MongoValueFormatters.ts b/packages/schema-analyzer/src/ValueFormatters.ts similarity index 56% rename from src/utils/json/mongo/MongoValueFormatters.ts rename to packages/schema-analyzer/src/ValueFormatters.ts index 243ce2631..7f9e8e5fa 100644 --- a/src/utils/json/mongo/MongoValueFormatters.ts +++ b/packages/schema-analyzer/src/ValueFormatters.ts @@ -4,16 +4,16 @@ *--------------------------------------------------------------------------------------------*/ import { type Binary, type BSONRegExp, type ObjectId } from 'mongodb'; -import { MongoBSONTypes } from './MongoBSONTypes'; +import { BSONTypes } from './BSONTypes'; /** - * Converts a MongoDB value to its display string representation based on its type. + * Converts a MongoDB API value to its display string representation based on its type. * * @param value - The value to be converted to a display string. - * @param type - The MongoDB data type of the value. + * @param type - The MongoDB API data type of the value. * @returns The string representation of the value. * - * The function handles various MongoDB data types including: + * The function handles various MongoDB API data types including: * - String * - Number, Int32, Double, Decimal128, Long * - Boolean @@ -24,60 +24,60 @@ import { MongoBSONTypes } from './MongoBSONTypes'; * * For unsupported or unknown types, the function defaults to JSON stringification. */ -export function valueToDisplayString(value: unknown, type: MongoBSONTypes): string { +export function valueToDisplayString(value: unknown, type: BSONTypes): string { switch (type) { - case MongoBSONTypes.String: { + case BSONTypes.String: { return value as string; } - case MongoBSONTypes.Number: - case MongoBSONTypes.Int32: - case MongoBSONTypes.Double: - case MongoBSONTypes.Decimal128: - case MongoBSONTypes.Long: { + case BSONTypes.Number: + case BSONTypes.Int32: + case BSONTypes.Double: + case BSONTypes.Decimal128: + case BSONTypes.Long: { return (value as number).toString(); } - case MongoBSONTypes.Boolean: { + case BSONTypes.Boolean: { return (value as boolean).toString(); } - case MongoBSONTypes.Date: { + case BSONTypes.Date: { return (value as Date).toISOString(); } - case MongoBSONTypes.ObjectId: { + case BSONTypes.ObjectId: { return (value as ObjectId).toHexString(); } - case MongoBSONTypes.Null: { + case BSONTypes.Null: { return 'null'; } - case MongoBSONTypes.RegExp: { + case BSONTypes.RegExp: { const v = value as BSONRegExp; return `${v.pattern} ${v.options}`; } - case MongoBSONTypes.Binary: { + case BSONTypes.Binary: { return `Binary[${(value as Binary).length()}]`; } - case MongoBSONTypes.Symbol: { + case BSONTypes.Symbol: { return (value as symbol).toString(); } - case MongoBSONTypes.Timestamp: { + case BSONTypes.Timestamp: { return (value as { toString: () => string }).toString(); } - case MongoBSONTypes.MinKey: { + case BSONTypes.MinKey: { return 'MinKey'; } - case MongoBSONTypes.MaxKey: { + case BSONTypes.MaxKey: { return 'MaxKey'; } - case MongoBSONTypes.Code: - case MongoBSONTypes.CodeWithScope: { + case BSONTypes.Code: + case BSONTypes.CodeWithScope: { return JSON.stringify(value); } - case MongoBSONTypes.Array: - case MongoBSONTypes.Object: - case MongoBSONTypes.Map: - case MongoBSONTypes.DBRef: - case MongoBSONTypes.Undefined: - case MongoBSONTypes._UNKNOWN_: + case BSONTypes.Array: + case BSONTypes.Object: + case BSONTypes.Map: + case BSONTypes.DBRef: + case BSONTypes.Undefined: + case BSONTypes._UNKNOWN_: default: { return JSON.stringify(value); } diff --git a/packages/schema-analyzer/src/getKnownFields.ts b/packages/schema-analyzer/src/getKnownFields.ts new file mode 100644 index 000000000..f5da314b6 --- /dev/null +++ b/packages/schema-analyzer/src/getKnownFields.ts @@ -0,0 +1,219 @@ +/*--------------------------------------------------------------------------------------------- + * Copyright (c) Microsoft Corporation. All rights reserved. + * Licensed under the MIT License. See License.txt in the project root for license information. + *--------------------------------------------------------------------------------------------*/ + +import Denque from 'denque'; +import { type JSONSchema } from './JSONSchema'; + +export interface FieldEntry { + /** Dot-notated path (e.g., "user.profile.name") */ + path: string; + /** JSON type of the dominant type entry ("string", "number", "object", "array", etc.) */ + type: string; + /** Dominant BSON type from x-bsonType on the most common type entry ("date", "objectid", "int32", etc.) */ + bsonType: string; + /** All observed BSON types for this field (for polymorphic fields) */ + bsonTypes?: string[]; + /** + * True if this field was not present in every inspected document + * (x-occurrence < parent x-documentsInspected). + * + * This is a statistical observation, not a schema constraint β€” in the MongoDB API / DocumentDB API, + * all fields are implicitly optional. + */ + isSparse?: boolean; + /** If the field is an array, the dominant element BSON type */ + arrayItemBsonType?: string; +} + +/** + * This function traverses our JSON Schema object and collects all leaf property paths + * along with their most common data types. + * + * This information is needed for auto-completion support + * + * The approach is as follows: + * - Initialize a queue with the root properties of the schema to perform a breadth-first traversal. + * - While the queue is not empty: + * - Dequeue the next item, which includes the current schema node and its path. + * - Determine the most common type for the current node by looking at the 'x-typeOccurrence' field. + * - If the most common type is an object with properties: + * - Enqueue its child properties with their updated paths into the queue for further traversal. + * - Else if the most common type is a leaf type (e.g., string, number, boolean): + * - Add the current path and type to the result array as it represents a leaf property. + * - Continue this process until all nodes have been processed. + * - Return the result array containing objects with 'path' and 'type' for each leaf property. + */ +export function getKnownFields(schema: JSONSchema): FieldEntry[] { + const result: FieldEntry[] = []; + + type QueueItem = { + path: string; + schemaNode: JSONSchema; + parentDocumentsInspected: number; + }; + + const rootDocumentsInspected = (schema['x-documentsInspected'] as number) ?? 0; + const queue: Denque = new Denque(); + + // Initialize the queue with root properties + // + // Note: JSON Schema allows boolean values as schema references (true = accept all, + // false = reject all), but our SchemaAnalyzer never produces boolean refs β€” it always + // emits full schema objects. The cast to JSONSchema below is therefore safe for our + // use case. If this function were ever reused with externally-sourced schemas, a + // `typeof propSchema === 'boolean'` guard should be added here and in the nested + // property loop below. + if (schema.properties) { + for (const propName of Object.keys(schema.properties)) { + const propSchema = schema.properties[propName] as JSONSchema; + queue.push({ + path: propName, + schemaNode: propSchema, + parentDocumentsInspected: rootDocumentsInspected, + }); + } + } + + while (queue.length > 0) { + const item = queue.shift(); + if (!item) continue; + + const { path, schemaNode, parentDocumentsInspected } = item; + const mostCommonTypeEntry = getMostCommonTypeEntry(schemaNode); + + if (mostCommonTypeEntry) { + if (mostCommonTypeEntry.type === 'object' && mostCommonTypeEntry.properties) { + // Not a leaf node, enqueue its properties + const objectDocumentsInspected = (mostCommonTypeEntry['x-documentsInspected'] as number) ?? 0; + for (const childName of Object.keys(mostCommonTypeEntry.properties)) { + const childSchema = mostCommonTypeEntry.properties[childName] as JSONSchema; + // TODO: Dot-delimited path concatenation is ambiguous when a field name + // itself contains a literal dot. For example, a root-level field named + // "a.b" produces path "a.b", indistinguishable from a nested field + // { a: { b: ... } }. Fields with literal dots in their names were + // prohibited before MongoDB API 3.6 and remain rare in practice. + // + // Future improvement: change `path` from `string` to `string[]` + // (segment array) to preserve the distinction between nesting and + // literal dots, pushing escaping/formatting decisions to consumers + // (TS definitions, completion items, aggregation references, etc.). + queue.push({ + path: `${path}.${childName}`, + schemaNode: childSchema, + parentDocumentsInspected: objectDocumentsInspected, + }); + } + } else { + // Leaf node, build the FieldEntry + const bsonType = (mostCommonTypeEntry['x-bsonType'] as string) ?? (mostCommonTypeEntry.type as string); + + const entry: FieldEntry = { + path, + type: mostCommonTypeEntry.type as string, + bsonType, + }; + + // bsonTypes: collect all distinct x-bsonType values from anyOf entries + const allBsonTypes = collectBsonTypes(schemaNode); + if (allBsonTypes.length >= 2) { + entry.bsonTypes = allBsonTypes; + } + + // isSparse: field was not observed in every document + const occurrence = (schemaNode['x-occurrence'] as number) ?? 0; + if (parentDocumentsInspected > 0 && occurrence < parentDocumentsInspected) { + entry.isSparse = true; + } + + // arrayItemBsonType: for array fields, find the dominant element type + if (mostCommonTypeEntry.type === 'array') { + const itemBsonType = getDominantArrayItemBsonType(mostCommonTypeEntry); + if (itemBsonType) { + entry.arrayItemBsonType = itemBsonType; + } + } + + result.push(entry); + } + } + } + + // Sort: _id first, then alphabetical by path + result.sort((a, b) => { + if (a.path === '_id') return -1; + if (b.path === '_id') return 1; + return a.path.localeCompare(b.path); + }); + + return result; +} + +/** + * Helper function to get the most common type entry from a schema node. + * It looks for the 'anyOf' array and selects the type with the highest 'x-typeOccurrence'. + */ +function getMostCommonTypeEntry(schemaNode: JSONSchema): JSONSchema | null { + if (schemaNode.anyOf && schemaNode.anyOf.length > 0) { + let maxOccurrence = -1; + let mostCommonTypeEntry: JSONSchema | null = null; + + for (const typeEntry of schemaNode.anyOf as JSONSchema[]) { + const occurrence = typeEntry['x-typeOccurrence'] || 0; + if (occurrence > maxOccurrence) { + maxOccurrence = occurrence; + mostCommonTypeEntry = typeEntry; + } + } + return mostCommonTypeEntry; + } else if (schemaNode.type) { + // If 'anyOf' is not present, use the 'type' field directly + return schemaNode; + } + return null; +} + +/** + * Collects all distinct x-bsonType values from a schema node's anyOf entries. + * Returns them sorted alphabetically for determinism. + */ +function collectBsonTypes(schemaNode: JSONSchema): string[] { + if (!schemaNode.anyOf || schemaNode.anyOf.length === 0) { + return []; + } + + const bsonTypes = new Set(); + for (const entry of schemaNode.anyOf as JSONSchema[]) { + const bsonType = entry['x-bsonType'] as string | undefined; + if (bsonType) { + bsonTypes.add(bsonType); + } + } + + return Array.from(bsonTypes).sort(); +} + +/** + * For an array type entry, finds the dominant element BSON type by looking at + * items.anyOf and selecting the entry with the highest x-typeOccurrence. + */ +function getDominantArrayItemBsonType(arrayTypeEntry: JSONSchema): string | undefined { + const itemsSchema = arrayTypeEntry.items as JSONSchema | undefined; + if (!itemsSchema?.anyOf || itemsSchema.anyOf.length === 0) { + return undefined; + } + + let maxOccurrence = -1; + let dominantBsonType: string | undefined; + + for (const entry of itemsSchema.anyOf as JSONSchema[]) { + const occurrence = (entry['x-typeOccurrence'] as number) ?? 0; + if (occurrence > maxOccurrence) { + maxOccurrence = occurrence; + dominantBsonType = entry['x-bsonType'] as string | undefined; + } + } + + return dominantBsonType; +} diff --git a/packages/schema-analyzer/src/index.ts b/packages/schema-analyzer/src/index.ts new file mode 100644 index 000000000..871fd61f8 --- /dev/null +++ b/packages/schema-analyzer/src/index.ts @@ -0,0 +1,10 @@ +/*--------------------------------------------------------------------------------------------- + * Copyright (c) Microsoft Corporation. All rights reserved. + * Licensed under the MIT License. See License.txt in the project root for license information. + *--------------------------------------------------------------------------------------------*/ + +export { BSONTypes } from './BSONTypes'; +export { getKnownFields, type FieldEntry } from './getKnownFields'; +export { type JSONSchema, type JSONSchemaMap, type JSONSchemaRef } from './JSONSchema'; +export { SchemaAnalyzer, buildFullPaths, getPropertyNamesAtLevel } from './SchemaAnalyzer'; +export { valueToDisplayString } from './ValueFormatters'; diff --git a/packages/schema-analyzer/test/SchemaAnalyzer.arrayStats.test.ts b/packages/schema-analyzer/test/SchemaAnalyzer.arrayStats.test.ts new file mode 100644 index 000000000..2669d5214 --- /dev/null +++ b/packages/schema-analyzer/test/SchemaAnalyzer.arrayStats.test.ts @@ -0,0 +1,464 @@ +/*--------------------------------------------------------------------------------------------- + * Copyright (c) Microsoft Corporation. All rights reserved. + * Licensed under the MIT License. See License.txt in the project root for license information. + *--------------------------------------------------------------------------------------------*/ + +import { ObjectId, type Document, type WithId } from 'mongodb'; +import { type JSONSchema } from '../src/JSONSchema'; +import { SchemaAnalyzer } from '../src/SchemaAnalyzer'; + +/** + * This test file investigates the array element occurrence/stats problem. + * + * The core issue: When an array contains mixed types (e.g., strings AND objects), + * `x-typeOccurrence` on the items' type entries counts individual elements across + * ALL documents, not occurrences-per-document. This makes "field presence probability" + * for nested object properties inside arrays hard to interpret. + * + * Example scenario: + * doc1.data = ["a", "b", "c", {"value": 23}] β†’ 3 strings, 1 object + * doc2.data = ["x", "y", {"value": 42, "flag": true}] β†’ 2 strings, 1 object + * doc3.data = ["z"] β†’ 1 string, 0 objects + * + * After processing 3 docs: + * - items.anyOf[string].x-typeOccurrence = 6 (total string elements across all docs) + * - items.anyOf[object].x-typeOccurrence = 2 (total object elements across all docs) + * - items.anyOf[object].properties.value.x-occurrence = 2 (from 2 object elements) + * - items.anyOf[object].properties.flag.x-occurrence = 1 (from 1 object element) + * + * The problem: what is items.anyOf[object].properties.value's "probability"? + * - 2/2? (present in every object element β†’ makes sense) + * - 2/3? (present in 2 of 3 documents β†’ misleading, doc3 has no objects at all) + * - 2/6? (present in 2 of 6 total elements β†’ nonsensical, mixes types) + * + * There's no x-documentsInspected equivalent at the array level to anchor + * the occurrence count. + */ +describe('Array element occurrence analysis', () => { + it('counts element types across multiple documents', () => { + const analyzer = new SchemaAnalyzer(); + + const doc1: WithId = { + _id: new ObjectId(), + data: ['a', 'b', 'c', { value: 23 }], + }; + const doc2: WithId = { + _id: new ObjectId(), + data: ['x', 'y', { value: 42, flag: true }], + }; + const doc3: WithId = { + _id: new ObjectId(), + data: ['z'], + }; + + analyzer.addDocument(doc1); + analyzer.addDocument(doc2); + analyzer.addDocument(doc3); + const schema = analyzer.getSchema(); + + // data field: array seen in 3 docs + const dataField = schema.properties?.['data'] as JSONSchema; + expect(dataField['x-occurrence']).toBe(3); + + // The array type entry + const arrayTypeEntry = dataField.anyOf?.find((e) => (e as JSONSchema)['x-bsonType'] === 'array') as JSONSchema; + expect(arrayTypeEntry).toBeDefined(); + expect(arrayTypeEntry['x-typeOccurrence']).toBe(3); + + // Array items + const itemsSchema = arrayTypeEntry.items as JSONSchema; + const stringEntry = itemsSchema.anyOf?.find((e) => (e as JSONSchema)['x-bsonType'] === 'string') as JSONSchema; + const objectEntry = itemsSchema.anyOf?.find((e) => (e as JSONSchema)['x-bsonType'] === 'object') as JSONSchema; + + // String elements: "a","b","c","x","y","z" = 6 total + expect(stringEntry['x-typeOccurrence']).toBe(6); + + // Object elements: {value:23}, {value:42,flag:true} = 2 total + expect(objectEntry['x-typeOccurrence']).toBe(2); + + // Properties inside the object elements + const valueField = objectEntry.properties?.['value'] as JSONSchema; + const flagField = objectEntry.properties?.['flag'] as JSONSchema; + + // "value" appeared in both objects β†’ x-occurrence = 2 + expect(valueField['x-occurrence']).toBe(2); + + // "flag" appeared in 1 object β†’ x-occurrence = 1 + expect(flagField['x-occurrence']).toBe(1); + + // THE CORE QUESTION: What is the denominator for probability? + // + // We know objectEntry['x-typeOccurrence'] = 2 (2 objects total across all arrays). + // So valueField probability = 2/2 = 100% (correct: every object had "value") + // And flagField probability = 1/2 = 50% (correct: half of objects had "flag") + // + // BUT: there is NO x-documentsInspected on objectEntry to formally define + // the denominator. The consumer has to know to use x-typeOccurrence as the + // denominator for nested properties inside array elements. + // + // This actually WORKS β€” the semantics are: + // "of the N objects observed inside this array, M had this property" + // + // It just isn't obvious from the schema structure. + }); + + it('tracks min/max array lengths across documents', () => { + const analyzer = new SchemaAnalyzer(); + + const doc1: WithId = { + _id: new ObjectId(), + tags: ['a', 'b', 'c'], + }; + const doc2: WithId = { + _id: new ObjectId(), + tags: ['x'], + }; + const doc3: WithId = { + _id: new ObjectId(), + tags: ['p', 'q', 'r', 's', 't'], + }; + + analyzer.addDocument(doc1); + analyzer.addDocument(doc2); + analyzer.addDocument(doc3); + const schema = analyzer.getSchema(); + + const tagsField = schema.properties?.['tags'] as JSONSchema; + const arrayEntry = tagsField.anyOf?.find((e) => (e as JSONSchema)['x-bsonType'] === 'array') as JSONSchema; + + expect(arrayEntry['x-minItems']).toBe(1); + expect(arrayEntry['x-maxItems']).toBe(5); + }); + + it('accumulates nested object properties from objects inside arrays across documents', () => { + const analyzer = new SchemaAnalyzer(); + + // doc1 has two objects with different properties in the items array + const doc1: WithId = { + _id: new ObjectId(), + items: [ + { name: 'Laptop', price: 999 }, + { name: 'Mouse', price: 29, discount: true }, + ], + }; + + // doc2 has one object with yet another property + const doc2: WithId = { + _id: new ObjectId(), + items: [{ name: 'Desk', weight: 50 }], + }; + + analyzer.addDocument(doc1); + analyzer.addDocument(doc2); + const schema = analyzer.getSchema(); + + const itemsField = schema.properties?.['items'] as JSONSchema; + const arrayEntry = itemsField.anyOf?.find((e) => (e as JSONSchema)['x-bsonType'] === 'array') as JSONSchema; + const objEntry = (arrayEntry.items as JSONSchema).anyOf?.find( + (e) => (e as JSONSchema)['x-bsonType'] === 'object', + ) as JSONSchema; + + const props = objEntry.properties as Record; + + // "name" appeared in all 3 object elements + expect(props['name']['x-occurrence']).toBe(3); + + // "price" appeared in 2 of 3 object elements + expect(props['price']['x-occurrence']).toBe(2); + + // "discount" appeared in 1 of 3 object elements + expect(props['discount']['x-occurrence']).toBe(1); + + // "weight" appeared in 1 of 3 object elements + expect(props['weight']['x-occurrence']).toBe(1); + + // Total object elements = 3 (2 from doc1 + 1 from doc2) + expect(objEntry['x-typeOccurrence']).toBe(3); + + // So probability interpretations: + // name: 3/3 = 100% + // price: 2/3 = 67% + // discount: 1/3 = 33% + // weight: 1/3 = 33% + // + // This is correct! x-typeOccurrence serves as the denominator. + }); + + it('handles arrays that ONLY contain primitives (no occurrence complexity)', () => { + const analyzer = new SchemaAnalyzer(); + + const doc1: WithId = { + _id: new ObjectId(), + scores: [90, 85, 78], + }; + const doc2: WithId = { + _id: new ObjectId(), + scores: [100, 55], + }; + + analyzer.addDocument(doc1); + analyzer.addDocument(doc2); + const schema = analyzer.getSchema(); + + const scoresField = schema.properties?.['scores'] as JSONSchema; + const arrayEntry = scoresField.anyOf?.find((e) => (e as JSONSchema)['x-bsonType'] === 'array') as JSONSchema; + + const numEntry = (arrayEntry.items as JSONSchema).anyOf?.find( + (e) => (e as JSONSchema)['x-bsonType'] === 'double', + ) as JSONSchema; + + // 5 total numeric elements + expect(numEntry['x-typeOccurrence']).toBe(5); + + // Stats across all elements + expect(numEntry['x-minValue']).toBe(55); + expect(numEntry['x-maxValue']).toBe(100); + + // Array length stats + expect(arrayEntry['x-minItems']).toBe(2); + expect(arrayEntry['x-maxItems']).toBe(3); + }); + + it('verifies that encounteredMongoTypes map is per-document', () => { + // The encounteredMongoTypes map is created inside the Array case handler. + // It controls whether initializeStatsForValue or aggregateStatsForValue is called. + // If it's per-array-occurrence (per document), stats should initialize fresh for each doc. + // + // BUT WAIT: The map is local to the switch case, which processes ONE array per queue item. + // Multiple documents contribute different queue items, and the map is re-created for each. + // However, the stats update goes to the SAME itemEntry across documents (because + // findTypeEntry finds the existing entry). So: + // + // doc1.scores = [10, 20] β†’ first array processing, encounteredMongoTypes fresh + // - element 10: initializeStatsForValue (sets x-minValue=10, x-maxValue=10) + // - element 20: aggregateStatsForValue (updates x-maxValue=20) + // + // doc2.scores = [5, 30] β†’ second array processing, encounteredMongoTypes fresh + // - element 5: initializeStatsForValue ← BUT x-minValue is already 10 from doc1! + // initializeStatsForValue OVERWRITES x-minValue to 5 (correct by accident here) + // Actually let's check... initializeStatsForValue sets x-maxValue = 5 + // and x-minValue = 5. So the 20 from doc1 would be lost! + // + // This is a REAL BUG: initializeStatsForValue is called for the first occurrence + // per array, but the typeEntry already has stats from previous arrays. + + const analyzer = new SchemaAnalyzer(); + + const doc1: WithId = { + _id: new ObjectId(), + scores: [10, 20, 30], + }; + const doc2: WithId = { + _id: new ObjectId(), + scores: [5, 15], + }; + + analyzer.addDocument(doc1); + analyzer.addDocument(doc2); + const schema = analyzer.getSchema(); + + const scoresField = schema.properties?.['scores'] as JSONSchema; + const arrayEntry = scoresField.anyOf?.find((e) => (e as JSONSchema)['x-bsonType'] === 'array') as JSONSchema; + + const numEntry = (arrayEntry.items as JSONSchema).anyOf?.find( + (e) => (e as JSONSchema)['x-bsonType'] === 'double', + ) as JSONSchema; + + // Expected correct values: + // All 5 elements: 10, 20, 30, 5, 15 + // Global min = 5, global max = 30 + + // If there's a bug, doc2 processing re-initializes: + // after doc1: min=10, max=30 + // doc2 first element (5): initializeStatsForValue β†’ sets min=5, max=5 + // doc2 second element (15): aggregateStatsForValue β†’ max becomes 15 + // final: min=5, max=15 ← WRONG (lost 30 from doc1) + + // This test documents the actual behavior (might be buggy): + expect(numEntry['x-minValue']).toBe(5); + // If the bug exists, this will be 15 instead of 30: + expect(numEntry['x-maxValue']).toBe(30); // should be 30 if correct + }); +}); + +describe('Array probability denominator problem', () => { + it('reproduces the >100% probability bug: empty array + large array', () => { + // User scenario: + // doc1: a = [] β†’ 0 objects + // doc2: a = [{b:1}, {b:2}, ..., {b:100}] β†’ 100 objects + // + // Naively computing probability as: + // occurrence_of_b / root.x-documentsInspected = 100 / 2 = 5000% + // + // The correct probability should be: + // occurrence_of_b / objectEntry.x-typeOccurrence = 100 / 100 = 100% + // + // FIX: Set x-documentsInspected on the object type entry so the uniform + // formula `x-occurrence / parent.x-documentsInspected` works at every + // nesting level. + + const analyzer = new SchemaAnalyzer(); + + const doc1: WithId = { + _id: new ObjectId(), + a: [], // empty array + }; + + // doc2: 100 objects, each with property "b" + const objectElements: Record[] = []; + for (let i = 1; i <= 100; i++) { + objectElements.push({ b: i }); + } + const doc2: WithId = { + _id: new ObjectId(), + a: objectElements, + }; + + analyzer.addDocument(doc1); + analyzer.addDocument(doc2); + const schema = analyzer.getSchema(); + + // Root level + expect(schema['x-documentsInspected']).toBe(2); + + // Navigate to the object type entry inside the array + const aField = schema.properties?.['a'] as JSONSchema; + expect(aField['x-occurrence']).toBe(2); // both docs have 'a' + + const arrayEntry = aField.anyOf?.find((e) => (e as JSONSchema)['x-bsonType'] === 'array') as JSONSchema; + const objectEntry = (arrayEntry.items as JSONSchema).anyOf?.find( + (e) => (e as JSONSchema)['x-bsonType'] === 'object', + ) as JSONSchema; + + // 100 object elements total + expect(objectEntry['x-typeOccurrence']).toBe(100); + + // Property "b" appears in all 100 objects + const bField = objectEntry.properties?.['b'] as JSONSchema; + expect(bField['x-occurrence']).toBe(100); + + // THE FIX: objectEntry should have x-documentsInspected = 100 + // so that the uniform formula works: + // probability = b.x-occurrence / objectEntry.x-documentsInspected + // = 100 / 100 = 100% + expect(objectEntry['x-documentsInspected']).toBe(100); + }); + + it('correctly computes probability for sparse properties in array objects', () => { + // doc1: items = [{name:"A", price:10}, {name:"B"}] β†’ 2 objects, name in both, price in 1 + // doc2: items = [{name:"C", discount:true}] β†’ 1 object + // + // Total objects = 3 + // name: 3/3 = 100% + // price: 1/3 = 33% + // discount: 1/3 = 33% + + const analyzer = new SchemaAnalyzer(); + + const doc1: WithId = { + _id: new ObjectId(), + items: [{ name: 'A', price: 10 }, { name: 'B' }], + }; + const doc2: WithId = { + _id: new ObjectId(), + items: [{ name: 'C', discount: true }], + }; + + analyzer.addDocument(doc1); + analyzer.addDocument(doc2); + const schema = analyzer.getSchema(); + + const itemsField = schema.properties?.['items'] as JSONSchema; + const arrayEntry = itemsField.anyOf?.find((e) => (e as JSONSchema)['x-bsonType'] === 'array') as JSONSchema; + const objectEntry = (arrayEntry.items as JSONSchema).anyOf?.find( + (e) => (e as JSONSchema)['x-bsonType'] === 'object', + ) as JSONSchema; + + // The object type entry should have x-documentsInspected = 3 + expect(objectEntry['x-documentsInspected']).toBe(3); + + const props = objectEntry.properties as Record; + + // Probability = x-occurrence / x-documentsInspected (uniform formula) + expect(props['name']['x-occurrence']).toBe(3); // 3/3 = 100% + expect(props['price']['x-occurrence']).toBe(1); // 1/3 = 33% + expect(props['discount']['x-occurrence']).toBe(1); // 1/3 = 33% + }); + + it('sets x-documentsInspected on nested objects at all levels', () => { + // items: [{address: {city: "NY", zip: "10001"}}, {address: {city: "LA"}}] + // + // At items.anyOf[object] level: x-documentsInspected = 2 + // At address.anyOf[object] level: x-documentsInspected = 2 + // city: 2/2 = 100%, zip: 1/2 = 50% + + const analyzer = new SchemaAnalyzer(); + + const doc: WithId = { + _id: new ObjectId(), + items: [{ address: { city: 'NY', zip: '10001' } }, { address: { city: 'LA' } }], + }; + + analyzer.addDocument(doc); + const schema = analyzer.getSchema(); + + const itemsField = schema.properties?.['items'] as JSONSchema; + const arrayEntry = itemsField.anyOf?.find((e) => (e as JSONSchema)['x-bsonType'] === 'array') as JSONSchema; + const objectEntry = (arrayEntry.items as JSONSchema).anyOf?.find( + (e) => (e as JSONSchema)['x-bsonType'] === 'object', + ) as JSONSchema; + + // 2 objects in the array + expect(objectEntry['x-documentsInspected']).toBe(2); + + // address.anyOf[object] β€” the nested object type + const addressProp = objectEntry.properties?.['address'] as JSONSchema; + const addressObjEntry = addressProp.anyOf?.find( + (e) => (e as JSONSchema)['x-bsonType'] === 'object', + ) as JSONSchema; + + // Both objects had address, and both addresses were objects + expect(addressObjEntry['x-documentsInspected']).toBe(2); + + const addrProps = addressObjEntry.properties as Record; + expect(addrProps['city']['x-occurrence']).toBe(2); // 2/2 = 100% + expect(addrProps['zip']['x-occurrence']).toBe(1); // 1/2 = 50% + }); + + it('does NOT change x-documentsInspected at root level (root keeps document count)', () => { + const analyzer = new SchemaAnalyzer(); + + const doc1: WithId = { + _id: new ObjectId(), + name: 'Alice', + address: { city: 'NY' }, + }; + const doc2: WithId = { + _id: new ObjectId(), + name: 'Bob', + address: { city: 'LA', zip: '90001' }, + }; + + analyzer.addDocument(doc1); + analyzer.addDocument(doc2); + const schema = analyzer.getSchema(); + + // Root x-documentsInspected is document count, not affected by the fix + expect(schema['x-documentsInspected']).toBe(2); + + // Root-level probability still works: name.occurrence(2) / documentsInspected(2) = 100% + const nameField = schema.properties?.['name'] as JSONSchema; + expect(nameField['x-occurrence']).toBe(2); + + // Nested object: address.anyOf[object] should have x-documentsInspected = 2 + const addressField = schema.properties?.['address'] as JSONSchema; + const addressObjEntry = addressField.anyOf?.find( + (e) => (e as JSONSchema)['x-bsonType'] === 'object', + ) as JSONSchema; + expect(addressObjEntry['x-documentsInspected']).toBe(2); + + const addrProps = addressObjEntry.properties as Record; + expect(addrProps['city']['x-occurrence']).toBe(2); // 2/2 = 100% + expect(addrProps['zip']['x-occurrence']).toBe(1); // 1/2 = 50% + }); +}); diff --git a/packages/schema-analyzer/test/SchemaAnalyzer.test.ts b/packages/schema-analyzer/test/SchemaAnalyzer.test.ts new file mode 100644 index 000000000..f23a97bdf --- /dev/null +++ b/packages/schema-analyzer/test/SchemaAnalyzer.test.ts @@ -0,0 +1,349 @@ +/*--------------------------------------------------------------------------------------------- + * Copyright (c) Microsoft Corporation. All rights reserved. + * Licensed under the MIT License. See License.txt in the project root for license information. + *--------------------------------------------------------------------------------------------*/ + +import { type JSONSchema, type JSONSchemaMap, type JSONSchemaRef } from '../src/JSONSchema'; +import { getPropertyNamesAtLevel, SchemaAnalyzer } from '../src/SchemaAnalyzer'; +import { + arraysWithDifferentDataTypes, + complexDocument, + complexDocumentsArray, + complexDocumentWithOddTypes, + embeddedDocumentOnly, + flatDocument, + sparseDocumentsArray, +} from './mongoTestDocuments'; + +describe('DocumentDB Schema Analyzer', () => { + it('prints out schema for testing', () => { + const analyzer = SchemaAnalyzer.fromDocument(embeddedDocumentOnly); + const schema = analyzer.getSchema(); + expect(schema).toBeDefined(); + }); + + it('supports many documents', () => { + const analyzer = SchemaAnalyzer.fromDocuments(sparseDocumentsArray); + const schema = analyzer.getSchema(); + expect(schema).toBeDefined(); + + // Check that 'x-documentsInspected' is correct + expect(schema['x-documentsInspected']).toBe(sparseDocumentsArray.length); + + // Check that the schema has the correct root properties + const expectedRootProperties = new Set(['_id', 'name', 'age', 'email', 'isActive', 'score', 'description']); + + expect(Object.keys(schema.properties || {})).toEqual( + expect.arrayContaining(Array.from(expectedRootProperties)), + ); + + // Check that the 'name' field is detected correctly + const nameField = schema.properties?.['name'] as JSONSchema; + expect(nameField).toBeDefined(); + expect(nameField?.['x-occurrence']).toBeGreaterThan(0); + + // Access 'anyOf' to get the type entries + const nameFieldTypes = nameField.anyOf?.map((typeEntry) => (typeEntry as JSONSchema)['type']); + expect(nameFieldTypes).toContain('string'); + + // Check that the 'age' field has the correct type + const ageField = schema.properties?.['age'] as JSONSchema; + expect(ageField).toBeDefined(); + const ageFieldTypes = ageField.anyOf?.map((typeEntry) => (typeEntry as JSONSchema)['type']); + expect(ageFieldTypes).toContain('number'); + + // Check that the 'isActive' field is a boolean + const isActiveField = schema.properties?.['isActive'] as JSONSchema; + expect(isActiveField).toBeDefined(); + const isActiveTypes = isActiveField.anyOf?.map((typeEntry) => (typeEntry as JSONSchema)['type']); + expect(isActiveTypes).toContain('boolean'); + + // Check that the 'description' field is optional (occurs in some documents) + const descriptionField = schema.properties?.['description'] as JSONSchema | undefined; + expect(descriptionField).toBeDefined(); + expect(descriptionField?.['x-occurrence']).toBeLessThan(sparseDocumentsArray.length); + }); + + it('detects all BSON types from flatDocument', () => { + const analyzer = SchemaAnalyzer.fromDocument(flatDocument); + const schema = analyzer.getSchema(); + + // Check that all fields are detected + const expectedFields = Object.keys(flatDocument); + expect(Object.keys(schema.properties || {})).toEqual(expect.arrayContaining(expectedFields)); + + // Helper function to get the 'x-bsonType' from a field + function getBsonType(fieldName: string): string | undefined { + const field = schema.properties?.[fieldName] as JSONSchema | undefined; + const anyOf = field?.anyOf; + return anyOf && (anyOf[0] as JSONSchema | undefined)?.['x-bsonType']; + } + + // Check that specific BSON types are correctly identified + expect(getBsonType('int32Field')).toBe('int32'); + expect(getBsonType('doubleField')).toBe('double'); + expect(getBsonType('decimalField')).toBe('decimal128'); + expect(getBsonType('dateField')).toBe('date'); + expect(getBsonType('objectIdField')).toBe('objectid'); + expect(getBsonType('codeField')).toBe('code'); + expect(getBsonType('uuidField')).toBe('uuid'); + expect(getBsonType('uuidLegacyField')).toBe('uuid-legacy'); + }); + + it('detects embedded objects correctly', () => { + const analyzer = SchemaAnalyzer.fromDocument(embeddedDocumentOnly); + const schema = analyzer.getSchema(); + + // Check that the root properties are detected + expect(schema.properties).toHaveProperty('personalInfo'); + expect(schema.properties).toHaveProperty('jobInfo'); + + // Access 'personalInfo' properties + const personalInfoAnyOf = + schema.properties && (schema.properties['personalInfo'] as JSONSchema | undefined)?.anyOf; + const personalInfoProperties = (personalInfoAnyOf?.[0] as JSONSchema | undefined)?.properties; + expect(personalInfoProperties).toBeDefined(); + expect(personalInfoProperties).toHaveProperty('name'); + expect(personalInfoProperties).toHaveProperty('age'); + expect(personalInfoProperties).toHaveProperty('married'); + expect(personalInfoProperties).toHaveProperty('address'); + + // Access 'address' properties within 'personalInfo' + const addressAnyOf = ((personalInfoProperties as JSONSchemaMap)['address'] as JSONSchema).anyOf; + const addressProperties = (addressAnyOf?.[0] as JSONSchema | undefined)?.properties; + expect(addressProperties).toBeDefined(); + expect(addressProperties).toHaveProperty('street'); + expect(addressProperties).toHaveProperty('city'); + expect(addressProperties).toHaveProperty('zip'); + }); + + it('detects arrays and their element types correctly', () => { + const analyzer = SchemaAnalyzer.fromDocument(arraysWithDifferentDataTypes); + const schema = analyzer.getSchema(); + + // Check that arrays are detected + expect(schema.properties).toHaveProperty('integersArray'); + expect(schema.properties).toHaveProperty('stringsArray'); + expect(schema.properties).toHaveProperty('booleansArray'); + expect(schema.properties).toHaveProperty('mixedArray'); + expect(schema.properties).toHaveProperty('datesArray'); + + // Helper function to get item types from an array field + function getArrayItemTypes(fieldName: string): string[] | undefined { + const field = schema.properties?.[fieldName] as JSONSchema | undefined; + const anyOf = field?.anyOf; + const itemsAnyOf: JSONSchemaRef[] | undefined = ( + (anyOf?.[0] as JSONSchema | undefined)?.items as JSONSchema | undefined + )?.anyOf; + return itemsAnyOf?.map((typeEntry) => (typeEntry as JSONSchema)['type'] as string); + } + + // Check that 'integersArray' has elements of type 'number' + const integerItemTypes = getArrayItemTypes('integersArray'); + expect(integerItemTypes).toContain('number'); + + // Check that 'stringsArray' has elements of type 'string' + const stringItemTypes = getArrayItemTypes('stringsArray'); + expect(stringItemTypes).toContain('string'); + + // Check that 'mixedArray' contains multiple types + const mixedItemTypes = getArrayItemTypes('mixedArray'); + expect(mixedItemTypes).toEqual(expect.arrayContaining(['number', 'string', 'boolean', 'object', 'null'])); + }); + + it('handles arrays within objects and objects within arrays', () => { + const analyzer = SchemaAnalyzer.fromDocument(complexDocument); + const schema = analyzer.getSchema(); + + // Access 'user.profile.hobbies' + const user = schema.properties?.['user'] as JSONSchema | undefined; + const userProfile = (user?.anyOf?.[0] as JSONSchema | undefined)?.properties?.['profile'] as + | JSONSchema + | undefined; + const hobbies = (userProfile?.anyOf?.[0] as JSONSchema | undefined)?.properties?.['hobbies'] as + | JSONSchema + | undefined; + const hobbiesItems = (hobbies?.anyOf?.[0] as JSONSchema | undefined)?.items as JSONSchema | undefined; + const hobbiesItemTypes = hobbiesItems?.anyOf?.map((typeEntry) => (typeEntry as JSONSchema).type); + expect(hobbiesItemTypes).toContain('string'); + + // Access 'user.profile.addresses' + const addresses = (userProfile?.anyOf?.[0] as JSONSchema | undefined)?.properties?.['addresses'] as + | JSONSchema + | undefined; + const addressesItems = (addresses?.anyOf?.[0] as JSONSchema | undefined)?.items as JSONSchema | undefined; + const addressItemTypes = addressesItems?.anyOf?.map((typeEntry) => (typeEntry as JSONSchema).type); + expect(addressItemTypes).toContain('object'); + + // Check that 'orders' is an array + const orders = schema.properties?.['orders'] as JSONSchema | undefined; + expect(orders).toBeDefined(); + const ordersType = (orders?.anyOf?.[0] as JSONSchema | undefined)?.type; + expect(ordersType).toBe('array'); + + // Access 'items' within 'orders' + const orderItemsParent = (orders?.anyOf?.[0] as JSONSchema | undefined)?.items as JSONSchema | undefined; + const orderItems = (orderItemsParent?.anyOf?.[0] as JSONSchema | undefined)?.properties?.['items'] as + | JSONSchema + | undefined; + const orderItemsType = (orderItems?.anyOf?.[0] as JSONSchema | undefined)?.type; + expect(orderItemsType).toBe('array'); + }); + + it('updates schema correctly when processing multiple documents', () => { + const analyzer = SchemaAnalyzer.fromDocuments(complexDocumentsArray); + const schema = analyzer.getSchema(); + + // Check that 'x-documentsInspected' is correct + expect(schema['x-documentsInspected']).toBe(complexDocumentsArray.length); + + // Check that some fields are present from different documents + expect(schema.properties).toHaveProperty('stringField'); + expect(schema.properties).toHaveProperty('personalInfo'); + expect(schema.properties).toHaveProperty('integersArray'); + expect(schema.properties).toHaveProperty('user'); + + // Check that 'integersArray' has correct min and max values + const integersArray = schema.properties?.['integersArray'] as JSONSchema | undefined; + const integerItemType = ((integersArray?.anyOf?.[0] as JSONSchema | undefined)?.items as JSONSchema | undefined) + ?.anyOf?.[0] as JSONSchema | undefined; + expect(integerItemType?.['x-minValue']).toBe(1); + expect(integerItemType?.['x-maxValue']).toBe(5); + + // Check that 'orders.items.price' is detected as Decimal128 + const orders2 = schema.properties?.['orders'] as JSONSchema | undefined; + const orderItemsParent2 = (orders2?.anyOf?.[0] as JSONSchema | undefined)?.items as JSONSchema | undefined; + const orderItems = (orderItemsParent2?.anyOf?.[0] as JSONSchema | undefined)?.properties?.['items'] as + | JSONSchema + | undefined; + const priceFieldParent = ((orderItems?.anyOf?.[0] as JSONSchema | undefined)?.items as JSONSchema | undefined) + ?.anyOf?.[0] as JSONSchema | undefined; + const priceField = priceFieldParent?.properties?.['price'] as JSONSchema | undefined; + const priceFieldType = priceField?.anyOf?.[0] as JSONSchema | undefined; + expect(priceFieldType?.['x-bsonType']).toBe('decimal128'); + }); + + describe('traverses schema', () => { + it('with valid paths', () => { + const analyzer = SchemaAnalyzer.fromDocument(complexDocument); + const schema = analyzer.getSchema(); + + let propertiesAtRoot = getPropertyNamesAtLevel(schema, []); + expect(propertiesAtRoot).toHaveLength(4); + + propertiesAtRoot = getPropertyNamesAtLevel(schema, ['user']); + expect(propertiesAtRoot).toHaveLength(3); + + propertiesAtRoot = getPropertyNamesAtLevel(schema, ['user', 'profile']); + expect(propertiesAtRoot).toHaveLength(4); + }); + + it('with broken paths', () => { + const analyzer = SchemaAnalyzer.fromDocument(complexDocument); + const schema = analyzer.getSchema(); + + const propertiesAtRoot = getPropertyNamesAtLevel(schema, []); + expect(propertiesAtRoot).toHaveLength(4); + + expect(() => getPropertyNamesAtLevel(schema, ['no-entry'])).toThrow(); + + expect(() => getPropertyNamesAtLevel(schema, ['user', 'no-entry'])).toThrow(); + }); + + it('with sparse docs and mixed types', () => { + const analyzer = new SchemaAnalyzer(); + analyzer.addDocument(complexDocument); + analyzer.addDocument(complexDocumentWithOddTypes); + const schema = analyzer.getSchema(); + + let propertiesAtRoot = getPropertyNamesAtLevel(schema, []); + expect(propertiesAtRoot).toHaveLength(4); + + propertiesAtRoot = getPropertyNamesAtLevel(schema, ['user']); + expect(propertiesAtRoot).toHaveLength(3); + expect(propertiesAtRoot).toEqual(['email', 'profile', 'username']); + + propertiesAtRoot = getPropertyNamesAtLevel(schema, ['user', 'profile']); + expect(propertiesAtRoot).toHaveLength(4); + expect(propertiesAtRoot).toEqual(['addresses', 'firstName', 'hobbies', 'lastName']); + + propertiesAtRoot = getPropertyNamesAtLevel(schema, ['history']); + expect(propertiesAtRoot).toHaveLength(6); + }); + }); + + describe('SchemaAnalyzer class methods', () => { + it('clone() creates an independent deep copy', () => { + // Use embeddedDocumentOnly (plain JS types) to avoid structuredClone issues with BSON types + const original = SchemaAnalyzer.fromDocument(embeddedDocumentOnly); + const cloned = original.clone(); + + // Clone has the same document count + expect(cloned.getDocumentCount()).toBe(1); + + // Clone has the same properties + const originalProps = Object.keys(original.getSchema().properties || {}); + const clonedProps = Object.keys(cloned.getSchema().properties || {}); + expect(clonedProps).toEqual(originalProps); + + // Add another document to the original only + original.addDocument(arraysWithDifferentDataTypes); + expect(original.getDocumentCount()).toBe(2); + expect(cloned.getDocumentCount()).toBe(1); + + // Clone's schema was NOT affected by the mutation + const originalPropsAfter = Object.keys(original.getSchema().properties || {}); + const clonedPropsAfter = Object.keys(cloned.getSchema().properties || {}); + expect(originalPropsAfter).toContain('integersArray'); + expect(originalPropsAfter).toContain('stringsArray'); + expect(clonedPropsAfter).not.toContain('integersArray'); + expect(clonedPropsAfter).not.toContain('stringsArray'); + }); + + it('reset() clears all accumulated state', () => { + const analyzer = SchemaAnalyzer.fromDocument(flatDocument); + expect(analyzer.getDocumentCount()).toBeGreaterThan(0); + expect(Object.keys(analyzer.getSchema().properties || {})).not.toHaveLength(0); + + analyzer.reset(); + + expect(analyzer.getDocumentCount()).toBe(0); + const schema = analyzer.getSchema(); + expect(schema.properties).toBeUndefined(); + expect(schema['x-documentsInspected']).toBeUndefined(); + }); + + it('fromDocument() creates analyzer with single document', () => { + const analyzer = SchemaAnalyzer.fromDocument(flatDocument); + expect(analyzer.getDocumentCount()).toBe(1); + + const schema = analyzer.getSchema(); + const expectedFields = Object.keys(flatDocument); + expect(Object.keys(schema.properties || {})).toEqual(expect.arrayContaining(expectedFields)); + }); + + it('fromDocuments() creates analyzer with multiple documents', () => { + const analyzer = SchemaAnalyzer.fromDocuments(sparseDocumentsArray); + expect(analyzer.getDocumentCount()).toBe(sparseDocumentsArray.length); + + // Compare with manually-built analyzer + const manual = new SchemaAnalyzer(); + manual.addDocuments(sparseDocumentsArray); + + expect(JSON.stringify(analyzer.getSchema())).toBe(JSON.stringify(manual.getSchema())); + }); + + it('addDocuments() is equivalent to multiple addDocument() calls', () => { + const batch = new SchemaAnalyzer(); + batch.addDocuments(complexDocumentsArray); + + const sequential = new SchemaAnalyzer(); + for (const doc of complexDocumentsArray) { + sequential.addDocument(doc); + } + + expect(batch.getDocumentCount()).toBe(sequential.getDocumentCount()); + expect(JSON.stringify(batch.getSchema())).toBe(JSON.stringify(sequential.getSchema())); + }); + }); +}); diff --git a/packages/schema-analyzer/test/SchemaAnalyzer.versioning.test.ts b/packages/schema-analyzer/test/SchemaAnalyzer.versioning.test.ts new file mode 100644 index 000000000..38ef144a6 --- /dev/null +++ b/packages/schema-analyzer/test/SchemaAnalyzer.versioning.test.ts @@ -0,0 +1,663 @@ +/*--------------------------------------------------------------------------------------------- + * Copyright (c) Microsoft Corporation. All rights reserved. + * Licensed under the MIT License. See License.txt in the project root for license information. + *--------------------------------------------------------------------------------------------*/ + +import { ObjectId, type Document, type WithId } from 'mongodb'; +import { type JSONSchema } from '../src/JSONSchema'; +import { SchemaAnalyzer } from '../src/SchemaAnalyzer'; + +// ------------------------------------------------------------------ +// Test fixtures +// ------------------------------------------------------------------ + +function makeDoc(fields: Record = {}): WithId { + return { _id: new ObjectId(), ...fields }; +} + +// ------------------------------------------------------------------ +// Version counter +// ------------------------------------------------------------------ +describe('SchemaAnalyzer version counter', () => { + it('starts at 0 for a new analyzer', () => { + const analyzer = new SchemaAnalyzer(); + expect(analyzer.version).toBe(0); + }); + + it('increments on addDocument()', () => { + const analyzer = new SchemaAnalyzer(); + analyzer.addDocument(makeDoc({ a: 1 })); + expect(analyzer.version).toBe(1); + + analyzer.addDocument(makeDoc({ b: 2 })); + expect(analyzer.version).toBe(2); + }); + + it('increments only once for addDocuments() (batch)', () => { + const analyzer = new SchemaAnalyzer(); + const docs = [makeDoc({ a: 1 }), makeDoc({ b: 2 }), makeDoc({ c: 3 })]; + + analyzer.addDocuments(docs); + expect(analyzer.version).toBe(1); + }); + + it('increments on reset()', () => { + const analyzer = SchemaAnalyzer.fromDocument(makeDoc({ x: 1 })); + expect(analyzer.version).toBe(1); + + analyzer.reset(); + expect(analyzer.version).toBe(2); + }); + + it('cloned analyzer starts with version 0 (independent from original)', () => { + const original = new SchemaAnalyzer(); + original.addDocument(makeDoc({ a: 1 })); + original.addDocument(makeDoc({ b: 2 })); + expect(original.version).toBe(2); + + const cloned = original.clone(); + expect(cloned.version).toBe(0); + + // Mutating the clone does not affect the original's version + cloned.addDocument(makeDoc({ c: 3 })); + expect(cloned.version).toBe(1); + expect(original.version).toBe(2); + }); + + it('accumulates across mixed operations', () => { + const analyzer = new SchemaAnalyzer(); + // addDocument +1 + analyzer.addDocument(makeDoc()); + expect(analyzer.version).toBe(1); + + // addDocuments +1 (batch) + analyzer.addDocuments([makeDoc(), makeDoc()]); + expect(analyzer.version).toBe(2); + + // reset +1 + analyzer.reset(); + expect(analyzer.version).toBe(3); + + // addDocument after reset +1 + analyzer.addDocument(makeDoc()); + expect(analyzer.version).toBe(4); + }); + + it('fromDocument() factory yields version 1', () => { + const analyzer = SchemaAnalyzer.fromDocument(makeDoc({ a: 1 })); + expect(analyzer.version).toBe(1); + }); + + it('fromDocuments() factory yields version 1', () => { + const analyzer = SchemaAnalyzer.fromDocuments([makeDoc(), makeDoc(), makeDoc()]); + expect(analyzer.version).toBe(1); + }); +}); + +// ------------------------------------------------------------------ +// Version-based caching (getKnownFields cache) +// ------------------------------------------------------------------ +describe('SchemaAnalyzer getKnownFields cache', () => { + it('is populated on first call to getKnownFields()', () => { + const analyzer = SchemaAnalyzer.fromDocument(makeDoc({ name: 'Alice', age: 30 })); + const fields = analyzer.getKnownFields(); + + expect(fields.length).toBeGreaterThan(0); + // Should contain _id, age, name + const paths = fields.map((f) => f.path); + expect(paths).toContain('_id'); + expect(paths).toContain('name'); + expect(paths).toContain('age'); + }); + + it('is reused when version has not changed (same reference)', () => { + const analyzer = SchemaAnalyzer.fromDocument(makeDoc({ name: 'Alice' })); + const first = analyzer.getKnownFields(); + const second = analyzer.getKnownFields(); + + // Same array reference β€” cache was reused, not recomputed + expect(second).toBe(first); + }); + + it('is invalidated when addDocument() is called', () => { + const analyzer = SchemaAnalyzer.fromDocument(makeDoc({ name: 'Alice' })); + const before = analyzer.getKnownFields(); + + analyzer.addDocument(makeDoc({ name: 'Bob', email: 'bob@test.com' })); + const after = analyzer.getKnownFields(); + + // Different reference β€” cache was recomputed + expect(after).not.toBe(before); + // New field should be present + expect(after.map((f) => f.path)).toContain('email'); + }); + + it('is invalidated when addDocuments() is called', () => { + const analyzer = SchemaAnalyzer.fromDocument(makeDoc({ name: 'Alice' })); + const before = analyzer.getKnownFields(); + + analyzer.addDocuments([makeDoc({ score: 42 }), makeDoc({ level: 7 })]); + const after = analyzer.getKnownFields(); + + expect(after).not.toBe(before); + const paths = after.map((f) => f.path); + expect(paths).toContain('score'); + expect(paths).toContain('level'); + }); + + it('is invalidated when reset() is called', () => { + const analyzer = SchemaAnalyzer.fromDocument(makeDoc({ name: 'Alice' })); + const before = analyzer.getKnownFields(); + expect(before.length).toBeGreaterThan(0); + + analyzer.reset(); + const after = analyzer.getKnownFields(); + + expect(after).not.toBe(before); + // After reset the schema is empty so no fields + expect(after).toHaveLength(0); + }); + + it('returns updated results after cache invalidation', () => { + const analyzer = new SchemaAnalyzer(); + // Empty analyzer β†’ no known fields + expect(analyzer.getKnownFields()).toHaveLength(0); + + // Add first doc + analyzer.addDocument(makeDoc({ x: 1 })); + const fields1 = analyzer.getKnownFields(); + expect(fields1.map((f) => f.path)).toEqual(expect.arrayContaining(['_id', 'x'])); + + // Add second doc with new field + analyzer.addDocument(makeDoc({ x: 2, y: 'hello' })); + const fields2 = analyzer.getKnownFields(); + expect(fields2).not.toBe(fields1); + expect(fields2.map((f) => f.path)).toContain('y'); + }); + + it('clone gets its own independent cache', () => { + const original = SchemaAnalyzer.fromDocument(makeDoc({ name: 'Alice' })); + const originalFields = original.getKnownFields(); + + const cloned = original.clone(); + const clonedFields = cloned.getKnownFields(); + + // Both should have the same content but be independent objects + expect(clonedFields).not.toBe(originalFields); + expect(clonedFields.map((f) => f.path)).toEqual(originalFields.map((f) => f.path)); + + // Mutating the clone should not affect the original cache + cloned.addDocument(makeDoc({ extra: true })); + const clonedFieldsAfter = cloned.getKnownFields(); + expect(clonedFieldsAfter.map((f) => f.path)).toContain('extra'); + expect(original.getKnownFields().map((f) => f.path)).not.toContain('extra'); + }); +}); + +// ------------------------------------------------------------------ +// Instances and types counting +// ------------------------------------------------------------------ +describe('SchemaAnalyzer instances and types counting', () => { + describe('x-occurrence (field instance counting)', () => { + it('counts 1 for a field present in a single document', () => { + const analyzer = SchemaAnalyzer.fromDocument(makeDoc({ name: 'Alice' })); + const schema = analyzer.getSchema(); + const nameField = schema.properties?.['name'] as JSONSchema; + expect(nameField['x-occurrence']).toBe(1); + }); + + it('counts correctly across multiple documents', () => { + const analyzer = new SchemaAnalyzer(); + analyzer.addDocument(makeDoc({ name: 'Alice', age: 30 })); + analyzer.addDocument(makeDoc({ name: 'Bob', age: 25 })); + analyzer.addDocument(makeDoc({ name: 'Carol' })); // no age + + const schema = analyzer.getSchema(); + expect((schema.properties?.['name'] as JSONSchema)['x-occurrence']).toBe(3); + expect((schema.properties?.['age'] as JSONSchema)['x-occurrence']).toBe(2); + }); + + it('counts sparse fields correctly (field missing in some documents)', () => { + const analyzer = new SchemaAnalyzer(); + analyzer.addDocument(makeDoc({ a: 1, b: 2, c: 3 })); + analyzer.addDocument(makeDoc({ a: 10 })); // only 'a' + analyzer.addDocument(makeDoc({ a: 100, c: 300 })); // 'a' and 'c' + + const schema = analyzer.getSchema(); + expect((schema.properties?.['a'] as JSONSchema)['x-occurrence']).toBe(3); + expect((schema.properties?.['b'] as JSONSchema)['x-occurrence']).toBe(1); + expect((schema.properties?.['c'] as JSONSchema)['x-occurrence']).toBe(2); + }); + + it('counts occurrences for nested object properties', () => { + const analyzer = new SchemaAnalyzer(); + analyzer.addDocument(makeDoc({ user: { name: 'Alice', age: 30 } })); + analyzer.addDocument(makeDoc({ user: { name: 'Bob' } })); // no age + + const schema = analyzer.getSchema(); + const userField = schema.properties?.['user'] as JSONSchema; + const objectEntry = userField.anyOf?.find((e) => (e as JSONSchema).type === 'object') as JSONSchema; + + expect((objectEntry.properties?.['name'] as JSONSchema)['x-occurrence']).toBe(2); + expect((objectEntry.properties?.['age'] as JSONSchema)['x-occurrence']).toBe(1); + }); + }); + + describe('x-typeOccurrence (type counting)', () => { + it('counts type occurrences for a single-type field', () => { + const analyzer = new SchemaAnalyzer(); + analyzer.addDocument(makeDoc({ name: 'Alice' })); + analyzer.addDocument(makeDoc({ name: 'Bob' })); + analyzer.addDocument(makeDoc({ name: 'Carol' })); + + const schema = analyzer.getSchema(); + const nameField = schema.properties?.['name'] as JSONSchema; + const stringEntry = nameField.anyOf?.find( + (e) => (e as JSONSchema)['x-bsonType'] === 'string', + ) as JSONSchema; + + expect(stringEntry['x-typeOccurrence']).toBe(3); + }); + + it('counts type occurrences for polymorphic fields', () => { + const analyzer = new SchemaAnalyzer(); + analyzer.addDocument(makeDoc({ value: 'hello' })); + analyzer.addDocument(makeDoc({ value: 42 })); + analyzer.addDocument(makeDoc({ value: 'world' })); + analyzer.addDocument(makeDoc({ value: true })); + + const schema = analyzer.getSchema(); + const valueField = schema.properties?.['value'] as JSONSchema; + + const stringEntry = valueField.anyOf?.find( + (e) => (e as JSONSchema)['x-bsonType'] === 'string', + ) as JSONSchema; + const booleanEntry = valueField.anyOf?.find( + (e) => (e as JSONSchema)['x-bsonType'] === 'boolean', + ) as JSONSchema; + + // 2 strings, 1 number, 1 boolean + expect(stringEntry['x-typeOccurrence']).toBe(2); + expect(booleanEntry['x-typeOccurrence']).toBe(1); + + // total x-occurrence should equal sum of x-typeOccurrence values + const totalTypeOccurrence = (valueField.anyOf as JSONSchema[]).reduce( + (sum, entry) => sum + ((entry['x-typeOccurrence'] as number) ?? 0), + 0, + ); + expect(valueField['x-occurrence']).toBe(totalTypeOccurrence); + }); + + it('counts array element types across documents', () => { + const analyzer = new SchemaAnalyzer(); + analyzer.addDocument(makeDoc({ tags: ['a', 'b'] })); // 2 strings + analyzer.addDocument(makeDoc({ tags: ['c', 42] })); // 1 string + 1 number + analyzer.addDocument(makeDoc({ tags: [true] })); // 1 boolean + + const schema = analyzer.getSchema(); + const tagsField = schema.properties?.['tags'] as JSONSchema; + const arrayEntry = tagsField.anyOf?.find((e) => (e as JSONSchema)['x-bsonType'] === 'array') as JSONSchema; + const itemsSchema = arrayEntry.items as JSONSchema; + + const stringEntry = itemsSchema.anyOf?.find( + (e) => (e as JSONSchema)['x-bsonType'] === 'string', + ) as JSONSchema; + const booleanEntry = itemsSchema.anyOf?.find( + (e) => (e as JSONSchema)['x-bsonType'] === 'boolean', + ) as JSONSchema; + + // 3 string elements total: "a", "b", "c" + expect(stringEntry['x-typeOccurrence']).toBe(3); + + // 1 boolean element + expect(booleanEntry['x-typeOccurrence']).toBe(1); + }); + + it('type occurrence count equals field occurrence for a single-type field', () => { + const analyzer = new SchemaAnalyzer(); + for (let i = 0; i < 5; i++) { + analyzer.addDocument(makeDoc({ score: i * 10 })); + } + + const schema = analyzer.getSchema(); + const scoreField = schema.properties?.['score'] as JSONSchema; + const typeEntries = scoreField.anyOf as JSONSchema[]; + + // Only one type, so its typeOccurrence should equal the field occurrence + expect(typeEntries).toHaveLength(1); + expect(typeEntries[0]['x-typeOccurrence']).toBe(scoreField['x-occurrence']); + }); + }); + + describe('x-documentsInspected counting', () => { + it('tracks document count at root level', () => { + const analyzer = new SchemaAnalyzer(); + analyzer.addDocument(makeDoc({ a: 1 })); + analyzer.addDocument(makeDoc({ b: 2 })); + analyzer.addDocument(makeDoc({ c: 3 })); + + expect(analyzer.getSchema()['x-documentsInspected']).toBe(3); + expect(analyzer.getDocumentCount()).toBe(3); + }); + + it('tracks object instances for nested objects', () => { + const analyzer = new SchemaAnalyzer(); + analyzer.addDocument(makeDoc({ info: { x: 1 } })); + analyzer.addDocument(makeDoc({ info: { x: 2, y: 3 } })); + + const schema = analyzer.getSchema(); + const infoField = schema.properties?.['info'] as JSONSchema; + const objectEntry = infoField.anyOf?.find( + (e) => (e as JSONSchema)['x-bsonType'] === 'object', + ) as JSONSchema; + + expect(objectEntry['x-documentsInspected']).toBe(2); + }); + + it('tracks object instances inside arrays accurately', () => { + const analyzer = new SchemaAnalyzer(); + // doc1: array with 2 objects + analyzer.addDocument(makeDoc({ items: [{ a: 1 }, { a: 2 }] })); + // doc2: array with 1 object + analyzer.addDocument(makeDoc({ items: [{ a: 3, b: 4 }] })); + + const schema = analyzer.getSchema(); + const itemsField = schema.properties?.['items'] as JSONSchema; + const arrayEntry = itemsField.anyOf?.find((e) => (e as JSONSchema)['x-bsonType'] === 'array') as JSONSchema; + const objectEntry = (arrayEntry.items as JSONSchema).anyOf?.find( + (e) => (e as JSONSchema)['x-bsonType'] === 'object', + ) as JSONSchema; + + // 3 objects total (2 from doc1, 1 from doc2) + expect(objectEntry['x-documentsInspected']).toBe(3); + // "a" appears in all 3 objects + expect((objectEntry.properties?.['a'] as JSONSchema)['x-occurrence']).toBe(3); + // "b" appears in 1 of 3 objects + expect((objectEntry.properties?.['b'] as JSONSchema)['x-occurrence']).toBe(1); + }); + + it('resets to 0 after reset()', () => { + const analyzer = SchemaAnalyzer.fromDocuments([makeDoc({ a: 1 }), makeDoc({ b: 2 })]); + expect(analyzer.getDocumentCount()).toBe(2); + + analyzer.reset(); + expect(analyzer.getDocumentCount()).toBe(0); + }); + }); + + describe('probability correctness (occurrence / documentsInspected)', () => { + it('yields 100% for fields present in every document', () => { + const analyzer = new SchemaAnalyzer(); + for (let i = 0; i < 10; i++) { + analyzer.addDocument(makeDoc({ name: `user-${i}` })); + } + + const schema = analyzer.getSchema(); + const occurrence = (schema.properties?.['name'] as JSONSchema)['x-occurrence'] as number; + const total = schema['x-documentsInspected'] as number; + expect(occurrence / total).toBe(1); + }); + + it('yields correct fraction for sparse fields', () => { + const analyzer = new SchemaAnalyzer(); + // 3 docs with 'a', 1 doc with 'b' + analyzer.addDocument(makeDoc({ a: 1, b: 10 })); + analyzer.addDocument(makeDoc({ a: 2 })); + analyzer.addDocument(makeDoc({ a: 3 })); + + const schema = analyzer.getSchema(); + const total = schema['x-documentsInspected'] as number; + const aOccurrence = (schema.properties?.['a'] as JSONSchema)['x-occurrence'] as number; + const bOccurrence = (schema.properties?.['b'] as JSONSchema)['x-occurrence'] as number; + + expect(aOccurrence / total).toBe(1); // 3/3 + expect(bOccurrence / total).toBeCloseTo(1 / 3); // 1/3 + }); + + it('yields correct fraction for nested objects inside arrays', () => { + const analyzer = new SchemaAnalyzer(); + analyzer.addDocument( + makeDoc({ + items: [ + { name: 'A', price: 10 }, + { name: 'B' }, // no price + ], + }), + ); + analyzer.addDocument(makeDoc({ items: [{ name: 'C', price: 20 }] })); + + const schema = analyzer.getSchema(); + const itemsField = schema.properties?.['items'] as JSONSchema; + const arrayEntry = itemsField.anyOf?.find((e) => (e as JSONSchema)['x-bsonType'] === 'array') as JSONSchema; + const objectEntry = (arrayEntry.items as JSONSchema).anyOf?.find( + (e) => (e as JSONSchema)['x-bsonType'] === 'object', + ) as JSONSchema; + + const denominator = objectEntry['x-documentsInspected'] as number; + const nameOccurrence = (objectEntry.properties?.['name'] as JSONSchema)['x-occurrence'] as number; + const priceOccurrence = (objectEntry.properties?.['price'] as JSONSchema)['x-occurrence'] as number; + + expect(denominator).toBe(3); // 3 objects total + expect(nameOccurrence / denominator).toBe(1); // 3/3 + expect(priceOccurrence / denominator).toBeCloseTo(2 / 3); // 2/3 + }); + }); + + describe('array and nested array counting', () => { + it('counts x-typeOccurrence for the array type entry across documents', () => { + const analyzer = new SchemaAnalyzer(); + analyzer.addDocument(makeDoc({ tags: ['a'] })); + analyzer.addDocument(makeDoc({ tags: ['b', 'c'] })); + analyzer.addDocument(makeDoc({ tags: 42 })); // not an array + + const schema = analyzer.getSchema(); + const tagsField = schema.properties?.['tags'] as JSONSchema; + + // Field seen 3 times total + expect(tagsField['x-occurrence']).toBe(3); + + const arrayEntry = tagsField.anyOf?.find((e) => (e as JSONSchema)['x-bsonType'] === 'array') as JSONSchema; + + // Array type seen 2 out of 3 times + expect(arrayEntry['x-typeOccurrence']).toBe(2); + + // x-minItems / x-maxItems tracked across array instances + expect(arrayEntry['x-minItems']).toBe(1); + expect(arrayEntry['x-maxItems']).toBe(2); + }); + + it('counts x-minItems / x-maxItems for arrays across documents', () => { + const analyzer = new SchemaAnalyzer(); + analyzer.addDocument(makeDoc({ nums: [1, 2, 3] })); // length 3 + analyzer.addDocument(makeDoc({ nums: [10] })); // length 1 + analyzer.addDocument(makeDoc({ nums: [4, 5, 6, 7, 8] })); // length 5 + + const schema = analyzer.getSchema(); + const numsField = schema.properties?.['nums'] as JSONSchema; + const arrayEntry = numsField.anyOf?.find((e) => (e as JSONSchema)['x-bsonType'] === 'array') as JSONSchema; + + expect(arrayEntry['x-minItems']).toBe(1); + expect(arrayEntry['x-maxItems']).toBe(5); + expect(arrayEntry['x-typeOccurrence']).toBe(3); + }); + + it('counts nested arrays (arrays within arrays)', () => { + const analyzer = new SchemaAnalyzer(); + // matrix is an array of arrays of numbers + analyzer.addDocument( + makeDoc({ + matrix: [ + [1, 2], + [3, 4, 5], + ], + }), + ); + analyzer.addDocument(makeDoc({ matrix: [[10]] })); + + const schema = analyzer.getSchema(); + const matrixField = schema.properties?.['matrix'] as JSONSchema; + const outerArrayEntry = matrixField.anyOf?.find( + (e) => (e as JSONSchema)['x-bsonType'] === 'array', + ) as JSONSchema; + + // Outer array seen in 2 documents + expect(outerArrayEntry['x-typeOccurrence']).toBe(2); + // doc1 has 2 inner arrays, doc2 has 1 + expect(outerArrayEntry['x-minItems']).toBe(1); + expect(outerArrayEntry['x-maxItems']).toBe(2); + + // Inner arrays: items type should be 'array' + const innerArrayEntry = (outerArrayEntry.items as JSONSchema).anyOf?.find( + (e) => (e as JSONSchema)['x-bsonType'] === 'array', + ) as JSONSchema; + expect(innerArrayEntry).toBeDefined(); + // 3 inner arrays total: [1,2], [3,4,5], [10] + expect(innerArrayEntry['x-typeOccurrence']).toBe(3); + // inner array lengths: 2, 3, 1 + expect(innerArrayEntry['x-minItems']).toBe(1); + expect(innerArrayEntry['x-maxItems']).toBe(3); + + // Elements inside inner arrays are numbers + const numberEntry = (innerArrayEntry.items as JSONSchema).anyOf?.find( + (e) => (e as JSONSchema).type === 'number', + ) as JSONSchema; + expect(numberEntry).toBeDefined(); + // 6 numbers total: 1,2,3,4,5,10 + expect(numberEntry['x-typeOccurrence']).toBe(6); + }); + + it('counts objects within arrays within objects (deep nesting)', () => { + const analyzer = new SchemaAnalyzer(); + analyzer.addDocument( + makeDoc({ + company: { + departments: [ + { name: 'Eng', employees: [{ role: 'Dev' }, { role: 'QA', level: 3 }] }, + { name: 'Sales' }, + ], + }, + }), + ); + analyzer.addDocument( + makeDoc({ + company: { + departments: [{ name: 'HR', employees: [{ role: 'Recruiter' }] }], + }, + }), + ); + + const schema = analyzer.getSchema(); + + // company is an object + const companyField = schema.properties?.['company'] as JSONSchema; + const companyObj = companyField.anyOf?.find( + (e) => (e as JSONSchema)['x-bsonType'] === 'object', + ) as JSONSchema; + expect(companyObj['x-documentsInspected']).toBe(2); + + // departments is an array inside company + const deptField = companyObj.properties?.['departments'] as JSONSchema; + const deptArrayEntry = deptField.anyOf?.find( + (e) => (e as JSONSchema)['x-bsonType'] === 'array', + ) as JSONSchema; + expect(deptArrayEntry['x-typeOccurrence']).toBe(2); + + // department objects: 2 from doc1 + 1 from doc2 = 3 + const deptObjEntry = (deptArrayEntry.items as JSONSchema).anyOf?.find( + (e) => (e as JSONSchema)['x-bsonType'] === 'object', + ) as JSONSchema; + expect(deptObjEntry['x-documentsInspected']).toBe(3); + expect(deptObjEntry['x-typeOccurrence']).toBe(3); + + // "name" in all 3 department objects, "employees" in 2 of 3 + expect((deptObjEntry.properties?.['name'] as JSONSchema)['x-occurrence']).toBe(3); + expect((deptObjEntry.properties?.['employees'] as JSONSchema)['x-occurrence']).toBe(2); + + // employees is an array inside department objects + const empField = deptObjEntry.properties?.['employees'] as JSONSchema; + const empArrayEntry = empField.anyOf?.find( + (e) => (e as JSONSchema)['x-bsonType'] === 'array', + ) as JSONSchema; + expect(empArrayEntry['x-typeOccurrence']).toBe(2); + + // employee objects: 2 from first dept + 1 from HR = 3 + const empObjEntry = (empArrayEntry.items as JSONSchema).anyOf?.find( + (e) => (e as JSONSchema)['x-bsonType'] === 'object', + ) as JSONSchema; + expect(empObjEntry['x-documentsInspected']).toBe(3); + + // "role" in all 3 employee objects, "level" in 1 + expect((empObjEntry.properties?.['role'] as JSONSchema)['x-occurrence']).toBe(3); + expect((empObjEntry.properties?.['level'] as JSONSchema)['x-occurrence']).toBe(1); + }); + + it('tracks mixed types inside arrays (objects + primitives)', () => { + const analyzer = new SchemaAnalyzer(); + analyzer.addDocument(makeDoc({ data: ['hello', { key: 'val' }, 42] })); + analyzer.addDocument(makeDoc({ data: [{ key: 'v2', extra: true }] })); + + const schema = analyzer.getSchema(); + const dataField = schema.properties?.['data'] as JSONSchema; + const arrayEntry = dataField.anyOf?.find((e) => (e as JSONSchema)['x-bsonType'] === 'array') as JSONSchema; + const itemsSchema = arrayEntry.items as JSONSchema; + + // string: 1, object: 2, number: 1 + const stringEntry = itemsSchema.anyOf?.find( + (e) => (e as JSONSchema)['x-bsonType'] === 'string', + ) as JSONSchema; + const objectEntry = itemsSchema.anyOf?.find( + (e) => (e as JSONSchema)['x-bsonType'] === 'object', + ) as JSONSchema; + + expect(stringEntry['x-typeOccurrence']).toBe(1); + expect(objectEntry['x-typeOccurrence']).toBe(2); + expect(objectEntry['x-documentsInspected']).toBe(2); + + // "key" in both objects, "extra" in 1 + expect((objectEntry.properties?.['key'] as JSONSchema)['x-occurrence']).toBe(2); + expect((objectEntry.properties?.['extra'] as JSONSchema)['x-occurrence']).toBe(1); + }); + }); + + describe('addDocuments vs sequential addDocument equivalence', () => { + it('produces identical occurrence counts', () => { + const docs = [makeDoc({ a: 1, b: 'x' }), makeDoc({ a: 2 }), makeDoc({ a: 3, c: true })]; + + const batch = new SchemaAnalyzer(); + batch.addDocuments(docs); + + const sequential = new SchemaAnalyzer(); + for (const doc of docs) { + sequential.addDocument(doc); + } + + const batchSchema = batch.getSchema(); + const seqSchema = sequential.getSchema(); + + // Root counts match + expect(batchSchema['x-documentsInspected']).toBe(seqSchema['x-documentsInspected']); + + // Field-level occurrence counts match + for (const key of Object.keys(batchSchema.properties ?? {})) { + const batchField = batchSchema.properties?.[key] as JSONSchema; + const seqField = seqSchema.properties?.[key] as JSONSchema; + expect(batchField['x-occurrence']).toBe(seqField['x-occurrence']); + } + }); + + it('produces identical type occurrence counts', () => { + const docs = [makeDoc({ value: 'hello' }), makeDoc({ value: 42 }), makeDoc({ value: 'world' })]; + + const batch = new SchemaAnalyzer(); + batch.addDocuments(docs); + + const sequential = new SchemaAnalyzer(); + for (const doc of docs) { + sequential.addDocument(doc); + } + + // Stringify the schemas to compare their full type entry structures + expect(JSON.stringify(batch.getSchema())).toBe(JSON.stringify(sequential.getSchema())); + }); + }); +}); diff --git a/src/utils/json/mongo/mongoTestDocuments.ts b/packages/schema-analyzer/test/mongoTestDocuments.ts similarity index 100% rename from src/utils/json/mongo/mongoTestDocuments.ts rename to packages/schema-analyzer/test/mongoTestDocuments.ts diff --git a/packages/schema-analyzer/tsconfig.json b/packages/schema-analyzer/tsconfig.json new file mode 100644 index 000000000..8688f97ff --- /dev/null +++ b/packages/schema-analyzer/tsconfig.json @@ -0,0 +1,20 @@ +{ + "compilerOptions": { + "composite": true, + "declaration": true, + "declarationMap": true, + "module": "commonjs", + "target": "ES2023", + "lib": ["ES2023"], + "rootDir": "./src", + "outDir": "./dist", + "strict": true, + "esModuleInterop": true, + "allowSyntheticDefaultImports": true, + "skipLibCheck": true, + "resolveJsonModule": true, + "sourceMap": true + }, + "include": ["src/**/*"], + "exclude": ["node_modules", "dist"] +} diff --git a/src/documentdb/ClusterSession.ts b/src/documentdb/ClusterSession.ts index da81218fe..da3b5107c 100644 --- a/src/documentdb/ClusterSession.ts +++ b/src/documentdb/ClusterSession.ts @@ -3,11 +3,17 @@ * Licensed under the MIT License. See License.txt in the project root for license information. *--------------------------------------------------------------------------------------------*/ +import { ParseMode, parse as parseShellBSON } from '@mongodb-js/shell-bson-parser'; +import { + SchemaAnalyzer, + getPropertyNamesAtLevel, + type FieldEntry, + type JSONSchema, +} from '@vscode-documentdb/schema-analyzer'; import * as l10n from '@vscode/l10n'; import { EJSON } from 'bson'; import { ObjectId, type Document, type Filter, type WithId } from 'mongodb'; -import { type JSONSchema } from '../utils/json/JSONSchema'; -import { getPropertyNamesAtLevel, updateSchemaWithDocument } from '../utils/json/mongo/SchemaAnalyzer'; +import { ext } from '../extensionVariables'; import { getDataAtPath } from '../utils/slickgrid/mongo/toSlickGridTable'; import { toSlickGridTree, type TreeData } from '../utils/slickgrid/mongo/toSlickGridTree'; import { ClustersClient, type FindQueryParams } from './ClustersClient'; @@ -78,7 +84,7 @@ export class ClusterSession { * Updates progressively as users navigate through different pages. * Reset when the query or page size changes. */ - private _accumulatedJsonSchema: JSONSchema = {}; + private _schemaAnalyzer: SchemaAnalyzer = new SchemaAnalyzer(); /** * Tracks the highest page number that has been accumulated into the schema. @@ -161,8 +167,17 @@ export class ClusterSession { } } - // The user's query has changed, invalidate all caches - this._accumulatedJsonSchema = {}; + // The user's query has changed, invalidate all caches. + // + // NOTE: We intentionally do NOT reset the SchemaAnalyzer here. + // When a new query returns 0 results, preserving field knowledge from + // previous queries is more valuable for autocompletion than having an + // empty field list. The SchemaAnalyzer accumulates field data + // monotonically β€” new fields are added, existing field type statistics + // are enriched with each query. This means type statistics represent + // aggregated observations across queries, not a single query snapshot. + // Consumers should treat type frequency data as approximate/relative + // (e.g., "mostly String") rather than absolute percentages. this._highestPageAccumulated = 0; this._currentPageSize = null; this._currentRawDocuments = []; @@ -185,7 +200,8 @@ export class ClusterSession { private resetAccumulationIfPageSizeChanged(newPageSize: number): void { if (this._currentPageSize !== null && this._currentPageSize !== newPageSize) { // Page size changed, reset accumulation tracking - this._accumulatedJsonSchema = {}; + this._schemaAnalyzer.reset(); + ext.outputChannel.trace('[SchemaAnalyzer] Reset β€” page size changed'); this._highestPageAccumulated = 0; } this._currentPageSize = newPageSize; @@ -298,8 +314,12 @@ export class ClusterSession { // Since navigation is sequential and starts at page 1, we only need to track // the highest page number accumulated if (pageNumber > this._highestPageAccumulated) { - this._currentRawDocuments.map((doc) => updateSchemaWithDocument(this._accumulatedJsonSchema, doc)); + this._schemaAnalyzer.addDocuments(this._currentRawDocuments); this._highestPageAccumulated = pageNumber; + + ext.outputChannel.trace( + `[SchemaAnalyzer] Analyzed ${String(this._schemaAnalyzer.getDocumentCount())} documents, ${String(this._schemaAnalyzer.getKnownFields().length)} known fields`, + ); } return documents.length; @@ -355,7 +375,7 @@ export class ClusterSession { public getCurrentPageAsTable(path: string[]): TableData { const responsePack: TableData = { path: path, - headers: getPropertyNamesAtLevel(this._accumulatedJsonSchema, path), + headers: getPropertyNamesAtLevel(this._schemaAnalyzer.getSchema(), path), data: getDataAtPath(this._currentRawDocuments, path), }; @@ -363,7 +383,15 @@ export class ClusterSession { } public getCurrentSchema(): JSONSchema { - return this._accumulatedJsonSchema; + return this._schemaAnalyzer.getSchema(); + } + + /** + * Returns the cached list of known fields from the accumulated schema. + * Uses SchemaAnalyzer's version-based caching β€” only recomputed when the schema changes. + */ + public getKnownFields(): FieldEntry[] { + return this._schemaAnalyzer.getKnownFields(); } // ============================================================================ @@ -521,7 +549,7 @@ export class ClusterSession { * @remarks * This method uses the same BSON parsing logic as ClustersClient.runFindQuery(): * - filter is parsed with toFilterQueryObj() which handles UUID(), Date(), MinKey(), MaxKey() constructors - * - projection and sort are parsed with EJSON.parse() + * - projection and sort are parsed with parseShellBSON() in Loose mode * * Use this method when you need the actual MongoDB Document objects for query execution. * Use getCurrentFindQueryParams() when you only need the string representations. @@ -536,7 +564,9 @@ export class ClusterSession { let projectionObj: Document | undefined; if (stringParams.project && stringParams.project.trim() !== '{}') { try { - projectionObj = EJSON.parse(stringParams.project) as Document; + projectionObj = parseShellBSON(stringParams.project, { + mode: ParseMode.Loose, + }) as Document; } catch (error) { throw new Error( l10n.t('Invalid projection syntax: {0}', error instanceof Error ? error.message : String(error)), @@ -548,7 +578,9 @@ export class ClusterSession { let sortObj: Document | undefined; if (stringParams.sort && stringParams.sort.trim() !== '{}') { try { - sortObj = EJSON.parse(stringParams.sort) as Document; + sortObj = parseShellBSON(stringParams.sort, { + mode: ParseMode.Loose, + }) as Document; } catch (error) { throw new Error( l10n.t('Invalid sort syntax: {0}', error instanceof Error ? error.message : String(error)), diff --git a/src/documentdb/ClustersClient.ts b/src/documentdb/ClustersClient.ts index bc28cff61..2adb398f6 100644 --- a/src/documentdb/ClustersClient.ts +++ b/src/documentdb/ClustersClient.ts @@ -10,6 +10,7 @@ */ import { appendExtensionUserAgent, callWithTelemetryAndErrorHandling, parseError } from '@microsoft/vscode-azext-utils'; +import { ParseMode, parse as parseShellBSON } from '@mongodb-js/shell-bson-parser'; import * as l10n from '@vscode/l10n'; import { EJSON } from 'bson'; import { @@ -513,13 +514,15 @@ export class ClustersClient { // Parse and add projection if provided if (queryParams.project && queryParams.project.trim() !== '{}') { try { - options.projection = EJSON.parse(queryParams.project) as Document; + options.projection = parseShellBSON(queryParams.project, { + mode: ParseMode.Loose, + }) as Document; } catch (error) { const cause = error instanceof Error ? error : new Error(String(error)); throw new QueryError( 'INVALID_PROJECTION', l10n.t( - 'Invalid projection syntax: {0}. Please use valid JSON, for example: { "fieldName": 1 }', + 'Invalid projection syntax: {0}. Please use valid JSON or a DocumentDB API expression, for example: { fieldName: 1 }', cause.message, ), cause, @@ -530,13 +533,15 @@ export class ClustersClient { // Parse and add sort if provided if (queryParams.sort && queryParams.sort.trim() !== '{}') { try { - options.sort = EJSON.parse(queryParams.sort) as Document; + options.sort = parseShellBSON(queryParams.sort, { + mode: ParseMode.Loose, + }) as Document; } catch (error) { const cause = error instanceof Error ? error : new Error(String(error)); throw new QueryError( 'INVALID_SORT', l10n.t( - 'Invalid sort syntax: {0}. Please use valid JSON, for example: { "fieldName": 1 }', + 'Invalid sort syntax: {0}. Please use valid JSON or a DocumentDB API expression, for example: { fieldName: 1 }', cause.message, ), cause, @@ -662,13 +667,15 @@ export class ClustersClient { // Parse and add projection if provided if (queryParams.project && queryParams.project.trim() !== '{}') { try { - options.projection = EJSON.parse(queryParams.project) as Document; + options.projection = parseShellBSON(queryParams.project, { + mode: ParseMode.Loose, + }) as Document; } catch (error) { const cause = error instanceof Error ? error : new Error(String(error)); throw new QueryError( 'INVALID_PROJECTION', l10n.t( - 'Invalid projection syntax: {0}. Please use valid JSON, for example: { "fieldName": 1 }', + 'Invalid projection syntax: {0}. Please use valid JSON or a DocumentDB API expression, for example: { fieldName: 1 }', cause.message, ), cause, @@ -679,13 +686,15 @@ export class ClustersClient { // Parse and add sort if provided if (queryParams.sort && queryParams.sort.trim() !== '{}') { try { - options.sort = EJSON.parse(queryParams.sort) as Document; + options.sort = parseShellBSON(queryParams.sort, { + mode: ParseMode.Loose, + }) as Document; } catch (error) { const cause = error instanceof Error ? error : new Error(String(error)); throw new QueryError( 'INVALID_SORT', l10n.t( - 'Invalid sort syntax: {0}. Please use valid JSON, for example: { "fieldName": 1 }', + 'Invalid sort syntax: {0}. Please use valid JSON or a DocumentDB API expression, for example: { fieldName: 1 }', cause.message, ), cause, diff --git a/src/documentdb/utils/toFilterQuery.test.ts b/src/documentdb/utils/toFilterQuery.test.ts index ca8ff0352..a19caa7ef 100644 --- a/src/documentdb/utils/toFilterQuery.test.ts +++ b/src/documentdb/utils/toFilterQuery.test.ts @@ -3,7 +3,8 @@ * Licensed under the MIT License. See License.txt in the project root for license information. *--------------------------------------------------------------------------------------------*/ -import { MaxKey, MinKey, UUID } from 'mongodb'; +import { Binary, Decimal128, Int32, Long, ObjectId, Timestamp } from 'bson'; +import { MaxKey, MinKey } from 'mongodb'; import { QueryError } from '../errors/QueryError'; import { toFilterQueryObj } from './toFilterQuery'; @@ -29,175 +30,164 @@ jest.mock('../../extensionVariables', () => ({ }, })); -// Basic query examples -const basicQueries = [ - { input: '{ }', expected: {} }, - { input: '{ "name": "John" }', expected: { name: 'John' } }, - { input: '{ "name": "John", "age": { "$gt": 30 } }', expected: { name: 'John', age: { $gt: 30 } } }, -]; - -// BSON function examples with different variations -const bsonFunctionTestCases = [ - // UUID cases - { - type: 'UUID', - input: '{ "id": UUID("123e4567-e89b-12d3-a456-426614174000") }', - property: 'id', - expectedClass: UUID, - expectedValue: '123e4567-e89b-12d3-a456-426614174000', - }, - { - type: 'UUID with new', - input: '{ "userId": new UUID("550e8400-e29b-41d4-a716-446655440000") }', - property: 'userId', - expectedClass: UUID, - expectedValue: '550e8400-e29b-41d4-a716-446655440000', - }, - { - type: 'UUID with single quotes', - input: '{ "id": UUID(\'123e4567-e89b-12d3-a456-426614174000\') }', - property: 'id', - expectedClass: UUID, - expectedValue: '123e4567-e89b-12d3-a456-426614174000', - }, - // MinKey cases - { - type: 'MinKey', - input: '{ "start": MinKey() }', - property: 'start', - expectedClass: MinKey, - }, - { - type: 'MinKey with new', - input: '{ "min": new MinKey() }', - property: 'min', - expectedClass: MinKey, - }, - // MaxKey cases - { - type: 'MaxKey', - input: '{ "end": MaxKey() }', - property: 'end', - expectedClass: MaxKey, - }, - { - type: 'MaxKey with new', - input: '{ "max": new MaxKey() }', - property: 'max', - expectedClass: MaxKey, - }, - // Date cases - { - type: 'Date', - input: '{ "created": new Date("2023-01-01") }', - property: 'created', - expectedClass: Date, - expectedValue: '2023-01-01T00:00:00.000Z', - }, - { - type: 'Date without new', - input: '{ "updated": Date("2023-12-31T23:59:59.999Z") }', - property: 'updated', - expectedClass: Date, - expectedValue: '2023-12-31T23:59:59.999Z', - }, -]; +describe('toFilterQuery', () => { + describe('basic queries', () => { + test('empty string returns empty object', () => { + expect(toFilterQueryObj('')).toEqual({}); + }); -// Examples of mixed BSON types -const mixedQuery = - '{ "id": UUID("123e4567-e89b-12d3-a456-426614174000"), "start": MinKey(), "end": MaxKey(), "created": new Date("2023-01-01") }'; + test('whitespace-only returns empty object', () => { + expect(toFilterQueryObj(' ')).toEqual({}); + }); -// Complex nested query -const complexQuery = - '{ "range": { "start": MinKey(), "end": MaxKey() }, "timestamp": new Date("2023-01-01"), "ids": [UUID("123e4567-e89b-12d3-a456-426614174000")] }'; + test('empty object returns empty object', () => { + expect(toFilterQueryObj('{ }')).toEqual({}); + }); -// String that contains BSON function syntax but should be treated as plain text -const textWithFunctionSyntax = '{ "userName": "A user with UUID()name and Date() format", "status": "active" }'; + test('simple string filter', () => { + expect(toFilterQueryObj('{ "name": "John" }')).toEqual({ name: 'John' }); + }); -// Error test cases -const errorTestCases = [ - { description: 'invalid JSON', input: '{ invalid json }' }, - { description: 'invalid UUID', input: '{ "id": UUID("invalid-uuid") }' }, - { description: 'invalid Date', input: '{ "date": new Date("invalid-date") }' }, - { description: 'missing parameter', input: '{ "key": UUID() }' }, -]; + test('filter with query operator', () => { + expect(toFilterQueryObj('{ "age": { "$gt": 30 } }')).toEqual({ age: { $gt: 30 } }); + }); -describe('toFilterQuery', () => { - it('converts basic query strings to objects', () => { - basicQueries.forEach((testCase) => { - expect(toFilterQueryObj(testCase.input)).toEqual(testCase.expected); + test('combined filter', () => { + expect(toFilterQueryObj('{ "name": "John", "age": { "$gt": 30 } }')).toEqual({ + name: 'John', + age: { $gt: 30 }, + }); }); }); - describe('BSON function support', () => { - test.each(bsonFunctionTestCases)('converts $type', ({ input, property, expectedClass, expectedValue }) => { - const result = toFilterQueryObj(input); - - expect(result).toHaveProperty(property); - expect(result[property]).toBeInstanceOf(expectedClass); - - if (expectedValue) { - if (result[property] instanceof UUID) { - // eslint-disable-next-line jest/no-conditional-expect - expect(result[property].toString()).toBe(expectedValue); - } else if (result[property] instanceof Date) { - // eslint-disable-next-line jest/no-conditional-expect - expect(result[property].toISOString()).toBe(expectedValue); - } - } + describe('relaxed syntax (new with shell-bson-parser)', () => { + test('unquoted keys', () => { + expect(toFilterQueryObj('{ count: 42 }')).toEqual({ count: 42 }); }); - }); - it('handles mixed BSON types in the same query', () => { - const result = toFilterQueryObj(mixedQuery); + test('single-quoted strings', () => { + expect(toFilterQueryObj("{ name: 'Alice' }")).toEqual({ name: 'Alice' }); + }); + + test('Math.min expression', () => { + const result = toFilterQueryObj('{ rating: Math.min(1.7, 2) }'); + expect(result).toEqual({ rating: 1.7 }); + }); - expect(result.id).toBeInstanceOf(UUID); - expect(result.start).toBeInstanceOf(MinKey); - expect(result.end).toBeInstanceOf(MaxKey); - expect(result.created).toBeInstanceOf(Date); + test('unquoted keys with nested operators', () => { + expect(toFilterQueryObj('{ age: { $gt: 25 } }')).toEqual({ age: { $gt: 25 } }); + }); - expect((result.id as UUID).toString()).toBe('123e4567-e89b-12d3-a456-426614174000'); - expect((result.created as Date).toISOString()).toBe('2023-01-01T00:00:00.000Z'); + test('mixed quoted and unquoted keys', () => { + expect(toFilterQueryObj('{ name: "Alice", "age": 30 }')).toEqual({ name: 'Alice', age: 30 }); + }); }); - it('handles complex nested queries with multiple BSON types', () => { - const result = toFilterQueryObj(complexQuery); + describe('BSON constructor support', () => { + test('UUID constructor', () => { + const result = toFilterQueryObj('{ id: UUID("123e4567-e89b-12d3-a456-426614174000") }'); + expect(result).toHaveProperty('id'); + // shell-bson-parser returns Binary subtype 4 for UUID + expect(result.id).toBeInstanceOf(Binary); + expect((result.id as Binary).sub_type).toBe(Binary.SUBTYPE_UUID); + }); - expect(result.range.start).toBeInstanceOf(MinKey); - expect(result.range.end).toBeInstanceOf(MaxKey); - expect(result.timestamp).toBeInstanceOf(Date); - expect(result.ids[0]).toBeInstanceOf(UUID); - }); + test('UUID with new keyword', () => { + const result = toFilterQueryObj('{ userId: new UUID("550e8400-e29b-41d4-a716-446655440000") }'); + expect(result).toHaveProperty('userId'); + expect(result.userId).toBeInstanceOf(Binary); + expect((result.userId as Binary).sub_type).toBe(Binary.SUBTYPE_UUID); + }); + + test('MinKey constructor', () => { + const result = toFilterQueryObj('{ start: MinKey() }'); + expect(result).toHaveProperty('start'); + expect(result.start).toBeInstanceOf(MinKey); + }); - it('does not process BSON function calls within string values', () => { - const result = toFilterQueryObj(textWithFunctionSyntax); - expect(result).toEqual({ - userName: 'A user with UUID()name and Date() format', - status: 'active', + test('MaxKey constructor', () => { + const result = toFilterQueryObj('{ end: MaxKey() }'); + expect(result).toHaveProperty('end'); + expect(result.end).toBeInstanceOf(MaxKey); + }); + + test('Date constructor', () => { + const result = toFilterQueryObj('{ created: new Date("2023-01-01") }'); + expect(result).toHaveProperty('created'); + expect(result.created).toBeInstanceOf(Date); + expect((result.created as Date).toISOString()).toBe('2023-01-01T00:00:00.000Z'); + }); + + test('ObjectId constructor', () => { + const result = toFilterQueryObj('{ _id: ObjectId("507f1f77bcf86cd799439011") }'); + expect(result).toHaveProperty('_id'); + expect(result._id).toBeInstanceOf(ObjectId); + }); + + test('ISODate constructor', () => { + const result = toFilterQueryObj('{ ts: ISODate("2024-01-01") }'); + expect(result).toHaveProperty('ts'); + expect(result.ts).toBeInstanceOf(Date); + }); + + test('Decimal128 constructor', () => { + const result = toFilterQueryObj('{ val: Decimal128("1.23") }'); + expect(result).toHaveProperty('val'); + expect(result.val).toBeInstanceOf(Decimal128); + }); + + test('NumberInt constructor', () => { + const result = toFilterQueryObj('{ n: NumberInt(42) }'); + expect(result).toHaveProperty('n'); + expect(result.n).toBeInstanceOf(Int32); + }); + + test('NumberLong constructor', () => { + const result = toFilterQueryObj('{ n: NumberLong(42) }'); + expect(result).toHaveProperty('n'); + expect(result.n).toBeInstanceOf(Long); + }); + + test('Timestamp constructor', () => { + const result = toFilterQueryObj('{ ts: Timestamp(1, 1) }'); + expect(result).toHaveProperty('ts'); + expect(result.ts).toBeInstanceOf(Timestamp); }); }); - describe('error handling', () => { - test.each(errorTestCases)('throws QueryError for $description', ({ input }) => { - expect(() => toFilterQueryObj(input)).toThrow(QueryError); + describe('mixed BSON types', () => { + test('multiple BSON constructors in one query', () => { + const result = toFilterQueryObj( + '{ id: UUID("123e4567-e89b-12d3-a456-426614174000"), start: MinKey(), end: MaxKey(), created: new Date("2023-01-01") }', + ); + + expect(result.id).toBeInstanceOf(Binary); + expect((result.id as Binary).sub_type).toBe(Binary.SUBTYPE_UUID); + expect(result.start).toBeInstanceOf(MinKey); + expect(result.end).toBeInstanceOf(MaxKey); + expect(result.created).toBeInstanceOf(Date); }); - it('throws QueryError with INVALID_FILTER code for invalid JSON', () => { - let thrownError: QueryError | undefined; - try { - toFilterQueryObj('{ invalid json }'); - } catch (error) { - thrownError = error as QueryError; - } - expect(thrownError).toBeDefined(); - expect(thrownError?.name).toBe('QueryError'); - expect(thrownError?.code).toBe('INVALID_FILTER'); + test('nested BSON constructors', () => { + const result = toFilterQueryObj( + '{ range: { start: MinKey(), end: MaxKey() }, timestamp: new Date("2023-01-01") }', + ); + + expect(result.range.start).toBeInstanceOf(MinKey); + expect(result.range.end).toBeInstanceOf(MaxKey); + expect(result.timestamp).toBeInstanceOf(Date); + }); + }); + + describe('error handling', () => { + test('throws QueryError for invalid syntax', () => { + expect(() => toFilterQueryObj('{ invalid json }')).toThrow(QueryError); }); - it('throws QueryError with INVALID_FILTER code for invalid UUID', () => { + test('throws QueryError with INVALID_FILTER code', () => { let thrownError: QueryError | undefined; try { - toFilterQueryObj('{ "id": UUID("invalid-uuid") }'); + toFilterQueryObj('not valid at all'); } catch (error) { thrownError = error as QueryError; } @@ -206,10 +196,10 @@ describe('toFilterQuery', () => { expect(thrownError?.code).toBe('INVALID_FILTER'); }); - it('includes original error message in QueryError message', () => { + test('error message contains "Invalid filter syntax"', () => { let thrownError: QueryError | undefined; try { - toFilterQueryObj('{ invalid json }'); + toFilterQueryObj('not valid'); } catch (error) { thrownError = error as QueryError; } @@ -217,16 +207,15 @@ describe('toFilterQuery', () => { expect(thrownError?.message).toContain('Invalid filter syntax'); }); - it('includes helpful JSON example in error message', () => { + test('error message contains helpful example', () => { let thrownError: QueryError | undefined; try { - toFilterQueryObj('{ invalid json }'); + toFilterQueryObj('not valid'); } catch (error) { thrownError = error as QueryError; } expect(thrownError).toBeDefined(); - expect(thrownError?.message).toContain('Please use valid JSON'); - expect(thrownError?.message).toContain('"name": "value"'); + expect(thrownError?.message).toContain('name: "value"'); }); }); }); diff --git a/src/documentdb/utils/toFilterQuery.ts b/src/documentdb/utils/toFilterQuery.ts index 807f18858..1cbb67a15 100644 --- a/src/documentdb/utils/toFilterQuery.ts +++ b/src/documentdb/utils/toFilterQuery.ts @@ -3,227 +3,38 @@ * Licensed under the MIT License. See License.txt in the project root for license information. *--------------------------------------------------------------------------------------------*/ -import { EJSON } from 'bson'; -import { UUID, type Document, type Filter } from 'mongodb'; +import { ParseMode, parse as parseShellBSON } from '@mongodb-js/shell-bson-parser'; +import { type Document, type Filter } from 'mongodb'; import * as vscode from 'vscode'; import { QueryError } from '../errors/QueryError'; +/** + * Parses a user-provided filter query string into a DocumentDB filter object. + * + * Uses `@mongodb-js/shell-bson-parser` in Loose mode, which supports: + * - Unquoted keys: `{ name: 1 }` + * - Single-quoted strings: `{ name: 'Alice' }` + * - BSON constructors: `ObjectId("...")`, `UUID("...")`, `ISODate("...")`, etc. + * - JS expressions: `Math.min(1.7, 2)`, `Date.now()`, arithmetic + * - MongoDB Extended JSON: `{ "$oid": "..." }` + * + * Replaces the previous hand-rolled regex-based converter + EJSON.parse pipeline. + */ export function toFilterQueryObj(queryString: string): Filter { try { - // Convert pseudo-JavaScript style BSON constructor calls into Extended JSON that EJSON can parse. - // Example: { "id": UUID("...") } -> { "id": {"$uuid":"..."} } - const extendedJsonQuery = convertToExtendedJson(queryString); - // EJSON.parse will turn Extended JSON into native BSON/JS types (UUID, Date, etc.). - return EJSON.parse(extendedJsonQuery) as Filter; - } catch (error) { if (queryString.trim().length === 0) { return {} as Filter; } - + return parseShellBSON(queryString, { mode: ParseMode.Loose }) as Filter; + } catch (error) { const cause = error instanceof Error ? error : new Error(String(error)); throw new QueryError( 'INVALID_FILTER', vscode.l10n.t( - 'Invalid filter syntax: {0}. Please use valid JSON, for example: { "name": "value" }', + 'Invalid filter syntax: {0}. Please use valid JSON or a DocumentDB API expression, for example: { name: "value" }', cause.message, ), cause, ); } } - -/** - * Walks the raw query text and rewrites BSON-like constructor calls (UUID, MinKey, MaxKey, Date) - * into MongoDB Extended JSON fragments while deliberately skipping anything that appears inside - * string literals (so user text containing e.g. "UUID(" is not transformed). - * - * This is intentionally lightweight and avoids a full JS / JSON parser to keep latency low inside - * the query input UX. Future improvements may replace this with a tokenizer / parser for richer - * validation and diagnostics. - */ -function convertToExtendedJson(query: string): string { - // Phase 1: Precompute which character positions are inside a (single or double quoted) string. - // This lets the replacement pass stay simple and branchless for non‑string regions. - const isInString = markStringLiterals(query); - - // Phase 2: Scan + rewrite BSON-like calls only when not inside a string literal. - let result = ''; - let i = 0; - while (i < query.length) { - if (isInString[i]) { - // Inside a user string literal – copy verbatim. - result += query[i]; - i += 1; - continue; - } - - const remaining = query.slice(i); - - // UUID(...) - const uuidMatch = matchUUID(remaining); - if (uuidMatch) { - const { raw, uuidString } = uuidMatch; - try { - // Validate early so we fail fast instead of producing malformed Extended JSON. - // (Instantiation is enough to validate format.) - new UUID(uuidString); - } catch { - throw new Error(`Invalid UUID: ${uuidString}`); - } - result += `{"$uuid":"${uuidString}"}`; - i += raw.length; - continue; - } - - // MinKey() - const minKeyMatch = matchMinKey(remaining); - if (minKeyMatch) { - result += '{"$minKey":1}'; - i += minKeyMatch.raw.length; - continue; - } - - // MaxKey() - const maxKeyMatch = matchMaxKey(remaining); - if (maxKeyMatch) { - result += '{"$maxKey":1}'; - i += maxKeyMatch.raw.length; - continue; - } - - // Date("...") - const dateMatch = matchDate(remaining); - if (dateMatch) { - const { raw, dateString } = dateMatch; - const date = new Date(dateString); - if (Number.isNaN(date.getTime())) { - throw new Error(`Invalid date: ${dateString}`); - } - result += `{"$date":"${dateString}"}`; - i += raw.length; - continue; - } - - // Fallback: copy one character. - result += query[i]; - i += 1; - } - - return result; -} - -/** - * markStringLiterals - * - * Lightweight pass to flag which character indices are inside a quoted string. - * - * Supported: - * - Single quotes '...' - * - Double quotes "..." - * - Escapes inside those strings via backslash (\" or \') - * - * Not a full JSON validator: - * - Does not detect malformed / unclosed strings (those will just mark to end) - * - Does not handle template literals (not valid JSON anyway) - * - * Rationale: - * This is intentionally simple and fast. It exists to prevent accidental rewriting of text - * inside user-provided string values (e.g. "note: call UUID('x') later") while we still accept - * a relaxed JSON-ish syntax for convenience. If the query authoring experience is expanded - * (linting, richer autocomplete, tolerant recovery) we can replace this with a proper tokenizer. - */ -function markStringLiterals(input: string): boolean[] { - const isInString: boolean[] = new Array(input.length).fill(false) as boolean[]; - let inString = false; - let currentQuote: '"' | "'" | null = null; - let escapeNext = false; - - for (let i = 0; i < input.length; i++) { - const ch = input[i]; - - if (escapeNext) { - // Current char is escaped; treat it as plain content inside the string. - isInString[i] = inString; - escapeNext = false; - continue; - } - - if (inString) { - // Inside a string: mark and handle escapes / termination. - isInString[i] = true; - if (ch === '\\') { - escapeNext = true; - } else if (ch === currentQuote) { - inString = false; - currentQuote = null; - } - continue; - } - - // Not currently in a string – only a quote can start one. - if (ch === '"' || ch === "'") { - inString = true; - currentQuote = ch as '"' | "'"; - isInString[i] = true; - continue; - } - - // Outside of strings. - isInString[i] = false; - } - - return isInString; -} - -// --- Regex constants for BSON-like constructor calls --- - -/** - * Matches UUID constructor calls, e.g. UUID("...") or new UUID('...'), case-insensitive. - * Captures the quoted UUID string. - * Pattern details: - * - Optional "new" prefix with whitespace: (?:new\s+)? - * - "uuid" keyword, case-insensitive - * - Optional whitespace before and inside parentheses - * - Quoted string (single or double quotes) as argument, captured in group 1 - */ -const UUID_REGEX = /^(?:new\s+)?uuid\s*\(\s*["']([^"']+)["']\s*\)/i; - -/** - * Matches MinKey constructor calls, e.g. MinKey() or new MinKey(), case-insensitive. - * No arguments. - */ -const MIN_KEY_REGEX = /^(?:new\s+)?minkey\s*\(\s*\)/i; - -/** - * Matches MaxKey constructor calls, e.g. MaxKey() or new MaxKey(), case-insensitive. - * No arguments. - */ -const MAX_KEY_REGEX = /^(?:new\s+)?maxkey\s*\(\s*\)/i; - -/** - * Matches Date constructor calls, e.g. Date("...") or new Date('...'), case-insensitive. - * Captures the quoted date string. - * Pattern details: - * - Optional "new" prefix with whitespace: (?:new\s+)? - * - "date" keyword, case-insensitive - * - Optional whitespace before and inside parentheses - * - Quoted string (single or double quotes) as argument, captured in group 1 - */ -const DATE_REGEX = /^(?:new\s+)?date\s*\(\s*["']([^"']+)["']\s*\)/i; - -function matchUUID(src: string): { raw: string; uuidString: string } | undefined { - const m = UUID_REGEX.exec(src); - return m ? { raw: m[0], uuidString: m[1] } : undefined; -} -function matchMinKey(src: string): { raw: string } | undefined { - const m = MIN_KEY_REGEX.exec(src); - return m ? { raw: m[0] } : undefined; -} -function matchMaxKey(src: string): { raw: string } | undefined { - const m = MAX_KEY_REGEX.exec(src); - return m ? { raw: m[0] } : undefined; -} -function matchDate(src: string): { raw: string; dateString: string } | undefined { - const m = DATE_REGEX.exec(src); - return m ? { raw: m[0], dateString: m[1] } : undefined; -} diff --git a/src/utils/json/data-api/autocomplete/future-work.md b/src/utils/json/data-api/autocomplete/future-work.md new file mode 100644 index 000000000..660113c7d --- /dev/null +++ b/src/utils/json/data-api/autocomplete/future-work.md @@ -0,0 +1,161 @@ +# Autocomplete β€” Future Work + +Outstanding TODOs flagged in code during the schema transformer implementation (PR #506). +These must be resolved before the completion providers ship to users. + +--- + +## ~~1. `SPECIAL_CHARS_PATTERN` is incomplete + `insertText` quoting doesn't escape~~ βœ… RESOLVED + +**Resolved in:** PR #506 (commit addressing copilot review comment) + +Replaced `SPECIAL_CHARS_PATTERN` with `JS_IDENTIFIER_PATTERN` β€” a proper identifier validity check. +Added `\` β†’ `\\` and `"` β†’ `\"` escaping when quoting `insertText`. +Tests cover dashes, brackets, digits, embedded quotes, and backslashes. + +--- + +## 2. `referenceText` is invalid MQL for special field names + +**Severity:** Medium β€” will generate broken aggregation expressions +**File:** `toFieldCompletionItems.ts` β€” `referenceText` construction +**When to fix:** Before the aggregation completion provider is wired up + +### Problem + +`referenceText` is always `$${entry.path}` (e.g., `$address.city`). In MQL, the `$field.path` syntax only works when every segment is a valid identifier without dots, spaces, or `$`. For field names like `order-items`, `a.b`, or `my field`, the `$` prefix syntax produces invalid references. + +### Examples + +| Field name | Current `referenceText` | Valid? | Correct MQL | +| ------------------- | ----------------------- | -------------- | ------------------------------------ | +| `age` | `$age` | βœ… | `$age` | +| `address.city` | `$address.city` | βœ… (nested) | `$address.city` | +| `order-items` | `$order-items` | ❌ | `{ $getField: "order-items" }` | +| `a.b` (literal dot) | `$a.b` | ❌ (ambiguous) | `{ $getField: { $literal: "a.b" } }` | +| `my field` | `$my field` | ❌ | `{ $getField: "my field" }` | + +### Proposed approaches + +**Option A β€” Make `referenceText` optional:** Return `undefined` for fields that can't use `$`-prefix syntax. The completion provider would omit the reference suggestion for those fields. + +**Option B β€” Use `$getField` for special names:** + +```typescript +referenceText: needsQuoting + ? `{ $getField: "${escaped}" }` + : `$${entry.path}`, +``` + +**Option C β€” Provide both forms:** Add a `referenceTextRaw` (always `$path`) and `referenceTextSafe` (uses `$getField` when needed). Let the completion provider choose based on context. + +**Recommendation:** Option B is pragmatic. Option C is more flexible if we later need to support both forms in different contexts (e.g., `$match` vs `$project`). + +--- + +## 3. `FieldEntry.path` dot-concatenation is ambiguous for literal dots + +**Severity:** Low (rare in practice) β€” fields with literal dots were prohibited before MongoDB API 3.6 +**File:** `getKnownFields.ts` β€” path concatenation at `path: \`${path}.${childName}\``**When to fix:** When we encounter real-world schemas with literal dots, or during the next`FieldEntry` interface revision + +### Problem + +Paths are built by concatenating segments with `.` as separator. A root-level field named `"a.b"` produces `path: "a.b"`, which is indistinguishable from a nested field `{ a: { b: ... } }`. + +This ambiguity flows downstream to all consumers: `toTypeScriptDefinition`, `toFieldCompletionItems`, `generateDescriptions`, and any future completion provider. + +### Examples + +| Document shape | Resulting `path` | Ambiguous? | +| --------------------- | ---------------- | ----------------------------- | +| `{ a: { b: 1 } }` | `"a.b"` | β€” | +| `{ "a.b": 1 }` | `"a.b"` | βœ… Same as above | +| `{ x: { "y.z": 1 } }` | `"x.y.z"` | βœ… Looks like 3-level nesting | + +### Proposed fix + +Change `FieldEntry.path` from `string` to `string[]` (segment array): + +```typescript +// Before +interface FieldEntry { + path: string; // "address.city" + ... +} + +// After +interface FieldEntry { + path: string[]; // ["address", "city"] + ... +} +``` + +Each consumer then formats the path for its own context: + +- **TypeScript definitions:** Already use schema `properties` keys directly (no change needed there) +- **Completion items:** `entry.path.join('.')` for display, bracket notation for special segments +- **Aggregation references:** `$` + segments joined with `.`, or `$getField` chains for special segments + +### Impact + +This is a **breaking change** to the `FieldEntry` interface. Affected consumers: + +- `toFieldCompletionItems.ts` +- `toTypeScriptDefinition.ts` (indirect β€” uses schema, not FieldEntry paths) +- `generateDescriptions.ts` (uses schema, not FieldEntry paths) +- `collectionViewRouter.ts` (imports `FieldEntry` type) +- `ClusterSession.ts` (imports `FieldEntry` type) +- `generateMongoFindJsonSchema.ts` (imports `FieldEntry` type) +- `SchemaAnalyzer.ts` (returns `FieldEntry[]` via `getKnownFields`) + +**Recommendation:** Defer until the completion provider is built. The ambiguity only matters for fields with literal dots, which are uncommon. When fixing, do it as a single atomic change across all consumers. + +--- + +## 4. TypeScript definition output references undeclared BSON type names + +**Severity:** Low β€” the TS definition is for display/hover only, not compiled or type-checked +**File:** `toTypeScriptDefinition.ts` β€” `bsonToTypeScriptMap` +**When to fix:** Before the TS definition is used in a context where type correctness matters (e.g., Monaco intellisense with an actual TS language service) + +### Problem + +The BSON-to-TypeScript type mapping emits non-built-in type names such as `ObjectId`, `Binary`, `Timestamp`, `MinKey`, `MaxKey`, `Code`, `DBRef`, and `UUID`. These are MongoDB API BSON driver types, but the generated definition string doesn't include `import` statements or `declare` stubs for them. + +If the output is ever fed to a TypeScript compiler or language service (e.g., Monaco with full TS checking), it will report "Cannot find name 'ObjectId'" etc. + +### Current state + +The generated output is used for documentation/hover display only β€” it's rendered as syntax-highlighted text, not compiled. So this is purely cosmetic today. + +### Proposed fix (when needed) + +**Option A β€” Emit `import type`:** + +```typescript +import type { ObjectId, Binary, Timestamp, MinKey, MaxKey, Code, DBRef, UUID } from 'mongodb'; +``` + +Only include types that actually appear in the schema. + +**Option B β€” Emit `declare type` stubs:** + +```typescript +declare type ObjectId = { toString(): string }; +declare type Binary = { length(): number }; +// ... etc. +``` + +Lightweight, no dependency on the `mongodb` package. + +**Option C β€” Map everything to primitive types:** + +```typescript +ObjectId β†’ string // (its string representation) +Binary β†’ Uint8Array +Timestamp β†’ { t: number; i: number } +``` + +Loses semantic precision but avoids the undeclared-type problem entirely. + +**Recommendation:** Option A is the most correct approach. Collect the set of non-built-in types actually used in the schema, then prepend a single `import type` line. Defer until the output is consumed by a real TS language service. diff --git a/src/utils/json/data-api/autocomplete/generateDescriptions.test.ts b/src/utils/json/data-api/autocomplete/generateDescriptions.test.ts new file mode 100644 index 000000000..32a103431 --- /dev/null +++ b/src/utils/json/data-api/autocomplete/generateDescriptions.test.ts @@ -0,0 +1,210 @@ +/*--------------------------------------------------------------------------------------------- + * Copyright (c) Microsoft Corporation. All rights reserved. + * Licensed under the MIT License. See License.txt in the project root for license information. + *--------------------------------------------------------------------------------------------*/ + +import { type JSONSchema } from '@vscode-documentdb/schema-analyzer'; +import { generateDescriptions } from './generateDescriptions'; + +describe('generateDescriptions', () => { + it('adds descriptions with type and percentage for simple document', () => { + const schema: JSONSchema = { + 'x-documentsInspected': 100, + properties: { + name: { + 'x-occurrence': 100, + anyOf: [ + { + type: 'string', + 'x-bsonType': 'string', + 'x-typeOccurrence': 100, + }, + ], + }, + }, + }; + + generateDescriptions(schema); + + const nameSchema = schema.properties?.name as JSONSchema; + expect(nameSchema.description).toBe('String Β· 100%'); + }); + + it('includes min/max stats for numeric fields', () => { + const schema: JSONSchema = { + 'x-documentsInspected': 100, + properties: { + age: { + 'x-occurrence': 95, + anyOf: [ + { + type: 'number', + 'x-bsonType': 'int32', + 'x-typeOccurrence': 95, + 'x-minValue': 18, + 'x-maxValue': 95, + }, + ], + }, + }, + }; + + generateDescriptions(schema); + + const ageSchema = schema.properties?.age as JSONSchema; + expect(ageSchema.description).toBe('Int32 Β· 95% Β· range: 18–95'); + }); + + it('includes length stats for string fields', () => { + const schema: JSONSchema = { + 'x-documentsInspected': 100, + properties: { + name: { + 'x-occurrence': 100, + anyOf: [ + { + type: 'string', + 'x-bsonType': 'string', + 'x-typeOccurrence': 100, + 'x-minLength': 3, + 'x-maxLength': 50, + }, + ], + }, + }, + }; + + generateDescriptions(schema); + + const nameSchema = schema.properties?.name as JSONSchema; + expect(nameSchema.description).toBe('String Β· 100% Β· length: 3–50'); + }); + + it('includes date range stats for date fields', () => { + const minDate = new Date('2020-01-01T00:00:00.000Z').getTime(); + const maxDate = new Date('2024-12-31T00:00:00.000Z').getTime(); + + const schema: JSONSchema = { + 'x-documentsInspected': 100, + properties: { + createdAt: { + 'x-occurrence': 80, + anyOf: [ + { + type: 'string', + 'x-bsonType': 'date', + 'x-typeOccurrence': 80, + 'x-minDate': minDate, + 'x-maxDate': maxDate, + }, + ], + }, + }, + }; + + generateDescriptions(schema); + + const createdAtSchema = schema.properties?.createdAt as JSONSchema; + expect(createdAtSchema.description).toBe('Date Β· 80% Β· range: 2020-01-01 – 2024-12-31'); + }); + + it('includes true/false counts for boolean fields', () => { + const schema: JSONSchema = { + 'x-documentsInspected': 100, + properties: { + active: { + 'x-occurrence': 100, + anyOf: [ + { + type: 'boolean', + 'x-bsonType': 'boolean', + 'x-typeOccurrence': 100, + 'x-trueCount': 80, + 'x-falseCount': 20, + }, + ], + }, + }, + }; + + generateDescriptions(schema); + + const activeSchema = schema.properties?.active as JSONSchema; + expect(activeSchema.description).toBe('Boolean Β· 100% Β· true: 80, false: 20'); + }); + + it('handles nested object fields (descriptions at nested level)', () => { + const schema: JSONSchema = { + 'x-documentsInspected': 100, + properties: { + address: { + 'x-occurrence': 100, + anyOf: [ + { + type: 'object', + 'x-bsonType': 'object', + 'x-typeOccurrence': 100, + 'x-documentsInspected': 100, + properties: { + city: { + 'x-occurrence': 100, + anyOf: [ + { + type: 'string', + 'x-bsonType': 'string', + 'x-typeOccurrence': 100, + 'x-minLength': 2, + 'x-maxLength': 30, + }, + ], + }, + }, + }, + ], + }, + }, + }; + + generateDescriptions(schema); + + // The parent (address) should also get a description + const addressSchema = schema.properties?.address as JSONSchema; + expect(addressSchema.description).toBe('Object Β· 100%'); + + // The nested city should get its own description + const addressTypeEntry = (addressSchema.anyOf as JSONSchema[])[0]; + const citySchema = addressTypeEntry.properties?.city as JSONSchema; + expect(citySchema.description).toBe('String Β· 100% Β· length: 2–30'); + }); + + it('handles polymorphic fields (shows multiple types)', () => { + const schema: JSONSchema = { + 'x-documentsInspected': 100, + properties: { + value: { + 'x-occurrence': 95, + anyOf: [ + { + type: 'number', + 'x-bsonType': 'int32', + 'x-typeOccurrence': 60, + 'x-minValue': 1, + 'x-maxValue': 100, + }, + { + type: 'string', + 'x-bsonType': 'string', + 'x-typeOccurrence': 35, + }, + ], + }, + }, + }; + + generateDescriptions(schema); + + const valueSchema = schema.properties?.value as JSONSchema; + // Dominant type first, then secondary + expect(valueSchema.description).toBe('Int32 | String Β· 95% Β· range: 1–100'); + }); +}); diff --git a/src/utils/json/data-api/autocomplete/generateDescriptions.ts b/src/utils/json/data-api/autocomplete/generateDescriptions.ts new file mode 100644 index 000000000..2f4f28867 --- /dev/null +++ b/src/utils/json/data-api/autocomplete/generateDescriptions.ts @@ -0,0 +1,218 @@ +/*--------------------------------------------------------------------------------------------- + * Copyright (c) Microsoft Corporation. All rights reserved. + * Licensed under the MIT License. See License.txt in the project root for license information. + *--------------------------------------------------------------------------------------------*/ + +import { BSONTypes, type JSONSchema } from '@vscode-documentdb/schema-analyzer'; +import Denque from 'denque'; + +/** + * Work item for BFS traversal of the schema tree. + */ +interface WorkItem { + schemaNode: JSONSchema; + parentDocumentsInspected: number; +} + +/** + * Post-processor that mutates the schema in-place, adding human-readable + * `description` strings to each property node. Descriptions include: + * - Dominant type name(s) + * - Occurrence percentage (based on `x-occurrence / parentDocumentsInspected`) + * - Type-specific stats (length, range, true/false counts, etc.) + * + * Uses BFS to traverse all property levels. + */ +export function generateDescriptions(schema: JSONSchema): void { + const rootDocumentsInspected = (schema['x-documentsInspected'] as number) ?? 0; + + const queue = new Denque(); + + // Seed the queue with root-level properties + if (schema.properties) { + for (const propName of Object.keys(schema.properties)) { + const propSchema = schema.properties[propName] as JSONSchema; + if (typeof propSchema === 'boolean') continue; + + queue.push({ + schemaNode: propSchema, + parentDocumentsInspected: rootDocumentsInspected, + }); + } + } + + while (queue.length > 0) { + const item = queue.shift(); + if (!item) continue; + + const { schemaNode, parentDocumentsInspected } = item; + + // Collect type display names from anyOf entries + const typeNames = collectTypeDisplayNames(schemaNode); + + // Build description parts + const parts: string[] = []; + + // Part 1: Type info + if (typeNames.length > 0) { + parts.push(typeNames.join(' | ')); + } + + // Part 2: Occurrence percentage + if (parentDocumentsInspected > 0) { + const occurrence = (schemaNode['x-occurrence'] as number) ?? 0; + const percentage = ((occurrence / parentDocumentsInspected) * 100).toFixed(0); + parts.push(`${percentage}%`); + } + + // Part 3: Stats from the dominant type entry + const dominantEntry = getDominantTypeEntry(schemaNode); + if (dominantEntry) { + const statString = getStatString(dominantEntry); + if (statString) { + parts.push(statString); + } + + // If the dominant entry is an object with properties, enqueue children + if (dominantEntry.type === 'object' && dominantEntry.properties) { + const objectDocumentsInspected = (dominantEntry['x-documentsInspected'] as number) ?? 0; + for (const childName of Object.keys(dominantEntry.properties)) { + const childSchema = dominantEntry.properties[childName] as JSONSchema; + if (typeof childSchema === 'boolean') continue; + + queue.push({ + schemaNode: childSchema, + parentDocumentsInspected: objectDocumentsInspected, + }); + } + } + } + + // Set the description + if (parts.length > 0) { + schemaNode.description = parts.join(' Β· '); + } + } +} + +/** + * Collects display names for all types in a schema node's `anyOf` entries. + * Returns them ordered by descending `x-typeOccurrence`. + */ +function collectTypeDisplayNames(schemaNode: JSONSchema): string[] { + if (!schemaNode.anyOf || schemaNode.anyOf.length === 0) { + return []; + } + + const entries: Array<{ name: string; occurrence: number }> = []; + for (const entry of schemaNode.anyOf) { + if (typeof entry === 'boolean') continue; + const bsonType = (entry['x-bsonType'] as string) ?? ''; + const occurrence = (entry['x-typeOccurrence'] as number) ?? 0; + const name = bsonType + ? BSONTypes.toDisplayString(bsonType as BSONTypes) + : ((entry.type as string) ?? 'Unknown'); + entries.push({ name, occurrence }); + } + + // Sort by occurrence descending so dominant type comes first + entries.sort((a, b) => b.occurrence - a.occurrence); + return entries.map((e) => e.name); +} + +/** + * Returns the anyOf entry with the highest `x-typeOccurrence`. + */ +function getDominantTypeEntry(schemaNode: JSONSchema): JSONSchema | null { + if (!schemaNode.anyOf || schemaNode.anyOf.length === 0) { + return null; + } + + let maxOccurrence = -1; + let dominant: JSONSchema | null = null; + + for (const entry of schemaNode.anyOf) { + if (typeof entry === 'boolean') continue; + const occurrence = (entry['x-typeOccurrence'] as number) ?? 0; + if (occurrence > maxOccurrence) { + maxOccurrence = occurrence; + dominant = entry; + } + } + + return dominant; +} + +/** + * Returns a type-specific stats string for the given type entry, or undefined if + * no relevant stats are available. + */ +function getStatString(typeEntry: JSONSchema): string | undefined { + const bsonType = (typeEntry['x-bsonType'] as string) ?? ''; + + switch (bsonType) { + case 'string': + case 'binary': { + const minLen = typeEntry['x-minLength'] as number | undefined; + const maxLen = typeEntry['x-maxLength'] as number | undefined; + if (minLen !== undefined && maxLen !== undefined) { + return `length: ${String(minLen)}–${String(maxLen)}`; + } + return undefined; + } + + case 'int32': + case 'double': + case 'long': + case 'decimal128': + case 'number': { + const minVal = typeEntry['x-minValue'] as number | undefined; + const maxVal = typeEntry['x-maxValue'] as number | undefined; + if (minVal !== undefined && maxVal !== undefined) { + return `range: ${String(minVal)}–${String(maxVal)}`; + } + return undefined; + } + + case 'date': { + const minDate = typeEntry['x-minDate'] as number | undefined; + const maxDate = typeEntry['x-maxDate'] as number | undefined; + if (minDate !== undefined && maxDate !== undefined) { + const minISO = new Date(minDate).toISOString().split('T')[0]; + const maxISO = new Date(maxDate).toISOString().split('T')[0]; + return `range: ${minISO} – ${maxISO}`; + } + return undefined; + } + + case 'boolean': { + const trueCount = typeEntry['x-trueCount'] as number | undefined; + const falseCount = typeEntry['x-falseCount'] as number | undefined; + if (trueCount !== undefined && falseCount !== undefined) { + return `true: ${String(trueCount)}, false: ${String(falseCount)}`; + } + return undefined; + } + + case 'array': { + const minItems = typeEntry['x-minItems'] as number | undefined; + const maxItems = typeEntry['x-maxItems'] as number | undefined; + if (minItems !== undefined && maxItems !== undefined) { + return `items: ${String(minItems)}–${String(maxItems)}`; + } + return undefined; + } + + case 'object': { + const minProps = typeEntry['x-minProperties'] as number | undefined; + const maxProps = typeEntry['x-maxProperties'] as number | undefined; + if (minProps !== undefined && maxProps !== undefined) { + return `properties: ${String(minProps)}–${String(maxProps)}`; + } + return undefined; + } + + default: + return undefined; + } +} diff --git a/src/utils/json/data-api/autocomplete/getKnownFields.test.ts b/src/utils/json/data-api/autocomplete/getKnownFields.test.ts new file mode 100644 index 000000000..d0680e2f3 --- /dev/null +++ b/src/utils/json/data-api/autocomplete/getKnownFields.test.ts @@ -0,0 +1,128 @@ +/*--------------------------------------------------------------------------------------------- + * Copyright (c) Microsoft Corporation. All rights reserved. + * Licensed under the MIT License. See License.txt in the project root for license information. + *--------------------------------------------------------------------------------------------*/ + +import { type FieldEntry, getKnownFields, SchemaAnalyzer } from '@vscode-documentdb/schema-analyzer'; +import { ObjectId } from 'bson'; + +describe('getKnownFields', () => { + it('returns bsonType for primitive fields', () => { + const analyzer = SchemaAnalyzer.fromDocument({ + _id: new ObjectId(), + name: 'Alice', + age: 42, + score: 3.14, + active: true, + }); + const fields = getKnownFields(analyzer.getSchema()); + + const nameField = fields.find((f: FieldEntry) => f.path === 'name'); + expect(nameField?.type).toBe('string'); + expect(nameField?.bsonType).toBe('string'); + + const ageField = fields.find((f: FieldEntry) => f.path === 'age'); + expect(ageField?.type).toBe('number'); + // bsonType could be 'double' or 'int32' depending on JS runtime + expect(['double', 'int32']).toContain(ageField?.bsonType); + + const activeField = fields.find((f: FieldEntry) => f.path === 'active'); + expect(activeField?.type).toBe('boolean'); + expect(activeField?.bsonType).toBe('boolean'); + }); + + it('returns _id first and sorts alphabetically', () => { + const analyzer = SchemaAnalyzer.fromDocument({ + _id: new ObjectId(), + zebra: 1, + apple: 2, + mango: 3, + }); + const fields = getKnownFields(analyzer.getSchema()); + const paths = fields.map((f: FieldEntry) => f.path); + + expect(paths[0]).toBe('_id'); + // Remaining should be alphabetical + expect(paths.slice(1)).toEqual(['apple', 'mango', 'zebra']); + }); + + it('detects optional fields', () => { + const analyzer = new SchemaAnalyzer(); + analyzer.addDocument({ _id: new ObjectId(), name: 'Alice', age: 30 }); + analyzer.addDocument({ _id: new ObjectId(), name: 'Bob' }); // no 'age' + + const fields = getKnownFields(analyzer.getSchema()); + + const nameField = fields.find((f: FieldEntry) => f.path === 'name'); + expect(nameField?.isSparse).toBeUndefined(); // present in all docs + + const ageField = fields.find((f: FieldEntry) => f.path === 'age'); + expect(ageField?.isSparse).toBe(true); // missing in doc2 + }); + + it('returns bsonTypes for polymorphic fields', () => { + const analyzer = new SchemaAnalyzer(); + analyzer.addDocument({ _id: new ObjectId(), value: 'hello' }); + analyzer.addDocument({ _id: new ObjectId(), value: 42 }); + + const fields = getKnownFields(analyzer.getSchema()); + const valueField = fields.find((f: FieldEntry) => f.path === 'value'); + + expect(valueField?.bsonTypes).toBeDefined(); + expect(valueField?.bsonTypes).toHaveLength(2); + expect(valueField?.bsonTypes).toContain('string'); + // Could be 'double' or 'int32' + expect(valueField?.bsonTypes?.some((t: string) => ['double', 'int32'].includes(t))).toBe(true); + }); + + it('returns arrayItemBsonType for array fields', () => { + const analyzer = SchemaAnalyzer.fromDocument({ + _id: new ObjectId(), + tags: ['a', 'b', 'c'], + scores: [10, 20, 30], + }); + const fields = getKnownFields(analyzer.getSchema()); + + const tagsField = fields.find((f: FieldEntry) => f.path === 'tags'); + expect(tagsField?.type).toBe('array'); + expect(tagsField?.bsonType).toBe('array'); + expect(tagsField?.arrayItemBsonType).toBe('string'); + + const scoresField = fields.find((f: FieldEntry) => f.path === 'scores'); + expect(scoresField?.type).toBe('array'); + expect(scoresField?.arrayItemBsonType).toBeDefined(); + }); + + it('handles nested object fields', () => { + const analyzer = SchemaAnalyzer.fromDocument({ + _id: new ObjectId(), + user: { + name: 'Alice', + profile: { + bio: 'hello', + }, + }, + }); + const fields = getKnownFields(analyzer.getSchema()); + const paths = fields.map((f: FieldEntry) => f.path); + + // Objects are expanded, not leaf nodes + expect(paths).not.toContain('user'); + expect(paths).toContain('user.name'); + expect(paths).toContain('user.profile.bio'); + }); + + it('detects optional nested fields', () => { + const analyzer = new SchemaAnalyzer(); + analyzer.addDocument({ _id: new ObjectId(), user: { name: 'Alice', age: 30 } }); + analyzer.addDocument({ _id: new ObjectId(), user: { name: 'Bob' } }); // no age in nested obj + + const fields = getKnownFields(analyzer.getSchema()); + + const nameField = fields.find((f: FieldEntry) => f.path === 'user.name'); + expect(nameField?.isSparse).toBeUndefined(); // present in both objects + + const ageField = fields.find((f: FieldEntry) => f.path === 'user.age'); + expect(ageField?.isSparse).toBe(true); // missing in doc2's user object + }); +}); diff --git a/src/utils/json/data-api/autocomplete/toFieldCompletionItems.test.ts b/src/utils/json/data-api/autocomplete/toFieldCompletionItems.test.ts new file mode 100644 index 000000000..37a7ecc4e --- /dev/null +++ b/src/utils/json/data-api/autocomplete/toFieldCompletionItems.test.ts @@ -0,0 +1,129 @@ +/*--------------------------------------------------------------------------------------------- + * Copyright (c) Microsoft Corporation. All rights reserved. + * Licensed under the MIT License. See License.txt in the project root for license information. + *--------------------------------------------------------------------------------------------*/ + +import { type FieldEntry } from '@vscode-documentdb/schema-analyzer'; +import { toFieldCompletionItems } from './toFieldCompletionItems'; + +describe('toFieldCompletionItems', () => { + it('converts simple fields', () => { + const fields: FieldEntry[] = [ + { path: 'name', type: 'string', bsonType: 'string' }, + { path: 'age', type: 'number', bsonType: 'int32' }, + ]; + + const result = toFieldCompletionItems(fields); + + expect(result).toHaveLength(2); + expect(result[0].fieldName).toBe('name'); + expect(result[0].displayType).toBe('String'); + expect(result[0].bsonType).toBe('string'); + expect(result[0].insertText).toBe('name'); + + expect(result[1].fieldName).toBe('age'); + expect(result[1].displayType).toBe('Int32'); + expect(result[1].bsonType).toBe('int32'); + expect(result[1].insertText).toBe('age'); + }); + + it('escapes dotted paths in insertText', () => { + const fields: FieldEntry[] = [ + { path: 'address.city', type: 'string', bsonType: 'string' }, + { path: 'user.profile.bio', type: 'string', bsonType: 'string' }, + ]; + + const result = toFieldCompletionItems(fields); + + expect(result[0].insertText).toBe('"address.city"'); + expect(result[1].insertText).toBe('"user.profile.bio"'); + }); + + it('quotes field names with dashes', () => { + const fields: FieldEntry[] = [{ path: 'order-items', type: 'string', bsonType: 'string' }]; + const result = toFieldCompletionItems(fields); + expect(result[0].insertText).toBe('"order-items"'); + expect(result[0].fieldName).toBe('order-items'); // display stays unescaped + }); + + it('quotes field names with brackets', () => { + const fields: FieldEntry[] = [{ path: 'items[0]', type: 'string', bsonType: 'string' }]; + const result = toFieldCompletionItems(fields); + expect(result[0].insertText).toBe('"items[0]"'); + }); + + it('quotes field names starting with a digit', () => { + const fields: FieldEntry[] = [{ path: '123abc', type: 'string', bsonType: 'string' }]; + const result = toFieldCompletionItems(fields); + expect(result[0].insertText).toBe('"123abc"'); + }); + + it('escapes embedded double quotes in insertText', () => { + const fields: FieldEntry[] = [{ path: 'say"hi"', type: 'string', bsonType: 'string' }]; + const result = toFieldCompletionItems(fields); + expect(result[0].insertText).toBe('"say\\"hi\\""'); + expect(result[0].fieldName).toBe('say"hi"'); // display stays unescaped + }); + + it('escapes backslashes in insertText', () => { + const fields: FieldEntry[] = [{ path: 'back\\slash', type: 'string', bsonType: 'string' }]; + const result = toFieldCompletionItems(fields); + expect(result[0].insertText).toBe('"back\\\\slash"'); + }); + + it('does not quote valid identifiers', () => { + const fields: FieldEntry[] = [ + { path: 'name', type: 'string', bsonType: 'string' }, + { path: '_id', type: 'string', bsonType: 'objectid' }, + { path: '$type', type: 'string', bsonType: 'string' }, + ]; + const result = toFieldCompletionItems(fields); + expect(result[0].insertText).toBe('name'); + expect(result[1].insertText).toBe('_id'); + expect(result[2].insertText).toBe('$type'); + }); + + it('adds $ prefix to referenceText', () => { + const fields: FieldEntry[] = [ + { path: 'age', type: 'number', bsonType: 'int32' }, + { path: 'address.city', type: 'string', bsonType: 'string' }, + ]; + + const result = toFieldCompletionItems(fields); + + expect(result[0].referenceText).toBe('$age'); + expect(result[1].referenceText).toBe('$address.city'); + }); + + it('preserves isSparse', () => { + const fields: FieldEntry[] = [ + { path: 'name', type: 'string', bsonType: 'string', isSparse: false }, + { path: 'nickname', type: 'string', bsonType: 'string', isSparse: true }, + { path: 'email', type: 'string', bsonType: 'string' }, // undefined β†’ false + ]; + + const result = toFieldCompletionItems(fields); + + expect(result[0].isSparse).toBe(false); + expect(result[1].isSparse).toBe(true); + expect(result[2].isSparse).toBe(false); + }); + + it('uses correct displayType', () => { + const fields: FieldEntry[] = [ + { path: '_id', type: 'string', bsonType: 'objectid' }, + { path: 'createdAt', type: 'string', bsonType: 'date' }, + { path: 'active', type: 'boolean', bsonType: 'boolean' }, + { path: 'score', type: 'number', bsonType: 'double' }, + { path: 'tags', type: 'array', bsonType: 'array' }, + ]; + + const result = toFieldCompletionItems(fields); + + expect(result[0].displayType).toBe('ObjectId'); + expect(result[1].displayType).toBe('Date'); + expect(result[2].displayType).toBe('Boolean'); + expect(result[3].displayType).toBe('Double'); + expect(result[4].displayType).toBe('Array'); + }); +}); diff --git a/src/utils/json/data-api/autocomplete/toFieldCompletionItems.ts b/src/utils/json/data-api/autocomplete/toFieldCompletionItems.ts new file mode 100644 index 000000000..60e299590 --- /dev/null +++ b/src/utils/json/data-api/autocomplete/toFieldCompletionItems.ts @@ -0,0 +1,86 @@ +/*--------------------------------------------------------------------------------------------- + * Copyright (c) Microsoft Corporation. All rights reserved. + * Licensed under the MIT License. See License.txt in the project root for license information. + *--------------------------------------------------------------------------------------------*/ + +import { BSONTypes, type FieldEntry } from '@vscode-documentdb/schema-analyzer'; + +/** + * Completion-ready data for a single field entry. + * + * Design intent: + * - `fieldName` is the human-readable, unescaped field path shown in the completion list. + * Users see clean names like "address.city" or "order-items" without quotes or escaping. + * - `insertText` is the escaped/quoted form that gets inserted when the user selects a + * completion item. For simple identifiers it matches `fieldName`; for names containing + * special characters (dots, spaces, `$`, etc.) it is wrapped in double quotes. + * - `referenceText` is the `$`-prefixed aggregation field reference (e.g., "$age"). + */ +export interface FieldCompletionData { + /** The full dot-notated field name, e.g., "address.city" β€” kept unescaped for display */ + fieldName: string; + /** Human-readable type display, e.g., "String", "Date", "ObjectId" */ + displayType: string; + /** Raw BSON type from FieldEntry */ + bsonType: string; + /** All observed BSON types for polymorphic fields (e.g., ["string", "int32"]) */ + bsonTypes?: string[]; + /** Human-readable display strings for all observed types (e.g., ["String", "Int32"]) */ + displayTypes?: string[]; + /** Whether the field was not present in every inspected document (statistical observation, not a constraint) */ + isSparse: boolean; + /** Text to insert when the user selects this completion β€” quoted/escaped if the field name contains special chars */ + insertText: string; + /** + * Field reference for aggregation expressions, e.g., "$age", "$address.city". + * + * TODO: The simple `$field.path` syntax is invalid MQL for field names containing dots, + * spaces, or `$` characters. For such fields, the correct MQL syntax is + * `{ $getField: "fieldName" }`. This should be addressed when the aggregation + * completion provider is wired up β€” either by using `$getField` for special names + * or by making `referenceText` optional for fields that cannot use the `$` prefix syntax. + */ + referenceText: string; +} + +/** + * Matches valid JavaScript/TypeScript identifiers. + * A valid identifier starts with a letter, underscore, or dollar sign, + * followed by zero or more letters, digits, underscores, or dollar signs. + * + * Field names that do NOT match this pattern must be quoted and escaped + * in `insertText` to produce valid query expressions. + */ +const JS_IDENTIFIER_PATTERN = /^[a-zA-Z_$][a-zA-Z0-9_$]*$/; + +/** + * Converts an array of FieldEntry objects into completion-ready FieldCompletionData items. + * + * @param fields - Array of FieldEntry objects from getKnownFields + * @returns Array of FieldCompletionData ready for use in editor completions + */ +export function toFieldCompletionItems(fields: FieldEntry[]): FieldCompletionData[] { + return fields.map((entry) => { + const displayType = BSONTypes.toDisplayString(entry.bsonType as BSONTypes); + const needsQuoting = !JS_IDENTIFIER_PATTERN.test(entry.path); + + let insertText: string; + if (needsQuoting) { + const escaped = entry.path.replace(/\\/g, '\\\\').replace(/"/g, '\\"'); + insertText = `"${escaped}"`; + } else { + insertText = entry.path; + } + + return { + fieldName: entry.path, + displayType, + bsonType: entry.bsonType, + bsonTypes: entry.bsonTypes, + displayTypes: entry.bsonTypes?.map((t) => BSONTypes.toDisplayString(t as BSONTypes)), + isSparse: entry.isSparse ?? false, + insertText, + referenceText: `$${entry.path}`, + }; + }); +} diff --git a/src/utils/json/data-api/autocomplete/toTypeScriptDefinition.test.ts b/src/utils/json/data-api/autocomplete/toTypeScriptDefinition.test.ts new file mode 100644 index 000000000..d003b9ded --- /dev/null +++ b/src/utils/json/data-api/autocomplete/toTypeScriptDefinition.test.ts @@ -0,0 +1,318 @@ +/*--------------------------------------------------------------------------------------------- + * Copyright (c) Microsoft Corporation. All rights reserved. + * Licensed under the MIT License. See License.txt in the project root for license information. + *--------------------------------------------------------------------------------------------*/ + +import { type JSONSchema } from '@vscode-documentdb/schema-analyzer'; +import { toTypeScriptDefinition } from './toTypeScriptDefinition'; + +describe('toTypeScriptDefinition', () => { + it('generates basic interface with primitive types', () => { + const schema: JSONSchema = { + 'x-documentsInspected': 100, + properties: { + _id: { + 'x-occurrence': 100, + anyOf: [ + { + type: 'string', + 'x-bsonType': 'objectid', + 'x-typeOccurrence': 100, + }, + ], + }, + name: { + 'x-occurrence': 100, + anyOf: [ + { + type: 'string', + 'x-bsonType': 'string', + 'x-typeOccurrence': 100, + }, + ], + }, + age: { + 'x-occurrence': 100, + anyOf: [ + { + type: 'number', + 'x-bsonType': 'int32', + 'x-typeOccurrence': 100, + }, + ], + }, + }, + }; + + const result = toTypeScriptDefinition(schema, 'users'); + + expect(result).toContain('interface UsersDocument {'); + expect(result).toContain(' _id: ObjectId;'); + expect(result).toContain(' name: string;'); + expect(result).toContain(' age: number;'); + expect(result).toContain('}'); + }); + + it('marks optional fields with ?', () => { + const schema: JSONSchema = { + 'x-documentsInspected': 100, + properties: { + name: { + 'x-occurrence': 100, + anyOf: [ + { + type: 'string', + 'x-bsonType': 'string', + 'x-typeOccurrence': 100, + }, + ], + }, + nickname: { + 'x-occurrence': 50, + anyOf: [ + { + type: 'string', + 'x-bsonType': 'string', + 'x-typeOccurrence': 50, + }, + ], + }, + }, + }; + + const result = toTypeScriptDefinition(schema, 'users'); + + expect(result).toContain(' name: string;'); + expect(result).toContain(' nickname?: string;'); + }); + + it('handles nested objects as inline blocks', () => { + const schema: JSONSchema = { + 'x-documentsInspected': 100, + properties: { + address: { + 'x-occurrence': 100, + anyOf: [ + { + type: 'object', + 'x-bsonType': 'object', + 'x-typeOccurrence': 100, + 'x-documentsInspected': 100, + properties: { + city: { + 'x-occurrence': 100, + anyOf: [ + { + type: 'string', + 'x-bsonType': 'string', + 'x-typeOccurrence': 100, + }, + ], + }, + zip: { + 'x-occurrence': 100, + anyOf: [ + { + type: 'string', + 'x-bsonType': 'string', + 'x-typeOccurrence': 100, + }, + ], + }, + }, + }, + ], + }, + }, + }; + + const result = toTypeScriptDefinition(schema, 'users'); + + expect(result).toContain(' address: {'); + expect(result).toContain(' city: string;'); + expect(result).toContain(' zip: string;'); + expect(result).toContain(' };'); + }); + + it('handles arrays with element types', () => { + const schema: JSONSchema = { + 'x-documentsInspected': 100, + properties: { + tags: { + 'x-occurrence': 100, + anyOf: [ + { + type: 'array', + 'x-bsonType': 'array', + 'x-typeOccurrence': 100, + items: { + anyOf: [ + { + type: 'string', + 'x-bsonType': 'string', + 'x-typeOccurrence': 100, + }, + ], + }, + }, + ], + }, + }, + }; + + const result = toTypeScriptDefinition(schema, 'posts'); + + expect(result).toContain(' tags: string[];'); + }); + + it('handles polymorphic fields as unions', () => { + const schema: JSONSchema = { + 'x-documentsInspected': 100, + properties: { + metadata: { + 'x-occurrence': 80, + anyOf: [ + { + type: 'string', + 'x-bsonType': 'string', + 'x-typeOccurrence': 50, + }, + { + type: 'number', + 'x-bsonType': 'int32', + 'x-typeOccurrence': 20, + }, + { + type: 'null', + 'x-bsonType': 'null', + 'x-typeOccurrence': 10, + }, + ], + }, + }, + }; + + const result = toTypeScriptDefinition(schema, 'items'); + + expect(result).toContain(' metadata?: string | number | null;'); + }); + + it('PascalCase conversion for collection name', () => { + expect(toTypeScriptDefinition({ 'x-documentsInspected': 0 }, 'users')).toContain('interface UsersDocument'); + expect(toTypeScriptDefinition({ 'x-documentsInspected': 0 }, 'order_items')).toContain( + 'interface OrderItemsDocument', + ); + expect(toTypeScriptDefinition({ 'x-documentsInspected': 0 }, 'my-awesome-collection')).toContain( + 'interface MyAwesomeCollectionDocument', + ); + }); + + it('prefixes with _ when collection name starts with a digit', () => { + expect(toTypeScriptDefinition({ 'x-documentsInspected': 0 }, '123abc')).toContain('interface _123abcDocument'); + expect(toTypeScriptDefinition({ 'x-documentsInspected': 0 }, '99_bottles')).toContain( + 'interface _99BottlesDocument', + ); + }); + + it('falls back to CollectionDocument when name is only separators', () => { + expect(toTypeScriptDefinition({ 'x-documentsInspected': 0 }, '---')).toContain('interface CollectionDocument'); + expect(toTypeScriptDefinition({ 'x-documentsInspected': 0 }, '_ _ _')).toContain( + 'interface CollectionDocument', + ); + }); + + it('falls back to CollectionDocument for empty string', () => { + expect(toTypeScriptDefinition({ 'x-documentsInspected': 0 }, '')).toContain('interface CollectionDocument'); + }); + + describe('special character field names', () => { + function makeSchemaWithField(fieldName: string): JSONSchema { + return { + 'x-documentsInspected': 100, + properties: { + [fieldName]: { + 'x-occurrence': 100, + anyOf: [ + { + type: 'string', + 'x-bsonType': 'string', + 'x-typeOccurrence': 100, + }, + ], + }, + }, + }; + } + + it('leaves valid identifiers unquoted', () => { + const result = toTypeScriptDefinition(makeSchemaWithField('age'), 'test'); + expect(result).toContain(' age: string;'); + }); + + it('leaves underscore-prefixed identifiers unquoted', () => { + const result = toTypeScriptDefinition(makeSchemaWithField('_id'), 'test'); + expect(result).toContain(' _id: string;'); + }); + + it('leaves dollar-prefixed identifiers unquoted', () => { + const result = toTypeScriptDefinition(makeSchemaWithField('$type'), 'test'); + expect(result).toContain(' $type: string;'); + }); + + it('quotes field names with dashes', () => { + const result = toTypeScriptDefinition(makeSchemaWithField('order-items'), 'test'); + expect(result).toContain(' "order-items": string;'); + }); + + it('quotes field names with dots', () => { + const result = toTypeScriptDefinition(makeSchemaWithField('a.b'), 'test'); + expect(result).toContain(' "a.b": string;'); + }); + + it('quotes field names with spaces', () => { + const result = toTypeScriptDefinition(makeSchemaWithField('my field'), 'test'); + expect(result).toContain(' "my field": string;'); + }); + + it('quotes field names with brackets', () => { + const result = toTypeScriptDefinition(makeSchemaWithField('items[0]'), 'test'); + expect(result).toContain(' "items[0]": string;'); + }); + + it('escapes embedded double quotes in field names', () => { + const result = toTypeScriptDefinition(makeSchemaWithField('say"hi"'), 'test'); + expect(result).toContain(' "say\\"hi\\"": string;'); + }); + + it('escapes backslashes in field names', () => { + const result = toTypeScriptDefinition(makeSchemaWithField('back\\slash'), 'test'); + expect(result).toContain(' "back\\\\slash": string;'); + }); + + it('quotes field names that start with a digit', () => { + const result = toTypeScriptDefinition(makeSchemaWithField('123abc'), 'test'); + expect(result).toContain(' "123abc": string;'); + }); + + it('preserves optionality with quoted field names', () => { + const schema: JSONSchema = { + 'x-documentsInspected': 100, + properties: { + 'order-items': { + 'x-occurrence': 50, + anyOf: [ + { + type: 'string', + 'x-bsonType': 'string', + 'x-typeOccurrence': 50, + }, + ], + }, + }, + }; + + const result = toTypeScriptDefinition(schema, 'test'); + expect(result).toContain(' "order-items"?: string;'); + }); + }); +}); diff --git a/src/utils/json/data-api/autocomplete/toTypeScriptDefinition.ts b/src/utils/json/data-api/autocomplete/toTypeScriptDefinition.ts new file mode 100644 index 000000000..17328dfeb --- /dev/null +++ b/src/utils/json/data-api/autocomplete/toTypeScriptDefinition.ts @@ -0,0 +1,272 @@ +/*--------------------------------------------------------------------------------------------- + * Copyright (c) Microsoft Corporation. All rights reserved. + * Licensed under the MIT License. See License.txt in the project root for license information. + *--------------------------------------------------------------------------------------------*/ + +import { BSONTypes, type JSONSchema } from '@vscode-documentdb/schema-analyzer'; + +/** + * Maps a BSON type string to the corresponding TypeScript type representation. + */ +const bsonToTypeScriptMap: Record = { + [BSONTypes.String]: 'string', + [BSONTypes.Int32]: 'number', + [BSONTypes.Double]: 'number', + [BSONTypes.Long]: 'number', + [BSONTypes.Decimal128]: 'number', + [BSONTypes.Number]: 'number', + [BSONTypes.Boolean]: 'boolean', + [BSONTypes.Date]: 'Date', + [BSONTypes.ObjectId]: 'ObjectId', + [BSONTypes.Null]: 'null', + [BSONTypes.Undefined]: 'undefined', + [BSONTypes.Binary]: 'Binary', + [BSONTypes.RegExp]: 'RegExp', + [BSONTypes.UUID]: 'UUID', + [BSONTypes.UUID_LEGACY]: 'UUID', + [BSONTypes.Timestamp]: 'Timestamp', + [BSONTypes.MinKey]: 'MinKey', + [BSONTypes.MaxKey]: 'MaxKey', + [BSONTypes.Code]: 'Code', + [BSONTypes.CodeWithScope]: 'Code', + [BSONTypes.DBRef]: 'DBRef', + [BSONTypes.Map]: 'Map', + [BSONTypes.Symbol]: 'symbol', +}; + +/** + * Converts a BSON type string to a TypeScript type string. + */ +function bsonTypeToTS(bsonType: string): string { + return bsonToTypeScriptMap[bsonType] ?? 'unknown'; +} + +/** + * Matches valid JavaScript/TypeScript identifiers. + * A valid identifier starts with a letter, underscore, or dollar sign, + * followed by zero or more letters, digits, underscores, or dollar signs. + */ +const JS_IDENTIFIER_PATTERN = /^[a-zA-Z_$][a-zA-Z0-9_$]*$/; + +/** + * Returns a safe TypeScript property name for use in interface definitions. + * If the name is a valid JS identifier, it is returned as-is. + * Otherwise, it is wrapped in double quotes with internal quotes and backslashes escaped. + * + * Examples: + * - "age" β†’ "age" (valid identifier, unchanged) + * - "order-items" β†’ '"order-items"' (dash) + * - "a.b" β†’ '"a.b"' (dot) + * - "my field" β†’ '"my field"' (space) + * - 'say"hi"' β†’ '"say\\"hi\\""' (embedded quotes escaped) + */ +function safePropertyName(name: string): string { + if (JS_IDENTIFIER_PATTERN.test(name)) { + return name; + } + const escaped = name.replace(/\\/g, '\\\\').replace(/"/g, '\\"'); + return `"${escaped}"`; +} + +/** + * Converts a collection name to PascalCase and appends "Document". + * If the result would start with a digit, a leading `_` is prepended. + * If the collection name contains only separators or is empty, falls back to "CollectionDocument". + * + * Examples: + * - "users" β†’ "UsersDocument" + * - "order_items" β†’ "OrderItemsDocument" + * - "123abc" β†’ "_123abcDocument" + * - "---" β†’ "CollectionDocument" + */ +function toInterfaceName(collectionName: string): string { + const pascal = collectionName + .split(/[_\-\s]+/) + .filter((s) => s.length > 0) + .map((segment) => segment.charAt(0).toUpperCase() + segment.slice(1)) + .join(''); + + if (pascal.length === 0) { + return 'CollectionDocument'; + } + + // Prefix with _ if the first character is a digit (invalid TS identifier start) + const prefix = /^[0-9]/.test(pascal) ? '_' : ''; + return `${prefix}${pascal}Document`; +} + +/** + * Generates a TypeScript interface definition string from a JSONSchema + * produced by the SchemaAnalyzer. + * + * @param schema - The JSON Schema with x- extensions from SchemaAnalyzer + * @param collectionName - The MongoDB API collection name, used to derive the interface name + * @returns A formatted TypeScript interface definition string + */ +export function toTypeScriptDefinition(schema: JSONSchema, collectionName: string): string { + const interfaceName = toInterfaceName(collectionName); + const rootDocumentsInspected = (schema['x-documentsInspected'] as number) ?? 0; + + const lines: string[] = []; + lines.push(`interface ${interfaceName} {`); + + if (schema.properties) { + renderProperties(schema.properties, rootDocumentsInspected, 1, lines); + } + + lines.push('}'); + return lines.join('\n'); +} + +/** + * Renders property lines for a set of JSON Schema properties at a given indent level. + */ +function renderProperties( + properties: Record, + parentDocumentsInspected: number, + indentLevel: number, + lines: string[], +): void { + const indent = ' '.repeat(indentLevel); + + for (const [propName, propSchema] of Object.entries(properties)) { + if (typeof propSchema === 'boolean') continue; + + const isOptional = isFieldOptional(propSchema, parentDocumentsInspected); + const optionalMarker = isOptional ? '?' : ''; + const tsType = resolveTypeString(propSchema, indentLevel); + const safeName = safePropertyName(propName); + + lines.push(`${indent}${safeName}${optionalMarker}: ${tsType};`); + } +} + +/** + * Returns true if the field's occurrence is less than the parent's document count. + */ +function isFieldOptional(schemaNode: JSONSchema, parentDocumentsInspected: number): boolean { + const occurrence = (schemaNode['x-occurrence'] as number) ?? 0; + return parentDocumentsInspected > 0 && occurrence < parentDocumentsInspected; +} + +/** + * Resolves a full TypeScript type string for a schema node by examining its + * `anyOf` entries. Handles primitives, objects (inline blocks), and arrays. + */ +function resolveTypeString(schemaNode: JSONSchema, indentLevel: number): string { + if (!schemaNode.anyOf || schemaNode.anyOf.length === 0) { + return 'unknown'; + } + + const typeStrings: string[] = []; + + for (const entry of schemaNode.anyOf) { + if (typeof entry === 'boolean') continue; + const ts = singleEntryToTS(entry, indentLevel); + if (ts && !typeStrings.includes(ts)) { + typeStrings.push(ts); + } + } + + if (typeStrings.length === 0) { + return 'unknown'; + } + + return typeStrings.join(' | '); +} + +/** + * Converts a single `anyOf` type entry to a TypeScript type string. + */ +function singleEntryToTS(entry: JSONSchema, indentLevel: number): string { + const bsonType = (entry['x-bsonType'] as string) ?? ''; + + // Object with nested properties β†’ inline block + if (entry.type === 'object' && entry.properties) { + return renderInlineObject(entry, indentLevel); + } + + // Array β†’ determine element types + if (entry.type === 'array' || bsonType === (BSONTypes.Array as string)) { + return renderArrayType(entry, indentLevel); + } + + // Primitive or mapped type + if (bsonType) { + return bsonTypeToTS(bsonType); + } + + // Fallback to JSON type + const jsonType = entry.type as string | undefined; + if (jsonType) { + return jsonType; + } + + return 'unknown'; +} + +/** + * Renders an inline object type `{ field: type; ... }`. + */ +function renderInlineObject(entry: JSONSchema, indentLevel: number): string { + const lines: string[] = []; + const objectDocumentsInspected = (entry['x-documentsInspected'] as number) ?? 0; + + lines.push('{'); + + if (entry.properties) { + renderProperties(entry.properties, objectDocumentsInspected, indentLevel + 1, lines); + } + + const closingIndent = ' '.repeat(indentLevel); + lines.push(`${closingIndent}}`); + + return lines.join('\n'); +} + +/** + * Renders an array type, e.g., `string[]` or `(string | number)[]`. + */ +function renderArrayType(entry: JSONSchema, indentLevel: number): string { + const itemsSchema = entry.items; + + if (!itemsSchema || typeof itemsSchema === 'boolean') { + return 'unknown[]'; + } + + // Items specified as a single schema (not an array of schemas) + if (!Array.isArray(itemsSchema)) { + const itemSchema = itemsSchema as JSONSchema; + + if (itemSchema.anyOf && itemSchema.anyOf.length > 0) { + const elementTypes: string[] = []; + for (const itemEntry of itemSchema.anyOf) { + if (typeof itemEntry === 'boolean') continue; + const ts = singleEntryToTS(itemEntry, indentLevel); + if (ts && !elementTypes.includes(ts)) { + elementTypes.push(ts); + } + } + + if (elementTypes.length === 0) { + return 'unknown[]'; + } + + if (elementTypes.length === 1) { + return `${elementTypes[0]}[]`; + } + + return `(${elementTypes.join(' | ')})[]`; + } + + // Single item type without anyOf + const bsonType = (itemSchema['x-bsonType'] as string) ?? ''; + if (bsonType) { + return `${bsonTypeToTS(bsonType)}[]`; + } + + return 'unknown[]'; + } + + return 'unknown[]'; +} diff --git a/src/utils/json/mongo/MongoBSONTypes.ts b/src/utils/json/mongo/MongoBSONTypes.ts deleted file mode 100644 index fa97add9c..000000000 --- a/src/utils/json/mongo/MongoBSONTypes.ts +++ /dev/null @@ -1,200 +0,0 @@ -/*--------------------------------------------------------------------------------------------- - * Copyright (c) Microsoft Corporation. All rights reserved. - * Licensed under the MIT License. See License.txt in the project root for license information. - *--------------------------------------------------------------------------------------------*/ - -import { - Binary, - BSONSymbol, - Code, - DBRef, - Decimal128, - Double, - Int32, - Long, - MaxKey, - MinKey, - ObjectId, - Timestamp, - UUID, -} from 'mongodb'; - -/** - * Represents the different data types that can be stored in a MongoDB document. - * The string representation is casesensitive and should match the MongoDB documentation. - * https://www.mongodb.com/docs/manual/reference/bson-types/ - */ -export enum MongoBSONTypes { - String = 'string', - Number = 'number', - Int32 = 'int32', - Double = 'double', - Decimal128 = 'decimal128', - Long = 'long', - Boolean = 'boolean', - Object = 'object', - Array = 'array', - Null = 'null', - Undefined = 'undefined', - Date = 'date', - RegExp = 'regexp', - Binary = 'binary', - ObjectId = 'objectid', - Symbol = 'symbol', - Timestamp = 'timestamp', - UUID = 'uuid', - UUID_LEGACY = 'uuid-legacy', // old UUID subtype, used in some legacy data - MinKey = 'minkey', - MaxKey = 'maxkey', - DBRef = 'dbref', - Code = 'code', - CodeWithScope = 'codewithscope', - Map = 'map', - // Add any deprecated types if necessary - _UNKNOWN_ = '_unknown_', // Catch-all for unknown types -} - -export namespace MongoBSONTypes { - const displayStringMap: Record = { - [MongoBSONTypes.String]: 'String', - [MongoBSONTypes.Number]: 'Number', - [MongoBSONTypes.Int32]: 'Int32', - [MongoBSONTypes.Double]: 'Double', - [MongoBSONTypes.Decimal128]: 'Decimal128', - [MongoBSONTypes.Long]: 'Long', - [MongoBSONTypes.Boolean]: 'Boolean', - [MongoBSONTypes.Object]: 'Object', - [MongoBSONTypes.Array]: 'Array', - [MongoBSONTypes.Null]: 'Null', - [MongoBSONTypes.Undefined]: 'Undefined', - [MongoBSONTypes.Date]: 'Date', - [MongoBSONTypes.RegExp]: 'RegExp', - [MongoBSONTypes.Binary]: 'Binary', - [MongoBSONTypes.ObjectId]: 'ObjectId', - [MongoBSONTypes.Symbol]: 'Symbol', - [MongoBSONTypes.Timestamp]: 'Timestamp', - [MongoBSONTypes.MinKey]: 'MinKey', - [MongoBSONTypes.MaxKey]: 'MaxKey', - [MongoBSONTypes.DBRef]: 'DBRef', - [MongoBSONTypes.Code]: 'Code', - [MongoBSONTypes.CodeWithScope]: 'CodeWithScope', - [MongoBSONTypes.Map]: 'Map', - [MongoBSONTypes._UNKNOWN_]: 'Unknown', - [MongoBSONTypes.UUID]: 'UUID', - [MongoBSONTypes.UUID_LEGACY]: 'UUID (Legacy)', - }; - - export function toDisplayString(type: MongoBSONTypes): string { - return displayStringMap[type] || 'Unknown'; - } - - export function toString(type: MongoBSONTypes): string { - return type; - } - - /** - * Converts a MongoDB data type to a case sensitive JSON data type - * @param type The MongoDB data type - * @returns A corresponding JSON data type (please note: it's case sensitive) - */ - export function toJSONType(type: MongoBSONTypes): string { - switch (type) { - case MongoBSONTypes.String: - case MongoBSONTypes.Symbol: - case MongoBSONTypes.Date: - case MongoBSONTypes.Timestamp: - case MongoBSONTypes.ObjectId: - case MongoBSONTypes.RegExp: - case MongoBSONTypes.Binary: - case MongoBSONTypes.Code: - case MongoBSONTypes.UUID: - case MongoBSONTypes.UUID_LEGACY: - return 'string'; - - case MongoBSONTypes.Boolean: - return 'boolean'; - - case MongoBSONTypes.Int32: - case MongoBSONTypes.Long: - case MongoBSONTypes.Double: - case MongoBSONTypes.Decimal128: - return 'number'; - - case MongoBSONTypes.Object: - case MongoBSONTypes.Map: - case MongoBSONTypes.DBRef: - case MongoBSONTypes.CodeWithScope: - return 'object'; - - case MongoBSONTypes.Array: - return 'array'; - - case MongoBSONTypes.Null: - case MongoBSONTypes.Undefined: - case MongoBSONTypes.MinKey: - case MongoBSONTypes.MaxKey: - return 'null'; - - default: - return 'string'; // Default to string for unknown types - } - } - - /** - * Accepts a value from a MongoDB 'Document' object and returns the inferred type. - * @param value The value of a field in a MongoDB 'Document' object - * @returns - */ - export function inferType(value: unknown): MongoBSONTypes { - if (value === null) return MongoBSONTypes.Null; - if (value === undefined) return MongoBSONTypes.Undefined; - - switch (typeof value) { - case 'string': - return MongoBSONTypes.String; - case 'number': - return MongoBSONTypes.Double; // JavaScript numbers are doubles - case 'boolean': - return MongoBSONTypes.Boolean; - case 'object': - if (Array.isArray(value)) { - return MongoBSONTypes.Array; - } - - // Check for common BSON types first - if (value instanceof ObjectId) return MongoBSONTypes.ObjectId; - if (value instanceof Int32) return MongoBSONTypes.Int32; - if (value instanceof Double) return MongoBSONTypes.Double; - if (value instanceof Date) return MongoBSONTypes.Date; - if (value instanceof Timestamp) return MongoBSONTypes.Timestamp; - - // Less common types - if (value instanceof Decimal128) return MongoBSONTypes.Decimal128; - if (value instanceof Long) return MongoBSONTypes.Long; - if (value instanceof MinKey) return MongoBSONTypes.MinKey; - if (value instanceof MaxKey) return MongoBSONTypes.MaxKey; - if (value instanceof BSONSymbol) return MongoBSONTypes.Symbol; - if (value instanceof DBRef) return MongoBSONTypes.DBRef; - if (value instanceof Map) return MongoBSONTypes.Map; - if (value instanceof UUID && value.sub_type === Binary.SUBTYPE_UUID) return MongoBSONTypes.UUID; - if (value instanceof UUID && value.sub_type === Binary.SUBTYPE_UUID_OLD) - return MongoBSONTypes.UUID_LEGACY; - if (value instanceof Buffer || value instanceof Binary) return MongoBSONTypes.Binary; - if (value instanceof RegExp) return MongoBSONTypes.RegExp; - if (value instanceof Code) { - if (value.scope) { - return MongoBSONTypes.CodeWithScope; - } else { - return MongoBSONTypes.Code; - } - } - - // Default to Object if none of the above match - return MongoBSONTypes.Object; - default: - // This should never happen, but if it does, we'll catch it here - // TODO: add telemetry somewhere to know when it happens (not here, this could get hit too often) - return MongoBSONTypes._UNKNOWN_; - } - } -} diff --git a/src/utils/json/mongo/SchemaAnalyzer.test.ts b/src/utils/json/mongo/SchemaAnalyzer.test.ts deleted file mode 100644 index 731791611..000000000 --- a/src/utils/json/mongo/SchemaAnalyzer.test.ts +++ /dev/null @@ -1,255 +0,0 @@ -/*--------------------------------------------------------------------------------------------- - * Copyright (c) Microsoft Corporation. All rights reserved. - * Licensed under the MIT License. See License.txt in the project root for license information. - *--------------------------------------------------------------------------------------------*/ - -import { type JSONSchema, type JSONSchemaRef } from '../JSONSchema'; -import { getPropertyNamesAtLevel, updateSchemaWithDocument } from './SchemaAnalyzer'; -import { - arraysWithDifferentDataTypes, - complexDocument, - complexDocumentsArray, - complexDocumentWithOddTypes, - embeddedDocumentOnly, - flatDocument, - sparseDocumentsArray, -} from './mongoTestDocuments'; - -describe('DocumentDB Schema Analyzer', () => { - it('prints out schema for testing', () => { - const schema: JSONSchema = {}; - updateSchemaWithDocument(schema, embeddedDocumentOnly); - console.log(JSON.stringify(schema, null, 2)); - expect(schema).toBeDefined(); - }); - - it('supports many documents', () => { - const schema: JSONSchema = {}; - sparseDocumentsArray.forEach((doc) => updateSchemaWithDocument(schema, doc)); - expect(schema).toBeDefined(); - - // Check that 'x-documentsInspected' is correct - expect(schema['x-documentsInspected']).toBe(sparseDocumentsArray.length); - - // Check that the schema has the correct root properties - const expectedRootProperties = new Set(['_id', 'name', 'age', 'email', 'isActive', 'score', 'description']); - - expect(Object.keys(schema.properties || {})).toEqual( - expect.arrayContaining(Array.from(expectedRootProperties)), - ); - - // Check that the 'name' field is detected correctly - const nameField: JSONSchema = schema.properties?.['name']; - expect(nameField).toBeDefined(); - expect(nameField?.['x-occurrence']).toBeGreaterThan(0); - - // Access 'anyOf' to get the type entries - const nameFieldTypes = nameField.anyOf?.map((typeEntry) => typeEntry['type']); - expect(nameFieldTypes).toContain('string'); - - // Check that the 'age' field has the correct type - const ageField: JSONSchema = schema.properties?.['age']; - expect(ageField).toBeDefined(); - const ageFieldTypes = ageField.anyOf?.map((typeEntry) => typeEntry['type']); - expect(ageFieldTypes).toContain('number'); - - // Check that the 'isActive' field is a boolean - const isActiveField: JSONSchema = schema.properties?.['isActive']; - expect(isActiveField).toBeDefined(); - const isActiveTypes = isActiveField.anyOf?.map((typeEntry) => typeEntry['type']); - expect(isActiveTypes).toContain('boolean'); - - // Check that the 'description' field is optional (occurs in some documents) - const descriptionField = schema.properties?.['description']; - expect(descriptionField).toBeDefined(); - expect(descriptionField?.['x-occurrence']).toBeLessThan(sparseDocumentsArray.length); - }); - - it('detects all BSON types from flatDocument', () => { - const schema: JSONSchema = {}; - updateSchemaWithDocument(schema, flatDocument); - - // Check that all fields are detected - const expectedFields = Object.keys(flatDocument); - expect(Object.keys(schema.properties || {})).toEqual(expect.arrayContaining(expectedFields)); - - // Helper function to get the 'x-bsonType' from a field - function getBsonType(fieldName: string): string | undefined { - const field = schema.properties?.[fieldName]; - const anyOf = field?.anyOf; - return anyOf && anyOf[0]?.['x-bsonType']; - } - - // Check that specific BSON types are correctly identified - expect(getBsonType('int32Field')).toBe('int32'); - expect(getBsonType('doubleField')).toBe('double'); - expect(getBsonType('decimalField')).toBe('decimal128'); - expect(getBsonType('dateField')).toBe('date'); - expect(getBsonType('objectIdField')).toBe('objectid'); - expect(getBsonType('codeField')).toBe('code'); - expect(getBsonType('uuidField')).toBe('uuid'); - expect(getBsonType('uuidLegacyField')).toBe('uuid-legacy'); - }); - - it('detects embedded objects correctly', () => { - const schema: JSONSchema = {}; - updateSchemaWithDocument(schema, embeddedDocumentOnly); - - // Check that the root properties are detected - expect(schema.properties).toHaveProperty('personalInfo'); - expect(schema.properties).toHaveProperty('jobInfo'); - - // Access 'personalInfo' properties - const personalInfoAnyOf = schema.properties && schema.properties['personalInfo']?.anyOf; - const personalInfoProperties = personalInfoAnyOf?.[0]?.properties; - expect(personalInfoProperties).toBeDefined(); - expect(personalInfoProperties).toHaveProperty('name'); - expect(personalInfoProperties).toHaveProperty('age'); - expect(personalInfoProperties).toHaveProperty('married'); - expect(personalInfoProperties).toHaveProperty('address'); - - // Access 'address' properties within 'personalInfo' - const addressAnyOf = personalInfoProperties['address'].anyOf; - const addressProperties = addressAnyOf?.[0]?.properties; - expect(addressProperties).toBeDefined(); - expect(addressProperties).toHaveProperty('street'); - expect(addressProperties).toHaveProperty('city'); - expect(addressProperties).toHaveProperty('zip'); - }); - - it('detects arrays and their element types correctly', () => { - const schema: JSONSchema = {}; - updateSchemaWithDocument(schema, arraysWithDifferentDataTypes); - - // Check that arrays are detected - expect(schema.properties).toHaveProperty('integersArray'); - expect(schema.properties).toHaveProperty('stringsArray'); - expect(schema.properties).toHaveProperty('booleansArray'); - expect(schema.properties).toHaveProperty('mixedArray'); - expect(schema.properties).toHaveProperty('datesArray'); - - // Helper function to get item types from an array field - function getArrayItemTypes(fieldName: string): string[] | undefined { - const field = schema.properties?.[fieldName]; - const anyOf = field?.anyOf; - const itemsAnyOf: JSONSchemaRef[] = anyOf?.[0]?.items?.anyOf; - return itemsAnyOf?.map((typeEntry) => typeEntry['type']); - } - - // Check that 'integersArray' has elements of type 'number' - const integerItemTypes = getArrayItemTypes('integersArray'); - expect(integerItemTypes).toContain('number'); - - // Check that 'stringsArray' has elements of type 'string' - const stringItemTypes = getArrayItemTypes('stringsArray'); - expect(stringItemTypes).toContain('string'); - - // Check that 'mixedArray' contains multiple types - const mixedItemTypes = getArrayItemTypes('mixedArray'); - expect(mixedItemTypes).toEqual(expect.arrayContaining(['number', 'string', 'boolean', 'object', 'null'])); - }); - - it('handles arrays within objects and objects within arrays', () => { - const schema: JSONSchema = {}; - updateSchemaWithDocument(schema, complexDocument); - - // Access 'user.profile.hobbies' - const userProfile = schema.properties && schema.properties['user'].anyOf?.[0]?.properties?.['profile']; - const hobbies = userProfile?.anyOf?.[0]?.properties?.['hobbies']; - // eslint-disable-next-line @typescript-eslint/no-unsafe-call - const hobbiesItemTypes = hobbies?.anyOf?.[0]?.items?.anyOf?.map((typeEntry) => typeEntry['type']); - expect(hobbiesItemTypes).toContain('string'); - - // Access 'user.profile.addresses' - const addresses = userProfile?.anyOf?.[0]?.properties?.['addresses']; - // eslint-disable-next-line @typescript-eslint/no-unsafe-call - const addressItemTypes = addresses?.anyOf?.[0]?.items?.anyOf?.map((typeEntry) => typeEntry['type']); - expect(addressItemTypes).toContain('object'); - - // Check that 'orders' is an array - const orders = schema.properties && schema.properties['orders']; - expect(orders).toBeDefined(); - const ordersType = orders.anyOf?.[0]?.type; - expect(ordersType).toBe('array'); - - // Access 'items' within 'orders' - const orderItems = orders.anyOf?.[0]?.items?.anyOf?.[0]?.properties?.['items']; - const orderItemsType = orderItems?.anyOf?.[0]?.type; - expect(orderItemsType).toBe('array'); - }); - - it('updates schema correctly when processing multiple documents', () => { - const schema: JSONSchema = {}; - complexDocumentsArray.forEach((doc) => updateSchemaWithDocument(schema, doc)); - - // Check that 'x-documentsInspected' is correct - expect(schema['x-documentsInspected']).toBe(complexDocumentsArray.length); - - // Check that some fields are present from different documents - expect(schema.properties).toHaveProperty('stringField'); - expect(schema.properties).toHaveProperty('personalInfo'); - expect(schema.properties).toHaveProperty('integersArray'); - expect(schema.properties).toHaveProperty('user'); - - // Check that 'integersArray' has correct min and max values - const integersArray = schema.properties && schema.properties['integersArray']; - const integerItemType = integersArray.anyOf?.[0]?.items?.anyOf?.[0]; - expect(integerItemType?.['x-minValue']).toBe(1); - expect(integerItemType?.['x-maxValue']).toBe(5); - - // Check that 'orders.items.price' is detected as Decimal128 - const orders = schema.properties && schema.properties['orders']; - const orderItems = orders.anyOf?.[0]?.items?.anyOf?.[0]?.properties?.['items']; - const priceField = orderItems?.anyOf?.[0]?.items?.anyOf?.[0]?.properties?.['price']; - const priceFieldType = priceField?.anyOf?.[0]; - expect(priceFieldType?.['x-bsonType']).toBe('decimal128'); - }); - - describe('traverses schema', () => { - it('with valid paths', () => { - const schema: JSONSchema = {}; - updateSchemaWithDocument(schema, complexDocument); - - let propertiesAtRoot = getPropertyNamesAtLevel(schema, []); - expect(propertiesAtRoot).toHaveLength(4); - - propertiesAtRoot = getPropertyNamesAtLevel(schema, ['user']); - expect(propertiesAtRoot).toHaveLength(3); - - propertiesAtRoot = getPropertyNamesAtLevel(schema, ['user', 'profile']); - expect(propertiesAtRoot).toHaveLength(4); - }); - - it('with broken paths', () => { - const schema: JSONSchema = {}; - updateSchemaWithDocument(schema, complexDocument); - - const propertiesAtRoot = getPropertyNamesAtLevel(schema, []); - expect(propertiesAtRoot).toHaveLength(4); - - expect(() => getPropertyNamesAtLevel(schema, ['no-entry'])).toThrow(); - - expect(() => getPropertyNamesAtLevel(schema, ['user', 'no-entry'])).toThrow(); - }); - - it('with sparse docs and mixed types', () => { - const schema: JSONSchema = {}; - updateSchemaWithDocument(schema, complexDocument); - updateSchemaWithDocument(schema, complexDocumentWithOddTypes); - - let propertiesAtRoot = getPropertyNamesAtLevel(schema, []); - expect(propertiesAtRoot).toHaveLength(4); - - propertiesAtRoot = getPropertyNamesAtLevel(schema, ['user']); - expect(propertiesAtRoot).toHaveLength(3); - expect(propertiesAtRoot).toEqual(['email', 'profile', 'username']); - - propertiesAtRoot = getPropertyNamesAtLevel(schema, ['user', 'profile']); - expect(propertiesAtRoot).toHaveLength(4); - expect(propertiesAtRoot).toEqual(['addresses', 'firstName', 'hobbies', 'lastName']); - - propertiesAtRoot = getPropertyNamesAtLevel(schema, ['history']); - expect(propertiesAtRoot).toHaveLength(6); - }); - }); -}); diff --git a/src/utils/json/mongo/autocomplete/basicMongoFindFilterSchema.json b/src/utils/json/mongo/autocomplete/basicMongoFindFilterSchema.json deleted file mode 100644 index a886411a4..000000000 --- a/src/utils/json/mongo/autocomplete/basicMongoFindFilterSchema.json +++ /dev/null @@ -1,173 +0,0 @@ -{ - "$schema": "http://json-schema.org/draft-07/schema#", - "$id": "mongodb-generic-filter-schema", - "title": "MongoDB Generic Find Filter Schema", - "type": "object", - "additionalProperties": { - "oneOf": [ - { - "title": "Direct Value", - "description": "A direct value for equality matching on any field.", - "examples": ["example", 42, true, null] - }, - { - "title": "Operator-Based Query", - "$ref": "#/definitions/operatorObject", - "examples": [ - { "$gt": 10 }, - { "$lt": 100 }, - { "$gte": 5 }, - { "$lte": 50 }, - { "$in": ["red", "blue", "green"] }, - { "$nin": ["yellow", "purple"] }, - { "$exists": false }, - { "$regex": "^start.*end$" }, - { "$gt": 10, "$lt": 20 }, - { "$in": [1, 2, 3], "$nin": [4, 5] } - ] - } - ] - }, - "properties": { - "$or": { - "type": "array", - "items": { "$ref": "#" }, - "description": "Joins query clauses with a logical OR.", - "examples": [ - [{ "status": "A" }, { "qty": { "$lt": 30 } }], - [{ "age": { "$gte": 18 } }, { "membership": "gold" }], - [{ "category": { "$in": ["electronics", "books"] } }, { "onSale": true }] - ] - }, - "$and": { - "type": "array", - "items": { "$ref": "#" }, - "description": "Joins query clauses with a logical AND.", - "examples": [ - [{ "status": "A" }, { "qty": { "$gt": 20, "$lt": 50 } }], - [{ "verified": true }, { "email": { "$exists": true } }], - [{ "price": { "$gte": 100 } }, { "stock": { "$lte": 500 } }] - ] - }, - "$not": { - "oneOf": [{ "$ref": "#" }], - "description": "Inverts the effect of a query expression.", - "examples": [ - { "price": { "$gt": 100 } }, - { "status": { "$eq": "inactive" } }, - { "category": { "$in": ["outdated", "discontinued"] } } - ] - }, - "$nor": { - "type": "array", - "items": { "$ref": "#" }, - "description": "Joins query clauses with a logical NOR.", - "examples": [ - [{ "price": 1.99 }, { "qty": { "$lt": 20 } }], - [{ "status": "A" }, { "onSale": true }], - [{ "rating": { "$gte": 4.5 } }, { "reviews": { "$gt": 100 } }] - ] - } - }, - "definitions": { - "operatorObject": { - "type": "object", - "properties": { - "$eq": { - "description": "Matches values that are equal to a specified value.", - "examples": ["active", 100, true] - }, - "$ne": { - "description": "Matches all values that are not equal to a specified value.", - "examples": ["inactive", 0, false] - }, - "$gt": { - "description": "Matches values that are greater than a specified value.", - "examples": [10, 100] - }, - "$gte": { - "description": "Matches values that are greater than or equal to a specified value.", - "examples": [5, 50] - }, - "$lt": { - "description": "Matches values that are less than a specified value.", - "examples": [20, 80] - }, - "$lte": { - "description": "Matches values that are less than or equal to a specified value.", - "examples": [15, 75] - }, - "$in": { - "type": "array", - "description": "Matches any of the values specified in an array.", - "examples": [ - ["red", "green", "blue"], - [1, 2, 3], - ["small", "medium", "large"] - ] - }, - "$nin": { - "type": "array", - "description": "Matches none of the values specified in an array.", - "examples": [ - ["yellow", "purple"], - [4, 5, 6], - ["extra-large", "xxl"] - ] - }, - "$exists": { - "type": "boolean", - "description": "Matches documents that have the specified field.", - "examples": [true, false] - }, - "$regex": { - "description": "Provides regular expression capabilities for pattern matching strings.", - "examples": ["^start", "end$", ".*pattern.*", "^[A-Z]{3}[0-9]{2}$"] - }, - "$size": { - "type": "integer", - "description": "Matches any array with the specified number of elements.", - "examples": [0, 5, 10] - }, - "$type": { - "description": "Matches values based on their BSON type.", - "examples": [1, "string", "object"] - }, - "$all": { - "type": "array", - "description": "Matches arrays that contain all elements specified in the query.", - "examples": [ - ["red", "blue"], - [10, 20], - ["feature1", "feature2"] - ] - }, - "$elemMatch": { - "type": "object", - "description": "Matches documents that contain an array field with at least one element that matches the specified query criteria.", - "examples": [ - { "score": { "$gt": 80 } }, - { "dimensions": { "$lt": 50, "$gt": 20 } }, - { "attributes": { "color": "red", "size": "M" } } - ] - } - }, - "additionalProperties": false, - "description": "An object containing MongoDB query operators and their corresponding values.", - "minProperties": 1, - "examples": [ - { "$gt": 10 }, - { "$lt": 100 }, - { "$gte": 5 }, - { "$lte": 50 }, - { "$in": ["value1", "value2"] }, - { "$gt": 10, "$lt": 20 }, - { "$exists": true }, - { "$regex": "^[a-z]+$" }, - { "$in": [1, 2, 3], "$nin": [4, 5, 6] }, - { "$elemMatch": { "score": { "$gte": 80 } } } - ] - } - }, - "description": "Generic schema for MongoDB find query filters without knowledge of specific fields." -} diff --git a/src/utils/json/mongo/autocomplete/generateMongoFindJsonSchema.ts b/src/utils/json/mongo/autocomplete/generateMongoFindJsonSchema.ts deleted file mode 100644 index 0f0fa7bbe..000000000 --- a/src/utils/json/mongo/autocomplete/generateMongoFindJsonSchema.ts +++ /dev/null @@ -1,270 +0,0 @@ -/*--------------------------------------------------------------------------------------------- - * Copyright (c) Microsoft Corporation. All rights reserved. - * Licensed under the MIT License. See License.txt in the project root for license information. - *--------------------------------------------------------------------------------------------*/ - -import { type FieldEntry } from './getKnownFields'; - -/** - * Generates a JSON schema for MongoDB find filter queries. - * - * This function is a short-term solution for providing autocompletion for MongoDB find filter queries. - * A MongoDB find filter query is a JSON document that can range from simple to complex structures. - * Basic autocompletion can be provided using a modified JSON schema, which is what we've done here. - * - * The long-term plan is to provide a more sophisticated auto-completion using, for example, - * the suggestion API that Monaco provides. This will be looked at in the future. - * - * @param fieldEntries - An array of field entries where each entry contains: - * - path: A string representing the full path of the field in the dataset (e.g., "age", "address.city"). - * - type: The most common or expected data type for that field (e.g., "number", "string"). - * - * The data provided is supposed to contain all known data paths from the expected dataset, - * focusing only on leaf nodes. - * - * The returned JSON schema can be directly added to the Monaco editor to activate autocompletion. - * - * @returns A JSON schema object that can be used for autocompletion in the Monaco editor. - */ -export function generateMongoFindJsonSchema(fieldEntries: FieldEntry[]) { - // Initialize the base schema object - const schema = { - $schema: 'http://json-schema.org/draft-07/schema#', - $id: 'mongodb-filter-schema', - title: 'MongoDB Find Filter Schema', - type: 'object', - properties: {}, - additionalProperties: { - oneOf: [ - { - title: 'Direct Value', - description: 'A direct value for equality matching on an unknown field.', - examples: ['value', 123, true, null], - }, - { - title: 'Operator-Based Query', - $ref: '#/definitions/operatorObjectUnknown', - examples: [{ $ne: 'inactive' }, { $exists: true }], - }, - ], - }, - definitions: { - operatorObject: { - type: 'object', - properties: { - $eq: { - description: 'Matches values that are equal to a specified value.', - examples: [21, 'active', true], - }, - $ne: { - description: 'Matches all values that are not equal to a specified value.', - examples: [30, 'inactive', false], - }, - $gt: { - description: 'Matches values that are greater than a specified value.', - examples: [25, 100], - }, - $gte: { - description: 'Matches values that are greater than or equal to a specified value.', - examples: [18, 50], - }, - $lt: { - description: 'Matches values that are less than a specified value.', - examples: [65, 100], - }, - $lte: { - description: 'Matches values that are less than or equal to a specified value.', - examples: [30, 75], - }, - $in: { - type: 'array', - description: 'Matches any of the values specified in an array.', - examples: [ - ['red', 'blue'], - [21, 30, 40], - ], - }, - $nin: { - type: 'array', - description: 'Matches none of the values specified in an array.', - examples: [['green'], [50, 60]], - }, - $exists: { - type: 'boolean', - description: 'Matches documents that have the specified field.', - examples: [true, false], - }, - $regex: { - description: 'Provides regular expression capabilities for pattern matching strings.', - examples: ['^re', '.*blue$', '^[A-Z]+'], - }, - }, - additionalProperties: false, - description: 'An object containing a MongoDB query operator and its corresponding value.', - minProperties: 1, - }, - operatorObjectUnknown: { - $ref: '#/definitions/operatorObject', - }, - }, - description: - 'Schema for MongoDB find query filters, supporting known fields with various operators for querying documents.', - }; - - // Set to collect all full paths - const fullPathsSet = new Set(); - - // Function to generate examples based on type - function generateExamples(type: string): unknown[] { - let examples; - if (type === 'number') { - examples = [42, 100]; - // eslint-disable-next-line @typescript-eslint/no-unsafe-call, @typescript-eslint/no-unsafe-member-access - examples.push(false); // odd type - // eslint-disable-next-line @typescript-eslint/no-unsafe-call, @typescript-eslint/no-unsafe-member-access - examples.push(null); - } else if (type === 'string') { - examples = ['red', 'blue']; - // eslint-disable-next-line @typescript-eslint/no-unsafe-call, @typescript-eslint/no-unsafe-member-access - examples.push(null); - } else if (type === 'boolean') { - examples = [true, false]; - // eslint-disable-next-line @typescript-eslint/no-unsafe-call, @typescript-eslint/no-unsafe-member-access - examples.push(null); - } else { - examples = ['value', 123, true, null]; - } - return examples as []; - } - - // Function to generate examples for operator-based queries - function generateOperatorExamples(type: string): unknown[] { - let examples; - if (type === 'number') { - examples = [{ $gt: 25 }, { $in: [20, 30, 40] }]; - } else if (type === 'string') { - examples = [{ $regex: '^re' }, { $ne: 'blue' }]; - } else if (type === 'boolean') { - examples = [{ $eq: true }, { $ne: false }]; - } else { - examples = [{ $exists: true }]; - } - return examples as []; - } - - // Function to create nested properties based on path components - function createNestedProperty( - // eslint-disable-next-line @typescript-eslint/no-explicit-any - obj: any, - pathComponents: string[], - type: string, - currentPath: string = '', - ) { - const fieldName = pathComponents[0]; - const newPath = currentPath ? `${currentPath}.${fieldName}` : fieldName; - - fullPathsSet.add(newPath); - - if (pathComponents.length === 1) { - // Leaf node - const examples = generateExamples(type); - const operatorExamples = generateOperatorExamples(type); - // eslint-disable-next-line @typescript-eslint/no-unsafe-member-access - obj[fieldName] = { - oneOf: [ - { - title: 'Direct Value', - description: `A direct value for equality matching on the '${fieldName}' field.`, - examples: examples, - }, - { - title: 'Operator-Based Query', - $ref: '#/definitions/operatorObject', - examples: operatorExamples, - }, - ], - }; - } else { - // Nested object - // eslint-disable-next-line @typescript-eslint/no-unsafe-member-access - if (!obj[fieldName]) { - // eslint-disable-next-line @typescript-eslint/no-unsafe-member-access - obj[fieldName] = { - type: 'object', - properties: {}, - additionalProperties: false, - description: `Embedded '${fieldName}' object containing fields.`, - }; - } - createNestedProperty( - // eslint-disable-next-line @typescript-eslint/no-unsafe-member-access - obj[fieldName]['properties'], - pathComponents.slice(1), - type, - newPath, - ); - } - } - - // Process each fieldEntry - for (const fieldEntry of fieldEntries) { - const pathComponents = fieldEntry.path.split('.'); - // eslint-disable-next-line @typescript-eslint/no-unsafe-member-access - createNestedProperty(schema['properties'], pathComponents, fieldEntry.type); - } - - // Function to get type for a full path - function getTypeForFullPath(fullPath: string): string | undefined { - for (const fieldEntry of fieldEntries) { - if (fieldEntry.path === fullPath) { - return fieldEntry.type; - } - } - return undefined; - } - - // Create properties with full paths at the root level - for (const fullPath of fullPathsSet) { - const type = getTypeForFullPath(fullPath) || 'string'; - const examples = generateExamples(type); - const operatorExamples = generateOperatorExamples(type); - - // eslint-disable-next-line @typescript-eslint/no-unsafe-member-access - schema['properties'][fullPath] = { - oneOf: [ - { - title: 'Direct Value', - description: `A direct value for equality matching on the '${fullPath}' field.`, - examples: examples, - }, - { - title: 'Operator-Based Query', - $ref: '#/definitions/operatorObject', - examples: operatorExamples, - }, - ], - }; - } - - // Add logical operators - const logicalOperators = ['$or', '$and', '$not', '$nor']; - for (const operator of logicalOperators) { - if (operator === '$not') { - // eslint-disable-next-line @typescript-eslint/no-unsafe-member-access - schema['properties'][operator] = { - oneOf: [{ $ref: '#' }], - description: `Inverts the effect of a query expression.`, - }; - } else { - // eslint-disable-next-line @typescript-eslint/no-unsafe-member-access - schema['properties'][operator] = { - type: 'array', - items: { $ref: '#' }, - description: `Joins query clauses with a logical ${operator.toUpperCase().substring(1)}.`, - }; - } - } - - // eslint-disable-next-line @typescript-eslint/no-unsafe-return - return schema; -} diff --git a/src/utils/json/mongo/autocomplete/getKnownFields.ts b/src/utils/json/mongo/autocomplete/getKnownFields.ts deleted file mode 100644 index a82277a73..000000000 --- a/src/utils/json/mongo/autocomplete/getKnownFields.ts +++ /dev/null @@ -1,95 +0,0 @@ -/*--------------------------------------------------------------------------------------------- - * Copyright (c) Microsoft Corporation. All rights reserved. - * Licensed under the MIT License. See License.txt in the project root for license information. - *--------------------------------------------------------------------------------------------*/ - -import Denque from 'denque'; -import { type JSONSchema } from '../../JSONSchema'; - -export interface FieldEntry { - path: string; - type: string; -} - -/** - * This function traverses our JSON Schema object and collects all leaf property paths - * along with their most common data types. - * - * This information is needed for auto-completion support - * - * The approach is as follows: - * - Initialize a queue with the root properties of the schema to perform a breadth-first traversal. - * - While the queue is not empty: - * - Dequeue the next item, which includes the current schema node and its path. - * - Determine the most common type for the current node by looking at the 'x-typeOccurrence' field. - * - If the most common type is an object with properties: - * - Enqueue its child properties with their updated paths into the queue for further traversal. - * - Else if the most common type is a leaf type (e.g., string, number, boolean): - * - Add the current path and type to the result array as it represents a leaf property. - * - Continue this process until all nodes have been processed. - * - Return the result array containing objects with 'path' and 'type' for each leaf property. - */ -export function getKnownFields(schema: JSONSchema): FieldEntry[] { - const result: Array<{ path: string; type: string }> = []; - type QueueItem = { - path: string; - schemaNode: JSONSchema; - }; - - const queue: Denque = new Denque(); - - // Initialize the queue with root properties - if (schema.properties) { - for (const propName of Object.keys(schema.properties)) { - const propSchema = schema.properties[propName] as JSONSchema; - queue.push({ path: propName, schemaNode: propSchema }); - } - } - - while (queue.length > 0) { - const item = queue.shift(); - if (!item) continue; - - const { path, schemaNode } = item; - const mostCommonTypeEntry = getMostCommonTypeEntry(schemaNode); - - if (mostCommonTypeEntry) { - if (mostCommonTypeEntry.type === 'object' && mostCommonTypeEntry.properties) { - // Not a leaf node, enqueue its properties - for (const childName of Object.keys(mostCommonTypeEntry.properties)) { - const childSchema = mostCommonTypeEntry.properties[childName] as JSONSchema; - queue.push({ path: `${path}.${childName}`, schemaNode: childSchema }); - } - } else { - // Leaf node, add to result - result.push({ path: path, type: mostCommonTypeEntry.type as string }); - } - } - } - - return result; -} - -/** - * Helper function to get the most common type entry from a schema node. - * It looks for the 'anyOf' array and selects the type with the highest 'x-typeOccurrence'. - */ -function getMostCommonTypeEntry(schemaNode: JSONSchema): JSONSchema | null { - if (schemaNode.anyOf && schemaNode.anyOf.length > 0) { - let maxOccurrence = -1; - let mostCommonTypeEntry: JSONSchema | null = null; - - for (const typeEntry of schemaNode.anyOf as JSONSchema[]) { - const occurrence = typeEntry['x-typeOccurrence'] || 0; - if (occurrence > maxOccurrence) { - maxOccurrence = occurrence; - mostCommonTypeEntry = typeEntry; - } - } - return mostCommonTypeEntry; - } else if (schemaNode.type) { - // If 'anyOf' is not present, use the 'type' field directly - return schemaNode; - } - return null; -} diff --git a/src/utils/slickgrid/mongo/toSlickGridTable.test.ts b/src/utils/slickgrid/mongo/toSlickGridTable.test.ts index 69156bf1b..4b1d0af3f 100644 --- a/src/utils/slickgrid/mongo/toSlickGridTable.test.ts +++ b/src/utils/slickgrid/mongo/toSlickGridTable.test.ts @@ -76,7 +76,6 @@ describe('toSlickGridTable', () => { it('at a nested level', () => { const tableData = getDataAtPath(mongoDocuments, ['nestedDocument']); - console.log(tableData); expect(tableData).toHaveLength(5); expect(tableData[0]['key']).toBeDefined(); diff --git a/src/utils/slickgrid/mongo/toSlickGridTable.ts b/src/utils/slickgrid/mongo/toSlickGridTable.ts index 737fcb7c0..5deb51fe0 100644 --- a/src/utils/slickgrid/mongo/toSlickGridTable.ts +++ b/src/utils/slickgrid/mongo/toSlickGridTable.ts @@ -3,11 +3,10 @@ * Licensed under the MIT License. See License.txt in the project root for license information. *--------------------------------------------------------------------------------------------*/ +import { BSONTypes, valueToDisplayString } from '@vscode-documentdb/schema-analyzer'; import { EJSON } from 'bson'; import { type Document, type WithId } from 'mongodb'; import { type TableDataEntry } from '../../../documentdb/ClusterSession'; -import { MongoBSONTypes } from '../../json/mongo/MongoBSONTypes'; -import { valueToDisplayString } from '../../json/mongo/MongoValueFormatters'; /** * Extracts data from a list of MongoDB documents at a specified path. @@ -45,8 +44,8 @@ export function getDataAtPath(documents: WithId[], path: string[]): Ta // we also make sure that the '_id' field is always included in the data! if (doc._id) { row['_id'] = { - value: valueToDisplayString(doc._id, MongoBSONTypes.inferType(doc._id)), - type: MongoBSONTypes.inferType(doc._id), + value: valueToDisplayString(doc._id, BSONTypes.inferType(doc._id)), + type: BSONTypes.inferType(doc._id), }; // TODO: problem here -> what if the user has a field with this name... row['x-objectid'] = EJSON.stringify(doc._id, { relaxed: false }); // this is crucial, we need to retain the _id field for future queries from the table view @@ -72,13 +71,13 @@ export function getDataAtPath(documents: WithId[], path: string[]): Ta continue; } else { const value: unknown = subdocument[key]; - const type: MongoBSONTypes = MongoBSONTypes.inferType(value); + const type: BSONTypes = BSONTypes.inferType(value); // eslint-disable-next-line if (value instanceof Array) { row[key] = { value: `array[${value.length}]`, - type: MongoBSONTypes.Array, + type: BSONTypes.Array, }; } else { row[key] = { value: valueToDisplayString(value, type), type: type }; diff --git a/src/utils/slickgrid/mongo/toSlickGridTree.ts b/src/utils/slickgrid/mongo/toSlickGridTree.ts index 9d3742cfe..849ad42b6 100644 --- a/src/utils/slickgrid/mongo/toSlickGridTree.ts +++ b/src/utils/slickgrid/mongo/toSlickGridTree.ts @@ -3,9 +3,8 @@ * Licensed under the MIT License. See License.txt in the project root for license information. *--------------------------------------------------------------------------------------------*/ +import { BSONTypes, valueToDisplayString } from '@vscode-documentdb/schema-analyzer'; import { type Document, type ObjectId, type WithId } from 'mongodb'; -import { MongoBSONTypes } from '../../json/mongo/MongoBSONTypes'; -import { valueToDisplayString } from '../../json/mongo/MongoValueFormatters'; /** * The data structure for a single node entry in the tree data structure for SlickGrid. @@ -113,10 +112,10 @@ export function documentToSlickGridTree(document: WithId, idPrefix?: s continue; } - const dataType: MongoBSONTypes = MongoBSONTypes.inferType(stackEntry.value); + const dataType: BSONTypes = BSONTypes.inferType(stackEntry.value); switch (dataType) { - case MongoBSONTypes.Object: { + case BSONTypes.Object: { tree.push({ id: globalEntryId, field: `${stackEntry.key}`, @@ -131,7 +130,7 @@ export function documentToSlickGridTree(document: WithId, idPrefix?: s }); break; } - case MongoBSONTypes.Array: { + case BSONTypes.Array: { const value = stackEntry.value as unknown[]; tree.push({ @@ -157,7 +156,7 @@ export function documentToSlickGridTree(document: WithId, idPrefix?: s id: globalEntryId, field: `${stackEntry.key}`, value: valueToDisplayString(stackEntry.value, dataType), - type: MongoBSONTypes.toDisplayString(MongoBSONTypes.inferType(stackEntry.value)), + type: BSONTypes.toDisplayString(BSONTypes.inferType(stackEntry.value)), parentId: stackEntry.parentId, }); break; diff --git a/src/webviews/components/MonacoAutoHeight.tsx b/src/webviews/components/MonacoAutoHeight.tsx index 9625d8f01..e151b230b 100644 --- a/src/webviews/components/MonacoAutoHeight.tsx +++ b/src/webviews/components/MonacoAutoHeight.tsx @@ -196,14 +196,21 @@ export const MonacoAutoHeight = (props: MonacoAutoHeightProps) => { /** * Configures the Tab key behavior for the Monaco editor. * - * When called, this function sets up or removes a keydown handler for the Tab key. - * If `shouldTrap` is true, Tab/Shift+Tab are trapped within the editor (focus remains in editor). - * If `shouldTrap` is false, Tab/Shift+Tab move focus to the next/previous focusable element outside the editor. + * When `shouldTrap` is true, Tab/Shift+Tab are trapped within the editor + * (default Monaco behavior for code indentation). + * + * When `shouldTrap` is false, Tab/Shift+Tab move focus to the next/previous + * focusable element outside the editor β€” UNLESS the editor is in snippet + * tab-stop mode (`inSnippetMode`), in which case Tab navigates between + * snippet placeholders. After the snippet session ends (final tab stop or + * ESC), Tab reverts to moving focus out of the editor. + * + * Uses `editor.addAction` with a precondition context key expression + * (`!inSnippetMode`) rather than `onKeyDown` interception, so Monaco's + * built-in snippet navigation takes priority when a snippet is active. * * @param {monacoEditor.editor.IStandaloneCodeEditor} editor - The Monaco editor instance. * @param {boolean} shouldTrap - Whether to trap Tab key in the editor. - * - true: Tab/Shift+Tab are trapped in the editor. - * - false: Tab/Shift+Tab move focus to next/previous element. */ const configureTabKeyMode = (editor: monacoEditor.editor.IStandaloneCodeEditor, shouldTrap: boolean) => { if (tabKeyDisposerRef.current) { @@ -215,17 +222,30 @@ export const MonacoAutoHeight = (props: MonacoAutoHeightProps) => { return; } - tabKeyDisposerRef.current = editor.onKeyDown((event) => { - if (event.keyCode !== monacoEditor.KeyCode.Tab) { - return; - } - - event.preventDefault(); - event.stopPropagation(); + // Register Tab and Shift+Tab actions that only fire when NOT in snippet mode. + // When inSnippetMode is true, Monaco's built-in snippet Tab handler takes over. + const tabAction = editor.addAction({ + id: 'documentdb.tab.moveFocusNext', + label: 'Move Focus to Next Element', + keybindings: [monacoEditor.KeyCode.Tab], + precondition: '!inSnippetMode', + run: () => moveFocus(editor, 'next'), + }); - const direction = event.browserEvent.shiftKey ? 'previous' : 'next'; - moveFocus(editor, direction); + const shiftTabAction = editor.addAction({ + id: 'documentdb.tab.moveFocusPrevious', + label: 'Move Focus to Previous Element', + keybindings: [monacoEditor.KeyMod.Shift | monacoEditor.KeyCode.Tab], + precondition: '!inSnippetMode', + run: () => moveFocus(editor, 'previous'), }); + + tabKeyDisposerRef.current = { + dispose: () => { + tabAction.dispose(); + shiftTabAction.dispose(); + }, + }; }; /** diff --git a/src/webviews/components/MonacoEditor.tsx b/src/webviews/components/MonacoEditor.tsx index c08e2087d..7e6f84530 100644 --- a/src/webviews/components/MonacoEditor.tsx +++ b/src/webviews/components/MonacoEditor.tsx @@ -75,11 +75,20 @@ export const MonacoEditor = ({ onEscapeEditor, onMount, ...props }: MonacoEditor disposablesRef.current.forEach((d) => d.dispose()); disposablesRef.current = []; - // Register Escape key handler to exit the editor + // Register Escape key handler to exit the editor. + // The context expression ensures ESC is only handled when: + // - The suggest (autocomplete) widget is NOT visible + // - The editor is NOT in snippet tab-stop mode + // This allows Monaco's built-in handlers to dismiss the suggest + // widget or exit snippet mode first, before our handler fires. if (onEscapeEditor) { - editor.addCommand(monacoInstance.KeyCode.Escape, () => { - onEscapeEditor(); - }); + editor.addCommand( + monacoInstance.KeyCode.Escape, + () => { + onEscapeEditor(); + }, + '!suggestWidgetVisible && !inSnippetMode', + ); } // Announce escape hint once when editor gains focus diff --git a/src/webviews/documentdb/collectionView/CollectionView.tsx b/src/webviews/documentdb/collectionView/CollectionView.tsx index 23908bb93..863a07bed 100644 --- a/src/webviews/documentdb/collectionView/CollectionView.tsx +++ b/src/webviews/documentdb/collectionView/CollectionView.tsx @@ -12,6 +12,7 @@ import { Announcer } from '../../api/webview-client/accessibility'; import { useConfiguration } from '../../api/webview-client/useConfiguration'; import { useTrpcClient } from '../../api/webview-client/useTrpcClient'; import { useSelectiveContextMenuPrevention } from '../../api/webview-client/utils/useSelectiveContextMenuPrevention'; +import { setCompletionContext } from '../../documentdbQuery'; import './collectionView.scss'; import { CollectionViewContext, @@ -351,17 +352,24 @@ export const CollectionView = (): JSX.Element => { } function updateAutoCompletionData(): void { - trpcClient.mongoClusters.collectionView.getAutocompletionSchema + trpcClient.mongoClusters.collectionView.getFieldCompletionData .query() - .then(async (schema) => { - void (await currentContextRef.current.queryEditor?.setJsonSchema(schema)); + .then((fields) => { + setCompletionContext(configuration.sessionId, { fields }); }) .catch((error) => { - void trpcClient.common.displayErrorMessage.mutate({ - message: l10n.t('Error while loading the autocompletion data'), - modal: false, - cause: error instanceof Error ? error.message : String(error), - }); + console.debug('Failed to update field completion data:', error); + // Non-blocking β€” completion will work without fields + trpcClient.common.reportEvent + .mutate({ + eventName: 'fieldCompletionDataFetchFailed', + properties: { + error: error instanceof Error ? error.message : String(error), + }, + }) + .catch(() => { + // best-effort telemetry, swallow errors + }); }); } diff --git a/src/webviews/documentdb/collectionView/collectionViewContext.ts b/src/webviews/documentdb/collectionView/collectionViewContext.ts index 435396ce6..e3a64fa63 100644 --- a/src/webviews/documentdb/collectionView/collectionViewContext.ts +++ b/src/webviews/documentdb/collectionView/collectionViewContext.ts @@ -97,7 +97,6 @@ export type CollectionViewContextType = { skip: number; limit: number; }; - setJsonSchema(schema: object): Promise; //monacoEditor.languages.json.DiagnosticsOptions, but we don't want to import monacoEditor here }; isAiRowVisible: boolean; // Controls visibility of the AI prompt row in QueryEditor queryInsights: QueryInsightsState; // Query insights state for progressive loading diff --git a/src/webviews/documentdb/collectionView/collectionViewRouter.ts b/src/webviews/documentdb/collectionView/collectionViewRouter.ts index fec8eb05d..8e00cfa70 100644 --- a/src/webviews/documentdb/collectionView/collectionViewRouter.ts +++ b/src/webviews/documentdb/collectionView/collectionViewRouter.ts @@ -4,15 +4,14 @@ *--------------------------------------------------------------------------------------------*/ import { callWithTelemetryAndErrorHandling, type IActionContext } from '@microsoft/vscode-azext-utils'; +import { type FieldEntry } from '@vscode-documentdb/schema-analyzer'; import * as fs from 'fs'; import { type Document } from 'mongodb'; import * as path from 'path'; import * as vscode from 'vscode'; -import { type JSONSchema } from 'vscode-json-languageservice'; import { z } from 'zod'; import { ClusterSession } from '../../../documentdb/ClusterSession'; import { getConfirmationAsInSettings } from '../../../utils/dialogs/getConfirmation'; -import { getKnownFields, type FieldEntry } from '../../../utils/json/mongo/autocomplete/getKnownFields'; import { publicProcedureWithTelemetry, router, type WithTelemetry } from '../../api/extension-server/trpc'; import * as l10n from '@vscode/l10n'; @@ -39,9 +38,7 @@ import { Views } from '../../../documentdb/Views'; import { ext } from '../../../extensionVariables'; import { QueryInsightsAIService } from '../../../services/ai/QueryInsightsAIService'; import { type CollectionItem } from '../../../tree/documentdb/CollectionItem'; -// eslint-disable-next-line import/no-internal-modules -import basicFindQuerySchema from '../../../utils/json/mongo/autocomplete/basicMongoFindFilterSchema.json'; -import { generateMongoFindJsonSchema } from '../../../utils/json/mongo/autocomplete/generateMongoFindJsonSchema'; +import { toFieldCompletionItems } from '../../../utils/json/data-api/autocomplete/toFieldCompletionItems'; import { promptAfterActionEventually } from '../../../utils/survey'; import { UsageImpact } from '../../../utils/surveyTypes'; import { type BaseRouterContext } from '../../api/configuration/appRouter'; @@ -234,25 +231,16 @@ export const collectionsViewRouter = router({ return { documentCount: size }; }), - getAutocompletionSchema: publicProcedureWithTelemetry + getFieldCompletionData: publicProcedureWithTelemetry // procedure type .query(({ ctx }) => { const myCtx = ctx as WithTelemetry; const session: ClusterSession = ClusterSession.getSession(myCtx.sessionId); - const _currentJsonSchema = session.getCurrentSchema(); - const autoCompletionData: FieldEntry[] = getKnownFields(_currentJsonSchema); + const fieldEntries: FieldEntry[] = session.getKnownFields(); - let querySchema: JSONSchema; - - if (autoCompletionData.length > 0) { - querySchema = generateMongoFindJsonSchema(autoCompletionData); - } else { - querySchema = basicFindQuerySchema; - } - - return querySchema; + return toFieldCompletionItems(fieldEntries); }), getCurrentPageAsTable: publicProcedureWithTelemetry // parameters diff --git a/src/webviews/documentdb/collectionView/components/queryEditor/QueryEditor.tsx b/src/webviews/documentdb/collectionView/components/queryEditor/QueryEditor.tsx index 7b6d7e8ec..8bfaabc77 100644 --- a/src/webviews/documentdb/collectionView/components/queryEditor/QueryEditor.tsx +++ b/src/webviews/documentdb/collectionView/components/queryEditor/QueryEditor.tsx @@ -10,9 +10,16 @@ import { useContext, useEffect, useRef, useState, type JSX } from 'react'; import { InputWithProgress } from '../../../../components/InputWithProgress'; // eslint-disable-next-line import/no-internal-modules import type * as monacoEditor from 'monaco-editor/esm/vs/editor/editor.api'; -// eslint-disable-next-line import/no-internal-modules -import basicFindQuerySchema from '../../../../../utils/json/mongo/autocomplete/basicMongoFindFilterSchema.json'; import { useConfiguration } from '../../../../api/webview-client/useConfiguration'; +import { + buildEditorUri, + clearCompletionContext, + EditorType, + LANGUAGE_ID, + registerDocumentDBQueryLanguage, + validateExpression, + type Diagnostic, +} from '../../../../documentdbQuery'; import { type CollectionViewWebviewConfigurationType } from '../../collectionViewController'; import { ArrowResetRegular, SendRegular, SettingsFilled, SettingsRegular } from '@fluentui/react-icons'; @@ -24,6 +31,31 @@ import { CollectionViewContext } from '../../collectionViewContext'; import { useHideScrollbarsDuringResize } from '../../hooks/useHideScrollbarsDuringResize'; import './queryEditor.scss'; +/** + * Convert a Diagnostic from the documentdb-query validator to a Monaco marker. + */ +function toMonacoMarker( + diagnostic: Diagnostic, + model: monacoEditor.editor.ITextModel, + monaco: typeof monacoEditor, +): monacoEditor.editor.IMarkerData { + const startPos = model.getPositionAt(diagnostic.startOffset); + const endPos = model.getPositionAt(diagnostic.endOffset); + return { + severity: + diagnostic.severity === 'error' + ? monaco.MarkerSeverity.Error + : diagnostic.severity === 'warning' + ? monaco.MarkerSeverity.Warning + : monaco.MarkerSeverity.Info, + message: diagnostic.message, + startLineNumber: startPos.lineNumber, + startColumn: startPos.column, + endLineNumber: endPos.lineNumber, + endColumn: endPos.column, + }; +} + interface QueryEditorProps { onExecuteRequest: () => void; } @@ -46,7 +78,6 @@ export const QueryEditor = ({ onExecuteRequest }: QueryEditorProps): JSX.Element // AI prompt history (survives hide/show of AI input) const [aiPromptHistory, setAiPromptHistory] = useState([]); - const schemaAbortControllerRef = useRef(null); const aiGenerationAbortControllerRef = useRef(null); const aiInputRef = useRef(null); @@ -57,12 +88,162 @@ export const QueryEditor = ({ onExecuteRequest }: QueryEditorProps): JSX.Element const hideScrollbarsTemporarily = useHideScrollbarsDuringResize(); - const handleEditorDidMount = (editor: monacoEditor.editor.IStandaloneCodeEditor, monaco: typeof monacoEditor) => { - editor.setValue('{ }'); + /** + * Creates a Monaco model with a URI scheme for the given editor type. + * This enables the completion provider to identify which editor the request is for. + */ + const createEditorModel = ( + editor: monacoEditor.editor.IStandaloneCodeEditor, + monaco: typeof monacoEditor, + editorType: EditorType, + initialValue: string, + ): monacoEditor.editor.ITextModel => { + const uri = monaco.Uri.parse(buildEditorUri(editorType, configuration.sessionId)); + let model = monaco.editor.getModel(uri); + if (!model) { + model = monaco.editor.createModel(initialValue, LANGUAGE_ID, uri); + } + editor.setModel(model); + return model; + }; + + /** + * Sets up debounced validation on editor content changes. + * Returns a cleanup function to clear any pending timeout. + */ + const setupValidation = ( + editor: monacoEditor.editor.IStandaloneCodeEditor, + monaco: typeof monacoEditor, + model: monacoEditor.editor.ITextModel, + ): (() => void) => { + let validationTimeout: ReturnType; + const disposable = editor.onDidChangeModelContent(() => { + clearTimeout(validationTimeout); + validationTimeout = setTimeout(() => { + const diagnostics = validateExpression(editor.getValue()); + const markers = diagnostics.map((d) => toMonacoMarker(d, model, monaco)); + monaco.editor.setModelMarkers(model, 'documentdb-query', markers); + }, 300); + }); + return () => { + clearTimeout(validationTimeout); + disposable.dispose(); + }; + }; + + /** + * Cancels any active snippet session on the given editor. + * + * After a snippet completion (e.g., `fieldName: $1`), Monaco keeps the + * snippet session alive and highlights the tab-stop placeholder. If the + * user continues typing, the highlight grows β€” the "ghost selection" + * bug. Calling this function ends the snippet session cleanly. + */ + const cancelSnippetSession = (editor: monacoEditor.editor.IStandaloneCodeEditor): void => { + const controller = editor.getContribution('snippetController2') as { cancel: () => void } | null | undefined; + controller?.cancel(); + }; + + /** Characters that signal the end of a field-value pair and should exit snippet mode. */ + const SNIPPET_EXIT_CHARS = new Set([',', '}', ']']); + + /** + * Sets up pattern-based auto-trigger of completions. + * When a content change results in a trigger character followed by a + * space (`: `, `, `, `{ `, `[ `) at the end of the inserted text, + * completions are triggered automatically after a short delay. This + * handles both manual typing and completion acceptance. + * + * Also cancels any active snippet session when a delimiter character + * (`,`, `}`, `]`) is typed, preventing the "ghost selection" bug + * where the tab-stop highlight expands as the user continues typing. + * + * Returns a cleanup function. + */ + const setupSmartTrigger = (editor: monacoEditor.editor.IStandaloneCodeEditor): (() => void) => { + let triggerTimeout: ReturnType; + const contentDisposable = editor.onDidChangeModelContent((e) => { + clearTimeout(triggerTimeout); + + const change = e.changes[0]; + if (!change || change.text.length === 0) return; + + // Cancel snippet session when the user *types* a delimiter character. + // Only applies to single-character edits (user keystrokes), not to + // multi-character completion insertions which may legitimately + // contain commas or braces as part of the snippet text. + if (change.text.length === 1 && SNIPPET_EXIT_CHARS.has(change.text)) { + cancelSnippetSession(editor); + } + + const model = editor.getModel(); + if (!model) return; + + // Calculate the offset at the end of the inserted text in the new model + const endOffset = change.rangeOffset + change.text.length; + + // We need at least 2 chars to check for ": " or ", " + if (endOffset < 2) return; + + const fullText = model.getValue(); + const lastTwo = fullText.substring(endOffset - 2, endOffset); + if (lastTwo === ': ' || lastTwo === ', ' || lastTwo === '{ ' || lastTwo === '[ ') { + triggerTimeout = setTimeout(() => { + editor.trigger('smart-trigger', 'editor.action.triggerSuggest', {}); + }, 50); + } + }); + + // Cancel snippet session when the editor loses focus (Option D). + // If the user clicks away while a tab-stop is highlighted, the + // highlight should not persist when they return. + const blurDisposable = editor.onDidBlurEditorText(() => { + cancelSnippetSession(editor); + }); + + // Cancel snippet session on Enter or Ctrl+Enter / Cmd+Enter. + // Enter commits the current line and should exit snippet mode. + // Ctrl+Enter triggers query execution and should also exit snippet mode + // so the tab-stop highlight doesn't persist after running a query. + const keyDownDisposable = editor.onKeyDown((e) => { + if (e.browserEvent.key === 'Enter') { + cancelSnippetSession(editor); + } + }); + + return () => { + clearTimeout(triggerTimeout); + contentDisposable.dispose(); + blurDisposable.dispose(); + keyDownDisposable.dispose(); + }; + }; + // Track validation cleanup functions + const filterValidationCleanupRef = useRef<(() => void) | null>(null); + const projectValidationCleanupRef = useRef<(() => void) | null>(null); + const sortValidationCleanupRef = useRef<(() => void) | null>(null); + const filterSmartTriggerCleanupRef = useRef<(() => void) | null>(null); + const projectSmartTriggerCleanupRef = useRef<(() => void) | null>(null); + const sortSmartTriggerCleanupRef = useRef<(() => void) | null>(null); + const handleEditorDidMount = (editor: monacoEditor.editor.IStandaloneCodeEditor, monaco: typeof monacoEditor) => { // Store the filter editor reference filterEditorRef.current = editor; + // Register the documentdb-query language (idempotent β€” safe to call on every mount). + // Pass the tRPC openUrl handler so hover links can be opened via the extension host, + // bypassing the webview sandbox's popup restrictions. + void registerDocumentDBQueryLanguage(monaco, (url) => void trpcClient.common.openUrl.mutate({ url })); + + // Create model with URI scheme for contextual completions + const model = createEditorModel(editor, monaco, EditorType.Filter, '{ }'); + + // Set up debounced validation + filterValidationCleanupRef.current = setupValidation(editor, monaco, model); + + // Set up smart-trigger for completions after ": " and ", " + filterSmartTriggerCleanupRef.current = setupSmartTrigger(editor); + const getCurrentQueryFunction = () => ({ filter: filterValue, project: projectValue, @@ -76,78 +257,8 @@ export const QueryEditor = ({ onExecuteRequest }: QueryEditorProps): JSX.Element ...prev, queryEditor: { getCurrentQuery: getCurrentQueryFunction, - /** - * Dynamically sets the JSON schema for the Monaco editor's validation and autocompletion. - * - * NOTE: This function can encounter network errors if called immediately after the - * editor mounts, as the underlying JSON web worker may not have finished loading. - * To mitigate this, a delay is introduced before attempting to set the schema. - * - * A more robust long-term solution should be implemented to programmatically - * verify that the JSON worker is initialized before this function proceeds. - * - * An AbortController is used to prevent race conditions when this function is - * called in quick succession (e.g., rapid "refresh" clicks). It ensures that - * any pending schema update is cancelled before a new one begins, guaranteeing - * a clean, predictable state and allowing the Monaco worker to initialize correctly. - */ - setJsonSchema: async (schema) => { - // Use the ref to cancel the previous operation - if (schemaAbortControllerRef.current) { - schemaAbortControllerRef.current.abort(); - } - - // Create and store the new AbortController in the ref - const abortController = new AbortController(); - schemaAbortControllerRef.current = abortController; - const signal = abortController.signal; - - try { - // Wait for 2 seconds to give the worker time to initialize - await new Promise((resolve) => setTimeout(resolve, 2000)); - - // If the operation was cancelled during the delay, abort early - if (signal.aborted) { - return; - } - - // Check if JSON language features are available and set the schema - if (monaco.languages.json?.jsonDefaults) { - monaco.languages.json.jsonDefaults.setDiagnosticsOptions({ - validate: false, - schemas: [ - { - uri: 'mongodb-filter-query-schema.json', - fileMatch: ['*'], - schema: schema, - }, - ], - }); - } - } catch (error) { - // The error is likely an uncaught exception in the worker, - // but we catch here just in case. - console.warn('Error setting JSON schema:', error); - } - }, }, })); - - // initialize the monaco editor with the schema that's basic - // as we don't know the schema of the collection available - // this is a fallback for the case when the autocompletion feature fails. - monaco.languages.json.jsonDefaults.setDiagnosticsOptions({ - validate: true, - schemas: [ - { - uri: 'mongodb-filter-query-schema.json', // Unique identifier - fileMatch: ['*'], // Apply to all JSON files or specify as needed - - schema: basicFindQuerySchema, - // schema: generateMongoFindJsonSchema(fieldEntries) - }, - ], - }); }; const monacoOptions: editor.IStandaloneEditorConstructionOptions = { @@ -173,19 +284,58 @@ export const QueryEditor = ({ onExecuteRequest }: QueryEditorProps): JSX.Element automaticLayout: false, }; + // Intercept link clicks in Monaco hover tooltips. + // Monaco renders hover markdown links as tags, but the webview CSP + // blocks direct navigation. Capture clicks and route through tRPC. + const editorContainerRef = useRef(null); + useEffect(() => { + const container = editorContainerRef.current; + if (!container) return; + + const handleLinkClick = (e: MouseEvent): void => { + const target = e.target as HTMLElement; + const anchor = target.closest('a'); + if (!anchor) return; + + const href = anchor.getAttribute('href'); + if (href && (href.startsWith('https://') || href.startsWith('http://'))) { + e.preventDefault(); + e.stopPropagation(); + void trpcClient.common.openUrl.mutate({ url: href }); + } + }; + + container.addEventListener('click', handleLinkClick, true); + return () => container.removeEventListener('click', handleLinkClick, true); + }, [trpcClient]); + // Cleanup any pending operations when component unmounts useEffect(() => { return () => { - if (schemaAbortControllerRef.current) { - schemaAbortControllerRef.current.abort(); - schemaAbortControllerRef.current = null; - } if (aiGenerationAbortControllerRef.current) { aiGenerationAbortControllerRef.current.abort(); aiGenerationAbortControllerRef.current = null; } + + // Clean up validation timeouts + filterValidationCleanupRef.current?.(); + projectValidationCleanupRef.current?.(); + sortValidationCleanupRef.current?.(); + + // Clean up smart-trigger listeners + filterSmartTriggerCleanupRef.current?.(); + projectSmartTriggerCleanupRef.current?.(); + sortSmartTriggerCleanupRef.current?.(); + + // Dispose Monaco models + filterEditorRef.current?.getModel()?.dispose(); + projectEditorRef.current?.getModel()?.dispose(); + sortEditorRef.current?.getModel()?.dispose(); + + // Clear completion store for this session + clearCompletionContext(configuration.sessionId); }; - }, []); + }, [configuration.sessionId]); // Update getCurrentQuery function whenever state changes useEffect(() => { @@ -342,7 +492,7 @@ export const QueryEditor = ({ onExecuteRequest }: QueryEditorProps): JSX.Element }; return ( -
+
{/* Optional AI prompt row */}
@@ -397,7 +547,7 @@ export const QueryEditor = ({ onExecuteRequest }: QueryEditorProps): JSX.Element { handleEditorDidMount(editor, monaco); - // Sync initial value + // Sync editor content to state editor.onDidChangeModelContent(() => { setFilterValue(editor.getValue()); }); }} options={{ ...monacoOptions, - ariaLabel: l10n.t('Filter: Enter the DocumentDB query filter in JSON format'), + ariaLabel: l10n.t('Filter: Enter the DocumentDB query filter'), }} />
@@ -508,16 +658,31 @@ export const QueryEditor = ({ onExecuteRequest }: QueryEditorProps): JSX.Element { + onMount={(editor, monaco) => { + // Register language (idempotent) + void registerDocumentDBQueryLanguage( + monaco, + (url) => void trpcClient.common.openUrl.mutate({ url }), + ); + projectEditorRef.current = editor; - editor.setValue(projectValue); + + // Create model with URI scheme for project completions + const model = createEditorModel(editor, monaco, EditorType.Project, projectValue); + + // Set up validation + projectValidationCleanupRef.current = setupValidation(editor, monaco, model); + + // Set up smart-trigger + projectSmartTriggerCleanupRef.current = setupSmartTrigger(editor); + editor.onDidChangeModelContent(() => { setProjectValue(editor.getValue()); }); @@ -539,16 +704,31 @@ export const QueryEditor = ({ onExecuteRequest }: QueryEditorProps): JSX.Element { + onMount={(editor, monaco) => { + // Register language (idempotent) + void registerDocumentDBQueryLanguage( + monaco, + (url) => void trpcClient.common.openUrl.mutate({ url }), + ); + sortEditorRef.current = editor; - editor.setValue(sortValue); + + // Create model with URI scheme for sort completions + const model = createEditorModel(editor, monaco, EditorType.Sort, sortValue); + + // Set up validation + sortValidationCleanupRef.current = setupValidation(editor, monaco, model); + + // Set up smart-trigger + sortSmartTriggerCleanupRef.current = setupSmartTrigger(editor); + editor.onDidChangeModelContent(() => { setSortValue(editor.getValue()); }); diff --git a/src/webviews/documentdbQuery/completionStore.test.ts b/src/webviews/documentdbQuery/completionStore.test.ts new file mode 100644 index 000000000..fde71ed1b --- /dev/null +++ b/src/webviews/documentdbQuery/completionStore.test.ts @@ -0,0 +1,126 @@ +/*--------------------------------------------------------------------------------------------- + * Copyright (c) Microsoft Corporation. All rights reserved. + * Licensed under the MIT License. See License.txt in the project root for license information. + *--------------------------------------------------------------------------------------------*/ + +import { + clearAllCompletionContexts, + clearCompletionContext, + getCompletionContext, + setCompletionContext, +} from './completionStore'; + +describe('completionStore', () => { + beforeEach(() => { + clearAllCompletionContexts(); + }); + + test('setCompletionContext then getCompletionContext round-trips correctly', () => { + const context = { + fields: [ + { + fieldName: 'name', + displayType: 'String', + bsonType: 'string', + isSparse: false, + insertText: 'name', + referenceText: '$name', + }, + ], + }; + + setCompletionContext('session-1', context); + expect(getCompletionContext('session-1')).toEqual(context); + }); + + test('getCompletionContext returns undefined for unknown session', () => { + expect(getCompletionContext('unknown')).toBeUndefined(); + }); + + test('clearCompletionContext removes the entry', () => { + setCompletionContext('session-1', { fields: [] }); + expect(getCompletionContext('session-1')).toBeDefined(); + + clearCompletionContext('session-1'); + expect(getCompletionContext('session-1')).toBeUndefined(); + }); + + test('clearCompletionContext is a no-op for unknown session', () => { + expect(() => clearCompletionContext('unknown')).not.toThrow(); + }); + + test('clearAllCompletionContexts removes all entries', () => { + setCompletionContext('session-1', { fields: [] }); + setCompletionContext('session-2', { fields: [] }); + + clearAllCompletionContexts(); + + expect(getCompletionContext('session-1')).toBeUndefined(); + expect(getCompletionContext('session-2')).toBeUndefined(); + }); + + test('setCompletionContext overwrites existing data', () => { + const original = { + fields: [ + { + fieldName: 'old', + displayType: 'String', + bsonType: 'string', + isSparse: false, + insertText: 'old', + referenceText: '$old', + }, + ], + }; + const updated = { + fields: [ + { + fieldName: 'new', + displayType: 'Number', + bsonType: 'double', + isSparse: true, + insertText: 'new', + referenceText: '$new', + }, + ], + }; + + setCompletionContext('session-1', original); + setCompletionContext('session-1', updated); + + expect(getCompletionContext('session-1')).toEqual(updated); + }); + + test('multiple sessions are independent', () => { + const ctx1 = { + fields: [ + { + fieldName: 'a', + displayType: 'String', + bsonType: 'string', + isSparse: false, + insertText: 'a', + referenceText: '$a', + }, + ], + }; + const ctx2 = { + fields: [ + { + fieldName: 'b', + displayType: 'Number', + bsonType: 'int32', + isSparse: true, + insertText: 'b', + referenceText: '$b', + }, + ], + }; + + setCompletionContext('session-1', ctx1); + setCompletionContext('session-2', ctx2); + + expect(getCompletionContext('session-1')).toEqual(ctx1); + expect(getCompletionContext('session-2')).toEqual(ctx2); + }); +}); diff --git a/src/webviews/documentdbQuery/completionStore.ts b/src/webviews/documentdbQuery/completionStore.ts new file mode 100644 index 000000000..b97ed859d --- /dev/null +++ b/src/webviews/documentdbQuery/completionStore.ts @@ -0,0 +1,36 @@ +/*--------------------------------------------------------------------------------------------- + * Copyright (c) Microsoft Corporation. All rights reserved. + * Licensed under the MIT License. See License.txt in the project root for license information. + *--------------------------------------------------------------------------------------------*/ + +import { type FieldCompletionData } from '../../utils/json/data-api/autocomplete/toFieldCompletionItems'; + +/** + * Completion context for a single editor session. + * Holds dynamic field data fetched from the extension host after query execution. + */ +export interface CompletionContext { + fields: FieldCompletionData[]; +} + +const store = new Map(); + +/** Update field data for a session (called after query execution). */ +export function setCompletionContext(sessionId: string, context: CompletionContext): void { + store.set(sessionId, context); +} + +/** Get field data for a session. */ +export function getCompletionContext(sessionId: string): CompletionContext | undefined { + return store.get(sessionId); +} + +/** Remove a session's data (called on tab close / dispose). */ +export function clearCompletionContext(sessionId: string): void { + store.delete(sessionId); +} + +/** Clear all sessions (for testing). */ +export function clearAllCompletionContexts(): void { + store.clear(); +} diff --git a/src/webviews/documentdbQuery/completions/README.md b/src/webviews/documentdbQuery/completions/README.md new file mode 100644 index 000000000..bb060d369 --- /dev/null +++ b/src/webviews/documentdbQuery/completions/README.md @@ -0,0 +1,110 @@ +# Completions Module + +Context-sensitive completion items for the `documentdb-query` Monaco language. + +## Architecture + +``` +registerLanguage.ts + └─ provideCompletionItems() + β”‚ + β”œβ”€ cursorContext.ts ← detect semantic cursor position + β”‚ + └─ completions/ + β”œβ”€ createCompletionItems.ts ← main entry, context routing + β”œβ”€ mapCompletionItems.ts ← operator/field β†’ CompletionItem + β”œβ”€ typeSuggestions.ts ← type-aware value suggestions + β”œβ”€ snippetUtils.ts ← snippet text manipulation + └─ completionKnowledge.ts ← curated domain rules & constants +``` + +### Flow + +1. Monaco calls `provideCompletionItems()` (registered in `registerLanguage.ts`) +2. `detectCursorContext()` scans backward from the cursor to determine the semantic position +3. `createCompletionItems()` routes to the appropriate builder: + - **key / array-element** β†’ field names + key-position operators + - **value** β†’ type suggestions + operators (with braces) + BSON constructors + JS globals + - **operator** β†’ operators only (braces stripped, type-aware sorting) + - **empty** (unknown + needsWrapping) β†’ key-position completions with `{ }` wrapping + - **unknown** (ambiguous, no wrapping) β†’ all completions (fields, all operators, BSON constructors, JS globals) + +## Sorting + +Completion items use `sortText` prefixes so Monaco displays them in the intended order. Lower prefixes appear higher in the list. + +### Empty position (no braces) + +Same as key position. All insertions wrapped with `{ }`. + +| Prefix | Content | Example | +|--------|---------|---------| +| `0_fieldName` | Schema field names (wrapped) | `{ age: $1 }`, `{ name: $1 }` | +| `1_$and` | Key-position operators (with braces) | `{ $and: [...] }` | + +### Value position + +| Prefix | Content | Example | +|--------|---------|---------| +| `00_00` – `00_99` | Type suggestions | `true` / `false` for boolean fields | +| `0_$eq` – `2_$op` | Query operators (type-aware) | `{ $eq: … }`, `{ $gt: … }` | +| `3_ObjectId` | BSON constructors | `ObjectId(…)`, `ISODate(…)` | +| `4_Date` | JS globals | `Date`, `Math`, `RegExp`, `Infinity` | + +### Key position + +| Prefix | Content | Example | +|--------|---------|---------| +| `0_fieldName` | Schema field names | `age`, `name`, `_id` | +| `1_$and` | Key-position operators | `$and`, `$or`, `$nor` | + +### Operator position (type-aware) + +When the field's BSON type is known, operators are tiered by relevance: + +| Prefix | Tier | Meaning | +|--------|------|---------| +| `0_` | Type-relevant | Operator's `applicableBsonTypes` matches the field | +| `1a_` | Comparison (universal) | `$eq`, `$ne`, `$gt`, `$in`, etc. β€” no type restriction, most commonly used | +| `1b_` | Other universal | Element/evaluation/geospatial operators with no type restriction | +| `2_` | Non-matching | Operator has type restrictions that don't match the field | + +Within each tier, operators sort alphabetically by name (`$eq` < `$gt` < `$in`). + +**Example β€” boolean field `isActive`:** +- Tier `1a_`: `$eq`, `$gt`, `$gte`, `$in`, `$lt`, `$lte`, `$ne`, `$nin` (comparison) +- Tier `1b_`: `$exists`, `$type`, `$mod`, `$expr`, `$jsonSchema` (other universal) +- Tier `2_`: `$regex` (string-only), `$elemMatch` (array-only), `$bitsAllSet` (int/long-only) + +### Decision matrix + +``` +Has field type info? +β”œβ”€ NO β†’ no sortText override (Monaco default alphabetical) +β”œβ”€ YES +β”‚ β”œβ”€ Operator has applicableBsonTypes matching field? β†’ "0_" +β”‚ β”œβ”€ Operator has no applicableBsonTypes? +β”‚ β”‚ β”œβ”€ Is comparison operator (meta = query:comparison)? β†’ "1a_" +β”‚ β”‚ └─ Other category? β†’ "1b_" +β”‚ └─ Operator has applicableBsonTypes NOT matching field? β†’ "2_" +``` + +## Key concepts + +### `completionKnowledge.ts` + +Curated domain rules that go beyond the auto-generated operator registry in `documentdb-constants`. Contains: + +- **`KEY_POSITION_OPERATORS`** β€” operators valid only at query root level (`$and`, `$or`, etc.) +- **`LABEL_PLACEHOLDER`** β€” the `…` character used in display labels +- **`INFO_INDICATOR`** β€” the `β„Ή` character prepended to example descriptions + +### Snippet handling + +Operator snippets in `documentdb-constants` include outer braces: `{ $gt: ${1:value} }`. + +- **Empty position**: operators keep full braces (user has no braces); fields wrapped with `{ ... }` +- **Value position**: inserted as-is (user is replacing the entire value) +- **Operator position**: outer `{ }` stripped via `stripOuterBraces()` (user is already inside braces) +- **Key position**: outer `{ }` stripped (user is already inside the query object) +- **`$` escaping**: `escapeSnippetDollars()` prevents Monaco from treating `$gt` as a variable reference diff --git a/src/webviews/documentdbQuery/completions/completionKnowledge.ts b/src/webviews/documentdbQuery/completions/completionKnowledge.ts new file mode 100644 index 000000000..148539e60 --- /dev/null +++ b/src/webviews/documentdbQuery/completions/completionKnowledge.ts @@ -0,0 +1,98 @@ +/*--------------------------------------------------------------------------------------------- + * Copyright (c) Microsoft Corporation. All rights reserved. + * Licensed under the MIT License. See License.txt in the project root for license information. + *--------------------------------------------------------------------------------------------*/ + +/** + * Completion knowledge β€” curated domain rules for the completion provider. + * + * This file centralises "knowledge" that is **not** part of the generic + * DocumentDB operator registry (`documentdb-constants`) but is essential for + * producing high-quality, context-sensitive completions in the query editor. + * + * ### Why this file exists + * + * The `documentdb-constants` package is auto-generated from the official + * operator reference and is intentionally kept generic β€” it describes *what* + * operators exist, not *where* they are syntactically valid. + * + * However the completion provider needs to know additional rules: + * + * 1. **Which operators are only valid at key (root) position?** + * `$and`, `$or`, `$nor`, etc. accept sub-queries, not field values. + * Showing them inside a field's operator list (`{ age: { $and … } }`) is + * misleading, so we need an explicit list to filter them out of + * operator-position completions and include them in key-position completions. + * + * 2. **Placeholder character for labels** + * A single Unicode character used in completion-list labels to represent + * "user fills this in". Must render well in all editors and at any font size. + * + * Adding new knowledge here keeps the completion provider self-documented and + * avoids magic values scattered across multiple files. + */ + +/** + * Operators that are syntactically valid only at the **key position** (the + * root level of a query document, or inside a `$and`/`$or`/`$nor` array + * element). + * + * These operators accept sub-expressions or arrays of sub-queries as their + * values β€” they do **not** operate on a specific field's BSON value. For + * example: + * + * ```js + * // βœ… Valid β€” key position + * { $and: [{ age: { $gt: 18 } }, { name: "Alice" }] } + * + * // ❌ Invalid β€” operator position on field 'age' + * { age: { $and: … } } + * ``` + * + * **`$not` is intentionally excluded** β€” despite being a logical operator, + * `$not` is a field-level operator that wraps a single field's expression: + * `{ price: { $not: { $gt: 1.99 } } }`. It does NOT work at query root. + * + * The completion provider uses this set to: + * - **Include** these operators at key position and array-element position + * - **Exclude** them from operator position (inside `{ field: { … } }`) + * - **Exclude** them from value position + * + * Source: DocumentDB query language specification β€” logical and meta operators. + */ +export const KEY_POSITION_OPERATORS = new Set([ + '$and', + '$or', + '$nor', + '$comment', + '$expr', + '$jsonSchema', + '$text', + '$where', +]); + +/** + * Placeholder character used in completion-list **labels** to indicate where + * the user should type a value. + * + * This is purely cosmetic β€” the actual insertText uses Monaco snippet tab stops + * (`${1:placeholder}`). The label placeholder is what users see in the + * completion picker before selecting an item. + * + * We use the horizontal ellipsis `…` (U+2026) because: + * - It is universally understood as "something goes here" + * - It renders reliably across all monospace and proportional fonts + * - It is visually lightweight and does not distract from the operator syntax + * + * Previously we used `β–ͺ` (U+25AA, Black Small Square) but it was too subtle + * at small font sizes and less semantically clear. + */ +export const LABEL_PLACEHOLDER = '\u2026'; // … (horizontal ellipsis) + +/** + * Info indicator for completion descriptions that contain usage examples. + * + * Prepended to description strings that show example values to differentiate + * them from plain type labels (e.g., `"β„Ή e.g. ends with '.com'"` vs `"string literal"`). + */ +export const INFO_INDICATOR = '\u2139'; // β„Ή (information source) diff --git a/src/webviews/documentdbQuery/completions/createCompletionItems.ts b/src/webviews/documentdbQuery/completions/createCompletionItems.ts new file mode 100644 index 000000000..960c096b1 --- /dev/null +++ b/src/webviews/documentdbQuery/completions/createCompletionItems.ts @@ -0,0 +1,377 @@ +/*--------------------------------------------------------------------------------------------- + * Copyright (c) Microsoft Corporation. All rights reserved. + * Licensed under the MIT License. See License.txt in the project root for license information. + *--------------------------------------------------------------------------------------------*/ + +/** + * Context-sensitive completion item creation for the `documentdb-query` language. + * + * This module is the main entry point for the completion provider. It uses + * cursor context detection to determine which completions to show and delegates + * to specialized functions for each context (key, value, operator, etc.). + */ + +import { + FILTER_COMPLETION_META, + getFilteredCompletions, + PROJECTION_COMPLETION_META, +} from '@vscode-documentdb/documentdb-constants'; +// eslint-disable-next-line import/no-internal-modules +import type * as monacoEditor from 'monaco-editor/esm/vs/editor/editor.api'; +import { getCompletionContext } from '../completionStore'; +import { type CursorContext } from '../cursorContext'; +import { EditorType } from '../languageConfig'; +import { KEY_POSITION_OPERATORS } from './completionKnowledge'; +import { createJsGlobalCompletionItems } from './jsGlobals'; +import { mapFieldToCompletionItem, mapOperatorToCompletionItem } from './mapCompletionItems'; +import { createTypeSuggestions } from './typeSuggestions'; + +/** + * Parameters for creating completion items. + */ +export interface CreateCompletionItemsParams { + /** The editor type parsed from the model URI (undefined if URI doesn't match). */ + editorType: EditorType | undefined; + /** The session ID for looking up dynamic field completions. */ + sessionId: string | undefined; + /** The range to insert completions at. */ + range: monacoEditor.IRange; + /** Whether the cursor is immediately after a '$' character. */ + isDollarPrefix: boolean; + /** The Monaco editor API. */ + monaco: typeof monacoEditor; + /** + * Optional BSON types of the field the cursor is operating on. + * When provided, operators are sorted by type relevance. + */ + fieldBsonTypes?: readonly string[]; + /** + * When true, completion snippets should include outer `{ }` wrapping. + * Set when the editor content has no braces (user cleared the editor), + * so that inserted completions produce valid query syntax. + */ + needsWrapping?: boolean; + /** + * Optional cursor context from the heuristic cursor position detector. + * When provided, completions are filtered based on the semantic position + * of the cursor. When undefined, falls back to showing all completions + * (fields, operators, BSON constructors, and JS globals). + */ + cursorContext?: CursorContext; +} + +// KEY_POSITION_OPERATORS is imported from ./completionKnowledge +// Re-export for backwards compatibility and testing +export { KEY_POSITION_OPERATORS } from './completionKnowledge'; + +/** + * Returns the completion meta tags appropriate for the given editor type. + * + * Exported for testing. + */ +export function getMetaTagsForEditorType(editorType: EditorType | undefined): readonly string[] { + switch (editorType) { + case EditorType.Filter: + return FILTER_COMPLETION_META; + case EditorType.Project: + case EditorType.Sort: + return PROJECTION_COMPLETION_META; + default: + return FILTER_COMPLETION_META; + } +} + +/** + * Creates Monaco completion items based on the editor context. + * + * Main entry point called by the CompletionItemProvider. + * + * Context routing: + * - **key**: field names + key-position operators ($and, $or, etc.) + * - **value**: type suggestions + operators (with braces) + BSON constructors + * - **operator**: operators (without braces) with type-aware sorting + * - **array-element**: same as key position + * - **empty** (unknown + needsWrapping): key-position completions with `{ }` wrapping + * - **unknown** (ambiguous): all completions β€” full discovery fallback + */ +export function createCompletionItems(params: CreateCompletionItemsParams): monacoEditor.languages.CompletionItem[] { + const { editorType, sessionId, range, monaco, fieldBsonTypes, cursorContext, needsWrapping } = params; + + if (!cursorContext || cursorContext.position === 'unknown') { + if (needsWrapping) { + // EMPTY editor β€” no braces present. Show key-position completions + // (fields + root operators) with { } wrapping so inserted items + // produce valid syntax. + return createEmptyEditorCompletions(editorType, sessionId, range, monaco); + } + // Genuinely UNKNOWN β€” show all completions as a discovery fallback. + return createAllCompletions(editorType, sessionId, range, monaco); + } + + switch (cursorContext.position) { + case 'key': + case 'array-element': + return createKeyPositionCompletions(editorType, sessionId, range, monaco); + + case 'value': { + const fieldBsonType = cursorContext.fieldBsonType; + return createValuePositionCompletions(editorType, range, monaco, fieldBsonType); + } + + case 'operator': { + const bsonTypes = cursorContext.fieldBsonType ? [cursorContext.fieldBsonType] : fieldBsonTypes; + return createOperatorPositionCompletions(editorType, range, monaco, bsonTypes); + } + + default: + return createAllCompletions(editorType, sessionId, range, monaco); + } +} + +// ---------- Context-specific completion builders ---------- + +/** + * Empty editor completions β€” shows key-position items with `{ }` wrapping. + * + * Used when the editor has no braces (user cleared content). Behaves like + * key position but wraps all inserted completions with outer `{ }` so they + * produce valid query syntax. + */ +function createEmptyEditorCompletions( + editorType: EditorType | undefined, + sessionId: string | undefined, + range: monacoEditor.IRange, + monaco: typeof monacoEditor, +): monacoEditor.languages.CompletionItem[] { + const metaTags = getMetaTagsForEditorType(editorType); + const allEntries = getFilteredCompletions({ meta: [...metaTags] }); + + // Key-position operators β€” keep outer braces (don't strip) + const keyEntries = allEntries.filter((e) => KEY_POSITION_OPERATORS.has(e.value)); + const operatorItems = keyEntries.map((entry) => { + const item = mapOperatorToCompletionItem(entry, range, monaco); + item.sortText = `1_${entry.value}`; + return item; + }); + + // Fields β€” wrap insertText with `{ ... }` for valid syntax + const fieldItems = getFieldCompletionItems(sessionId, range, monaco).map((item) => ({ + ...item, + insertText: `{ ${item.insertText as string} }`, + })); + + return [...fieldItems, ...operatorItems]; +} + +/** + * All completions β€” used when cursor context is genuinely ambiguous (UNKNOWN). + * Shows fields, all operators, BSON constructors, and JS globals. + * Full discovery fallback for positions the parser can't classify. + */ +function createAllCompletions( + editorType: EditorType | undefined, + sessionId: string | undefined, + range: monacoEditor.IRange, + monaco: typeof monacoEditor, +): monacoEditor.languages.CompletionItem[] { + const metaTags = getMetaTagsForEditorType(editorType); + const allEntries = getFilteredCompletions({ meta: [...metaTags] }); + + const fieldItems = getFieldCompletionItems(sessionId, range, monaco); + + const operatorItems = allEntries + .filter((e) => e.meta !== 'bson' && e.meta !== 'variable' && e.standalone !== false) + .map((entry) => mapOperatorToCompletionItem(entry, range, monaco)); + + const bsonItems = allEntries + .filter((e) => e.meta === 'bson') + .map((entry) => { + const item = mapOperatorToCompletionItem(entry, range, monaco); + item.sortText = `3_${entry.value}`; + return item; + }); + + const jsGlobals = createJsGlobalCompletionItems(range, monaco); + + return [...fieldItems, ...operatorItems, ...bsonItems, ...jsGlobals]; +} + +function createKeyPositionCompletions( + editorType: EditorType | undefined, + sessionId: string | undefined, + range: monacoEditor.IRange, + monaco: typeof monacoEditor, +): monacoEditor.languages.CompletionItem[] { + const metaTags = getMetaTagsForEditorType(editorType); + const allEntries = getFilteredCompletions({ meta: [...metaTags] }); + + const keyEntries = allEntries.filter((e) => KEY_POSITION_OPERATORS.has(e.value)); + const operatorItems = keyEntries.map((entry) => { + // Strip outer braces β€” the user is already inside `{ }` at key position, + // so inserting the full `{ $and: [...] }` would create double braces. + const item = mapOperatorToCompletionItem(entry, range, monaco, undefined, true); + item.sortText = `1_${entry.value}`; + return item; + }); + + const fieldItems = getFieldCompletionItems(sessionId, range, monaco); + return [...fieldItems, ...operatorItems]; +} + +/** + * Value position completions: + * - **Project editor**: `1` (include) and `0` (exclude) β€” the most common projection values + * - **Sort editor**: `1` (ascending) and `-1` (descending) + * - **Filter editor** (default): + * 1. Type-aware suggestions (sort `00_`) β€” e.g., `true`/`false` for booleans + * 2. Query operators with brace-wrapping snippets (sort `0_`–`2_`) + * 3. BSON constructors (sort `3_`) + * 4. JS globals: Date, Math, RegExp, etc. (sort `4_`) + */ +function createValuePositionCompletions( + editorType: EditorType | undefined, + range: monacoEditor.IRange, + monaco: typeof monacoEditor, + fieldBsonType: string | undefined, +): monacoEditor.languages.CompletionItem[] { + // Project editor: only show include/exclude values + if (editorType === EditorType.Project) { + return createProjectValueCompletions(range, monaco); + } + + // Sort editor: only show ascending/descending values + if (editorType === EditorType.Sort) { + return createSortValueCompletions(range, monaco); + } + + const metaTags = getMetaTagsForEditorType(editorType); + const allEntries = getFilteredCompletions({ meta: [...metaTags] }); + + // 1. Type-aware suggestions (highest priority) + const typeSuggestions = createTypeSuggestions(fieldBsonType, range, monaco); + + // 2. Operators, excluding key-position-only operators. + // When fieldBsonType is known, apply type-aware sorting so comparison + // operators (e.g., $eq) appear above irrelevant ones (e.g., $bitsAllSet). + const fieldBsonTypes = fieldBsonType ? [fieldBsonType] : undefined; + const operatorEntries = allEntries.filter( + (e) => + e.meta !== 'bson' && + e.meta !== 'variable' && + e.standalone !== false && + !KEY_POSITION_OPERATORS.has(e.value), + ); + const operatorItems = operatorEntries.map((entry) => { + const item = mapOperatorToCompletionItem(entry, range, monaco, fieldBsonTypes); + // If type-aware sorting produced a prefix, keep it; otherwise default to 0_ + if (!item.sortText) { + item.sortText = `0_${entry.value}`; + } + return item; + }); + + // 3. BSON constructors (sort prefix 3_ β€” after all operator tiers: 0_, 1a_, 1b_, 2_) + const bsonEntries = allEntries.filter((e) => e.meta === 'bson'); + const bsonItems = bsonEntries.map((entry) => { + const item = mapOperatorToCompletionItem(entry, range, monaco); + item.sortText = `3_${entry.value}`; + return item; + }); + + // 4. JS globals: Date, Math, RegExp, Infinity, NaN, undefined (sort prefix 4_) + const jsGlobals = createJsGlobalCompletionItems(range, monaco); + + return [...typeSuggestions, ...operatorItems, ...bsonItems, ...jsGlobals]; +} + +/** + * Value completions for the **project** editor: `1` (include) and `0` (exclude). + * + * Projection operators like `$slice` and `$elemMatch` are already available + * via operator-position completions; these simple numeric values cover the + * most common use case. + */ +function createProjectValueCompletions( + range: monacoEditor.IRange, + monaco: typeof monacoEditor, +): monacoEditor.languages.CompletionItem[] { + return [ + { + label: { label: '1', description: 'include field' }, + kind: monaco.languages.CompletionItemKind.Value, + insertText: '1', + sortText: '00_1', + preselect: true, + range, + }, + { + label: { label: '0', description: 'exclude field' }, + kind: monaco.languages.CompletionItemKind.Value, + insertText: '0', + sortText: '00_0', + range, + }, + ]; +} + +/** + * Value completions for the **sort** editor: `1` (ascending) and `-1` (descending). + */ +function createSortValueCompletions( + range: monacoEditor.IRange, + monaco: typeof monacoEditor, +): monacoEditor.languages.CompletionItem[] { + return [ + { + label: { label: '1', description: 'ascending' }, + kind: monaco.languages.CompletionItemKind.Value, + insertText: '1', + sortText: '00_1', + preselect: true, + range, + }, + { + label: { label: '-1', description: 'descending' }, + kind: monaco.languages.CompletionItemKind.Value, + insertText: '-1', + sortText: '00_-1', + range, + }, + ]; +} + +function createOperatorPositionCompletions( + editorType: EditorType | undefined, + range: monacoEditor.IRange, + monaco: typeof monacoEditor, + fieldBsonTypes: readonly string[] | undefined, +): monacoEditor.languages.CompletionItem[] { + const metaTags = getMetaTagsForEditorType(editorType); + const allEntries = getFilteredCompletions({ meta: [...metaTags] }); + + const operatorEntries = allEntries.filter( + (e) => + e.meta !== 'bson' && + e.meta !== 'variable' && + e.standalone !== false && + !KEY_POSITION_OPERATORS.has(e.value), + ); + return operatorEntries.map((entry) => mapOperatorToCompletionItem(entry, range, monaco, fieldBsonTypes, true)); +} + +function getFieldCompletionItems( + sessionId: string | undefined, + range: monacoEditor.IRange, + monaco: typeof monacoEditor, +): monacoEditor.languages.CompletionItem[] { + const fieldItems: monacoEditor.languages.CompletionItem[] = []; + if (sessionId) { + const context = getCompletionContext(sessionId); + if (context) { + for (const field of context.fields) { + fieldItems.push(mapFieldToCompletionItem(field, range, monaco)); + } + } + } + return fieldItems; +} diff --git a/src/webviews/documentdbQuery/completions/index.ts b/src/webviews/documentdbQuery/completions/index.ts new file mode 100644 index 000000000..c9fa6db8c --- /dev/null +++ b/src/webviews/documentdbQuery/completions/index.ts @@ -0,0 +1,33 @@ +/*--------------------------------------------------------------------------------------------- + * Copyright (c) Microsoft Corporation. All rights reserved. + * Licensed under the MIT License. See License.txt in the project root for license information. + *--------------------------------------------------------------------------------------------*/ + +/** + * Completion items for the `documentdb-query` language. + * + * This folder contains context-sensitive completion logic: + * - `createCompletionItems.ts` β€” main entry point, context branching + * - `mapCompletionItems.ts` β€” operator/field β†’ CompletionItem mapping + * - `typeSuggestions.ts` β€” type-aware value suggestions (bool β†’ true/false, etc.) + * - `jsGlobals.ts` β€” JS globals available in the shell-bson-parser sandbox (Date, Math, etc.) + * - `snippetUtils.ts` β€” snippet text manipulation (brace stripping, $ escaping) + */ + +export { INFO_INDICATOR, LABEL_PLACEHOLDER } from './completionKnowledge'; +export { + KEY_POSITION_OPERATORS, + createCompletionItems, + getMetaTagsForEditorType, + type CreateCompletionItemsParams, +} from './createCompletionItems'; +export { createJsGlobalCompletionItems } from './jsGlobals'; +export { + getCategoryLabel, + getCompletionKindForMeta, + getOperatorSortPrefix, + mapFieldToCompletionItem, + mapOperatorToCompletionItem, +} from './mapCompletionItems'; +export { escapeSnippetDollars, stripOuterBraces } from './snippetUtils'; +export { createTypeSuggestions } from './typeSuggestions'; diff --git a/src/webviews/documentdbQuery/completions/jsGlobals.ts b/src/webviews/documentdbQuery/completions/jsGlobals.ts new file mode 100644 index 000000000..483485f54 --- /dev/null +++ b/src/webviews/documentdbQuery/completions/jsGlobals.ts @@ -0,0 +1,172 @@ +/*--------------------------------------------------------------------------------------------- + * Copyright (c) Microsoft Corporation. All rights reserved. + * Licensed under the MIT License. See License.txt in the project root for license information. + *--------------------------------------------------------------------------------------------*/ + +/** + * JavaScript global completions for the `documentdb-query` language. + * + * The `documentdb-query` language uses `@mongodb-js/shell-bson-parser` to + * execute queries. That parser runs in a sandboxed scope that exposes a + * limited set of JavaScript globals beyond the BSON constructors (which are + * already registered in `documentdb-constants`). + * + * This module provides completion items for those JS globals so they appear + * in the value-position completion list. They are NOT added to + * `documentdb-constants` because they are runtime JS constructs, not + * DocumentDB API operators. + * + * ### Supported JS globals (from shell-bson-parser's sandbox scope) + * + * **Class expressions** (object with whitelisted methods): + * - `Date` β€” `new Date()`, `Date()`, `Date.now()`, plus instance methods + * - `Math` β€” `Math.floor()`, `Math.min()`, `Math.max()`, etc. + * + * **Globals** (primitive values): + * - `Infinity`, `NaN`, `undefined` + * + * **Constructor functions** (SCOPE_ANY / SCOPE_NEW / SCOPE_CALL): + * - `RegExp` β€” already handled by the JS tokenizer, but listed for completeness + * + * Source: `node_modules/@mongodb-js/shell-bson-parser/dist/scope.js` + * (SCOPE_ANY, SCOPE_NEW, SCOPE_CALL, GLOBALS, ALLOWED_CLASS_EXPRESSIONS) + */ + +// eslint-disable-next-line import/no-internal-modules +import type * as monacoEditor from 'monaco-editor/esm/vs/editor/editor.api'; +import { escapeSnippetDollars } from './snippetUtils'; + +/** A JS global completion definition. */ +interface JsGlobalDef { + /** Display label (e.g., "Date") */ + label: string; + /** Optional snippet to insert (otherwise label is used) */ + snippet?: string; + /** Short description shown right-aligned in the completion list */ + description: string; + /** Documentation shown in the details panel */ + documentation: string; +} + +/** + * JS globals available in shell-bson-parser's sandbox. + * + * These are the class expressions and global values that the parser's + * sandboxed eval supports. BSON constructors (ObjectId, ISODate, etc.) + * are already provided by `documentdb-constants` and are NOT duplicated here. + */ +const JS_GLOBALS: readonly JsGlobalDef[] = [ + // -- Class constructors -- + { + label: 'Date', + snippet: 'new Date(${1})', + description: 'JS global', + documentation: + 'JavaScript Date constructor.\n\n' + + 'Usages:\n' + + '- `new Date()` β€” current time\n' + + '- `new Date("2025-01-01")` β€” specific date\n' + + '- `new Date(Date.now() - 14 * 24 * 60 * 60 * 1000)` β€” 14 days ago', + }, + { + label: 'Date.now()', + snippet: 'Date.now()', + description: 'JS global', + documentation: + 'Returns milliseconds since Unix epoch (Jan 1, 1970).\n\nUseful for relative date queries:\n```\n{ $gt: new Date(Date.now() - 7 * 24 * 60 * 60 * 1000) }\n```', + }, + { + label: 'RegExp', + snippet: 'RegExp("${1:pattern}")', + description: 'JS global', + documentation: + 'JavaScript RegExp constructor.\n\nExample: `RegExp("^test")`\n\nPrefer regex literals: `/^test/`', + }, + + // -- Math methods -- + { + label: 'Math.floor()', + snippet: 'Math.floor(${1:value})', + description: 'JS global', + documentation: 'Round down to the nearest integer.\n\nExample: `Math.floor(3.7)` β†’ `3`', + }, + { + label: 'Math.ceil()', + snippet: 'Math.ceil(${1:value})', + description: 'JS global', + documentation: 'Round up to the nearest integer.\n\nExample: `Math.ceil(3.2)` β†’ `4`', + }, + { + label: 'Math.round()', + snippet: 'Math.round(${1:value})', + description: 'JS global', + documentation: 'Round to the nearest integer.\n\nExample: `Math.round(3.5)` β†’ `4`', + }, + { + label: 'Math.min()', + snippet: 'Math.min(${1:a}, ${2:b})', + description: 'JS global', + documentation: 'Return the smaller of two values.\n\nExample: `Math.min(1.7, 2)` β†’ `1.7`', + }, + { + label: 'Math.max()', + snippet: 'Math.max(${1:a}, ${2:b})', + description: 'JS global', + documentation: 'Return the larger of two values.\n\nExample: `Math.max(1.7, 2)` β†’ `2`', + }, + + // -- Primitive globals -- + { + label: 'Infinity', + description: 'JS global', + documentation: 'Numeric value representing infinity.\n\nExample: `{ $lt: Infinity }`', + }, + { + label: 'NaN', + description: 'JS global', + documentation: 'Numeric value representing Not-a-Number.\n\nExample: `{ $ne: NaN }`', + }, + { + label: 'undefined', + description: 'JS global', + documentation: 'The undefined value.\n\nExample: `{ field: undefined }` β€” matches missing fields.', + }, +]; + +/** + * Creates completion items for JavaScript globals supported by the + * shell-bson-parser sandbox. + * + * These are shown at value position with sort prefix `4_` (after BSON + * constructors at `3_`). + * + * @param range - the insertion range + * @param monaco - the Monaco API + */ +export function createJsGlobalCompletionItems( + range: monacoEditor.IRange, + monaco: typeof monacoEditor, +): monacoEditor.languages.CompletionItem[] { + return JS_GLOBALS.map((def) => { + const hasSnippet = !!def.snippet; + let insertText = hasSnippet ? def.snippet! : def.label; + if (hasSnippet) { + insertText = escapeSnippetDollars(insertText); + } + + return { + label: { + label: def.label, + description: def.description, + }, + kind: hasSnippet + ? monaco.languages.CompletionItemKind.Constructor + : monaco.languages.CompletionItemKind.Constant, + insertText, + insertTextRules: hasSnippet ? monaco.languages.CompletionItemInsertTextRule.InsertAsSnippet : undefined, + documentation: { value: def.documentation }, + sortText: `4_${def.label}`, + range, + }; + }); +} diff --git a/src/webviews/documentdbQuery/completions/mapCompletionItems.ts b/src/webviews/documentdbQuery/completions/mapCompletionItems.ts new file mode 100644 index 000000000..3f68537f5 --- /dev/null +++ b/src/webviews/documentdbQuery/completions/mapCompletionItems.ts @@ -0,0 +1,150 @@ +/*--------------------------------------------------------------------------------------------- + * Copyright (c) Microsoft Corporation. All rights reserved. + * Licensed under the MIT License. See License.txt in the project root for license information. + *--------------------------------------------------------------------------------------------*/ + +/** + * Functions for mapping operator and field data to Monaco CompletionItems. + */ + +import { type OperatorEntry } from '@vscode-documentdb/documentdb-constants'; +// eslint-disable-next-line import/no-internal-modules +import type * as monacoEditor from 'monaco-editor/esm/vs/editor/editor.api'; +import { type FieldCompletionData } from '../../../utils/json/data-api/autocomplete/toFieldCompletionItems'; +import { escapeSnippetDollars, stripOuterBraces } from './snippetUtils'; + +/** + * Maps a meta tag category to a Monaco CompletionItemKind. + */ +export function getCompletionKindForMeta( + meta: string, + kinds: typeof monacoEditor.languages.CompletionItemKind, +): number { + if (meta.startsWith('query')) return kinds.Operator; + if (meta.startsWith('expr')) return kinds.Function; + if (meta === 'bson') return kinds.Constructor; + if (meta === 'stage') return kinds.Module; + if (meta === 'accumulator') return kinds.Method; + if (meta === 'update') return kinds.Property; + if (meta === 'variable') return kinds.Variable; + if (meta === 'window') return kinds.Event; + if (meta === 'field:identifier') return kinds.Field; + return kinds.Text; +} + +/** + * Computes a sortText prefix for an operator based on its type relevance + * to the given field BSON types. + * + * Sorting tiers (ascending = higher priority): + * - `"0_"` β€” Type-relevant: operator's `applicableBsonTypes` intersects with `fieldBsonTypes` + * - `"1a_"` β€” Comparison operators (universal): `$eq`, `$ne`, `$gt`, `$in`, etc. + * These are the most commonly used operators for any field type. + * - `"1b_"` β€” Other universal operators: element, evaluation, geospatial, etc. + * - `"2_"` β€” Non-matching: operator's `applicableBsonTypes` is set but doesn't match + * + * Returns `undefined` when no field type info is available (no sorting override). + */ +export function getOperatorSortPrefix( + entry: OperatorEntry, + fieldBsonTypes: readonly string[] | undefined, +): string | undefined { + if (!fieldBsonTypes || fieldBsonTypes.length === 0) { + return undefined; + } + + if (!entry.applicableBsonTypes || entry.applicableBsonTypes.length === 0) { + // Promote comparison operators above other universal operators + return entry.meta === 'query:comparison' ? '1a_' : '1b_'; + } + + const hasMatch = entry.applicableBsonTypes.some((t) => fieldBsonTypes.includes(t)); + return hasMatch ? '0_' : '2_'; +} + +/** + * Extracts a human-readable category label from a meta tag. + * `'query:comparison'` β†’ `'comparison'`, `'bson'` β†’ `'bson'` + */ +export function getCategoryLabel(meta: string): string { + const colonIndex = meta.indexOf(':'); + return colonIndex >= 0 ? meta.substring(colonIndex + 1) : meta; +} + +/** + * Maps an OperatorEntry from documentdb-constants to a Monaco CompletionItem. + * + * Pure function β€” safe for unit testing without a Monaco runtime. + * + * @param entry - the operator entry to map + * @param range - the insertion range + * @param monaco - the Monaco API + * @param fieldBsonTypes - optional BSON types of the field for type-aware sorting + * @param shouldStripBraces - when true, strip outer `{ }` from snippets (for operator position) + */ +export function mapOperatorToCompletionItem( + entry: OperatorEntry, + range: monacoEditor.IRange, + monaco: typeof monacoEditor, + fieldBsonTypes?: readonly string[], + shouldStripBraces?: boolean, +): monacoEditor.languages.CompletionItem { + const hasSnippet = !!entry.snippet; + const sortPrefix = getOperatorSortPrefix(entry, fieldBsonTypes); + let insertText = hasSnippet ? entry.snippet! : entry.value; + if (shouldStripBraces && hasSnippet) { + insertText = stripOuterBraces(insertText); + } + if (hasSnippet) { + insertText = escapeSnippetDollars(insertText); + } + + const categoryLabel = getCategoryLabel(entry.meta); + + let documentationValue = entry.description; + if (entry.link) { + documentationValue += `\n\n[β“˜ Documentation](${entry.link})`; + } + + return { + label: { + label: entry.value, + description: categoryLabel, + }, + kind: getCompletionKindForMeta(entry.meta, monaco.languages.CompletionItemKind), + insertText, + insertTextRules: hasSnippet ? monaco.languages.CompletionItemInsertTextRule.InsertAsSnippet : undefined, + documentation: { + value: documentationValue, + isTrusted: true, + }, + sortText: sortPrefix ? `${sortPrefix}${entry.value}` : undefined, + range, + }; +} + +/** + * Maps a FieldCompletionData entry to a Monaco CompletionItem. + * + * Fields are given a sort prefix of `"0_"` so they appear before operators. + * The insert text includes a trailing `: $1` snippet so that selecting a + * field name immediately places the cursor at the value position. + */ +export function mapFieldToCompletionItem( + field: FieldCompletionData, + range: monacoEditor.IRange, + monaco: typeof monacoEditor, +): monacoEditor.languages.CompletionItem { + const sparseIndicator = field.isSparse ? ' (sparse)' : ''; + return { + label: { + label: field.fieldName, + description: `${field.displayType}${sparseIndicator}`, + }, + kind: monaco.languages.CompletionItemKind.Field, + insertText: `${field.insertText}: $1`, + insertTextRules: monaco.languages.CompletionItemInsertTextRule.InsertAsSnippet, + sortText: `0_${field.fieldName}`, + range, + }; +} diff --git a/src/webviews/documentdbQuery/completions/snippetUtils.ts b/src/webviews/documentdbQuery/completions/snippetUtils.ts new file mode 100644 index 000000000..e43b7f70c --- /dev/null +++ b/src/webviews/documentdbQuery/completions/snippetUtils.ts @@ -0,0 +1,42 @@ +/*--------------------------------------------------------------------------------------------- + * Copyright (c) Microsoft Corporation. All rights reserved. + * Licensed under the MIT License. See License.txt in the project root for license information. + *--------------------------------------------------------------------------------------------*/ + +/** + * Utility functions for manipulating Monaco snippet text. + */ + +/** + * Strips the outermost `{ ` and ` }` from an operator snippet. + * + * Operator snippets in documentdb-constants are designed for value position + * (e.g., `{ $gt: ${1:value} }`). At operator position, the user is already + * inside braces, so the outer wrapping must be removed to avoid double-nesting. + * + * Only strips if the snippet starts with `'{ '` and ends with `' }'`. + * Inner brackets/braces are preserved: + * - `{ $in: [${1:value}] }` β†’ `$in: [${1:value}]` + * - `{ $gt: ${1:value} }` β†’ `$gt: ${1:value}` + */ +export function stripOuterBraces(snippet: string): string { + if (snippet.startsWith('{ ') && snippet.endsWith(' }')) { + return snippet.slice(2, -2); + } + return snippet; +} + +/** + * Escapes literal `$` signs in snippet text that would be misinterpreted + * as Monaco snippet variables. + * + * In Monaco snippet syntax, `$name` is a variable reference (resolves to empty + * for unknown variables). Operator names like `$gt` in snippets get consumed + * as variable references, producing empty output instead of the literal `$gt`. + * + * This function escapes `$` when followed by a letter (`$gt` β†’ `\$gt`) + * while preserving tab stop syntax (`${1:value}` and `$1` are unchanged). + */ +export function escapeSnippetDollars(snippet: string): string { + return snippet.replace(/\$(?=[a-zA-Z])/g, '\\$'); +} diff --git a/src/webviews/documentdbQuery/completions/typeSuggestions.ts b/src/webviews/documentdbQuery/completions/typeSuggestions.ts new file mode 100644 index 000000000..391e3094f --- /dev/null +++ b/src/webviews/documentdbQuery/completions/typeSuggestions.ts @@ -0,0 +1,248 @@ +/*--------------------------------------------------------------------------------------------- + * Copyright (c) Microsoft Corporation. All rights reserved. + * Licensed under the MIT License. See License.txt in the project root for license information. + *--------------------------------------------------------------------------------------------*/ + +/** + * Type-aware value suggestions for the completion provider. + * + * When the cursor is at a value position and the field's BSON type is known, + * this module provides contextual suggestions that match the field type: + * - Boolean fields β†’ `true`, `false` + * - Number fields β†’ range query snippet `{ $gt: β–ͺ, $lt: β–ͺ }` + * - String fields β†’ regex snippet, empty string literal + * - Date fields β†’ ISODate constructor, date range snippet + * - ObjectId fields β†’ ObjectId constructor + * - Null fields β†’ `null` + * - Array fields β†’ `$elemMatch` snippet + * + * These suggestions appear at the top of the completion list (sort prefix `00_`) + * to surface the most common patterns for each type. + */ + +// eslint-disable-next-line import/no-internal-modules +import type * as monacoEditor from 'monaco-editor/esm/vs/editor/editor.api'; +import { LABEL_PLACEHOLDER } from './completionKnowledge'; +import { escapeSnippetDollars } from './snippetUtils'; + +/** A type suggestion definition. */ +interface TypeSuggestionDef { + /** Display label */ + label: string; + /** Text or snippet to insert */ + insertText: string; + /** Whether insertText is a snippet (has tab stops) */ + isSnippet: boolean; + /** Description shown in the label area */ + description: string; + /** Documentation shown in the details panel */ + documentation?: string; +} + +/** + * Maps BSON type strings to curated value suggestions. + * + * Each type maps to an array of suggestions ordered by likelihood. + * The suggestions use Monaco snippet syntax for tab stops. + */ +const TYPE_SUGGESTIONS: Record = { + // BSONTypes.Boolean = 'boolean' + boolean: [ + { + label: 'true', + insertText: 'true', + isSnippet: false, + description: 'boolean literal', + documentation: `Boolean literal \`true\`.\n\nExample: \`{ field: true }\``, + }, + { + label: 'false', + insertText: 'false', + isSnippet: false, + description: 'boolean literal', + documentation: `Boolean literal \`false\`.\n\nExample: \`{ field: false }\``, + }, + ], + // BSONTypes.Int32 = 'int32' + int32: numberSuggestions(), + // BSONTypes.Double = 'double' + double: numberSuggestions(), + // BSONTypes.Long = 'long' + long: numberSuggestions(), + // BSONTypes.Decimal128 = 'decimal128' + decimal128: numberSuggestions(), + // BSONTypes.Number = 'number' (generic number without specific subtype) + number: numberSuggestions(), + string: [ + { + label: `{ $regex: /${LABEL_PLACEHOLDER}/ }`, + insertText: '{ $regex: /${1:pattern}/ }', + isSnippet: true, + description: 'pattern match', + documentation: + 'Match string fields with a regex pattern.\n\n' + + 'Example β€” ends with `.com`:\n```\n{ $regex: /\\.com$/ }\n```', + }, + { + label: '{ $regex: /\\.com$/ }', + insertText: '{ $regex: /${1:\\.com$}/ }', + isSnippet: true, + description: `ends with .com - pattern match`, + documentation: 'Example pattern match for: ends with `.com`:\n```\n{ $regex: /\\.com$/ }\n```', + }, + { + label: '""', + insertText: '"${1:text}"', + isSnippet: true, + description: 'string literal', + documentation: `Exact string match.\n\nExample: \`"active"\`, \`"pending"\``, + }, + ], + date: [ + { + label: `ISODate("${LABEL_PLACEHOLDER}")`, + insertText: `ISODate("\${1:${twoWeeksAgo()}}")`, + isSnippet: true, + description: 'date value', + documentation: `Match a specific date.\n\nExample: \`ISODate("${twoWeeksAgo()}")\``, + }, + { + label: `{ $gt: ISODate("${LABEL_PLACEHOLDER}"), $lt: ISODate("${LABEL_PLACEHOLDER}") }`, + insertText: `{ $gt: ISODate("\${1:${twoWeeksAgo()}}"), $lt: ISODate("\${2:${todayISO()}}") }`, + isSnippet: true, + description: 'date range', + documentation: `Match dates within a range.\n\nExample: last 2 weeks β€” \`{ $gt: ISODate("${twoWeeksAgo()}"), $lt: ISODate("${todayISO()}") }\``, + }, + { + label: `{ $gt: new Date(Date.now() - ${LABEL_PLACEHOLDER}) }`, + insertText: '{ $gt: new Date(Date.now() - ${1:14} * 24 * 60 * 60 * 1000) }', + isSnippet: true, + description: 'last N days', + documentation: `Match dates in the last N days relative to now.\n\nExample: last 14 days β€” \`{ $gt: new Date(Date.now() - 14 * 24 * 60 * 60 * 1000) }\``, + }, + ], + objectid: [ + { + label: `ObjectId("${LABEL_PLACEHOLDER}")`, + insertText: 'ObjectId("${1:hex}")', + isSnippet: true, + description: 'ObjectId value', + documentation: `Match by ObjectId.\n\nExample: \`ObjectId("507f1f77bcf86cd799439011")\``, + }, + ], + null: [ + { + label: 'null', + insertText: 'null', + isSnippet: false, + description: 'null literal', + documentation: `Match null or missing fields.\n\nExample: \`{ field: null }\``, + }, + ], + array: [ + { + label: `{ $elemMatch: { ${LABEL_PLACEHOLDER} } }`, + insertText: '{ $elemMatch: { ${1:query} } }', + isSnippet: true, + description: 'match element', + documentation: `Match arrays with at least one element satisfying the query.\n\nExample: \`{ $elemMatch: { status: "urgent" } }\``, + }, + { + label: `{ $size: ${LABEL_PLACEHOLDER} }`, + insertText: '{ $size: ${1:length} }', + isSnippet: true, + description: 'array length', + documentation: `Match arrays with exactly N elements.\n\nExample: \`{ $size: 3 }\``, + }, + ], +}; + +/** Shared number-type suggestions (int, double, long, decimal). */ +function numberSuggestions(): readonly TypeSuggestionDef[] { + return [ + { + label: `{ $gt: ${LABEL_PLACEHOLDER}, $lt: ${LABEL_PLACEHOLDER} }`, + insertText: '{ $gt: ${1:min}, $lt: ${2:max} }', + isSnippet: true, + description: 'range query', + documentation: `Match numbers within a range.\n\nExample: between 18 and 65 β€” \`{ $gt: 18, $lt: 65 }\``, + }, + { + label: `{ $gte: ${LABEL_PLACEHOLDER} }`, + insertText: '{ $gte: ${1:value} }', + isSnippet: true, + description: 'minimum value', + documentation: `Match numbers greater than or equal to a value.\n\nExample: at least 100 β€” \`{ $gte: 100 }\``, + }, + ]; +} + +/** + * Returns an ISO 8601 timestamp for two weeks ago (UTC, midnight). + * Used as a sensible default date placeholder β€” recent enough to be practical. + */ +function twoWeeksAgo(): string { + const d = new Date(); + d.setUTCDate(d.getUTCDate() - 14); + d.setUTCHours(0, 0, 0, 0); + return d.toISOString().replace('.000Z', 'Z'); +} + +/** + * Returns an ISO 8601 timestamp for today (UTC, end of day). + */ +function todayISO(): string { + const d = new Date(); + d.setUTCHours(23, 59, 59, 0); + return d.toISOString().replace('.000Z', 'Z'); +} + +/** + * Creates type-aware value suggestions based on the field's BSON type. + * + * Returns an array of high-priority completion items (sort prefix `00_`) + * that appear at the top of the value-position completion list. + * + * Returns an empty array when the BSON type is unknown or has no specific suggestions. + * + * @param fieldBsonType - BSON type string from the schema (e.g., 'int32', 'string', 'boolean') + * @param range - the insertion range + * @param monaco - the Monaco API + */ +export function createTypeSuggestions( + fieldBsonType: string | undefined, + range: monacoEditor.IRange, + monaco: typeof monacoEditor, +): monacoEditor.languages.CompletionItem[] { + if (!fieldBsonType) { + return []; + } + + const suggestions = TYPE_SUGGESTIONS[fieldBsonType]; + if (!suggestions) { + return []; + } + + return suggestions.map((def, index) => { + let insertText = def.insertText; + if (def.isSnippet) { + insertText = escapeSnippetDollars(insertText); + } + + return { + label: { + label: def.label, + description: def.description, + }, + kind: def.isSnippet + ? monaco.languages.CompletionItemKind.Snippet + : monaco.languages.CompletionItemKind.Value, + insertText, + insertTextRules: def.isSnippet ? monaco.languages.CompletionItemInsertTextRule.InsertAsSnippet : undefined, + documentation: def.documentation ? { value: def.documentation } : undefined, + sortText: `00_${String(index).padStart(2, '0')}`, + preselect: index === 0, + range, + }; + }); +} diff --git a/src/webviews/documentdbQuery/cursorContext.test.ts b/src/webviews/documentdbQuery/cursorContext.test.ts new file mode 100644 index 000000000..4ee6f57fe --- /dev/null +++ b/src/webviews/documentdbQuery/cursorContext.test.ts @@ -0,0 +1,271 @@ +/*--------------------------------------------------------------------------------------------- + * Copyright (c) Microsoft Corporation. All rights reserved. + * Licensed under the MIT License. See License.txt in the project root for license information. + *--------------------------------------------------------------------------------------------*/ + +import { detectCursorContext, type CursorContext, type FieldTypeLookup } from './cursorContext'; + +/** + * Helper: place cursor at the `|` marker in the input string. + * Returns { text, offset } with the `|` removed. + */ +function parseCursor(input: string): { text: string; offset: number } { + const idx = input.indexOf('|'); + if (idx === -1) { + throw new Error(`Test input must contain a '|' cursor marker: "${input}"`); + } + return { + text: input.slice(0, idx) + input.slice(idx + 1), + offset: idx, + }; +} + +/** Shorthand to detect context from a `|`-marked string. */ +function detect(input: string, fieldLookup?: FieldTypeLookup): CursorContext { + const { text, offset } = parseCursor(input); + return detectCursorContext(text, offset, fieldLookup); +} + +describe('detectCursorContext', () => { + // --------------------------------------------------------------- + // Step 1: Core context detection (complete expressions) + // --------------------------------------------------------------- + describe('Step 1: Core context detection', () => { + describe('key position (root)', () => { + it('detects key position in empty object', () => { + const result = detect('{ | }'); + expect(result).toEqual({ position: 'key', depth: 1 }); + }); + + it('detects key position after opening brace', () => { + const result = detect('{|}'); + expect(result).toEqual({ position: 'key', depth: 1 }); + }); + + it('detects key position after comma in root object', () => { + const result = detect('{ name: "Alice", | }'); + expect(result).toEqual({ position: 'key', depth: 1 }); + }); + }); + + describe('value position', () => { + it('detects value position after colon', () => { + const result = detect('{ _id: | }'); + expect(result).toEqual({ position: 'value', fieldName: '_id' }); + }); + + it('detects value position for quoted key', () => { + const result = detect('{ "my.field": | }'); + expect(result).toEqual({ position: 'value', fieldName: 'my.field' }); + }); + + it('detects value position for single-quoted key', () => { + const result = detect("{ 'address.city': | }"); + expect(result).toEqual({ position: 'value', fieldName: 'address.city' }); + }); + + it('includes bsonType when fieldLookup provides it', () => { + const lookup: FieldTypeLookup = (name) => (name === 'age' ? 'int32' : undefined); + const result = detect('{ age: | }', lookup); + expect(result).toEqual({ position: 'value', fieldName: 'age', fieldBsonType: 'int32' }); + }); + + it('omits bsonType when fieldLookup returns undefined', () => { + const lookup: FieldTypeLookup = () => undefined; + const result = detect('{ age: | }', lookup); + expect(result).toEqual({ position: 'value', fieldName: 'age' }); + }); + }); + + describe('operator position (nested object)', () => { + it('detects operator position inside nested object', () => { + const result = detect('{ age: { | } }'); + expect(result).toEqual({ position: 'operator', fieldName: 'age' }); + }); + + it('detects operator position with bsonType', () => { + const lookup: FieldTypeLookup = (name) => (name === 'age' ? 'int32' : undefined); + const result = detect('{ age: { | } }', lookup); + expect(result).toEqual({ position: 'operator', fieldName: 'age', fieldBsonType: 'int32' }); + }); + + it('detects operator position after comma in nested object', () => { + const result = detect('{ age: { $gt: 5, | } }'); + expect(result).toEqual({ position: 'operator', fieldName: 'age' }); + }); + }); + + describe('array-element position', () => { + it('detects array-element inside $and', () => { + const result = detect('{ $and: [ | ] }'); + expect(result).toEqual({ position: 'array-element', parentOperator: '$and' }); + }); + + it('detects array-element inside $or', () => { + const result = detect('{ $or: [ | ] }'); + expect(result).toEqual({ position: 'array-element', parentOperator: '$or' }); + }); + + it('detects array-element inside $nor', () => { + const result = detect('{ $nor: [ | ] }'); + expect(result).toEqual({ position: 'array-element', parentOperator: '$nor' }); + }); + }); + + describe('key inside logical operator array element', () => { + it('detects key inside $and array element object', () => { + const result = detect('{ $and: [ { | } ] }'); + expect(result.position).toBe('key'); + }); + + it('detects key inside $or array element object after comma', () => { + const result = detect('{ $or: [ { x: 1 }, { | } ] }'); + expect(result.position).toBe('key'); + }); + }); + + describe('edge cases', () => { + it('returns unknown for empty string', () => { + expect(detectCursorContext('', 0)).toEqual({ position: 'unknown' }); + }); + + it('returns unknown for cursor at offset 0', () => { + expect(detectCursorContext('{ age: 1 }', 0)).toEqual({ position: 'unknown' }); + }); + + it('returns unknown for null-ish text', () => { + expect(detectCursorContext('', 5)).toEqual({ position: 'unknown' }); + }); + + it('clamps cursor offset to text length', () => { + // Cursor past end of text β€” should still work + const result = detectCursorContext('{ age: ', 100); + expect(result).toEqual({ position: 'value', fieldName: 'age' }); + }); + }); + }); + + // --------------------------------------------------------------- + // Step 1.5: Incomplete / broken input (mid-typing states) + // --------------------------------------------------------------- + describe('Step 1.5: Incomplete / broken input', () => { + it('{ age: | β€” colon just typed, no closing brace', () => { + const result = detect('{ age: |'); + expect(result).toEqual({ position: 'value', fieldName: 'age' }); + }); + + it('{ age: $| β€” started typing BSON constructor', () => { + const result = detect('{ age: $|'); + expect(result).toEqual({ position: 'value', fieldName: 'age' }); + }); + + it('{ age: $ |} β€” dollar with closing brace', () => { + const result = detect('{ age: $ |}'); + expect(result).toEqual({ position: 'value', fieldName: 'age' }); + }); + + it('{ age: {| β€” opened nested object, no close', () => { + const result = detect('{ age: {|'); + expect(result).toEqual({ position: 'operator', fieldName: 'age' }); + }); + + it('{ age: { $| β€” partially typed operator', () => { + const result = detect('{ age: { $|'); + expect(result).toEqual({ position: 'operator', fieldName: 'age' }); + }); + + it('{ age: { $ |} β€” incomplete operator inside nested object', () => { + const result = detect('{ age: { $ |}'); + expect(result).toEqual({ position: 'operator', fieldName: 'age' }); + }); + + it('{ age: { $g| β€” partially typed $gt', () => { + const result = detect('{ age: { $g|'); + expect(result).toEqual({ position: 'operator', fieldName: 'age' }); + }); + + it('{ | β€” opened root object, no field name yet', () => { + const result = detect('{ |'); + expect(result).toEqual({ position: 'key', depth: 1 }); + }); + + it('{ a| β€” partially typed field name', () => { + const result = detect('{ a|'); + expect(result).toEqual({ position: 'key', depth: 1 }); + }); + + it('{ name: "Alice", | β€” comma after first pair, new key expected', () => { + const result = detect('{ name: "Alice", |'); + expect(result).toEqual({ position: 'key', depth: 1 }); + }); + + it('{ name: "Alice", a| β€” partially typed second field name', () => { + const result = detect('{ name: "Alice", a|'); + expect(result).toEqual({ position: 'key', depth: 1 }); + }); + + it('{ $and: [| β€” opened array for logical operator', () => { + const result = detect('{ $and: [|'); + expect(result).toEqual({ position: 'array-element', parentOperator: '$and' }); + }); + + it('{ $and: [ {| β€” inside $and array element object', () => { + const result = detect('{ $and: [ {|'); + expect(result.position).toBe('key'); + }); + + it('{ age: { $gt: 5, | β€” after comma inside nested operator object', () => { + const result = detect('{ age: { $gt: 5, |'); + expect(result).toEqual({ position: 'operator', fieldName: 'age' }); + }); + + it('{| β€” just the opening brace', () => { + const result = detect('{|'); + expect(result).toEqual({ position: 'key', depth: 1 }); + }); + + it('empty string β†’ unknown', () => { + expect(detectCursorContext('', 0)).toEqual({ position: 'unknown' }); + }); + + it('handles fieldLookup with incomplete input', () => { + const lookup: FieldTypeLookup = (name) => (name === 'age' ? 'int32' : undefined); + const result = detect('{ age: { $|', lookup); + expect(result).toEqual({ position: 'operator', fieldName: 'age', fieldBsonType: 'int32' }); + }); + + it('{ $or: [ { name: "x" }, {| β€” second element in $or array', () => { + const result = detect('{ $or: [ { name: "x" }, {|'); + expect(result.position).toBe('key'); + }); + }); + + // --------------------------------------------------------------- + // Multi-line expressions + // --------------------------------------------------------------- + describe('multi-line expressions', () => { + it('key position in multi-line object', () => { + const result = detect(`{ + name: "Alice", + | +}`); + expect(result).toEqual({ position: 'key', depth: 1 }); + }); + + it('value position in multi-line object', () => { + const result = detect(`{ + age: | +}`); + expect(result).toEqual({ position: 'value', fieldName: 'age' }); + }); + + it('operator position in multi-line nested object', () => { + const result = detect(`{ + age: { + | + } +}`); + expect(result).toEqual({ position: 'operator', fieldName: 'age' }); + }); + }); +}); diff --git a/src/webviews/documentdbQuery/cursorContext.ts b/src/webviews/documentdbQuery/cursorContext.ts new file mode 100644 index 000000000..606db0f39 --- /dev/null +++ b/src/webviews/documentdbQuery/cursorContext.ts @@ -0,0 +1,393 @@ +/*--------------------------------------------------------------------------------------------- + * Copyright (c) Microsoft Corporation. All rights reserved. + * Licensed under the MIT License. See License.txt in the project root for license information. + *--------------------------------------------------------------------------------------------*/ + +/** + * Cursor context detection for the `documentdb-query` language. + * + * Determines the semantic position of the cursor within a DocumentDB query + * expression (e.g., key position, value position, operator position) using + * a heuristic character-scanning approach. + * + * This module is a pure function with no Monaco or VS Code dependencies, + * making it fully unit-testable. + */ + +/** + * The semantic position of the cursor within a query expression. + * + * Used by the completion provider to determine which completions to show. + */ +export type CursorContext = + | { position: 'key'; depth: number } + | { position: 'value'; fieldName: string; fieldBsonType?: string } + | { position: 'operator'; fieldName: string; fieldBsonType?: string } + | { position: 'array-element'; parentOperator: string } + | { position: 'unknown' }; + +/** + * A callback that resolves a field name to its BSON type string. + * Used to enrich cursor context with type information from the completion store. + */ +export type FieldTypeLookup = (fieldName: string) => string | undefined; + +/** + * Detects the semantic cursor context within a DocumentDB query expression. + * + * Uses a heuristic backward-scanning approach from the cursor position to + * determine whether the cursor is at a key, value, operator, or array-element + * position. Falls back to `{ position: 'unknown' }` when context cannot be + * determined. + * + * @param text - the full text of the editor + * @param cursorOffset - the 0-based character offset of the cursor + * @param fieldLookup - optional callback to resolve field names to BSON types + * @returns the detected cursor context + */ +export function detectCursorContext(text: string, cursorOffset: number, fieldLookup?: FieldTypeLookup): CursorContext { + if (!text || cursorOffset <= 0) { + return { position: 'unknown' }; + } + + // Clamp cursor to text length + const offset = Math.min(cursorOffset, text.length); + + // Find the nearest structural character before the cursor + const scanResult = scanBackward(text, offset); + + if (!scanResult) { + return { position: 'unknown' }; + } + + switch (scanResult.char) { + case ':': + return resolveValueContext(text, scanResult.index, fieldLookup); + + case '{': + return resolveOpenBraceContext(text, scanResult.index, fieldLookup); + + case ',': + return resolveCommaContext(text, scanResult.index, fieldLookup); + + case '[': + return resolveOpenBracketContext(text, scanResult.index); + + default: + return { position: 'unknown' }; + } +} + +// ---------- Internal helpers ---------- + +/** Structural characters that define context boundaries. */ +const STRUCTURAL_CHARS = new Set([':', '{', ',', '[']); + +interface ScanResult { + char: string; + index: number; +} + +// Known edge case: the backward scanner does not track whether characters +// are inside quoted strings. A structural character that appears within a +// string literal is still treated as structural. For example, in +// { msg: "{", | } +// the `{` inside the string `"{"` would be found before the real opening +// brace, causing a misclassification. This is acceptable for a completion +// heuristic where rare edge cases degrade gracefully rather than break. + +/** + * Scans backward from the cursor, skipping whitespace and identifier characters + * (letters, digits, `_`, `$`, `.`, quotes), to find the nearest structural character. + * + * Identifier characters are skipped because the cursor may be mid-word + * (e.g., `{ ag|` β€” cursor is after 'g', but context is 'key' from the `{`). + */ +function scanBackward(text: string, offset: number): ScanResult | undefined { + let i = offset - 1; + while (i >= 0) { + const ch = text[i]; + if (STRUCTURAL_CHARS.has(ch)) { + return { char: ch, index: i }; + } + // Skip whitespace and identifier-like characters + if (isSkippable(ch)) { + i--; + continue; + } + // Hit something unexpected (e.g., '}', ']', ')') β€” stop scanning + // '}' and ']' indicate we've exited the current expression + return undefined; + } + return undefined; +} + +/** + * Characters to skip during backward scanning. + * These are characters that can appear between a structural char and the cursor: + * - whitespace + * - identifier chars (a-z, A-Z, 0-9, _, $, .) + * - quote marks (the user may be inside a quoted key) + * - minus sign (for negative numbers) + */ +function isSkippable(ch: string): boolean { + return /[\s\w.$"'`\-/]/.test(ch); +} + +/** + * Resolves context when ':' is found β€” cursor is in a value position. + * + * Examples: + * - `{ _id: | }` β†’ value with fieldName '_id' + * - `{ age: | }` β†’ value with fieldName 'age' + */ +function resolveValueContext(text: string, colonIndex: number, fieldLookup?: FieldTypeLookup): CursorContext { + const fieldName = extractKeyBeforeColon(text, colonIndex); + if (!fieldName) { + return { position: 'unknown' }; + } + const fieldBsonType = fieldLookup?.(fieldName); + return { + position: 'value', + fieldName, + ...(fieldBsonType !== undefined && { fieldBsonType }), + }; +} + +/** + * Resolves context when '{' is found. + * + * Two sub-cases: + * 1. Root or top-level: `{ | }` β†’ key position + * 2. After a colon: `{ age: { | } }` β†’ operator position for field 'age' + */ +function resolveOpenBraceContext(text: string, braceIndex: number, fieldLookup?: FieldTypeLookup): CursorContext { + // Look backward from the '{' to find what precedes it + const beforeBrace = scanBackwardFrom(text, braceIndex); + + if (beforeBrace && beforeBrace.char === ':') { + // Pattern: `fieldName: { | }` β†’ operator position + const fieldName = extractKeyBeforeColon(text, beforeBrace.index); + if (fieldName) { + // If the field name starts with '$', this is a nested query object + // inside a logical operator like $and: [ { | } ], but the immediate + // '{' is after a ':' which makes it an operator context + const fieldBsonType = fieldLookup?.(fieldName); + return { + position: 'operator', + fieldName, + ...(fieldBsonType !== undefined && { fieldBsonType }), + }; + } + } + + if (beforeBrace && beforeBrace.char === '[') { + // Pattern: `$and: [ { | } ]` β†’ key at depth 1 + return resolveKeyInsideArray(text, beforeBrace.index); + } + + if (beforeBrace && beforeBrace.char === ',') { + // Pattern: `$and: [ {...}, { | } ]` β€” inside an array after another element + return resolveCommaInsideArrayForBrace(text, beforeBrace.index); + } + + // Root object or can't determine parent + // +1 because the brace at braceIndex is the one we're inside + const depth = computeDepth(text, braceIndex) + 1; + return { position: 'key', depth }; +} + +/** + * Resolves context when ',' is found. + * + * Sub-cases: + * 1. Inside an object: `{ name: "x", | }` β†’ key position + * 2. Inside an operator object: `{ age: { $gt: 5, | } }` β†’ operator position + * 3. Inside an array: `{ $and: [ {...}, | ] }` β†’ array-element position + */ +function resolveCommaContext(text: string, commaIndex: number, fieldLookup?: FieldTypeLookup): CursorContext { + // Determine if comma is inside an array or an object by finding the + // nearest unmatched '[' or '{' + const enclosing = findEnclosingBracket(text, commaIndex); + + if (!enclosing) { + return { position: 'unknown' }; + } + + if (enclosing.char === '[') { + // Inside an array β€” determine parent operator + return resolveOpenBracketContext(text, enclosing.index); + } + + if (enclosing.char === '{') { + // Inside an object β€” is this a root-level object or a nested operator object? + return resolveOpenBraceContext(text, enclosing.index, fieldLookup); + } + + return { position: 'unknown' }; +} + +/** + * Resolves context when '[' is found. + * + * Example: `{ $and: [ | ] }` β†’ array-element with parentOperator '$and' + */ +function resolveOpenBracketContext(text: string, bracketIndex: number): CursorContext { + // Look backward from '[' to find the parent key via ':' + const beforeBracket = scanBackwardFrom(text, bracketIndex); + + if (beforeBracket && beforeBracket.char === ':') { + const parentKey = extractKeyBeforeColon(text, beforeBracket.index); + if (parentKey && parentKey.startsWith('$')) { + return { position: 'array-element', parentOperator: parentKey }; + } + } + + return { position: 'unknown' }; +} + +/** + * Resolves key context when '{' is found immediately after '['. + * Pattern: `$and: [ { | } ]` β†’ key at depth 1 + */ +function resolveKeyInsideArray(text: string, bracketIndex: number): CursorContext { + // Check if this array belongs to a logical operator + const beforeBracket = scanBackwardFrom(text, bracketIndex); + if (beforeBracket && beforeBracket.char === ':') { + const parentKey = extractKeyBeforeColon(text, beforeBracket.index); + if (parentKey && parentKey.startsWith('$')) { + // Inside a logical operator array element β€” treat as key context + const depth = computeDepth(text, bracketIndex); + return { position: 'key', depth: depth + 1 }; + } + } + const depth = computeDepth(text, bracketIndex); + return { position: 'key', depth: depth + 1 }; +} + +/** + * Resolves context when '{' is preceded by ',' inside an array. + * Pattern: `$and: [ {...}, { | } ]` + */ +function resolveCommaInsideArrayForBrace(text: string, commaIndex: number): CursorContext { + const enclosing = findEnclosingBracket(text, commaIndex); + if (enclosing && enclosing.char === '[') { + return resolveKeyInsideArray(text, enclosing.index); + } + return { position: 'key', depth: 0 }; +} + +// ---------- Character scanning utilities ---------- + +/** + * Scans backward from a given index (exclusive), skipping whitespace + * and identifier characters, to find the nearest structural character. + */ +function scanBackwardFrom(text: string, index: number): ScanResult | undefined { + let i = index - 1; + while (i >= 0) { + const ch = text[i]; + if (STRUCTURAL_CHARS.has(ch) || ch === ']' || ch === '}') { + if (ch === ']' || ch === '}') { + return undefined; // Hit a closing bracket β€” stop + } + return { char: ch, index: i }; + } + if (isSkippable(ch)) { + i--; + continue; + } + return undefined; + } + return undefined; +} + +/** + * Finds the nearest unmatched opening bracket (`{` or `[`) before the given index. + * Properly handles nested brackets by maintaining a balance counter. + */ +function findEnclosingBracket(text: string, index: number): ScanResult | undefined { + let braceDepth = 0; + let bracketDepth = 0; + + for (let i = index - 1; i >= 0; i--) { + const ch = text[i]; + switch (ch) { + case '}': + braceDepth++; + break; + case '{': + if (braceDepth > 0) { + braceDepth--; + } else { + return { char: '{', index: i }; + } + break; + case ']': + bracketDepth++; + break; + case '[': + if (bracketDepth > 0) { + bracketDepth--; + } else { + return { char: '[', index: i }; + } + break; + } + } + return undefined; +} + +/** + * Extracts the key name immediately before a colon. + * + * Handles: + * - Unquoted keys: `age:` β†’ 'age' + * - Single-quoted keys: `'my.field':` β†’ 'my.field' + * - Double-quoted keys: `"my.field":` β†’ 'my.field' + * - Dollar-prefixed: `$and:` β†’ '$and' + */ +function extractKeyBeforeColon(text: string, colonIndex: number): string | undefined { + let i = colonIndex - 1; + + // Skip whitespace before the colon + while (i >= 0 && /\s/.test(text[i])) { + i--; + } + + if (i < 0) return undefined; + + // Check if the key is quoted + const quoteChar = text[i]; + if (quoteChar === '"' || quoteChar === "'") { + // Find the matching opening quote + const closeQuoteIndex = i; + i--; + while (i >= 0 && text[i] !== quoteChar) { + i--; + } + if (i < 0) return undefined; // Unmatched quote + return text.substring(i + 1, closeQuoteIndex); + } + + // Unquoted key β€” collect identifier characters (including $ and .) + const end = i + 1; + while (i >= 0 && /[\w$.]/.test(text[i])) { + i--; + } + const key = text.substring(i + 1, end); + return key.length > 0 ? key : undefined; +} + +/** + * Computes the brace nesting depth at a given position. + * Counts unmatched `{` before the index. + */ +function computeDepth(text: string, index: number): number { + let depth = 0; + for (let i = 0; i < index; i++) { + if (text[i] === '{') depth++; + if (text[i] === '}') depth--; + } + return Math.max(0, depth); +} diff --git a/src/webviews/documentdbQuery/documentdbQueryCompletionProvider.test.ts b/src/webviews/documentdbQuery/documentdbQueryCompletionProvider.test.ts new file mode 100644 index 000000000..13521e3a0 --- /dev/null +++ b/src/webviews/documentdbQuery/documentdbQueryCompletionProvider.test.ts @@ -0,0 +1,1998 @@ +/*--------------------------------------------------------------------------------------------- + * Copyright (c) Microsoft Corporation. All rights reserved. + * Licensed under the MIT License. See License.txt in the project root for license information. + *--------------------------------------------------------------------------------------------*/ + +import { + FILTER_COMPLETION_META, + getFilteredCompletions, + PROJECTION_COMPLETION_META, + type OperatorEntry, +} from '@vscode-documentdb/documentdb-constants'; +// eslint-disable-next-line import/no-internal-modules +import type * as monacoEditor from 'monaco-editor/esm/vs/editor/editor.api'; +import { clearAllCompletionContexts, setCompletionContext } from './completionStore'; +import { type CursorContext } from './cursorContext'; +import { + createCompletionItems, + createTypeSuggestions, + escapeSnippetDollars, + getCategoryLabel, + getCompletionKindForMeta, + getMetaTagsForEditorType, + getOperatorSortPrefix, + mapFieldToCompletionItem, + mapOperatorToCompletionItem, + stripOuterBraces, +} from './documentdbQueryCompletionProvider'; +import { EditorType } from './languageConfig'; + +/** + * Minimal mock of `monaco.languages.CompletionItemKind` for testing. + * Uses distinct numeric values matching Monaco's enum. + */ +const mockCompletionItemKind: typeof monacoEditor.languages.CompletionItemKind = { + Method: 0, + Function: 1, + Constructor: 2, + Field: 3, + Variable: 4, + Class: 5, + Struct: 6, + Interface: 7, + Module: 8, + Property: 9, + Event: 10, + Operator: 11, + Unit: 12, + Value: 13, + Constant: 14, + Enum: 15, + EnumMember: 16, + Keyword: 17, + Text: 18, + Color: 19, + File: 20, + Reference: 21, + Customcolor: 22, + Folder: 23, + TypeParameter: 24, + User: 25, + Issue: 26, + Snippet: 27, +}; + +/** Minimal mock of `monaco.languages.CompletionItemInsertTextRule`. */ +const mockInsertTextRule = { + InsertAsSnippet: 4, // Same value as Monaco + KeepWhitespace: 1, + None: 0, +} as typeof monacoEditor.languages.CompletionItemInsertTextRule; + +/** + * Creates a minimal Monaco API mock for testing completion provider functions. + */ +function createMockMonaco(): typeof monacoEditor { + return { + languages: { + CompletionItemKind: mockCompletionItemKind, + CompletionItemInsertTextRule: mockInsertTextRule, + }, + } as unknown as typeof monacoEditor; +} + +/** + * Extracts the label string from a CompletionItem's label, + * which may be a plain string or a CompletionItemLabel object. + */ +function getLabelText(label: string | monacoEditor.languages.CompletionItemLabel): string { + return typeof label === 'string' ? label : label.label; +} + +/** Standard test range for all completion items. */ +const testRange: monacoEditor.IRange = { + startLineNumber: 1, + endLineNumber: 1, + startColumn: 1, + endColumn: 1, +}; + +describe('documentdbQueryCompletionProvider', () => { + describe('getCompletionKindForMeta', () => { + const kinds = mockCompletionItemKind; + + test('maps query operators to Operator kind', () => { + expect(getCompletionKindForMeta('query', kinds)).toBe(kinds.Operator); + expect(getCompletionKindForMeta('query:comparison', kinds)).toBe(kinds.Operator); + expect(getCompletionKindForMeta('query:logical', kinds)).toBe(kinds.Operator); + }); + + test('maps expression operators to Function kind', () => { + expect(getCompletionKindForMeta('expr:arith', kinds)).toBe(kinds.Function); + expect(getCompletionKindForMeta('expr:string', kinds)).toBe(kinds.Function); + }); + + test('maps BSON constructors to Constructor kind', () => { + expect(getCompletionKindForMeta('bson', kinds)).toBe(kinds.Constructor); + }); + + test('maps stages to Module kind', () => { + expect(getCompletionKindForMeta('stage', kinds)).toBe(kinds.Module); + }); + + test('maps accumulators to Method kind', () => { + expect(getCompletionKindForMeta('accumulator', kinds)).toBe(kinds.Method); + }); + + test('maps update operators to Property kind', () => { + expect(getCompletionKindForMeta('update', kinds)).toBe(kinds.Property); + }); + + test('maps variables to Variable kind', () => { + expect(getCompletionKindForMeta('variable', kinds)).toBe(kinds.Variable); + }); + + test('maps window operators to Event kind', () => { + expect(getCompletionKindForMeta('window', kinds)).toBe(kinds.Event); + }); + + test('maps field identifiers to Field kind', () => { + expect(getCompletionKindForMeta('field:identifier', kinds)).toBe(kinds.Field); + }); + + test('maps unknown meta to Text kind', () => { + expect(getCompletionKindForMeta('unknown', kinds)).toBe(kinds.Text); + }); + }); + + describe('mapOperatorToCompletionItem', () => { + const mockMonaco = createMockMonaco(); + + test('maps a simple operator entry without snippet', () => { + const entry: OperatorEntry = { + value: '$eq', + meta: 'query:comparison', + description: 'Matches values equal to a specified value.', + }; + + const item = mapOperatorToCompletionItem(entry, testRange, mockMonaco); + + expect(getLabelText(item.label)).toBe('$eq'); + expect(item.kind).toBe(mockCompletionItemKind.Operator); + expect(item.insertText).toBe('$eq'); + expect(item.insertTextRules).toBeUndefined(); + expect((item.documentation as { value: string }).value).toContain( + 'Matches values equal to a specified value.', + ); + expect(item.range).toBe(testRange); + }); + + test('maps an operator entry with snippet', () => { + const entry: OperatorEntry = { + value: '$gt', + meta: 'query:comparison', + description: 'Greater than', + snippet: '{ $gt: ${1:value} }', + }; + + const item = mapOperatorToCompletionItem(entry, testRange, mockMonaco); + + expect(getLabelText(item.label)).toBe('$gt'); + expect(item.insertText).toBe('{ \\$gt: ${1:value} }'); + expect(item.insertTextRules).toBe(mockInsertTextRule.InsertAsSnippet); + }); + + test('maps a BSON constructor with link', () => { + const entry: OperatorEntry = { + value: 'ObjectId', + meta: 'bson', + description: 'Creates a new ObjectId value.', + snippet: 'ObjectId("${1:hex}")', + link: 'https://docs.example.com/objectid', + }; + + const item = mapOperatorToCompletionItem(entry, testRange, mockMonaco); + + expect(getLabelText(item.label)).toBe('ObjectId'); + expect(item.kind).toBe(mockCompletionItemKind.Constructor); + expect(item.insertText).toBe('ObjectId("${1:hex}")'); + expect(item.insertTextRules).toBe(mockInsertTextRule.InsertAsSnippet); + const docValue = (item.documentation as { value: string }).value; + expect(docValue).toContain('Creates a new ObjectId value.'); + expect(docValue).toContain('https://docs.example.com/objectid'); + }); + + test('uses the provided range', () => { + const customRange: monacoEditor.IRange = { + startLineNumber: 3, + endLineNumber: 3, + startColumn: 5, + endColumn: 10, + }; + + const entry: OperatorEntry = { + value: '$in', + meta: 'query:comparison', + description: 'Matches any value in an array.', + }; + + const item = mapOperatorToCompletionItem(entry, customRange, mockMonaco); + expect(item.range).toBe(customRange); + }); + }); + + describe('getMetaTagsForEditorType', () => { + test('returns FILTER_COMPLETION_META for Filter editor type', () => { + const tags = getMetaTagsForEditorType(EditorType.Filter); + expect(tags).toBe(FILTER_COMPLETION_META); + }); + + test('returns PROJECTION_COMPLETION_META for Project editor type', () => { + const tags = getMetaTagsForEditorType(EditorType.Project); + expect(tags).toBe(PROJECTION_COMPLETION_META); + }); + + test('returns PROJECTION_COMPLETION_META for Sort editor type', () => { + const tags = getMetaTagsForEditorType(EditorType.Sort); + expect(tags).toBe(PROJECTION_COMPLETION_META); + }); + + test('returns FILTER_COMPLETION_META for undefined (fallback)', () => { + const tags = getMetaTagsForEditorType(undefined); + expect(tags).toBe(FILTER_COMPLETION_META); + }); + }); + + describe('createCompletionItems', () => { + const mockMonaco = createMockMonaco(); + + afterEach(() => { + clearAllCompletionContexts(); + }); + + test('returns items for filter context using documentdb-constants', () => { + const items = createCompletionItems({ + editorType: EditorType.Filter, + sessionId: undefined, + range: testRange, + isDollarPrefix: false, + monaco: mockMonaco, + }); + + // Should return the filter completions from documentdb-constants + expect(items.length).toBeGreaterThan(0); + + // All items should have required CompletionItem properties + for (const item of items) { + expect(item.label).toBeDefined(); + expect(getLabelText(item.label)).toBeDefined(); + expect(item.kind).toBeDefined(); + expect(item.insertText).toBeDefined(); + expect(item.range).toBe(testRange); + } + }); + + test('filter completions include query operators like $eq, $gt, $match at value position', () => { + const items = createCompletionItems({ + editorType: EditorType.Filter, + sessionId: undefined, + range: testRange, + isDollarPrefix: false, + monaco: mockMonaco, + cursorContext: { position: 'value', fieldName: 'x' }, + }); + + const labels = items.map((item) => getLabelText(item.label)); + expect(labels).toContain('$eq'); + expect(labels).toContain('$gt'); + expect(labels).toContain('$in'); + }); + + test('filter completions include BSON constructors like ObjectId at value position', () => { + const items = createCompletionItems({ + editorType: EditorType.Filter, + sessionId: undefined, + range: testRange, + isDollarPrefix: false, + monaco: mockMonaco, + cursorContext: { position: 'value', fieldName: 'x' }, + }); + + const labels = items.map((item) => getLabelText(item.label)); + expect(labels).toContain('ObjectId'); + expect(labels).toContain('UUID'); + expect(labels).toContain('ISODate'); + }); + + test('filter completions do NOT include JS globals like console, Math, function', () => { + const items = createCompletionItems({ + editorType: EditorType.Filter, + sessionId: undefined, + range: testRange, + isDollarPrefix: false, + monaco: mockMonaco, + }); + + const labels = items.map((item) => getLabelText(item.label)); + expect(labels).not.toContain('console'); + expect(labels).not.toContain('Math'); + expect(labels).not.toContain('function'); + expect(labels).not.toContain('window'); + expect(labels).not.toContain('document'); + expect(labels).not.toContain('Array'); + expect(labels).not.toContain('Object'); + expect(labels).not.toContain('String'); + }); + + test('filter completions do NOT include aggregation stages', () => { + const items = createCompletionItems({ + editorType: EditorType.Filter, + sessionId: undefined, + range: testRange, + isDollarPrefix: false, + monaco: mockMonaco, + }); + + const labels = items.map((item) => getLabelText(item.label)); + // $match is a query operator AND a stage, but $group/$unwind are stage-only + expect(labels).not.toContain('$group'); + expect(labels).not.toContain('$unwind'); + expect(labels).not.toContain('$lookup'); + }); + + test('filter completions at value position match getFilteredCompletions count for FILTER_COMPLETION_META', () => { + const items = createCompletionItems({ + editorType: EditorType.Filter, + sessionId: undefined, + range: testRange, + isDollarPrefix: false, + monaco: mockMonaco, + cursorContext: { position: 'value', fieldName: 'x' }, + }); + + const expected = getFilteredCompletions({ meta: [...FILTER_COMPLETION_META] }); + // Value position includes operators + BSON constructors (minus key-position operators) + expect(items.length).toBeGreaterThan(0); + expect(items.length).toBeLessThanOrEqual(expected.length); + }); + + test('default (undefined editor type) matches filter completions', () => { + const filterItems = createCompletionItems({ + editorType: EditorType.Filter, + sessionId: undefined, + range: testRange, + isDollarPrefix: false, + monaco: mockMonaco, + }); + + const defaultItems = createCompletionItems({ + editorType: undefined, + sessionId: undefined, + range: testRange, + isDollarPrefix: false, + monaco: mockMonaco, + }); + + expect(defaultItems).toHaveLength(filterItems.length); + }); + }); + + describe('mapFieldToCompletionItem', () => { + const mockMonaco = createMockMonaco(); + + test('maps a simple field to a CompletionItem', () => { + const field = { + fieldName: 'age', + displayType: 'Number', + bsonType: 'int32', + isSparse: false, + insertText: 'age', + referenceText: '$age', + }; + + const item = mapFieldToCompletionItem(field, testRange, mockMonaco); + + expect(item.label).toEqual({ label: 'age', description: 'Number' }); + expect(item.kind).toBe(mockCompletionItemKind.Field); + expect(item.insertText).toBe('age: $1'); + expect(item.insertTextRules).toBe(mockInsertTextRule.InsertAsSnippet); + expect(item.sortText).toBe('0_age'); + expect(item.range).toBe(testRange); + }); + + test('includes (sparse) indicator for sparse fields', () => { + const field = { + fieldName: 'optionalField', + displayType: 'String', + bsonType: 'string', + isSparse: true, + insertText: 'optionalField', + referenceText: '$optionalField', + }; + + const item = mapFieldToCompletionItem(field, testRange, mockMonaco); + + expect((item.label as { description: string }).description).toBe('String (sparse)'); + }); + + test('uses pre-escaped insertText for special field names', () => { + const field = { + fieldName: 'address.city', + displayType: 'String', + bsonType: 'string', + isSparse: false, + insertText: '"address.city"', + referenceText: '$address.city', + }; + + const item = mapFieldToCompletionItem(field, testRange, mockMonaco); + + expect((item.label as { label: string }).label).toBe('address.city'); + expect(item.insertText).toBe('"address.city": $1'); + }); + }); + + describe('field completions via store', () => { + const mockMonaco = createMockMonaco(); + + afterEach(() => { + clearAllCompletionContexts(); + }); + + test('field completions appear when store has data', () => { + setCompletionContext('test-session', { + fields: [ + { + fieldName: 'name', + displayType: 'String', + bsonType: 'string', + isSparse: false, + insertText: 'name', + referenceText: '$name', + }, + { + fieldName: 'age', + displayType: 'Number', + bsonType: 'int32', + isSparse: false, + insertText: 'age', + referenceText: '$age', + }, + ], + }); + + const items = createCompletionItems({ + editorType: EditorType.Filter, + sessionId: 'test-session', + range: testRange, + isDollarPrefix: false, + monaco: mockMonaco, + }); + + const labels = items.map((i) => getLabelText(i.label)); + expect(labels).toContain('name'); + expect(labels).toContain('age'); + }); + + test('field completions have sortText prefix so they sort first', () => { + setCompletionContext('test-session', { + fields: [ + { + fieldName: 'name', + displayType: 'String', + bsonType: 'string', + isSparse: false, + insertText: 'name', + referenceText: '$name', + }, + ], + }); + + const items = createCompletionItems({ + editorType: EditorType.Filter, + sessionId: 'test-session', + range: testRange, + isDollarPrefix: false, + monaco: mockMonaco, + }); + + const fieldItem = items.find((i) => getLabelText(i.label) === 'name'); + expect(fieldItem?.sortText).toBe('0_name'); + }); + + test('empty store returns all operator completions', () => { + const items = createCompletionItems({ + editorType: EditorType.Filter, + sessionId: 'nonexistent-session', + range: testRange, + isDollarPrefix: false, + monaco: mockMonaco, + }); + + // Without cursorContext, falls back to all completions + const labels = items.map((i) => getLabelText(i.label)); + expect(labels).toContain('$and'); + expect(labels).toContain('$or'); + expect(labels).toContain('$gt'); + }); + + test('undefined sessionId returns all operator completions', () => { + const items = createCompletionItems({ + editorType: EditorType.Filter, + sessionId: undefined, + range: testRange, + isDollarPrefix: false, + monaco: mockMonaco, + }); + + // Without cursorContext, falls back to all completions + const labels = items.map((i) => getLabelText(i.label)); + expect(labels).toContain('$and'); + expect(labels).toContain('$or'); + expect(labels).toContain('$gt'); + }); + }); + + describe('getOperatorSortPrefix', () => { + test('returns undefined when no fieldBsonTypes provided', () => { + const entry: OperatorEntry = { + value: '$eq', + meta: 'query:comparison', + description: 'Equals', + }; + expect(getOperatorSortPrefix(entry, undefined)).toBeUndefined(); + expect(getOperatorSortPrefix(entry, [])).toBeUndefined(); + }); + + test('returns "1a_" for universal comparison operator (no applicableBsonTypes)', () => { + const entry: OperatorEntry = { + value: '$eq', + meta: 'query:comparison', + description: 'Equals', + }; + expect(getOperatorSortPrefix(entry, ['string'])).toBe('1a_'); + }); + + test('returns "1b_" for universal non-comparison operator', () => { + const entry: OperatorEntry = { + value: '$exists', + meta: 'query:element', + description: 'Exists', + }; + expect(getOperatorSortPrefix(entry, ['string'])).toBe('1b_'); + }); + + test('returns "0_" for type-relevant operator (applicableBsonTypes matches)', () => { + const entry: OperatorEntry = { + value: '$regex', + meta: 'query:evaluation', + description: 'Regex match', + applicableBsonTypes: ['string'], + }; + expect(getOperatorSortPrefix(entry, ['string'])).toBe('0_'); + }); + + test('returns "2_" for non-matching operator (applicableBsonTypes does not match)', () => { + const entry: OperatorEntry = { + value: '$regex', + meta: 'query:evaluation', + description: 'Regex match', + applicableBsonTypes: ['string'], + }; + expect(getOperatorSortPrefix(entry, ['int32'])).toBe('2_'); + }); + + test('handles polymorphic fields (multiple bsonTypes)', () => { + const regexEntry: OperatorEntry = { + value: '$regex', + meta: 'query:evaluation', + description: 'Regex match', + applicableBsonTypes: ['string'], + }; + // Field is sometimes string, sometimes int32 β€” $regex should match + expect(getOperatorSortPrefix(regexEntry, ['int32', 'string'])).toBe('0_'); + }); + + test('returns "2_" when operator types and field types have no intersection', () => { + const sizeEntry: OperatorEntry = { + value: '$size', + meta: 'query:array', + description: 'Array size', + applicableBsonTypes: ['array'], + }; + expect(getOperatorSortPrefix(sizeEntry, ['string', 'int32'])).toBe('2_'); + }); + }); + + describe('type-aware operator sorting in mapOperatorToCompletionItem', () => { + const mockMonaco = createMockMonaco(); + + test('sortText is undefined when no fieldBsonTypes provided', () => { + const entry: OperatorEntry = { + value: '$eq', + meta: 'query:comparison', + description: 'Equals', + }; + const item = mapOperatorToCompletionItem(entry, testRange, mockMonaco); + expect(item.sortText).toBeUndefined(); + }); + + test('sortText is undefined when empty fieldBsonTypes provided', () => { + const entry: OperatorEntry = { + value: '$eq', + meta: 'query:comparison', + description: 'Equals', + }; + const item = mapOperatorToCompletionItem(entry, testRange, mockMonaco, []); + expect(item.sortText).toBeUndefined(); + }); + + test('universal comparison operator gets "1a_" prefix when fieldBsonTypes provided', () => { + const entry: OperatorEntry = { + value: '$eq', + meta: 'query:comparison', + description: 'Equals', + }; + const item = mapOperatorToCompletionItem(entry, testRange, mockMonaco, ['int32']); + expect(item.sortText).toBe('1a_$eq'); + }); + + test('type-relevant operator gets "0_" prefix', () => { + const entry: OperatorEntry = { + value: '$regex', + meta: 'query:evaluation', + description: 'Regex match', + applicableBsonTypes: ['string'], + }; + const item = mapOperatorToCompletionItem(entry, testRange, mockMonaco, ['string']); + expect(item.sortText).toBe('0_$regex'); + }); + + test('non-matching operator gets "2_" prefix (demoted, not hidden)', () => { + const entry: OperatorEntry = { + value: '$regex', + meta: 'query:evaluation', + description: 'Regex match', + applicableBsonTypes: ['string'], + }; + const item = mapOperatorToCompletionItem(entry, testRange, mockMonaco, ['int32']); + expect(item.sortText).toBe('2_$regex'); + }); + }); + + describe('type-aware sorting via createCompletionItems', () => { + const mockMonaco = createMockMonaco(); + + afterEach(() => { + clearAllCompletionContexts(); + }); + + test('without fieldBsonTypes, operators have no sortText at value position', () => { + const items = createCompletionItems({ + editorType: EditorType.Filter, + sessionId: undefined, + range: testRange, + isDollarPrefix: false, + monaco: mockMonaco, + cursorContext: { position: 'value', fieldName: 'x' }, + }); + + const regexItem = items.find((i) => getLabelText(i.label) === '$regex'); + // At value position, operators get sort prefix 0_ (not type-aware) + expect(regexItem?.sortText).toBe('0_$regex'); + + const eqItem = items.find((i) => getLabelText(i.label) === '$eq'); + expect(eqItem?.sortText).toBe('0_$eq'); + }); + + test('with fieldBsonTypes=["string"] at operator position, $regex gets "0_" and $size gets "2_"', () => { + const items = createCompletionItems({ + editorType: EditorType.Filter, + sessionId: undefined, + range: testRange, + isDollarPrefix: false, + monaco: mockMonaco, + fieldBsonTypes: ['string'], + cursorContext: { position: 'operator', fieldName: 'x' }, + }); + + const regexItem = items.find((i) => getLabelText(i.label) === '$regex'); + expect(regexItem?.sortText).toBe('0_$regex'); + + const sizeItem = items.find((i) => getLabelText(i.label) === '$size'); + expect(sizeItem?.sortText).toBe('2_$size'); + + // Comparison operators like $eq get "1a_" (promoted over other universals) + const eqItem = items.find((i) => getLabelText(i.label) === '$eq'); + expect(eqItem?.sortText).toBe('1a_$eq'); + }); + + test('with fieldBsonTypes=["int32"] at operator position, $regex gets "2_" (demoted, still present)', () => { + const context: CursorContext = { position: 'operator', fieldName: 'x' }; + const items = createCompletionItems({ + editorType: EditorType.Filter, + sessionId: undefined, + range: testRange, + isDollarPrefix: false, + monaco: mockMonaco, + fieldBsonTypes: ['int32'], + cursorContext: context, + }); + + const labels = items.map((i) => getLabelText(i.label)); + // $regex is still in the list, just demoted + expect(labels).toContain('$regex'); + + const regexItem = items.find((i) => getLabelText(i.label) === '$regex'); + expect(regexItem?.sortText).toBe('2_$regex'); + + // Bitwise operators should match int + const bitsAllSetItem = items.find((i) => getLabelText(i.label) === '$bitsAllSet'); + expect(bitsAllSetItem?.sortText).toBe('0_$bitsAllSet'); + }); + + test('all operators still present regardless of fieldBsonTypes at operator position', () => { + const itemsWithoutType = createCompletionItems({ + editorType: EditorType.Filter, + sessionId: undefined, + range: testRange, + isDollarPrefix: false, + monaco: mockMonaco, + cursorContext: { position: 'operator', fieldName: 'x' }, + }); + + const itemsWithType = createCompletionItems({ + editorType: EditorType.Filter, + sessionId: undefined, + range: testRange, + isDollarPrefix: false, + monaco: mockMonaco, + fieldBsonTypes: ['int32'], + cursorContext: { position: 'operator', fieldName: 'x' }, + }); + + // Same number of items β€” nothing filtered out + expect(itemsWithType).toHaveLength(itemsWithoutType.length); + }); + + test('field items still get "0_" prefix even when fieldBsonTypes is set', () => { + setCompletionContext('test-session', { + fields: [ + { + fieldName: 'age', + displayType: 'Number', + bsonType: 'int32', + isSparse: false, + insertText: 'age', + referenceText: '$age', + }, + ], + }); + + const items = createCompletionItems({ + editorType: EditorType.Filter, + sessionId: 'test-session', + range: testRange, + isDollarPrefix: false, + monaco: mockMonaco, + fieldBsonTypes: ['int32'], + cursorContext: { position: 'key', depth: 1 }, + }); + + const fieldItem = items.find((i) => getLabelText(i.label) === 'age'); + expect(fieldItem?.sortText).toBe('0_age'); + }); + }); + + describe('stripOuterBraces', () => { + test('strips outer { } from operator snippets', () => { + expect(stripOuterBraces('{ $gt: ${1:value} }')).toBe('$gt: ${1:value}'); + }); + + test('preserves inner brackets', () => { + expect(stripOuterBraces('{ $in: [${1:value}] }')).toBe('$in: [${1:value}]'); + }); + + test('preserves inner braces', () => { + expect(stripOuterBraces('{ $elemMatch: { ${1:query} } }')).toBe('$elemMatch: { ${1:query} }'); + }); + + test('returns unchanged if not wrapped', () => { + expect(stripOuterBraces('ObjectId("${1:hex}")')).toBe('ObjectId("${1:hex}")'); + }); + + test('returns unchanged for non-matching patterns', () => { + expect(stripOuterBraces('$gt')).toBe('$gt'); + }); + }); + + describe('getCategoryLabel', () => { + test('extracts sub-category from qualified meta tag', () => { + expect(getCategoryLabel('query:comparison')).toBe('comparison'); + expect(getCategoryLabel('query:logical')).toBe('logical'); + expect(getCategoryLabel('query:element')).toBe('element'); + expect(getCategoryLabel('query:array')).toBe('array'); + }); + + test('returns whole tag when no colon', () => { + expect(getCategoryLabel('bson')).toBe('bson'); + expect(getCategoryLabel('variable')).toBe('variable'); + }); + }); + + describe('escapeSnippetDollars', () => { + test('escapes $ before operator names in snippets', () => { + expect(escapeSnippetDollars('{ $gt: ${1:value} }')).toBe('{ \\$gt: ${1:value} }'); + }); + + test('preserves tab stop syntax', () => { + expect(escapeSnippetDollars('${1:value}')).toBe('${1:value}'); + expect(escapeSnippetDollars('$1')).toBe('$1'); + }); + + test('escapes multiple operator names', () => { + expect(escapeSnippetDollars('{ $and: [{ $gt: ${1:value} }] }')).toBe('{ \\$and: [{ \\$gt: ${1:value} }] }'); + }); + + test('does not escape BSON constructor snippets', () => { + expect(escapeSnippetDollars('ObjectId("${1:hex}")')).toBe('ObjectId("${1:hex}")'); + }); + + test('escapes stripped operator snippets', () => { + expect(escapeSnippetDollars('$gt: ${1:value}')).toBe('\\$gt: ${1:value}'); + expect(escapeSnippetDollars('$in: [${1:value}]')).toBe('\\$in: [${1:value}]'); + }); + }); + + // --------------------------------------------------------------- + // Context-sensitive completions (Step 4.5) + // --------------------------------------------------------------- + describe('context-sensitive completions', () => { + const mockMonaco = createMockMonaco(); + + afterEach(() => { + clearAllCompletionContexts(); + }); + + describe('key position', () => { + const keyContext: CursorContext = { position: 'key', depth: 1 }; + + test('shows field names when store has data', () => { + setCompletionContext('test-session', { + fields: [ + { + fieldName: 'name', + displayType: 'String', + bsonType: 'string', + isSparse: false, + insertText: 'name', + referenceText: '$name', + }, + ], + }); + + const items = createCompletionItems({ + editorType: EditorType.Filter, + sessionId: 'test-session', + range: testRange, + isDollarPrefix: false, + monaco: mockMonaco, + cursorContext: keyContext, + }); + + const labels = items.map((i) => getLabelText(i.label)); + expect(labels).toContain('name'); + }); + + test('shows key-position operators ($and, $or, $nor)', () => { + const items = createCompletionItems({ + editorType: EditorType.Filter, + sessionId: undefined, + range: testRange, + isDollarPrefix: false, + monaco: mockMonaco, + cursorContext: keyContext, + }); + + const labels = items.map((i) => getLabelText(i.label)); + expect(labels).toContain('$and'); + expect(labels).toContain('$or'); + expect(labels).toContain('$nor'); + expect(labels).toContain('$comment'); + expect(labels).toContain('$expr'); + // $not is a field-level operator, NOT a key-position operator + expect(labels).not.toContain('$not'); + }); + + test('does NOT show value-level operators ($gt, $lt, $regex, $eq)', () => { + const items = createCompletionItems({ + editorType: EditorType.Filter, + sessionId: undefined, + range: testRange, + isDollarPrefix: false, + monaco: mockMonaco, + cursorContext: keyContext, + }); + + const labels = items.map((i) => getLabelText(i.label)); + expect(labels).not.toContain('$gt'); + expect(labels).not.toContain('$lt'); + expect(labels).not.toContain('$regex'); + expect(labels).not.toContain('$eq'); + expect(labels).not.toContain('$in'); + expect(labels).not.toContain('$exists'); + }); + + test('does NOT show BSON constructors', () => { + const items = createCompletionItems({ + editorType: EditorType.Filter, + sessionId: undefined, + range: testRange, + isDollarPrefix: false, + monaco: mockMonaco, + cursorContext: keyContext, + }); + + const labels = items.map((i) => getLabelText(i.label)); + expect(labels).not.toContain('ObjectId'); + expect(labels).not.toContain('UUID'); + expect(labels).not.toContain('ISODate'); + }); + + test('fields sort before operators', () => { + setCompletionContext('test-session', { + fields: [ + { + fieldName: 'age', + displayType: 'Number', + bsonType: 'int32', + isSparse: false, + insertText: 'age', + referenceText: '$age', + }, + ], + }); + + const items = createCompletionItems({ + editorType: EditorType.Filter, + sessionId: 'test-session', + range: testRange, + isDollarPrefix: false, + monaco: mockMonaco, + cursorContext: keyContext, + }); + + const fieldItem = items.find((i) => getLabelText(i.label) === 'age'); + const andItem = items.find((i) => getLabelText(i.label) === '$and'); + expect(fieldItem?.sortText).toBe('0_age'); + expect(andItem?.sortText).toBe('1_$and'); + }); + }); + + describe('value position', () => { + const valueContext: CursorContext = { position: 'value', fieldName: 'age' }; + + test('shows BSON constructors', () => { + const items = createCompletionItems({ + editorType: EditorType.Filter, + sessionId: undefined, + range: testRange, + isDollarPrefix: false, + monaco: mockMonaco, + cursorContext: valueContext, + }); + + const labels = items.map((i) => getLabelText(i.label)); + expect(labels).toContain('ObjectId'); + expect(labels).toContain('UUID'); + expect(labels).toContain('ISODate'); + }); + + test('shows query operators (with brace-wrapping snippets)', () => { + const items = createCompletionItems({ + editorType: EditorType.Filter, + sessionId: undefined, + range: testRange, + isDollarPrefix: false, + monaco: mockMonaco, + cursorContext: valueContext, + }); + + const labels = items.map((i) => getLabelText(i.label)); + expect(labels).toContain('$gt'); + expect(labels).toContain('$eq'); + expect(labels).toContain('$in'); + + // Operators should have their full brace-wrapping snippets at value position + const gtItem = items.find((i) => getLabelText(i.label) === '$gt'); + expect(gtItem?.insertText).toBe('{ \\$gt: ${1:value} }'); + }); + + test('operators sort before BSON constructors', () => { + const items = createCompletionItems({ + editorType: EditorType.Filter, + sessionId: undefined, + range: testRange, + isDollarPrefix: false, + monaco: mockMonaco, + cursorContext: valueContext, + }); + + const gtItem = items.find((i) => getLabelText(i.label) === '$gt'); + const objectIdItem = items.find((i) => getLabelText(i.label) === 'ObjectId'); + expect(gtItem?.sortText).toBe('0_$gt'); + expect(objectIdItem?.sortText).toBe('3_ObjectId'); + }); + + test('includes JS globals and common methods after BSON constructors', () => { + const items = createCompletionItems({ + editorType: EditorType.Filter, + sessionId: undefined, + range: testRange, + isDollarPrefix: false, + monaco: mockMonaco, + cursorContext: valueContext, + }); + + const labels = items.map((i) => getLabelText(i.label)); + // Class constructors + expect(labels).toContain('Date'); + expect(labels).toContain('RegExp'); + // Static methods + expect(labels).toContain('Date.now()'); + expect(labels).toContain('Math.floor()'); + expect(labels).toContain('Math.min()'); + expect(labels).toContain('Math.max()'); + // Primitives + expect(labels).toContain('Infinity'); + + // JS globals sort after BSON constructors (4_ > 3_) + const dateItem = items.find((i) => getLabelText(i.label) === 'Date'); + expect(dateItem?.sortText).toBe('4_Date'); + }); + + test('does NOT show key-position operators ($and, $or)', () => { + const items = createCompletionItems({ + editorType: EditorType.Filter, + sessionId: undefined, + range: testRange, + isDollarPrefix: false, + monaco: mockMonaco, + cursorContext: valueContext, + }); + + const labels = items.map((i) => getLabelText(i.label)); + expect(labels).not.toContain('$and'); + expect(labels).not.toContain('$or'); + }); + + test('does NOT show field names', () => { + setCompletionContext('test-session', { + fields: [ + { + fieldName: 'name', + displayType: 'String', + bsonType: 'string', + isSparse: false, + insertText: 'name', + referenceText: '$name', + }, + ], + }); + + const items = createCompletionItems({ + editorType: EditorType.Filter, + sessionId: 'test-session', + range: testRange, + isDollarPrefix: false, + monaco: mockMonaco, + cursorContext: valueContext, + }); + + const labels = items.map((i) => getLabelText(i.label)); + expect(labels).not.toContain('name'); + }); + }); + + describe('operator position', () => { + const operatorContext: CursorContext = { position: 'operator', fieldName: 'age' }; + + test('shows comparison operators ($gt, $lt, $eq, $in) and $not', () => { + const items = createCompletionItems({ + editorType: EditorType.Filter, + sessionId: undefined, + range: testRange, + isDollarPrefix: false, + monaco: mockMonaco, + cursorContext: operatorContext, + }); + + const labels = items.map((i) => getLabelText(i.label)); + expect(labels).toContain('$gt'); + expect(labels).toContain('$lt'); + expect(labels).toContain('$eq'); + expect(labels).toContain('$in'); + expect(labels).toContain('$exists'); + expect(labels).toContain('$regex'); + // $not is a field-level operator, valid at operator position + expect(labels).toContain('$not'); + }); + + test('does NOT show key-position operators ($and, $or)', () => { + const items = createCompletionItems({ + editorType: EditorType.Filter, + sessionId: undefined, + range: testRange, + isDollarPrefix: false, + monaco: mockMonaco, + cursorContext: operatorContext, + }); + + const labels = items.map((i) => getLabelText(i.label)); + expect(labels).not.toContain('$and'); + expect(labels).not.toContain('$or'); + expect(labels).not.toContain('$nor'); + }); + + test('does NOT show BSON constructors', () => { + const items = createCompletionItems({ + editorType: EditorType.Filter, + sessionId: undefined, + range: testRange, + isDollarPrefix: false, + monaco: mockMonaco, + cursorContext: operatorContext, + }); + + const labels = items.map((i) => getLabelText(i.label)); + expect(labels).not.toContain('ObjectId'); + expect(labels).not.toContain('UUID'); + }); + + test('does NOT show field names', () => { + setCompletionContext('test-session', { + fields: [ + { + fieldName: 'name', + displayType: 'String', + bsonType: 'string', + isSparse: false, + insertText: 'name', + referenceText: '$name', + }, + ], + }); + + const items = createCompletionItems({ + editorType: EditorType.Filter, + sessionId: 'test-session', + range: testRange, + isDollarPrefix: false, + monaco: mockMonaco, + cursorContext: operatorContext, + }); + + const labels = items.map((i) => getLabelText(i.label)); + expect(labels).not.toContain('name'); + }); + + test('applies type-aware sorting when fieldBsonType is available', () => { + const typedContext: CursorContext = { + position: 'operator', + fieldName: 'age', + fieldBsonType: 'int32', + }; + + const items = createCompletionItems({ + editorType: EditorType.Filter, + sessionId: undefined, + range: testRange, + isDollarPrefix: false, + monaco: mockMonaco, + cursorContext: typedContext, + }); + + // $regex has applicableBsonTypes=['string'], doesn't match 'int32' β†’ demoted + const regexItem = items.find((i) => getLabelText(i.label) === '$regex'); + expect(regexItem?.sortText).toBe('2_$regex'); + + // $bitsAllSet has applicableBsonTypes containing 'int32' β†’ promoted + const bitsItem = items.find((i) => getLabelText(i.label) === '$bitsAllSet'); + expect(bitsItem?.sortText).toBe('0_$bitsAllSet'); + + // $eq is universal comparison β†’ promoted tier + const eqItem = items.find((i) => getLabelText(i.label) === '$eq'); + expect(eqItem?.sortText).toBe('1a_$eq'); + }); + + test('strips outer braces from operator snippets (Issue A fix)', () => { + const items = createCompletionItems({ + editorType: EditorType.Filter, + sessionId: undefined, + range: testRange, + isDollarPrefix: false, + monaco: mockMonaco, + cursorContext: operatorContext, + }); + + // At operator position, snippets should NOT have outer { } + const gtItem = items.find((i) => getLabelText(i.label) === '$gt'); + expect(gtItem?.insertText).toBe('\\$gt: ${1:value}'); + + const inItem = items.find((i) => getLabelText(i.label) === '$in'); + expect(inItem?.insertText).toBe('\\$in: [${1:value}]'); + + const regexItem = items.find((i) => getLabelText(i.label) === '$regex'); + expect(regexItem?.insertText).toBe('\\$regex: /${1:pattern}/'); + }); + }); + + describe('array-element position', () => { + const arrayContext: CursorContext = { position: 'array-element', parentOperator: '$and' }; + + test('behaves like key position (shows fields + key operators)', () => { + setCompletionContext('test-session', { + fields: [ + { + fieldName: 'age', + displayType: 'Number', + bsonType: 'int32', + isSparse: false, + insertText: 'age', + referenceText: '$age', + }, + ], + }); + + const items = createCompletionItems({ + editorType: EditorType.Filter, + sessionId: 'test-session', + range: testRange, + isDollarPrefix: false, + monaco: mockMonaco, + cursorContext: arrayContext, + }); + + const labels = items.map((i) => getLabelText(i.label)); + // Should include fields + expect(labels).toContain('age'); + // Should include key-position operators + expect(labels).toContain('$and'); + expect(labels).toContain('$or'); + // Should NOT include value-level operators + expect(labels).not.toContain('$gt'); + expect(labels).not.toContain('$regex'); + // Should NOT include BSON constructors + expect(labels).not.toContain('ObjectId'); + }); + }); + + describe('unknown position', () => { + const unknownContext: CursorContext = { position: 'unknown' }; + + test('falls back to all completions', () => { + const itemsWithContext = createCompletionItems({ + editorType: EditorType.Filter, + sessionId: undefined, + range: testRange, + isDollarPrefix: false, + monaco: mockMonaco, + cursorContext: unknownContext, + }); + + const itemsWithoutContext = createCompletionItems({ + editorType: EditorType.Filter, + sessionId: undefined, + range: testRange, + isDollarPrefix: false, + monaco: mockMonaco, + }); + + // Both should produce the same all-completions list + expect(itemsWithContext).toHaveLength(itemsWithoutContext.length); + const labels = itemsWithContext.map((i) => getLabelText(i.label)); + // All completions include key-position operators + expect(labels).toContain('$and'); + expect(labels).toContain('$or'); + // Also include value-position operators and BSON constructors + expect(labels).toContain('$gt'); + expect(labels).toContain('ObjectId'); + }); + }); + + describe('no cursorContext (undefined)', () => { + test('falls back to all completions (fields + operators + BSON + JS globals)', () => { + const items = createCompletionItems({ + editorType: EditorType.Filter, + sessionId: undefined, + range: testRange, + isDollarPrefix: false, + monaco: mockMonaco, + cursorContext: undefined, + }); + + // Without cursorContext, shows all completions + const labels = items.map((i) => getLabelText(i.label)); + expect(labels).toContain('$and'); + expect(labels).toContain('$or'); + expect(labels).toContain('$gt'); + expect(labels).toContain('ObjectId'); + }); + }); + + describe('needsWrapping (empty editor, no braces)', () => { + test('field insertText is wrapped with { } when needsWrapping is true', () => { + setCompletionContext('test-session', { + fields: [ + { + fieldName: 'name', + displayType: 'String', + bsonType: 'string', + isSparse: false, + insertText: 'name', + referenceText: '$name', + }, + ], + }); + + const items = createCompletionItems({ + editorType: EditorType.Filter, + sessionId: 'test-session', + range: testRange, + isDollarPrefix: false, + monaco: mockMonaco, + cursorContext: { position: 'unknown' }, + needsWrapping: true, + }); + + const fieldItem = items.find((i) => getLabelText(i.label) === 'name'); + expect(fieldItem?.insertText).toBe('{ name: $1 }'); + }); + + test('field insertText is NOT wrapped when needsWrapping is false', () => { + setCompletionContext('test-session', { + fields: [ + { + fieldName: 'name', + displayType: 'String', + bsonType: 'string', + isSparse: false, + insertText: 'name', + referenceText: '$name', + }, + ], + }); + + const items = createCompletionItems({ + editorType: EditorType.Filter, + sessionId: 'test-session', + range: testRange, + isDollarPrefix: false, + monaco: mockMonaco, + cursorContext: { position: 'unknown' }, + needsWrapping: false, + }); + + const fieldItem = items.find((i) => getLabelText(i.label) === 'name'); + expect(fieldItem?.insertText).toBe('name: $1'); + }); + + test('operators keep full brace-wrapping snippets when needsWrapping is true', () => { + const items = createCompletionItems({ + editorType: EditorType.Filter, + sessionId: undefined, + range: testRange, + isDollarPrefix: false, + monaco: mockMonaco, + cursorContext: { position: 'unknown' }, + needsWrapping: true, + }); + + // Operator snippets include { } already β€” they should NOT be stripped + const andItem = items.find((i) => getLabelText(i.label) === '$and'); + expect(andItem?.insertText).toContain('{'); + expect(andItem?.insertText).toContain('}'); + }); + }); + + // --------------------------------------------------------------- + // Category coverage: verify operator categories at each position + // --------------------------------------------------------------- + describe('operator category coverage by position', () => { + test('key position: only key-position operators, no field-level operators', () => { + const items = createCompletionItems({ + editorType: EditorType.Filter, + sessionId: undefined, + range: testRange, + isDollarPrefix: false, + monaco: mockMonaco, + cursorContext: { position: 'key', depth: 1 }, + }); + + const labels = items.map((i) => getLabelText(i.label)); + // Key-position: logical combinators and meta operators + expect(labels).toContain('$and'); // query:logical + expect(labels).toContain('$or'); // query:logical + expect(labels).toContain('$nor'); // query:logical + expect(labels).toContain('$comment'); // query:comment + expect(labels).toContain('$expr'); // query:expr + // Field-level operators must NOT appear at key position + expect(labels).not.toContain('$all'); // query:array β€” field-level + expect(labels).not.toContain('$elemMatch'); // query:array β€” field-level + expect(labels).not.toContain('$size'); // query:array β€” field-level + expect(labels).not.toContain('$gt'); // query:comparison + expect(labels).not.toContain('$regex'); // query:evaluation + expect(labels).not.toContain('$exists'); // query:element + expect(labels).not.toContain('$not'); // query:logical β€” field-level + }); + + test('value position: includes operators from all field-level categories', () => { + const items = createCompletionItems({ + editorType: EditorType.Filter, + sessionId: undefined, + range: testRange, + isDollarPrefix: false, + monaco: mockMonaco, + cursorContext: { position: 'value', fieldName: 'x' }, + }); + + const labels = items.map((i) => getLabelText(i.label)); + // Should include field-level operators from every category + expect(labels).toContain('$gt'); // query:comparison + expect(labels).toContain('$eq'); // query:comparison + expect(labels).toContain('$in'); // query:comparison + expect(labels).toContain('$regex'); // query:evaluation + expect(labels).toContain('$exists'); // query:element + expect(labels).toContain('$type'); // query:element + expect(labels).toContain('$all'); // query:array + expect(labels).toContain('$elemMatch'); // query:array + expect(labels).toContain('$size'); // query:array + expect(labels).toContain('$not'); // query:logical (field-level) + // Key-position operators should NOT be at value position + expect(labels).not.toContain('$and'); + expect(labels).not.toContain('$or'); + }); + + test('operator position: same field-level categories as value position', () => { + const items = createCompletionItems({ + editorType: EditorType.Filter, + sessionId: undefined, + range: testRange, + isDollarPrefix: false, + monaco: mockMonaco, + cursorContext: { position: 'operator', fieldName: 'x' }, + }); + + const labels = items.map((i) => getLabelText(i.label)); + expect(labels).toContain('$gt'); // query:comparison + expect(labels).toContain('$regex'); // query:evaluation + expect(labels).toContain('$exists'); // query:element + expect(labels).toContain('$all'); // query:array + expect(labels).toContain('$not'); // query:logical (field-level) + expect(labels).not.toContain('$and'); // key-position only + }); + }); + }); + + // --------------------------------------------------------------- + // Type-aware value suggestions + // --------------------------------------------------------------- + describe('createTypeSuggestions', () => { + const mockMonaco = createMockMonaco(); + + test('returns empty array for undefined bsonType', () => { + const items = createTypeSuggestions(undefined, testRange, mockMonaco); + expect(items).toHaveLength(0); + }); + + test('returns empty array for unknown bsonType', () => { + const items = createTypeSuggestions('unknownType', testRange, mockMonaco); + expect(items).toHaveLength(0); + }); + + test('returns true/false for boolean fields', () => { + const items = createTypeSuggestions('boolean', testRange, mockMonaco); + expect(items).toHaveLength(2); + + const labels = items.map((i) => getLabelText(i.label)); + expect(labels).toContain('true'); + expect(labels).toContain('false'); + + // Plain text, not snippets + const trueItem = items.find((i) => getLabelText(i.label) === 'true'); + expect(trueItem?.insertText).toBe('true'); + expect(trueItem?.insertTextRules).toBeUndefined(); + expect(trueItem?.kind).toBe(mockCompletionItemKind.Value); + }); + + test('returns range query for int fields', () => { + const items = createTypeSuggestions('int32', testRange, mockMonaco); + expect(items.length).toBeGreaterThanOrEqual(1); + + const labels = items.map((i) => getLabelText(i.label)); + expect(labels[0]).toContain('$gt'); + expect(labels[0]).toContain('$lt'); + + // Should be a snippet + expect(items[0].kind).toBe(mockCompletionItemKind.Snippet); + }); + + test('returns regex and empty string for string fields', () => { + const items = createTypeSuggestions('string', testRange, mockMonaco); + expect(items.length).toBeGreaterThanOrEqual(1); + + const labels = items.map((i) => getLabelText(i.label)); + expect(labels).toContain('{ $regex: /…/ }'); + }); + + test('returns ISODate for date fields', () => { + const items = createTypeSuggestions('date', testRange, mockMonaco); + expect(items.length).toBeGreaterThanOrEqual(1); + + const labels = items.map((i) => getLabelText(i.label)); + expect(labels).toContain('ISODate("…")'); + }); + + test('returns ObjectId for objectid fields', () => { + const items = createTypeSuggestions('objectid', testRange, mockMonaco); + expect(items).toHaveLength(1); + + expect(getLabelText(items[0].label)).toBe('ObjectId("…")'); + }); + + test('returns null for null fields', () => { + const items = createTypeSuggestions('null', testRange, mockMonaco); + expect(items).toHaveLength(1); + + expect(getLabelText(items[0].label)).toBe('null'); + expect(items[0].insertText).toBe('null'); + }); + + test('returns elemMatch and size for array fields', () => { + const items = createTypeSuggestions('array', testRange, mockMonaco); + expect(items.length).toBeGreaterThanOrEqual(2); + + const labels = items.map((i) => getLabelText(i.label)); + expect(labels).toContain('{ $elemMatch: { … } }'); + expect(labels).toContain('{ $size: … }'); + }); + + test('suggestions have sort prefix 00_ (highest priority)', () => { + const items = createTypeSuggestions('boolean', testRange, mockMonaco); + for (const item of items) { + expect(item.sortText).toMatch(/^00_/); + } + }); + + test('first suggestion is preselected', () => { + const items = createTypeSuggestions('int32', testRange, mockMonaco); + expect(items[0].preselect).toBe(true); + }); + }); + + describe('type suggestions in value position integration', () => { + const mockMonaco = createMockMonaco(); + + test('boolean field at value position shows true/false first', () => { + const context: CursorContext = { position: 'value', fieldName: 'isActive', fieldBsonType: 'boolean' }; + const items = createCompletionItems({ + editorType: EditorType.Filter, + sessionId: undefined, + range: testRange, + isDollarPrefix: false, + monaco: mockMonaco, + cursorContext: context, + }); + + const labels = items.map((i) => getLabelText(i.label)); + // true/false should be present + expect(labels).toContain('true'); + expect(labels).toContain('false'); + + // Operators should also be present + expect(labels).toContain('$eq'); + expect(labels).toContain('$gt'); + + // true/false should sort before operators (00_ < 0_) + const trueItem = items.find((i) => getLabelText(i.label) === 'true'); + const eqItem = items.find((i) => getLabelText(i.label) === '$eq'); + expect(trueItem!.sortText! < eqItem!.sortText!).toBe(true); + }); + + test('int field at value position shows range query first', () => { + const context: CursorContext = { position: 'value', fieldName: 'age', fieldBsonType: 'int32' }; + const items = createCompletionItems({ + editorType: EditorType.Filter, + sessionId: undefined, + range: testRange, + isDollarPrefix: false, + monaco: mockMonaco, + cursorContext: context, + }); + + // Range query suggestion should be first (sort 00_00) + const first = items[0]; + expect(getLabelText(first.label)).toContain('$gt'); + expect(first.sortText).toBe('00_00'); + }); + + test('unknown type at value position has no type suggestions', () => { + const context: CursorContext = { position: 'value', fieldName: 'data' }; + const items = createCompletionItems({ + editorType: EditorType.Filter, + sessionId: undefined, + range: testRange, + isDollarPrefix: false, + monaco: mockMonaco, + cursorContext: context, + }); + + // No type suggestions, but operators and BSON should still be present + const labels = items.map((i) => getLabelText(i.label)); + expect(labels).toContain('$eq'); + expect(labels).toContain('ObjectId'); + + // No items with 00_ sort prefix + expect(items.filter((i) => i.sortText?.startsWith('00_'))).toHaveLength(0); + }); + }); + + // --------------------------------------------------------------- + // Project and Sort value completions + // --------------------------------------------------------------- + describe('project editor value completions', () => { + const mockMonaco = createMockMonaco(); + + test('shows 1 (include) and 0 (exclude) at value position', () => { + const context: CursorContext = { position: 'value', fieldName: 'name' }; + const items = createCompletionItems({ + editorType: EditorType.Project, + sessionId: undefined, + range: testRange, + isDollarPrefix: false, + monaco: mockMonaco, + cursorContext: context, + }); + + expect(items).toHaveLength(2); + const labels = items.map((i) => getLabelText(i.label)); + expect(labels).toContain('1'); + expect(labels).toContain('0'); + }); + + test('1 (include) has description "include field"', () => { + const context: CursorContext = { position: 'value', fieldName: 'name' }; + const items = createCompletionItems({ + editorType: EditorType.Project, + sessionId: undefined, + range: testRange, + isDollarPrefix: false, + monaco: mockMonaco, + cursorContext: context, + }); + + const includeItem = items.find((i) => getLabelText(i.label) === '1'); + expect((includeItem?.label as { description: string }).description).toBe('include field'); + }); + + test('does NOT show operators, BSON constructors, or JS globals', () => { + const context: CursorContext = { position: 'value', fieldName: 'name' }; + const items = createCompletionItems({ + editorType: EditorType.Project, + sessionId: undefined, + range: testRange, + isDollarPrefix: false, + monaco: mockMonaco, + cursorContext: context, + }); + + const labels = items.map((i) => getLabelText(i.label)); + expect(labels).not.toContain('$gt'); + expect(labels).not.toContain('ObjectId'); + expect(labels).not.toContain('Date'); + }); + + test('1 is preselected', () => { + const context: CursorContext = { position: 'value', fieldName: 'name' }; + const items = createCompletionItems({ + editorType: EditorType.Project, + sessionId: undefined, + range: testRange, + isDollarPrefix: false, + monaco: mockMonaco, + cursorContext: context, + }); + + const includeItem = items.find((i) => getLabelText(i.label) === '1'); + expect(includeItem?.preselect).toBe(true); + }); + }); + + describe('sort editor value completions', () => { + const mockMonaco = createMockMonaco(); + + test('shows 1 (ascending) and -1 (descending) at value position', () => { + const context: CursorContext = { position: 'value', fieldName: 'age' }; + const items = createCompletionItems({ + editorType: EditorType.Sort, + sessionId: undefined, + range: testRange, + isDollarPrefix: false, + monaco: mockMonaco, + cursorContext: context, + }); + + expect(items).toHaveLength(2); + const labels = items.map((i) => getLabelText(i.label)); + expect(labels).toContain('1'); + expect(labels).toContain('-1'); + }); + + test('-1 has description "descending"', () => { + const context: CursorContext = { position: 'value', fieldName: 'age' }; + const items = createCompletionItems({ + editorType: EditorType.Sort, + sessionId: undefined, + range: testRange, + isDollarPrefix: false, + monaco: mockMonaco, + cursorContext: context, + }); + + const descItem = items.find((i) => getLabelText(i.label) === '-1'); + expect((descItem?.label as { description: string }).description).toBe('descending'); + }); + + test('does NOT show operators, BSON constructors, or JS globals', () => { + const context: CursorContext = { position: 'value', fieldName: 'age' }; + const items = createCompletionItems({ + editorType: EditorType.Sort, + sessionId: undefined, + range: testRange, + isDollarPrefix: false, + monaco: mockMonaco, + cursorContext: context, + }); + + const labels = items.map((i) => getLabelText(i.label)); + expect(labels).not.toContain('$gt'); + expect(labels).not.toContain('ObjectId'); + expect(labels).not.toContain('Date'); + }); + + test('1 is preselected', () => { + const context: CursorContext = { position: 'value', fieldName: 'age' }; + const items = createCompletionItems({ + editorType: EditorType.Sort, + sessionId: undefined, + range: testRange, + isDollarPrefix: false, + monaco: mockMonaco, + cursorContext: context, + }); + + const ascItem = items.find((i) => getLabelText(i.label) === '1'); + expect(ascItem?.preselect).toBe(true); + }); + }); + + // --------------------------------------------------------------- + // Category-based completion coverage by cursor position + // --------------------------------------------------------------- + describe('completion categories by cursor position', () => { + const mockMonaco = createMockMonaco(); + + /** + * Helper: extracts the description (category label) from a CompletionItem. + * For operator items this is getCategoryLabel(meta), e.g., "comparison", "array". + * For JS globals it is "JS global". + * For fields it is the type, e.g., "Number". + */ + function getDescription(label: string | monacoEditor.languages.CompletionItemLabel): string | undefined { + return typeof label === 'string' ? undefined : label.description; + } + + /** Returns Set of distinct category descriptions from a completion list. */ + function getCategories(items: monacoEditor.languages.CompletionItem[]): Set { + const categories = new Set(); + for (const item of items) { + const desc = getDescription(item.label); + if (desc) categories.add(desc); + } + return categories; + } + + describe('key position ({ })', () => { + const keyContext: CursorContext = { position: 'key', depth: 1 }; + + test('includes "logical" category operators', () => { + const items = createCompletionItems({ + editorType: EditorType.Filter, + sessionId: undefined, + range: testRange, + isDollarPrefix: false, + monaco: mockMonaco, + cursorContext: keyContext, + }); + + const categories = getCategories(items); + expect(categories.has('logical')).toBe(true); + }); + + test('does NOT include purely field-level categories (comparison, array, element)', () => { + const items = createCompletionItems({ + editorType: EditorType.Filter, + sessionId: undefined, + range: testRange, + isDollarPrefix: false, + monaco: mockMonaco, + cursorContext: keyContext, + }); + + const categories = getCategories(items); + // These categories have NO operators in KEY_POSITION_OPERATORS + expect(categories.has('comparison')).toBe(false); + expect(categories.has('array')).toBe(false); + expect(categories.has('element')).toBe(false); + // Note: 'evaluation' IS present because $expr, $jsonSchema, $text are key-position + }); + + test('does NOT include "bson" or "JS global"', () => { + const items = createCompletionItems({ + editorType: EditorType.Filter, + sessionId: undefined, + range: testRange, + isDollarPrefix: false, + monaco: mockMonaco, + cursorContext: keyContext, + }); + + const categories = getCategories(items); + expect(categories.has('bson')).toBe(false); + expect(categories.has('JS global')).toBe(false); + }); + }); + + describe('value position ({ field: })', () => { + const valueContext: CursorContext = { position: 'value', fieldName: 'x' }; + + test('includes field-level categories: comparison, array, evaluation, element', () => { + const items = createCompletionItems({ + editorType: EditorType.Filter, + sessionId: undefined, + range: testRange, + isDollarPrefix: false, + monaco: mockMonaco, + cursorContext: valueContext, + }); + + const categories = getCategories(items); + expect(categories.has('comparison')).toBe(true); + expect(categories.has('array')).toBe(true); + expect(categories.has('evaluation')).toBe(true); + expect(categories.has('element')).toBe(true); + }); + + test('includes "bson" and "JS global"', () => { + const items = createCompletionItems({ + editorType: EditorType.Filter, + sessionId: undefined, + range: testRange, + isDollarPrefix: false, + monaco: mockMonaco, + cursorContext: valueContext, + }); + + const categories = getCategories(items); + expect(categories.has('bson')).toBe(true); + expect(categories.has('JS global')).toBe(true); + }); + + test('does NOT include key-position-only operators', () => { + const items = createCompletionItems({ + editorType: EditorType.Filter, + sessionId: undefined, + range: testRange, + isDollarPrefix: false, + monaco: mockMonaco, + cursorContext: valueContext, + }); + + const labels = items.map((i) => getLabelText(i.label)); + expect(labels).not.toContain('$and'); + expect(labels).not.toContain('$or'); + expect(labels).not.toContain('$nor'); + }); + }); + + describe('operator position ({ field: { } })', () => { + const operatorContext: CursorContext = { position: 'operator', fieldName: 'x' }; + + test('includes field-level categories: comparison, array, evaluation, element', () => { + const items = createCompletionItems({ + editorType: EditorType.Filter, + sessionId: undefined, + range: testRange, + isDollarPrefix: false, + monaco: mockMonaco, + cursorContext: operatorContext, + }); + + const categories = getCategories(items); + expect(categories.has('comparison')).toBe(true); + expect(categories.has('array')).toBe(true); + expect(categories.has('evaluation')).toBe(true); + expect(categories.has('element')).toBe(true); + }); + + test('does NOT include "bson", "JS global", or key-position operators', () => { + const items = createCompletionItems({ + editorType: EditorType.Filter, + sessionId: undefined, + range: testRange, + isDollarPrefix: false, + monaco: mockMonaco, + cursorContext: operatorContext, + }); + + const categories = getCategories(items); + expect(categories.has('bson')).toBe(false); + expect(categories.has('JS global')).toBe(false); + + const labels = items.map((i) => getLabelText(i.label)); + expect(labels).not.toContain('$and'); + expect(labels).not.toContain('$or'); + }); + }); + + describe('unknown position (genuinely ambiguous β€” shows everything)', () => { + const unknownContext: CursorContext = { position: 'unknown' }; + + test('includes all categories (full discovery fallback)', () => { + const items = createCompletionItems({ + editorType: EditorType.Filter, + sessionId: undefined, + range: testRange, + isDollarPrefix: false, + monaco: mockMonaco, + cursorContext: unknownContext, + }); + + const categories = getCategories(items); + // UNKNOWN shows everything as discovery + expect(categories.has('logical')).toBe(true); + expect(categories.has('comparison')).toBe(true); + expect(categories.has('array')).toBe(true); + expect(categories.has('bson')).toBe(true); + expect(categories.has('JS global')).toBe(true); + }); + + test('includes field names if store has data', () => { + setCompletionContext('test-session', { + fields: [ + { + fieldName: 'name', + displayType: 'String', + bsonType: 'string', + isSparse: false, + insertText: 'name', + referenceText: '$name', + }, + ], + }); + + const items = createCompletionItems({ + editorType: EditorType.Filter, + sessionId: 'test-session', + range: testRange, + isDollarPrefix: false, + monaco: mockMonaco, + cursorContext: unknownContext, + }); + + const labels = items.map((i) => getLabelText(i.label)); + expect(labels).toContain('name'); + }); + }); + }); +}); diff --git a/src/webviews/documentdbQuery/documentdbQueryCompletionProvider.ts b/src/webviews/documentdbQuery/documentdbQueryCompletionProvider.ts new file mode 100644 index 000000000..3d3b74643 --- /dev/null +++ b/src/webviews/documentdbQuery/documentdbQueryCompletionProvider.ts @@ -0,0 +1,32 @@ +/*--------------------------------------------------------------------------------------------- + * Copyright (c) Microsoft Corporation. All rights reserved. + * Licensed under the MIT License. See License.txt in the project root for license information. + *--------------------------------------------------------------------------------------------*/ + +/** + * Barrel re-export for the completions module. + * + * The completion provider logic has been refactored into the `completions/` folder: + * - `completions/createCompletionItems.ts` β€” main entry point, context branching + * - `completions/mapCompletionItems.ts` β€” operator/field β†’ CompletionItem mapping + * - `completions/typeSuggestions.ts` β€” type-aware value suggestions + * - `completions/snippetUtils.ts` β€” snippet text manipulation + * + * This file preserves the original import path for existing consumers. + */ + +// eslint-disable-next-line no-restricted-exports +export { + KEY_POSITION_OPERATORS, + createCompletionItems, + createTypeSuggestions, + escapeSnippetDollars, + getCategoryLabel, + getCompletionKindForMeta, + getMetaTagsForEditorType, + getOperatorSortPrefix, + mapFieldToCompletionItem, + mapOperatorToCompletionItem, + stripOuterBraces, + type CreateCompletionItemsParams, +} from './completions'; diff --git a/src/webviews/documentdbQuery/documentdbQueryHoverProvider.test.ts b/src/webviews/documentdbQuery/documentdbQueryHoverProvider.test.ts new file mode 100644 index 000000000..720d7b72a --- /dev/null +++ b/src/webviews/documentdbQuery/documentdbQueryHoverProvider.test.ts @@ -0,0 +1,197 @@ +/*--------------------------------------------------------------------------------------------- + * Copyright (c) Microsoft Corporation. All rights reserved. + * Licensed under the MIT License. See License.txt in the project root for license information. + *--------------------------------------------------------------------------------------------*/ + +import { type FieldCompletionData } from '../../utils/json/data-api/autocomplete/toFieldCompletionItems'; +import { getHoverContent, type FieldDataLookup } from './documentdbQueryHoverProvider'; + +/** Creates a mock field lookup function from an array of fields. */ +function createFieldLookup(fields: FieldCompletionData[]): FieldDataLookup { + return (word: string) => fields.find((f) => f.fieldName === word); +} + +describe('documentdbQueryHoverProvider', () => { + describe('getHoverContent', () => { + test('returns hover for known operator $gt', () => { + const hover = getHoverContent('$gt'); + expect(hover).not.toBeNull(); + expect(hover!.contents).toHaveLength(1); + + const content = (hover!.contents[0] as { value: string }).value; + expect(content).toContain('**$gt**'); + }); + + test('returns hover with description for $eq', () => { + const hover = getHoverContent('$eq'); + expect(hover).not.toBeNull(); + + const content = (hover!.contents[0] as { value: string }).value; + expect(content).toContain('**$eq**'); + expect(content.split('\n').length).toBeGreaterThan(1); + }); + + test('returns hover for BSON constructor ObjectId', () => { + const hover = getHoverContent('ObjectId'); + expect(hover).not.toBeNull(); + + const content = (hover!.contents[0] as { value: string }).value; + expect(content).toContain('**ObjectId**'); + }); + + test('returns null for unknown word', () => { + const hover = getHoverContent('foo'); + expect(hover).toBeNull(); + }); + + test('returns null for arbitrary text that is not an operator', () => { + const hover = getHoverContent('somethingRandom123'); + expect(hover).toBeNull(); + }); + + test('word without $ prefix matches operator when prefixed', () => { + const hover = getHoverContent('gt'); + expect(hover).not.toBeNull(); + + const content = (hover!.contents[0] as { value: string }).value; + expect(content).toContain('**$gt**'); + }); + + test('includes doc link when available', () => { + const hover = getHoverContent('$gt'); + expect(hover).not.toBeNull(); + + const content = (hover!.contents[0] as { value: string }).value; + expect(content).toContain('Documentation]'); + }); + + test('operator hover has isTrusted set for clickable links', () => { + const hover = getHoverContent('$gt'); + expect(hover).not.toBeNull(); + + const hoverContent = hover!.contents[0] as { isTrusted?: boolean }; + expect(hoverContent.isTrusted).toBe(true); + }); + + test('returns hover for UUID constructor', () => { + const hover = getHoverContent('UUID'); + expect(hover).not.toBeNull(); + + const content = (hover!.contents[0] as { value: string }).value; + expect(content).toContain('**UUID**'); + }); + }); + + describe('field hover', () => { + const fields: FieldCompletionData[] = [ + { + fieldName: 'age', + displayType: 'Number', + bsonType: 'int32', + isSparse: false, + insertText: 'age', + referenceText: '$age', + }, + { + fieldName: 'nickname', + displayType: 'String', + bsonType: 'string', + isSparse: true, + insertText: 'nickname', + referenceText: '$nickname', + }, + { + fieldName: 'rating', + displayType: 'Double', + bsonType: 'double', + bsonTypes: ['double', 'int32'], + displayTypes: ['Double', 'Int32'], + isSparse: true, + insertText: 'rating', + referenceText: '$rating', + }, + ]; + + test('returns hover for a known field name', () => { + const hover = getHoverContent('age', createFieldLookup(fields)); + expect(hover).not.toBeNull(); + + const content = (hover!.contents[0] as { value: string }).value; + expect(content).toContain('**age**'); + }); + + test('shows "Inferred Type" section with type list', () => { + const hover = getHoverContent('age', createFieldLookup(fields)); + expect(hover).not.toBeNull(); + + const content = (hover!.contents[0] as { value: string }).value; + expect(content).toContain('Inferred Type'); + expect(content).toContain('Number'); + }); + + test('shows multiple types for polymorphic fields', () => { + const hover = getHoverContent('rating', createFieldLookup(fields)); + expect(hover).not.toBeNull(); + + const content = (hover!.contents[0] as { value: string }).value; + expect(content).toContain('Inferred Type'); + expect(content).toContain('Double'); + expect(content).toContain('Int32'); + }); + + test('shows sparse indicator for sparse fields', () => { + const hover = getHoverContent('nickname', createFieldLookup(fields)); + expect(hover).not.toBeNull(); + + const content = (hover!.contents[0] as { value: string }).value; + expect(content).toContain('**nickname**'); + expect(content).toContain('sparse'); + expect(content).toContain('not present in all documents'); + }); + + test('does NOT show sparse indicator for non-sparse fields', () => { + const hover = getHoverContent('age', createFieldLookup(fields)); + expect(hover).not.toBeNull(); + + const content = (hover!.contents[0] as { value: string }).value; + expect(content).not.toContain('sparse'); + }); + + test('field hover does NOT set isTrusted (user data is not trusted)', () => { + const hover = getHoverContent('age', createFieldLookup(fields)); + expect(hover).not.toBeNull(); + + const hoverContent = hover!.contents[0] as { isTrusted?: boolean }; + expect(hoverContent.isTrusted).toBeUndefined(); + }); + + test('returns null for unknown field when no operator match', () => { + const hover = getHoverContent('unknownField', createFieldLookup(fields)); + expect(hover).toBeNull(); + }); + + test('operators take priority over field names', () => { + const fieldsWithOperatorName: FieldCompletionData[] = [ + { + fieldName: 'gt', + displayType: 'String', + bsonType: 'string', + isSparse: false, + insertText: 'gt', + referenceText: '$gt', + }, + ]; + + const hover = getHoverContent('gt', createFieldLookup(fieldsWithOperatorName)); + expect(hover).not.toBeNull(); + + const content = (hover!.contents[0] as { value: string }).value; + expect(content).toContain('**$gt**'); + }); + + test('returns null for field when no fieldLookup provided', () => { + const hover = getHoverContent('age'); + expect(hover).toBeNull(); + }); + }); +}); diff --git a/src/webviews/documentdbQuery/documentdbQueryHoverProvider.ts b/src/webviews/documentdbQuery/documentdbQueryHoverProvider.ts new file mode 100644 index 000000000..23207293a --- /dev/null +++ b/src/webviews/documentdbQuery/documentdbQueryHoverProvider.ts @@ -0,0 +1,104 @@ +/*--------------------------------------------------------------------------------------------- + * Copyright (c) Microsoft Corporation. All rights reserved. + * Licensed under the MIT License. See License.txt in the project root for license information. + *--------------------------------------------------------------------------------------------*/ + +/** + * Hover provider logic for the `documentdb-query` language. + * + * Provides inline documentation when hovering over operators, + * BSON constructors, and field names. Uses `documentdb-constants` for + * the operator registry and the completion store for field type info. + */ + +import { getAllCompletions } from '@vscode-documentdb/documentdb-constants'; +// eslint-disable-next-line import/no-internal-modules +import type * as monacoEditor from 'monaco-editor/esm/vs/editor/editor.api'; +import { type FieldCompletionData } from '../../utils/json/data-api/autocomplete/toFieldCompletionItems'; +import { escapeMarkdown } from '../utils/escapeMarkdown'; + +/** + * A callback that resolves a word to field data from the completion store. + */ +export type FieldDataLookup = (word: string) => FieldCompletionData | undefined; + +/** + * Returns hover content for a word under the cursor. + * + * Tries multiple candidates to handle cases where: + * - The cursor is on `gt` after `$` (need to try `$gt`) + * - The cursor is on `ObjectId` (try as-is) + * - The cursor is on a field name like `age` (check field data) + * + * Operators/BSON constructors take priority over field names. + * + * @param word - The word at the cursor position + * @param fieldLookup - optional callback to resolve field names to field data + * @returns A Monaco Hover or null if no match + */ +export function getHoverContent(word: string, fieldLookup?: FieldDataLookup): monacoEditor.languages.Hover | null { + // Try with '$' prefix first (for operators where cursor lands after $) + // Then try the word as-is (for BSON constructors like ObjectId) + const candidates = word.startsWith('$') ? [word] : [`$${word}`, word]; + + const allEntries = getAllCompletions(); + + for (const candidate of candidates) { + const match = allEntries.find((e) => e.value === candidate); + if (match) { + const lines: string[] = [`**${match.value}**`]; + + if (match.description || match.link) { + lines.push('---'); + lines.push('
'); + } + + if (match.description) { + lines.push(match.description); + } + if (match.link) { + lines.push(`[β“˜ Documentation](${match.link})`); + } + + return { + contents: [{ value: lines.join('\n\n'), isTrusted: true, supportHtml: true }], + }; + } + } + + // If no operator match, try field name lookup + if (fieldLookup) { + const fieldData = fieldLookup(word); + if (fieldData) { + return buildFieldHover(fieldData); + } + } + + return null; +} + +/** + * Builds a hover tooltip for a field name. + */ +function buildFieldHover(field: FieldCompletionData): monacoEditor.languages.Hover { + const safeName = escapeMarkdown(field.fieldName); + let header = `**${safeName}**`; + + if (field.isSparse) { + header += '    sparse: not present in all documents'; + } + + const lines: string[] = [header]; + + // Inferred types section + const typeList = field.displayTypes && field.displayTypes.length > 0 ? field.displayTypes : [field.displayType]; + if (typeList && typeList.length > 0) { + lines.push('---'); + lines.push('
'); + lines.push(`Inferred Type: ${typeList.map((type) => `\`${escapeMarkdown(type)}\``).join(', ')}`); + } + + return { + contents: [{ value: lines.join('\n\n'), supportHtml: true }], + }; +} diff --git a/src/webviews/documentdbQuery/documentdbQueryValidator.test.ts b/src/webviews/documentdbQuery/documentdbQueryValidator.test.ts new file mode 100644 index 000000000..40f807e10 --- /dev/null +++ b/src/webviews/documentdbQuery/documentdbQueryValidator.test.ts @@ -0,0 +1,243 @@ +/*--------------------------------------------------------------------------------------------- + * Copyright (c) Microsoft Corporation. All rights reserved. + * Licensed under the MIT License. See License.txt in the project root for license information. + *--------------------------------------------------------------------------------------------*/ + +import { levenshteinDistance, validateExpression } from './documentdbQueryValidator'; + +describe('documentdbQueryValidator', () => { + describe('validateExpression', () => { + test('valid expression { age: { $gt: 25 } } produces no diagnostics', () => { + const diagnostics = validateExpression('{ age: { $gt: 25 } }'); + expect(diagnostics).toHaveLength(0); + }); + + test('valid expression with multiple fields produces no diagnostics', () => { + const diagnostics = validateExpression('{ name: "Alice", age: 30 }'); + expect(diagnostics).toHaveLength(0); + }); + + test('valid expression with BSON constructor produces no diagnostics', () => { + const diagnostics = validateExpression('{ _id: ObjectId("507f1f77bcf86cd799439011") }'); + expect(diagnostics).toHaveLength(0); + }); + + test('valid expression with UUID constructor produces no diagnostics', () => { + const diagnostics = validateExpression('{ id: UUID("123e4567-e89b-12d3-a456-426614174000") }'); + expect(diagnostics).toHaveLength(0); + }); + + test('valid expression with nested objects produces no diagnostics', () => { + const diagnostics = validateExpression('{ a: { b: { c: 1 } } }'); + expect(diagnostics).toHaveLength(0); + }); + + test('syntax error { age: { $gt: } produces error diagnostic', () => { + const diagnostics = validateExpression('{ age: { $gt: } }'); + expect(diagnostics.length).toBeGreaterThan(0); + + const errorDiag = diagnostics.find((d) => d.severity === 'error'); + expect(errorDiag).toBeDefined(); + }); + + test('syntax error with unclosed brace produces error diagnostic', () => { + const diagnostics = validateExpression('{ age: 25'); + expect(diagnostics.length).toBeGreaterThan(0); + expect(diagnostics[0].severity).toBe('error'); + }); + + test('typo UUUD("...") produces warning "Did you mean UUID?"', () => { + const diagnostics = validateExpression('{ id: UUUD("abc") }'); + + const warnings = diagnostics.filter((d) => d.severity === 'warning'); + expect(warnings.length).toBeGreaterThan(0); + expect(warnings[0].message).toContain('UUID'); + expect(warnings[0].message).toContain('Did you mean'); + }); + + test('typo Objected produces warning "Did you mean ObjectId?"', () => { + const diagnostics = validateExpression('{ id: ObjctId("abc") }'); + + const warnings = diagnostics.filter((d) => d.severity === 'warning'); + expect(warnings.length).toBeGreaterThan(0); + expect(warnings[0].message).toContain('ObjectId'); + }); + + test('unknown identifier foo used as function produces error', () => { + // "foo" is not close to any known identifier (Levenshtein > 2) + const diagnostics = validateExpression('{ id: foo("abc") }'); + const errors = diagnostics.filter((d) => d.severity === 'error'); + expect(errors).toHaveLength(1); + expect(errors[0].message).toContain("Unknown function 'foo'"); + }); + + test('unknown identifier as field name is not flagged', () => { + // Field names (non-function identifiers) should never produce diagnostics + const diagnostics = validateExpression('{ unknownField: 1 }'); + expect(diagnostics).toHaveLength(0); + }); + + test('unknown field name ___id is not flagged (field validation is out of scope)', () => { + // The validator does not validate field names against the schema. + // That requires integration with the completion store (known fields). + const diagnostics = validateExpression('{ ___id: 1 }'); + expect(diagnostics).toHaveLength(0); + }); + + test('empty string produces no diagnostics', () => { + const diagnostics = validateExpression(''); + expect(diagnostics).toHaveLength(0); + }); + + test('whitespace-only string produces no diagnostics', () => { + const diagnostics = validateExpression(' '); + expect(diagnostics).toHaveLength(0); + }); + + test('valid expression with Math.min produces no diagnostics', () => { + const diagnostics = validateExpression('{ rating: Math.min(1.7, 2) }'); + expect(diagnostics).toHaveLength(0); + }); + + test('valid expression with Date.now produces no diagnostics', () => { + const diagnostics = validateExpression('{ ts: Date.now() }'); + expect(diagnostics).toHaveLength(0); + }); + + test('typo Daate.now() produces warning "Did you mean Date?"', () => { + const diagnostics = validateExpression('{ _id: Daate.now() }'); + + const warnings = diagnostics.filter((d) => d.severity === 'warning'); + expect(warnings.length).toBeGreaterThan(0); + expect(warnings[0].message).toContain('Date'); + expect(warnings[0].message).toContain('Did you mean'); + }); + + test('typo Maht.min() produces warning "Did you mean Math?"', () => { + const diagnostics = validateExpression('{ val: Maht.min(1, 2) }'); + + const warnings = diagnostics.filter((d) => d.severity === 'warning'); + expect(warnings.length).toBeGreaterThan(0); + expect(warnings[0].message).toContain('Math'); + }); + + test('typo Nubmer.parseInt() produces warning "Did you mean Number?"', () => { + const diagnostics = validateExpression('{ x: Nubmer.parseInt("42") }'); + + const warnings = diagnostics.filter((d) => d.severity === 'warning'); + expect(warnings.length).toBeGreaterThan(0); + expect(warnings[0].message).toContain('Number'); + }); + + test('completely unknown member call UdddddduaD.now() produces error', () => { + const diagnostics = validateExpression('{ _id: UdddddduaD.now() }'); + const errors = diagnostics.filter((d) => d.severity === 'error'); + expect(errors).toHaveLength(1); + expect(errors[0].message).toContain("Unknown identifier 'UdddddduaD'"); + }); + + test('completely unknown direct call XyzAbc() produces error', () => { + const diagnostics = validateExpression('{ _id: XyzAbc("123") }'); + const errors = diagnostics.filter((d) => d.severity === 'error'); + expect(errors).toHaveLength(1); + expect(errors[0].message).toContain("Unknown function 'XyzAbc'"); + }); + + test('new Daddddte() produces error for unknown constructor', () => { + const diagnostics = validateExpression( + '{ date: { $gt: new Daddddte(Date.now() - 14 * 24 * 60 * 60 * 1000) } }', + ); + const errors = diagnostics.filter((d) => d.severity === 'error'); + expect(errors).toHaveLength(1); + expect(errors[0].message).toContain("Unknown constructor 'Daddddte'"); + }); + + test('new Dae() produces warning for near-miss constructor', () => { + const diagnostics = validateExpression('{ date: new Dae("2025-01-01") }'); + const warnings = diagnostics.filter((d) => d.severity === 'warning'); + expect(warnings).toHaveLength(1); + expect(warnings[0].message).toContain('Date'); + }); + + test('new Date() produces no diagnostics', () => { + const diagnostics = validateExpression('{ date: new Date() }'); + expect(diagnostics).toHaveLength(0); + }); + + test('new RegExp() produces no diagnostics', () => { + const diagnostics = validateExpression('{ name: { $regex: new RegExp("^test") } }'); + expect(diagnostics).toHaveLength(0); + }); + + test('Date.nodw() does NOT produce a warning (method validation is out of scope)', () => { + // We validate the object (Date) but not individual method names. + // Date is a known global, so no warning. The .nodw() method name + // is not validated β€” that would require method-level knowledge. + const diagnostics = validateExpression('{ _id: Date.nodw() }'); + expect(diagnostics).toHaveLength(0); + }); + + test('valid expression with ISODate constructor produces no diagnostics', () => { + const diagnostics = validateExpression('{ ts: ISODate("2024-01-01") }'); + expect(diagnostics).toHaveLength(0); + }); + + test('valid expression with MinKey produces no diagnostics', () => { + const diagnostics = validateExpression('{ start: MinKey() }'); + expect(diagnostics).toHaveLength(0); + }); + + test('valid expression with MaxKey produces no diagnostics', () => { + const diagnostics = validateExpression('{ end: MaxKey() }'); + expect(diagnostics).toHaveLength(0); + }); + + test('valid expression with regex produces no diagnostics', () => { + const diagnostics = validateExpression('{ name: /^alice/i }'); + expect(diagnostics).toHaveLength(0); + }); + + test('valid expression with array produces no diagnostics', () => { + const diagnostics = validateExpression('{ tags: { $in: ["a", "b"] } }'); + expect(diagnostics).toHaveLength(0); + }); + + test('diagnostics have valid offsets within the input range', () => { + const code = '{ age: { $gt: } }'; + const diagnostics = validateExpression(code); + + for (const d of diagnostics) { + expect(d.startOffset).toBeGreaterThanOrEqual(0); + expect(d.endOffset).toBeLessThanOrEqual(code.length); + expect(d.startOffset).toBeLessThanOrEqual(d.endOffset); + } + }); + }); + + describe('levenshteinDistance', () => { + test('identical strings have distance 0', () => { + expect(levenshteinDistance('UUID', 'UUID')).toBe(0); + }); + + test('one character difference has distance 1', () => { + expect(levenshteinDistance('UUID', 'UUUD')).toBe(1); + }); + + test('two character difference has distance 2', () => { + expect(levenshteinDistance('ObjectId', 'ObjctId')).toBeLessThanOrEqual(2); + }); + + test('completely different strings have high distance', () => { + expect(levenshteinDistance('UUID', 'something')).toBeGreaterThan(2); + }); + + test('empty string vs non-empty has distance equal to length', () => { + expect(levenshteinDistance('', 'abc')).toBe(3); + expect(levenshteinDistance('abc', '')).toBe(3); + }); + + test('both empty strings have distance 0', () => { + expect(levenshteinDistance('', '')).toBe(0); + }); + }); +}); diff --git a/src/webviews/documentdbQuery/documentdbQueryValidator.ts b/src/webviews/documentdbQuery/documentdbQueryValidator.ts new file mode 100644 index 000000000..95690247c --- /dev/null +++ b/src/webviews/documentdbQuery/documentdbQueryValidator.ts @@ -0,0 +1,320 @@ +/*--------------------------------------------------------------------------------------------- + * Copyright (c) Microsoft Corporation. All rights reserved. + * Licensed under the MIT License. See License.txt in the project root for license information. + *--------------------------------------------------------------------------------------------*/ + +/** + * Validator for `documentdb-query` editor content. + * + * Uses `acorn` to parse the expression and `acorn-walk` to traverse the AST. + * Produces diagnostics for: + * - Syntax errors (severity: error) + * - Near-miss BSON constructor typos (severity: warning) + * + * This module is pure and testable β€” it does not depend on Monaco. + * The mapping from Diagnostic[] to Monaco markers happens in the editor mount handler. + */ + +import { getAllCompletions } from '@vscode-documentdb/documentdb-constants'; +import * as acorn from 'acorn'; +import * as walk from 'acorn-walk'; + +/** + * A diagnostic produced by the validator. + * Offsets are 0-based character positions in the original (unwrapped) input. + */ +export interface Diagnostic { + /** 0-based start character offset in the original input */ + startOffset: number; + /** 0-based end character offset in the original input */ + endOffset: number; + severity: 'error' | 'warning' | 'info'; + message: string; +} + +/** + * Known identifiers that should NOT be flagged as typos. + * These are globals available in shell-bson-parser's sandbox. + */ +const KNOWN_GLOBALS = new Set([ + // BSON constructors (populated dynamically below) + // JS globals available in the sandbox + 'Math', + 'Date', + 'ISODate', + 'RegExp', + 'Infinity', + 'NaN', + 'undefined', + 'true', + 'false', + 'null', + 'Map', + 'Symbol', + // Common JS builtins that might appear in expressions + 'Number', + 'String', + 'Boolean', + 'Array', + 'Object', + 'parseInt', + 'parseFloat', + 'isNaN', + 'isFinite', +]); + +// Add all BSON constructors from documentdb-constants +let bsonConstructorsLoaded = false; + +function ensureBsonConstructors(): void { + if (bsonConstructorsLoaded) return; + bsonConstructorsLoaded = true; + + const allEntries = getAllCompletions(); + for (const entry of allEntries) { + if (entry.meta === 'bson') { + KNOWN_GLOBALS.add(entry.value); + } + } +} + +/** + * Computes the Levenshtein edit distance between two strings. + * Used for near-miss detection of BSON constructor typos. + */ +export function levenshteinDistance(a: string, b: string): number { + const m = a.length; + const n = b.length; + + if (m === 0) return n; + if (n === 0) return m; + + const dp: number[][] = Array.from({ length: m + 1 }, () => Array(n + 1).fill(0) as number[]); + + for (let i = 0; i <= m; i++) dp[i][0] = i; + for (let j = 0; j <= n; j++) dp[0][j] = j; + + for (let i = 1; i <= m; i++) { + for (let j = 1; j <= n; j++) { + if (a[i - 1] === b[j - 1]) { + dp[i][j] = dp[i - 1][j - 1]; + } else { + dp[i][j] = 1 + Math.min(dp[i - 1][j], dp[i][j - 1], dp[i - 1][j - 1]); + } + } + } + + return dp[m][n]; +} + +/** + * Finds the closest known identifier (BSON constructor or known global) to a given name. + * Returns the match and distance if within threshold, otherwise undefined. + * + * Searches both BSON constructor entries (from documentdb-constants) and + * KNOWN_GLOBALS (Date, Math, RegExp, etc.) for near-misses. + */ +function findNearMissKnownIdentifier(name: string): { match: string; distance: number } | undefined { + ensureBsonConstructors(); + + let bestMatch: string | undefined; + let bestDistance = Infinity; + + // Check against BSON constructors + const allEntries = getAllCompletions(); + for (const entry of allEntries) { + if (entry.meta === 'bson') { + const dist = levenshteinDistance(name.toLowerCase(), entry.value.toLowerCase()); + if (dist <= 2 && dist < bestDistance) { + bestDistance = dist; + bestMatch = entry.value; + } + } + } + + // Check against KNOWN_GLOBALS (Date, Math, RegExp, Number, etc.) + for (const known of KNOWN_GLOBALS) { + const dist = levenshteinDistance(name.toLowerCase(), known.toLowerCase()); + if (dist <= 2 && dist < bestDistance) { + bestDistance = dist; + bestMatch = known; + } + } + + if (bestMatch !== undefined && bestDistance <= 2) { + return { match: bestMatch, distance: bestDistance }; + } + + return undefined; +} + +/** + * Validates a documentdb-query expression and returns diagnostics. + * + * @param code - The expression text from the editor (e.g., `{ age: { $gt: 25 } }`) + * @returns Array of diagnostics (empty if the expression is valid) + */ +export function validateExpression(code: string): Diagnostic[] { + ensureBsonConstructors(); + + const trimmed = code.trim(); + if (trimmed.length === 0) { + return []; + } + + const diagnostics: Diagnostic[] = []; + + // Wrap in parentheses for acorn to parse as expression + // The offset adjustment accounts for the added '(' character + const wrapped = `(${code})`; + + let ast: acorn.Node; + try { + ast = acorn.parseExpressionAt(wrapped, 0, { + ecmaVersion: 'latest', + sourceType: 'module', + }); + } catch (error) { + if (error instanceof SyntaxError) { + const syntaxError = error as SyntaxError & { pos?: number; loc?: { line: number; column: number } }; + // Adjust offset for the wrapping parenthesis + const pos = syntaxError.pos !== undefined ? syntaxError.pos - 1 : 0; + const startOffset = Math.max(0, Math.min(pos, code.length)); + const endOffset = Math.min(startOffset + 1, code.length); + + const message = syntaxError.message.replace(/\(\d+:\d+\)/, '').trim(); + diagnostics.push({ + startOffset, + endOffset, + severity: 'error', + message, + }); + } + return diagnostics; + } + + // Walk the AST to check identifiers + try { + walk.simple(ast, { + // Planned no-op: bare identifiers are intentionally not flagged. + // In DocumentDB queries, most identifiers are field names (e.g. `{ age: 1 }`) + // which are valid and shouldn't produce diagnostics. Only identifiers in + // call positions (BSON constructor typos) are checked β€” see CallExpression + // and MemberExpression handlers below. + Identifier(_node: acorn.Node & { name: string }) { + // no-op by design + }, + CallExpression( + node: acorn.Node & { + callee: acorn.Node & { + name?: string; + type: string; + object?: acorn.Node & { name?: string; type: string }; + }; + }, + ) { + // Case 1: Direct call β€” e.g., ObjctId("abc") + if (node.callee.type === 'Identifier' && node.callee.name) { + const name = node.callee.name; + + if (KNOWN_GLOBALS.has(name)) { + return; + } + + const nearMiss = findNearMissKnownIdentifier(name); + const startOffset = node.callee.start - 1; + const endOffset = node.callee.end - 1; + if (nearMiss) { + diagnostics.push({ + startOffset, + endOffset, + severity: 'warning', + message: `Did you mean '${nearMiss.match}'?`, + }); + } else { + // No near-miss found β€” unknown function call will fail at runtime + diagnostics.push({ + startOffset, + endOffset, + severity: 'error', + message: `Unknown function '${name}'. Expected a BSON constructor (e.g., ObjectId, ISODate) or a known global (e.g., Date, Math).`, + }); + } + } + + // Case 2: Member call β€” e.g., Daate.now(), Maht.min() + // Check if the object is an unknown identifier that's a near-miss + if ( + node.callee.type === 'MemberExpression' && + node.callee.object && + node.callee.object.type === 'Identifier' && + node.callee.object.name + ) { + const objName = node.callee.object.name; + + if (KNOWN_GLOBALS.has(objName)) { + return; + } + + const nearMiss = findNearMissKnownIdentifier(objName); + const startOffset = node.callee.object.start - 1; + const endOffset = node.callee.object.end - 1; + if (nearMiss) { + diagnostics.push({ + startOffset, + endOffset, + severity: 'warning', + message: `Did you mean '${nearMiss.match}'?`, + }); + } else { + // No near-miss found β€” unknown object will fail at runtime + diagnostics.push({ + startOffset, + endOffset, + severity: 'error', + message: `Unknown identifier '${objName}'. Expected a known global (e.g., Date, Math).`, + }); + } + } + }, + // NewExpression has the same callee shape as CallExpression. + // e.g., `new Daddddte()` β€” the callee is an Identifier node. + NewExpression( + node: acorn.Node & { + callee: acorn.Node & { name?: string; type: string }; + }, + ) { + if (node.callee.type === 'Identifier' && node.callee.name) { + const name = node.callee.name; + + if (KNOWN_GLOBALS.has(name)) { + return; + } + + const nearMiss = findNearMissKnownIdentifier(name); + const startOffset = node.callee.start - 1; + const endOffset = node.callee.end - 1; + if (nearMiss) { + diagnostics.push({ + startOffset, + endOffset, + severity: 'warning', + message: `Did you mean '${nearMiss.match}'?`, + }); + } else { + diagnostics.push({ + startOffset, + endOffset, + severity: 'error', + message: `Unknown constructor '${name}'. Expected a BSON constructor (e.g., ObjectId, ISODate) or a known global (e.g., Date, RegExp).`, + }); + } + } + }, + }); + } catch { + // If walking fails, just return syntax diagnostics we already have + } + + return diagnostics; +} diff --git a/src/webviews/documentdbQuery/extractQuotedKey.test.ts b/src/webviews/documentdbQuery/extractQuotedKey.test.ts new file mode 100644 index 000000000..8c4f9e3ec --- /dev/null +++ b/src/webviews/documentdbQuery/extractQuotedKey.test.ts @@ -0,0 +1,79 @@ +/*--------------------------------------------------------------------------------------------- + * Copyright (c) Microsoft Corporation. All rights reserved. + * Licensed under the MIT License. See License.txt in the project root for license information. + *--------------------------------------------------------------------------------------------*/ + +import { extractQuotedKey } from './extractQuotedKey'; + +describe('extractQuotedKey', () => { + test('extracts double-quoted key when cursor is inside', () => { + const line = '{ "address.street": "value" }'; + // 01234567890123456789 + const col = 5; // on 'a' of address + const result = extractQuotedKey(line, col); + expect(result).not.toBeNull(); + expect(result!.key).toBe('address.street'); + }); + + test('extracts single-quoted key when cursor is inside', () => { + const line = "{ 'address.street': 'value' }"; + const col = 5; + const result = extractQuotedKey(line, col); + expect(result).not.toBeNull(); + expect(result!.key).toBe('address.street'); + }); + + test('returns null when cursor is not inside quotes', () => { + const line = '{ name: "value" }'; + const col = 3; // on 'a' of name (unquoted) + const result = extractQuotedKey(line, col); + expect(result).toBeNull(); + }); + + test('returns null when cursor is on a structural character', () => { + const line = '{ "key": "value" }'; + const col = 0; // on '{' + const result = extractQuotedKey(line, col); + expect(result).toBeNull(); + }); + + test('returns correct start/end for range highlighting', () => { + const line = '{ "address.street": 1 }'; + // 0123456789012345678 + const col = 10; // somewhere inside the quoted string + const result = extractQuotedKey(line, col); + expect(result).not.toBeNull(); + expect(result!.start).toBe(2); // position of opening " + expect(result!.end).toBe(18); // position after closing " + expect(result!.key).toBe('address.street'); + }); + + test('handles escaped quotes inside key', () => { + const line = '{ "key\\"name": 1 }'; + const col = 5; + const result = extractQuotedKey(line, col); + expect(result).not.toBeNull(); + expect(result!.key).toBe('key\\"name'); + }); + + test('cursor on opening quote still works', () => { + const line = '{ "address.street": 1 }'; + const col = 2; // on the opening " + const result = extractQuotedKey(line, col); + expect(result).not.toBeNull(); + expect(result!.key).toBe('address.street'); + }); + + test('cursor on closing quote still works', () => { + const line = '{ "address.street": 1 }'; + const col = 17; // on the closing " + const result = extractQuotedKey(line, col); + expect(result).not.toBeNull(); + expect(result!.key).toBe('address.street'); + }); + + test('returns null for empty line', () => { + const result = extractQuotedKey('', 0); + expect(result).toBeNull(); + }); +}); diff --git a/src/webviews/documentdbQuery/extractQuotedKey.ts b/src/webviews/documentdbQuery/extractQuotedKey.ts new file mode 100644 index 000000000..b3e412f58 --- /dev/null +++ b/src/webviews/documentdbQuery/extractQuotedKey.ts @@ -0,0 +1,92 @@ +/*--------------------------------------------------------------------------------------------- + * Copyright (c) Microsoft Corporation. All rights reserved. + * Licensed under the MIT License. See License.txt in the project root for license information. + *--------------------------------------------------------------------------------------------*/ + +/** + * Extracts a quoted key string if the cursor is inside one. + * + * For `{ "address.street": 1 }`, when the cursor is anywhere between the + * opening and closing quotes, returns the unquoted key `"address.street"` + * along with the 0-based start/end positions of the full quoted string + * (including the quotes themselves, for hover range highlighting). + * + * Returns null if the cursor is not inside a quoted string. + * + * @param line - the full line content + * @param col0 - 0-based column position of the cursor + */ +export function extractQuotedKey(line: string, col0: number): { key: string; start: number; end: number } | null { + if (col0 < 0 || col0 >= line.length) return null; + + // If cursor is on a quote, it could be the closing quote. + // Try treating the current position as the closing quote first. + const chAtCursor = line[col0]; + if (chAtCursor === '"' || chAtCursor === "'") { + // Not escaped? + if (col0 === 0 || line[col0 - 1] !== '\\') { + // Try to find a matching opening quote before this one + const result = tryMatchAsClosingQuote(line, col0, chAtCursor); + if (result) return result; + } + } + + // Scan backward to find the opening quote + let openQuoteIdx = -1; + let quoteChar: string | undefined; + + for (let i = col0; i >= 0; i--) { + const ch = line[i]; + if (ch === '"' || ch === "'") { + if (i > 0 && line[i - 1] === '\\') continue; + openQuoteIdx = i; + quoteChar = ch; + break; + } + if (ch === '{' || ch === '}' || ch === ':' || ch === ',') { + return null; + } + } + + if (openQuoteIdx < 0 || !quoteChar) return null; + + // Scan forward to find the closing quote + let closeQuoteIdx = -1; + for (let i = openQuoteIdx + 1; i < line.length; i++) { + if (line[i] === '\\') { + i++; + continue; + } + if (line[i] === quoteChar) { + closeQuoteIdx = i; + break; + } + } + + if (closeQuoteIdx < 0) return null; + if (col0 < openQuoteIdx || col0 > closeQuoteIdx) return null; + + const key = line.substring(openQuoteIdx + 1, closeQuoteIdx); + return { key, start: openQuoteIdx, end: closeQuoteIdx + 1 }; +} + +function tryMatchAsClosingQuote( + line: string, + closeIdx: number, + quoteChar: string, +): { key: string; start: number; end: number } | null { + // Scan backward from before the closing quote to find the opening quote + for (let i = closeIdx - 1; i >= 0; i--) { + if (line[i] === '\\') continue; + if (line[i] === quoteChar) { + if (i > 0 && line[i - 1] === '\\') continue; + const key = line.substring(i + 1, closeIdx); + return { key, start: i, end: closeIdx + 1 }; + } + // Stop at structural chars + if (line[i] === '{' || line[i] === '}' || line[i] === ':' || line[i] === ',') { + return null; + } + } + return null; +} diff --git a/src/webviews/documentdbQuery/index.ts b/src/webviews/documentdbQuery/index.ts new file mode 100644 index 000000000..cb349f84a --- /dev/null +++ b/src/webviews/documentdbQuery/index.ts @@ -0,0 +1,29 @@ +/*--------------------------------------------------------------------------------------------- + * Copyright (c) Microsoft Corporation. All rights reserved. + * Licensed under the MIT License. See License.txt in the project root for license information. + *--------------------------------------------------------------------------------------------*/ + +/** + * DocumentDB Query Language for Monaco Editor. + * + * This module provides the `documentdb-query` custom language that reuses + * the JavaScript Monarch tokenizer for syntax highlighting while providing + * custom completions from the `documentdb-constants` package. + * + * Usage: + * ```typescript + * import { registerDocumentDBQueryLanguage, LANGUAGE_ID } from './documentdbQuery'; + * + * // During Monaco initialization: + * await registerDocumentDBQueryLanguage(monaco); + * + * // In editor props: + * + * ``` + */ + +export { clearCompletionContext, getCompletionContext, setCompletionContext } from './completionStore'; +export { detectCursorContext, type CursorContext, type FieldTypeLookup } from './cursorContext'; +export { validateExpression, type Diagnostic } from './documentdbQueryValidator'; +export { EditorType, LANGUAGE_ID, URI_SCHEME, buildEditorUri, parseEditorUri } from './languageConfig'; +export { registerDocumentDBQueryLanguage } from './registerLanguage'; diff --git a/src/webviews/documentdbQuery/isCursorInsideString.test.ts b/src/webviews/documentdbQuery/isCursorInsideString.test.ts new file mode 100644 index 000000000..1c5468aff --- /dev/null +++ b/src/webviews/documentdbQuery/isCursorInsideString.test.ts @@ -0,0 +1,87 @@ +/*--------------------------------------------------------------------------------------------- + * Copyright (c) Microsoft Corporation. All rights reserved. + * Licensed under the MIT License. See License.txt in the project root for license information. + *--------------------------------------------------------------------------------------------*/ + +import { isCursorInsideString } from './isCursorInsideString'; + +describe('isCursorInsideString', () => { + test('returns false for empty text', () => { + expect(isCursorInsideString('', 0)).toBe(false); + }); + + test('returns false when cursor is outside any string', () => { + const text = '{ name: "Alice", age: 30 }'; + // cursor after the comma, outside the string + const cursorOffset = text.indexOf(',') + 1; + expect(isCursorInsideString(text, cursorOffset)).toBe(false); + }); + + test('returns true when cursor is inside a double-quoted string', () => { + const text = '{ name: "Ali'; + expect(isCursorInsideString(text, text.length)).toBe(true); + }); + + test('returns true when cursor is inside a single-quoted string', () => { + const text = "{ name: 'Ali"; + expect(isCursorInsideString(text, text.length)).toBe(true); + }); + + test('returns false when cursor is after a closed string', () => { + const text = '{ name: "Alice" }'; + // cursor at the space after closing quote + const cursorOffset = text.indexOf('"', 9) + 1; + expect(isCursorInsideString(text, cursorOffset)).toBe(false); + }); + + test('handles escaped quotes inside strings', () => { + const text = '{ name: "has\\"quote'; + // cursor is still inside the string (the \" is escaped) + expect(isCursorInsideString(text, text.length)).toBe(true); + }); + + test('returns false after escaped quote followed by closing quote', () => { + const text = '{ name: "has\\"quote" }'; + // cursor after the closing quote + const closingQuoteIdx = text.lastIndexOf('"'); + expect(isCursorInsideString(text, closingQuoteIdx + 1)).toBe(false); + }); + + // Edge cases from the plan + test('{ name: "Alice", | } β€” cursor outside string after comma', () => { + const text = '{ name: "Alice", '; + expect(isCursorInsideString(text, text.length)).toBe(false); + }); + + test('{ name: "has:colon" } β€” cursor inside string at colon', () => { + const text = '{ name: "has:'; + expect(isCursorInsideString(text, text.length)).toBe(true); + }); + + test('{ name: "has:colon", | } β€” cursor outside string after comma', () => { + const text = '{ name: "has:colon", '; + expect(isCursorInsideString(text, text.length)).toBe(false); + }); + + test('{ tags: ["a", | ] } β€” cursor outside string in array', () => { + const text = '{ tags: ["a", '; + expect(isCursorInsideString(text, text.length)).toBe(false); + }); + + test('{ msg: "has[bracket" } β€” cursor inside string at bracket', () => { + const text = '{ msg: "has['; + expect(isCursorInsideString(text, text.length)).toBe(true); + }); + + test('{ $and: [ | ] } β€” cursor outside string in array', () => { + const text = '{ $and: [ '; + expect(isCursorInsideString(text, text.length)).toBe(false); + }); + + test('handles mixed quote types correctly', () => { + const text = '{ name: "it\'s" }'; + // The single quote inside double quotes doesn't close anything + const cursorAfterClosingDouble = text.indexOf('"', 9) + 1; + expect(isCursorInsideString(text, cursorAfterClosingDouble)).toBe(false); + }); +}); diff --git a/src/webviews/documentdbQuery/isCursorInsideString.ts b/src/webviews/documentdbQuery/isCursorInsideString.ts new file mode 100644 index 000000000..7c605d1fe --- /dev/null +++ b/src/webviews/documentdbQuery/isCursorInsideString.ts @@ -0,0 +1,47 @@ +/*--------------------------------------------------------------------------------------------- + * Copyright (c) Microsoft Corporation. All rights reserved. + * Licensed under the MIT License. See License.txt in the project root for license information. + *--------------------------------------------------------------------------------------------*/ + +/** + * Determines whether the cursor is inside a string literal. + * + * Scans the text from the beginning up to the cursor offset, tracking whether + * we are inside a single-quoted or double-quoted string. Escaped quotes + * (preceded by `\`) do not toggle the state. + * + * This is a lightweight heuristic for suppressing auto-trigger completions + * when the trigger character (`:`, `,`, `[`) appears inside a string value + * rather than as structural syntax. + * + * @param text - the full text of the editor + * @param cursorOffset - the 0-based character offset of the cursor + * @returns true if the cursor is inside a string literal + */ +export function isCursorInsideString(text: string, cursorOffset: number): boolean { + let inString: "'" | '"' | false = false; + + for (let i = 0; i < cursorOffset && i < text.length; i++) { + const ch = text[i]; + + if (inString) { + // Check for escape character + if (ch === '\\') { + // Skip the next character (escaped) + i++; + continue; + } + // Check for closing quote + if (ch === inString) { + inString = false; + } + } else { + // Check for opening quote + if (ch === '"' || ch === "'") { + inString = ch; + } + } + } + + return inString !== false; +} diff --git a/src/webviews/documentdbQuery/languageConfig.test.ts b/src/webviews/documentdbQuery/languageConfig.test.ts new file mode 100644 index 000000000..97f276b52 --- /dev/null +++ b/src/webviews/documentdbQuery/languageConfig.test.ts @@ -0,0 +1,112 @@ +/*--------------------------------------------------------------------------------------------- + * Copyright (c) Microsoft Corporation. All rights reserved. + * Licensed under the MIT License. See License.txt in the project root for license information. + *--------------------------------------------------------------------------------------------*/ + +import { buildEditorUri, EditorType, LANGUAGE_ID, parseEditorUri, URI_SCHEME } from './languageConfig'; + +describe('languageConfig', () => { + describe('constants', () => { + test('LANGUAGE_ID is "documentdb-query"', () => { + expect(LANGUAGE_ID).toBe('documentdb-query'); + }); + + test('URI_SCHEME is "documentdb"', () => { + expect(URI_SCHEME).toBe('documentdb'); + }); + }); + + describe('EditorType', () => { + test('has expected enum values', () => { + expect(EditorType.Filter).toBe('filter'); + expect(EditorType.Project).toBe('project'); + expect(EditorType.Sort).toBe('sort'); + expect(EditorType.Aggregation).toBe('aggregation'); + }); + }); + + describe('buildEditorUri', () => { + test('builds filter URI with session ID', () => { + const uri = buildEditorUri(EditorType.Filter, 'session-abc-123'); + expect(uri).toBe('documentdb://filter/session-abc-123'); + }); + + test('builds project URI with session ID', () => { + const uri = buildEditorUri(EditorType.Project, 'my-session'); + expect(uri).toBe('documentdb://project/my-session'); + }); + + test('builds sort URI with session ID', () => { + const uri = buildEditorUri(EditorType.Sort, 'sess-1'); + expect(uri).toBe('documentdb://sort/sess-1'); + }); + + test('builds aggregation URI with session ID', () => { + const uri = buildEditorUri(EditorType.Aggregation, 'agg-session'); + expect(uri).toBe('documentdb://aggregation/agg-session'); + }); + }); + + describe('parseEditorUri', () => { + test('parses valid filter URI', () => { + const result = parseEditorUri('documentdb://filter/session-abc-123'); + expect(result).toEqual({ + editorType: EditorType.Filter, + sessionId: 'session-abc-123', + }); + }); + + test('parses valid project URI', () => { + const result = parseEditorUri('documentdb://project/my-session'); + expect(result).toEqual({ + editorType: EditorType.Project, + sessionId: 'my-session', + }); + }); + + test('parses valid sort URI', () => { + const result = parseEditorUri('documentdb://sort/sess-1'); + expect(result).toEqual({ + editorType: EditorType.Sort, + sessionId: 'sess-1', + }); + }); + + test('parses valid aggregation URI', () => { + const result = parseEditorUri('documentdb://aggregation/agg-123'); + expect(result).toEqual({ + editorType: EditorType.Aggregation, + sessionId: 'agg-123', + }); + }); + + test('returns undefined for unrecognized scheme', () => { + const result = parseEditorUri('vscode://filter/session-1'); + expect(result).toBeUndefined(); + }); + + test('returns undefined for unknown editor type', () => { + const result = parseEditorUri('documentdb://unknown/session-1'); + expect(result).toBeUndefined(); + }); + + test('returns undefined for malformed URI (no session)', () => { + const result = parseEditorUri('documentdb://filter'); + expect(result).toBeUndefined(); + }); + + test('returns undefined for empty string', () => { + const result = parseEditorUri(''); + expect(result).toBeUndefined(); + }); + + test('roundtrips with buildEditorUri', () => { + for (const editorType of Object.values(EditorType)) { + const sessionId = `test-session-${editorType}`; + const uri = buildEditorUri(editorType, sessionId); + const parsed = parseEditorUri(uri); + expect(parsed).toEqual({ editorType, sessionId }); + } + }); + }); +}); diff --git a/src/webviews/documentdbQuery/languageConfig.ts b/src/webviews/documentdbQuery/languageConfig.ts new file mode 100644 index 000000000..5ad101a25 --- /dev/null +++ b/src/webviews/documentdbQuery/languageConfig.ts @@ -0,0 +1,67 @@ +/*--------------------------------------------------------------------------------------------- + * Copyright (c) Microsoft Corporation. All rights reserved. + * Licensed under the MIT License. See License.txt in the project root for license information. + *--------------------------------------------------------------------------------------------*/ + +/** + * Constants and configuration for the `documentdb-query` custom Monaco language. + * + * This language reuses the JavaScript Monarch tokenizer for syntax highlighting + * but does NOT attach the TypeScript/JavaScript language service worker. + * Completions are driven entirely by custom providers using `documentdb-constants`. + */ + +/** The language identifier registered with Monaco. */ +export const LANGUAGE_ID = 'documentdb-query'; + +/** URI scheme used for query editor models. */ +export const URI_SCHEME = 'documentdb'; + +/** + * Known editor types for URI-based routing. + * The completion provider inspects `model.uri` to determine which + * completions to offer. + */ +export enum EditorType { + Filter = 'filter', + Project = 'project', + Sort = 'sort', + Aggregation = 'aggregation', +} + +/** + * Builds a Monaco model URI for a given editor type and session. + * + * @param editorType - the type of query editor (filter, project, sort) + * @param sessionId - unique session identifier for this editor instance + * @returns a URI string like `documentdb://filter/session-abc-123` + */ +export function buildEditorUri(editorType: EditorType, sessionId: string): string { + return `${URI_SCHEME}://${editorType}/${sessionId}`; +} + +/** + * Parses a Monaco model URI to extract the editor type. + * + * @param uri - the URI string (e.g., `documentdb://filter/session-abc-123`) + * @returns the EditorType or undefined if the URI doesn't match + */ +export function parseEditorUri(uri: string): { editorType: EditorType; sessionId: string } | undefined { + // Handle both URI objects and strings + const uriString = typeof uri === 'string' ? uri : String(uri); + + const match = uriString.match(new RegExp(`^${URI_SCHEME}://([^/]+)/(.+)$`)); + if (!match) { + return undefined; + } + + const editorType = match[1] as EditorType; + const sessionId = match[2]; + + // Validate that it's a known editor type + if (!Object.values(EditorType).includes(editorType)) { + return undefined; + } + + return { editorType, sessionId }; +} diff --git a/src/webviews/documentdbQuery/registerLanguage.ts b/src/webviews/documentdbQuery/registerLanguage.ts new file mode 100644 index 000000000..231eb9461 --- /dev/null +++ b/src/webviews/documentdbQuery/registerLanguage.ts @@ -0,0 +1,241 @@ +/*--------------------------------------------------------------------------------------------- + * Copyright (c) Microsoft Corporation. All rights reserved. + * Licensed under the MIT License. See License.txt in the project root for license information. + *--------------------------------------------------------------------------------------------*/ + +/** + * Registers the `documentdb-query` custom language with Monaco Editor. + * + * This module: + * 1. Registers the language ID with Monaco + * 2. Imports the JavaScript Monarch tokenizer for syntax highlighting + * 3. Registers a custom CompletionItemProvider scoped to `documentdb-query` + * 4. Registers a HoverProvider for operator/constructor documentation + * + * The JS tokenizer provides correct highlighting for: + * - Unquoted identifiers: `{ name: 1 }` + * - Single-quoted strings: `{ 'name': 1 }` + * - Double-quoted strings: `{ "name": 1 }` + * - BSON constructors: `ObjectId("...")` + * - Regex literals: `/^alice/i` + * - Comments, template literals, function bodies (for future $function support) + * + * Because this is a custom language ID, the TypeScript worker is NOT loaded, + * keeping the bundle ~400-600 KB lighter and ensuring a clean completion slate. + */ + +// eslint-disable-next-line import/no-internal-modules +import type * as monacoEditor from 'monaco-editor/esm/vs/editor/editor.api'; +import { getCompletionContext } from './completionStore'; +import { detectCursorContext } from './cursorContext'; +import { createCompletionItems } from './documentdbQueryCompletionProvider'; +import { getHoverContent } from './documentdbQueryHoverProvider'; +import { extractQuotedKey } from './extractQuotedKey'; +import { isCursorInsideString } from './isCursorInsideString'; +import { LANGUAGE_ID, parseEditorUri } from './languageConfig'; + +/** Coalesces concurrent registrations into a single promise. */ +let registrationPromise: Promise | undefined; + +/** Callback used to open external URLs via the extension host. */ +let openUrlHandler: ((url: string) => void) | undefined; + +/** + * Registers the `documentdb-query` language with Monaco. + * + * Safe to call multiple times β€” concurrent calls coalesce into one registration. + * The `openUrl` callback is updated on every call so the tRPC client reference + * stays current even after hot-reloads. + * + * @param monaco - the Monaco editor API instance + * @param openUrl - callback to open a URL via the extension host (avoids webview sandbox restrictions) + */ +export function registerDocumentDBQueryLanguage( + monaco: typeof monacoEditor, + openUrl?: (url: string) => void, +): Promise { + openUrlHandler = openUrl ?? openUrlHandler; + if (!registrationPromise) { + registrationPromise = doRegisterLanguage(monaco); + } + return registrationPromise; +} + +async function doRegisterLanguage(monaco: typeof monacoEditor): Promise { + // Step 1: Register the language ID + monaco.languages.register({ id: LANGUAGE_ID }); + + // Step 2: Import the JS Monarch tokenizer + // This path has been stable since Monaco 0.20 and exports { conf, language } + // eslint-disable-next-line import/no-internal-modules + const jsLanguage = (await import('monaco-editor/esm/vs/basic-languages/javascript/javascript.js')) as { + language: monacoEditor.languages.IMonarchLanguage; + conf: monacoEditor.languages.LanguageConfiguration; + }; + + // Step 3: Apply the JS tokenizer and language configuration to our custom language + monaco.languages.setMonarchTokensProvider(LANGUAGE_ID, jsLanguage.language); + monaco.languages.setLanguageConfiguration(LANGUAGE_ID, jsLanguage.conf); + + // Register a link opener so that documentation links in hover tooltips + // are opened via the extension host (which calls vscode.env.openExternal). + // VS Code webview sandboxing blocks window.open/popups, so we route through + // the tRPC openUrl mutation when available, or fall back to window.open. + monaco.editor.registerLinkOpener({ + open(resource) { + const url = resource.toString(true); + if (openUrlHandler) { + openUrlHandler(url); + } else { + window.open(url, '_blank'); + } + return true; + }, + }); + + // Step 4: Register the completion provider + monaco.languages.registerCompletionItemProvider(LANGUAGE_ID, { + triggerCharacters: ['$', '"', "'", '{', '.', ':', ',', '['], + provideCompletionItems: ( + model: monacoEditor.editor.ITextModel, + position: monacoEditor.Position, + ): monacoEditor.languages.CompletionList => { + // Parse the model URI to determine editor context + const uriString = model.uri.toString(); + const parsed = parseEditorUri(uriString); + + // Get the word at the current position for range calculation + const wordInfo = model.getWordUntilPosition(position); + let range: monacoEditor.IRange = { + startLineNumber: position.lineNumber, + endLineNumber: position.lineNumber, + startColumn: wordInfo.startColumn, + endColumn: wordInfo.endColumn, + }; + + // Check if cursor is preceded by '$' (for operator completions) + // Monaco's getWordUntilPosition() does not treat '$' as part of a word boundary. + // When the user types '$g', wordInfo.startColumn points to 'g', not '$'. + // Without this fix, selecting '$gt' would insert '$$gt' (double dollar). + const lineContent = model.getLineContent(position.lineNumber); + // -2 because columns are 1-based: e.g. startColumn=1 β†’ index -1 β†’ undefined (safe). + // JS returns undefined for out-of-bounds array access, so (undefined === '$') β†’ false. + const charBefore = lineContent[wordInfo.startColumn - 2]; + + if (charBefore === '$') { + range = { ...range, startColumn: range.startColumn - 1 }; + } + + // Detect cursor context for context-sensitive completions + const text = model.getValue(); + const cursorOffset = model.getOffsetAt(position); + + // Suppress completions when the cursor is inside a string literal. + // This prevents trigger characters like ':', ',', '[' from firing + // inside strings like { name: "has:colon" } or { msg: "has[bracket" }. + if (isCursorInsideString(text, cursorOffset)) { + return { suggestions: [] }; + } + + const sessionId = parsed?.sessionId; + + // Build field lookup from completion store to enrich context with BSON types + const fieldLookup = (fieldName: string): string | undefined => { + if (!sessionId) return undefined; + const ctx = getCompletionContext(sessionId); + return ctx?.fields.find((f) => f.fieldName === fieldName)?.bsonType; + }; + + const cursorContext = detectCursorContext(text, cursorOffset, fieldLookup); + + // Detect whether the editor content has braces. When the user clears + // the editor (deleting initial `{ }`), completions need to include + // wrapping braces so inserted snippets produce valid query syntax. + const needsWrapping = !text.includes('{'); + + // Build completion items based on context + const items = createCompletionItems({ + editorType: parsed?.editorType, + sessionId, + range, + isDollarPrefix: charBefore === '$', + monaco, + cursorContext, + needsWrapping, + }); + + return { suggestions: items }; + }, + }); + + // Step 5: Register the hover provider + monaco.languages.registerHoverProvider(LANGUAGE_ID, { + provideHover: ( + model: monacoEditor.editor.ITextModel, + position: monacoEditor.Position, + ): monacoEditor.languages.Hover | null => { + // Build field lookup from completion store for field hover info + const uriString = model.uri.toString(); + const parsedUri = parseEditorUri(uriString); + const hoverFieldLookup = parsedUri?.sessionId + ? (word: string) => { + const ctx = getCompletionContext(parsedUri.sessionId); + return ctx?.fields.find((f) => f.fieldName === word); + } + : undefined; + + // Try to extract a quoted string key (e.g., "address.street") + // Monaco's getWordAtPosition treats quotes and dots as word boundaries, + // so for { "address.street": 1 } hovering on "address" would only match + // "address", not the full field name "address.street". + const lineContent = model.getLineContent(position.lineNumber); + const col0 = position.column - 1; // 0-based + + const quotedResult = extractQuotedKey(lineContent, col0); + if (quotedResult) { + const hover = getHoverContent(quotedResult.key, hoverFieldLookup); + if (hover) { + return { + ...hover, + range: { + startLineNumber: position.lineNumber, + endLineNumber: position.lineNumber, + startColumn: quotedResult.start + 1, // 1-based + endColumn: quotedResult.end + 1, // 1-based + }, + }; + } + } + + // Fall back to standard word-based hover + const wordAtPosition = model.getWordAtPosition(position); + if (!wordAtPosition) { + return null; + } + + const hover = getHoverContent(wordAtPosition.word, hoverFieldLookup); + if (!hover) { + return null; + } + + // Set the range for the hover highlight + return { + ...hover, + range: { + startLineNumber: position.lineNumber, + endLineNumber: position.lineNumber, + startColumn: wordAtPosition.startColumn, + endColumn: wordAtPosition.endColumn, + }, + }; + }, + }); +} + +/** + * Resets the registration state. For testing only. + * @internal + */ +export function _resetRegistration(): void { + registrationPromise = undefined; +} diff --git a/src/webviews/documentdbQuery/tdd/README.md b/src/webviews/documentdbQuery/tdd/README.md new file mode 100644 index 000000000..27cbe2a63 --- /dev/null +++ b/src/webviews/documentdbQuery/tdd/README.md @@ -0,0 +1,33 @@ +# TDD Behavior Tests + +This folder contains **behavior-driven TDD tests** for the `documentdb-query` +autocompletion system. These tests define the expected behavior contract β€” they +were written BEFORE the implementation and drive the design. + +## Important: TDD Contract + +> **⚠️ LLM/Agent Instruction:** +> These tests represent intentional behavior specifications. If a test in this +> folder fails after a code change, **do NOT automatically fix the test**. +> Instead, alert the user that a TDD behavior contract has been violated. +> The failure likely means the implementation change broke an expected behavior. +> The user must decide whether to update the test (changing the contract) or +> fix the implementation. + +## Test Files + +| File | What it tests | +|------|---------------| +| `completionBehavior.test.ts` | Which completion categories appear at each cursor position, sorting order, and snippet wrapping | + +## Specification + +See [readme.completionBehavior.md](readme.completionBehavior.md) for the full +behavior specification with ASCII art examples. + +## Why a separate folder? + +These tests verify cross-cutting **behavior** (the completion matrix), not a +single class or module. They sit at the `documentdbQuery/tdd/` level because +they test the combined output of `cursorContext`, `createCompletionItems`, +`mapCompletionItems`, and `completionKnowledge` working together. diff --git a/src/webviews/documentdbQuery/tdd/completionBehavior.test.ts b/src/webviews/documentdbQuery/tdd/completionBehavior.test.ts new file mode 100644 index 000000000..b2187b114 --- /dev/null +++ b/src/webviews/documentdbQuery/tdd/completionBehavior.test.ts @@ -0,0 +1,534 @@ +/*--------------------------------------------------------------------------------------------- + * Copyright (c) Microsoft Corporation. All rights reserved. + * Licensed under the MIT License. See License.txt in the project root for license information. + *--------------------------------------------------------------------------------------------*/ + +/** + * TDD Behavior Tests β€” Completion Categories by Cursor Position + * + * These tests define the expected behavior CONTRACT for the autocompletion + * system. They were written BEFORE the implementation (TDD redβ†’green cycle) + * and verify WHAT completions appear at each cursor position, not HOW they + * are produced internally. + * + * ⚠️ LLM/Agent Instruction: + * If these tests fail after a code change, do NOT automatically fix the tests. + * Alert the user that a TDD behavior contract has been violated. The failure + * means the implementation broke expected behavior. The user must decide + * whether to update the spec or fix the implementation. + * + * Tests are category-based (not specific-operator-based) to be resilient to + * changes in documentdb-constants. Categories come from the `description` + * field in CompletionItemLabel (e.g., "comparison", "logical", "bson"). + * + * See: readme.completionBehavior.md for the full specification. + */ + +// eslint-disable-next-line import/no-internal-modules +import type * as monacoEditor from 'monaco-editor/esm/vs/editor/editor.api'; +import { clearAllCompletionContexts, setCompletionContext } from '../completionStore'; +import { type CursorContext } from '../cursorContext'; +import { createCompletionItems } from '../documentdbQueryCompletionProvider'; +import { EditorType } from '../languageConfig'; + +// ---------- Test infrastructure ---------- + +const mockCompletionItemKind: typeof monacoEditor.languages.CompletionItemKind = { + Method: 0, + Function: 1, + Constructor: 2, + Field: 3, + Variable: 4, + Class: 5, + Struct: 6, + Interface: 7, + Module: 8, + Property: 9, + Event: 10, + Operator: 11, + Unit: 12, + Value: 13, + Constant: 14, + Enum: 15, + EnumMember: 16, + Keyword: 17, + Text: 18, + Color: 19, + File: 20, + Reference: 21, + Customcolor: 22, + Folder: 23, + TypeParameter: 24, + User: 25, + Issue: 26, + Snippet: 27, +}; + +const mockInsertTextRule = { + InsertAsSnippet: 4, + KeepWhitespace: 1, + None: 0, +} as typeof monacoEditor.languages.CompletionItemInsertTextRule; + +function createMockMonaco(): typeof monacoEditor { + return { + languages: { + CompletionItemKind: mockCompletionItemKind, + CompletionItemInsertTextRule: mockInsertTextRule, + }, + } as unknown as typeof monacoEditor; +} + +const testRange: monacoEditor.IRange = { + startLineNumber: 1, + endLineNumber: 1, + startColumn: 1, + endColumn: 1, +}; + +// ---------- Helpers ---------- + +/** Extracts the description (category) from a CompletionItem label. */ +function getDescription(label: string | monacoEditor.languages.CompletionItemLabel): string | undefined { + return typeof label === 'string' ? undefined : label.description; +} + +/** Returns the set of distinct categories present in a completion list. */ +function getCategories(items: monacoEditor.languages.CompletionItem[]): Set { + const categories = new Set(); + for (const item of items) { + const desc = getDescription(item.label); + if (desc) categories.add(desc); + } + return categories; +} + +/** Returns the label text from a CompletionItem. */ +function getLabelText(label: string | monacoEditor.languages.CompletionItemLabel): string { + return typeof label === 'string' ? label : label.label; +} + +/** + * Returns all distinct sortText prefixes (the part before the underscore) + * found in a completion list. + */ +function getSortPrefixes(items: monacoEditor.languages.CompletionItem[]): Set { + const prefixes = new Set(); + for (const item of items) { + if (item.sortText) { + const underscoreIdx = item.sortText.indexOf('_'); + if (underscoreIdx > 0) { + prefixes.add(item.sortText.substring(0, underscoreIdx + 1)); + } + } + } + return prefixes; +} + +// ---------- Field data for tests ---------- + +const testFields = [ + { + fieldName: 'name', + displayType: 'String', + bsonType: 'string', + isSparse: false, + insertText: 'name', + referenceText: '$name', + }, + { + fieldName: 'age', + displayType: 'Number', + bsonType: 'int32', + isSparse: false, + insertText: 'age', + referenceText: '$age', + }, +]; + +// ---------- Key-position operator categories ---------- +// These are the categories that should appear at KEY / EMPTY positions. +// We test by category name, not specific operators, for resilience. +// (Used in assertions, not as a lookup β€” individual tests check specific categories.) + +// Field-level categories that should NOT appear at key/empty positions. +// These categories have NO operators in KEY_POSITION_OPERATORS. +// Note: 'logical' and 'evaluation' are shared β€” they have both key-position +// operators ($and/$or for logical, $expr/$text for evaluation) and field-level +// operators ($not for logical, $regex/$mod for evaluation). +const FIELD_LEVEL_ONLY_CATEGORIES = ['comparison', 'array', 'element', 'bitwise', 'geospatial']; + +// ===================================================================== +// Tests +// ===================================================================== + +describe('TDD: Completion Behavior', () => { + const mockMonaco = createMockMonaco(); + + beforeAll(() => { + console.warn( + '\n⚠️ TDD CONTRACT TESTS β€” If any test below fails, do NOT auto-fix the test.\n' + + ' Alert the user that a TDD behavior contract has been violated.\n' + + ' The user must decide whether to update the spec or fix the implementation.\n', + ); + }); + + afterEach(() => { + clearAllCompletionContexts(); + }); + + // ----------------------------------------------------------------- + // EMPTY position β€” no braces in editor + // ----------------------------------------------------------------- + describe('EMPTY position (no braces, needsWrapping=true)', () => { + /** + * β”Œβ”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β” + * β”‚ | β”‚ ← cursor, no braces + * β””β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”˜ + * + * Expected: fields + key operators, all wrapped with { } + * NOT expected: comparison, array, evaluation, element, bson, JS global + */ + + function getEmptyCompletions(sessionId?: string): monacoEditor.languages.CompletionItem[] { + return createCompletionItems({ + editorType: EditorType.Filter, + sessionId, + range: testRange, + isDollarPrefix: false, + monaco: mockMonaco, + cursorContext: { position: 'unknown' }, + needsWrapping: true, + }); + } + + test('includes field names when store has data', () => { + setCompletionContext('s1', { fields: testFields }); + const items = getEmptyCompletions('s1'); + const labels = items.map((i) => getLabelText(i.label)); + expect(labels).toContain('name'); + expect(labels).toContain('age'); + }); + + test('field insertText is wrapped with { }', () => { + setCompletionContext('s1', { fields: testFields }); + const items = getEmptyCompletions('s1'); + const nameItem = items.find((i) => getLabelText(i.label) === 'name'); + expect(nameItem?.insertText).toMatch(/^\{.*\}$/); + }); + + test('includes key-position operator categories (logical)', () => { + const items = getEmptyCompletions(); + const categories = getCategories(items); + expect(categories.has('logical')).toBe(true); + }); + + test('does NOT include field-level categories', () => { + const items = getEmptyCompletions(); + const categories = getCategories(items); + for (const cat of FIELD_LEVEL_ONLY_CATEGORIES) { + expect(categories.has(cat)).toBe(false); + } + }); + + test('does NOT include "bson"', () => { + const items = getEmptyCompletions(); + const categories = getCategories(items); + expect(categories.has('bson')).toBe(false); + }); + + test('does NOT include "JS global"', () => { + const items = getEmptyCompletions(); + const categories = getCategories(items); + expect(categories.has('JS global')).toBe(false); + }); + + test('fields sort before operators (0_ < 1_)', () => { + setCompletionContext('s1', { fields: testFields }); + const items = getEmptyCompletions('s1'); + const fieldItem = items.find((i) => getLabelText(i.label) === 'name'); + const operatorItems = items.filter((i) => getDescription(i.label) === 'logical'); + expect(fieldItem?.sortText).toMatch(/^0_/); + expect(operatorItems.length).toBeGreaterThan(0); + expect(operatorItems[0]?.sortText).toMatch(/^1_/); + }); + }); + + // ----------------------------------------------------------------- + // KEY position β€” inside { } + // ----------------------------------------------------------------- + describe('KEY position ({ | })', () => { + /** + * β”Œβ”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β” + * β”‚ { | } β”‚ ← cursor inside braces + * β””β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”˜ + * + * Expected: fields + key operators + * NOT expected: comparison, array, evaluation, element, bson, JS global + */ + + const keyContext: CursorContext = { position: 'key', depth: 1 }; + + function getKeyCompletions(sessionId?: string): monacoEditor.languages.CompletionItem[] { + return createCompletionItems({ + editorType: EditorType.Filter, + sessionId, + range: testRange, + isDollarPrefix: false, + monaco: mockMonaco, + cursorContext: keyContext, + }); + } + + test('includes key-position operator categories', () => { + const categories = getCategories(getKeyCompletions()); + expect(categories.has('logical')).toBe(true); + }); + + test('does NOT include field-level categories', () => { + const categories = getCategories(getKeyCompletions()); + for (const cat of FIELD_LEVEL_ONLY_CATEGORIES) { + expect(categories.has(cat)).toBe(false); + } + }); + + test('does NOT include "bson" or "JS global"', () => { + const categories = getCategories(getKeyCompletions()); + expect(categories.has('bson')).toBe(false); + expect(categories.has('JS global')).toBe(false); + }); + + test('field sortText starts with 0_, operator sortText starts with 1_', () => { + setCompletionContext('s1', { fields: testFields }); + const items = getKeyCompletions('s1'); + + // Every field item should have sortText starting with 0_ + const fieldItems = items.filter((i) => getLabelText(i.label) === 'name' || getLabelText(i.label) === 'age'); + for (const item of fieldItems) { + expect(item.sortText).toMatch(/^0_/); + } + + // Every operator item should have sortText starting with 1_ + const operatorItems = items.filter((i) => { + const desc = getDescription(i.label); + return desc === 'logical' || desc === 'evaluation' || desc === 'misc'; + }); + for (const item of operatorItems) { + expect(item.sortText).toMatch(/^1_/); + } + }); + }); + + // ----------------------------------------------------------------- + // VALUE position β€” { field: | } + // ----------------------------------------------------------------- + describe('VALUE position ({ field: | })', () => { + /** + * β”Œβ”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β” + * β”‚ { age: | } β”‚ ← cursor at value position + * β””β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”˜ + * + * Expected: type suggestions + field-level operators + bson + JS globals + * NOT expected: key-position operators ($and, $or at root) + */ + + const valueContext: CursorContext = { position: 'value', fieldName: 'age' }; + + function getValueCompletions(): monacoEditor.languages.CompletionItem[] { + return createCompletionItems({ + editorType: EditorType.Filter, + sessionId: undefined, + range: testRange, + isDollarPrefix: false, + monaco: mockMonaco, + cursorContext: valueContext, + }); + } + + test('includes field-level categories', () => { + const categories = getCategories(getValueCompletions()); + for (const cat of FIELD_LEVEL_ONLY_CATEGORIES) { + expect(categories.has(cat)).toBe(true); + } + }); + + test('includes "bson" and "JS global"', () => { + const categories = getCategories(getValueCompletions()); + expect(categories.has('bson')).toBe(true); + expect(categories.has('JS global')).toBe(true); + }); + + test('does NOT include key-position operators by label', () => { + const labels = getValueCompletions().map((i) => getLabelText(i.label)); + // Check just a couple representative key operators + expect(labels).not.toContain('$and'); + expect(labels).not.toContain('$or'); + }); + + test('sort order: operators (0_) before bson (3_) before JS globals (4_)', () => { + const prefixes = getSortPrefixes(getValueCompletions()); + expect(prefixes.has('0_')).toBe(true); + expect(prefixes.has('3_')).toBe(true); + expect(prefixes.has('4_')).toBe(true); + }); + + test('project editor shows only 1/0 at value position', () => { + const items = createCompletionItems({ + editorType: EditorType.Project, + sessionId: undefined, + range: testRange, + isDollarPrefix: false, + monaco: mockMonaco, + cursorContext: valueContext, + }); + expect(items).toHaveLength(2); + const labels = items.map((i) => getLabelText(i.label)); + expect(labels).toContain('1'); + expect(labels).toContain('0'); + }); + + test('sort editor shows only 1/-1 at value position', () => { + const items = createCompletionItems({ + editorType: EditorType.Sort, + sessionId: undefined, + range: testRange, + isDollarPrefix: false, + monaco: mockMonaco, + cursorContext: valueContext, + }); + expect(items).toHaveLength(2); + const labels = items.map((i) => getLabelText(i.label)); + expect(labels).toContain('1'); + expect(labels).toContain('-1'); + }); + }); + + // ----------------------------------------------------------------- + // OPERATOR position β€” { field: { | } } + // ----------------------------------------------------------------- + describe('OPERATOR position ({ field: { | } })', () => { + /** + * β”Œβ”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β” + * β”‚ { age: { | } } β”‚ ← cursor inside operator object + * β””β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”˜ + * + * Expected: field-level operators (braces stripped) + * NOT expected: bson, JS global, key-position operators + */ + + const operatorContext: CursorContext = { position: 'operator', fieldName: 'age' }; + + function getOperatorCompletions(): monacoEditor.languages.CompletionItem[] { + return createCompletionItems({ + editorType: EditorType.Filter, + sessionId: undefined, + range: testRange, + isDollarPrefix: false, + monaco: mockMonaco, + cursorContext: operatorContext, + }); + } + + test('includes field-level categories', () => { + const categories = getCategories(getOperatorCompletions()); + for (const cat of FIELD_LEVEL_ONLY_CATEGORIES) { + expect(categories.has(cat)).toBe(true); + } + }); + + test('does NOT include "bson" or "JS global"', () => { + const categories = getCategories(getOperatorCompletions()); + expect(categories.has('bson')).toBe(false); + expect(categories.has('JS global')).toBe(false); + }); + + test('does NOT include key-position operators', () => { + const labels = getOperatorCompletions().map((i) => getLabelText(i.label)); + expect(labels).not.toContain('$and'); + expect(labels).not.toContain('$or'); + }); + }); + + // ----------------------------------------------------------------- + // ARRAY-ELEMENT position β€” { $and: [|] } + // ----------------------------------------------------------------- + describe('ARRAY-ELEMENT position ({ $and: [|] })', () => { + /** + * Same behavior as KEY position + */ + + const arrayContext: CursorContext = { position: 'array-element', parentOperator: '$and' }; + + function getArrayElementCompletions(sessionId?: string): monacoEditor.languages.CompletionItem[] { + return createCompletionItems({ + editorType: EditorType.Filter, + sessionId, + range: testRange, + isDollarPrefix: false, + monaco: mockMonaco, + cursorContext: arrayContext, + }); + } + + test('behaves like KEY: includes logical, excludes field-level categories', () => { + const categories = getCategories(getArrayElementCompletions()); + expect(categories.has('logical')).toBe(true); + for (const cat of FIELD_LEVEL_ONLY_CATEGORIES) { + expect(categories.has(cat)).toBe(false); + } + }); + + test('includes fields when store has data', () => { + setCompletionContext('s1', { fields: testFields }); + const labels = getArrayElementCompletions('s1').map((i) => getLabelText(i.label)); + expect(labels).toContain('name'); + }); + }); + + // ----------------------------------------------------------------- + // UNKNOWN position β€” genuinely ambiguous (show everything) + // ----------------------------------------------------------------- + describe('UNKNOWN position (ambiguous, needsWrapping=false)', () => { + /** + * Genuinely unknown cursor position. Show everything as discovery. + * This is the fallback when the parser can't determine position AND + * the editor is not empty (has some content with braces but ambiguous). + */ + + function getUnknownCompletions(sessionId?: string): monacoEditor.languages.CompletionItem[] { + return createCompletionItems({ + editorType: EditorType.Filter, + sessionId, + range: testRange, + isDollarPrefix: false, + monaco: mockMonaco, + cursorContext: { position: 'unknown' }, + needsWrapping: false, + }); + } + + test('includes key-position categories', () => { + const categories = getCategories(getUnknownCompletions()); + expect(categories.has('logical')).toBe(true); + }); + + test('includes field-level categories (full discovery)', () => { + const categories = getCategories(getUnknownCompletions()); + for (const cat of FIELD_LEVEL_ONLY_CATEGORIES) { + expect(categories.has(cat)).toBe(true); + } + }); + + test('includes "bson" and "JS global"', () => { + const categories = getCategories(getUnknownCompletions()); + expect(categories.has('bson')).toBe(true); + expect(categories.has('JS global')).toBe(true); + }); + + test('includes fields when store has data', () => { + setCompletionContext('s1', { fields: testFields }); + const labels = getUnknownCompletions('s1').map((i) => getLabelText(i.label)); + expect(labels).toContain('name'); + }); + }); +}); diff --git a/src/webviews/documentdbQuery/tdd/readme.completionBehavior.md b/src/webviews/documentdbQuery/tdd/readme.completionBehavior.md new file mode 100644 index 000000000..575f4f9f8 --- /dev/null +++ b/src/webviews/documentdbQuery/tdd/readme.completionBehavior.md @@ -0,0 +1,135 @@ +# Completion Behavior Specification + +> **⚠️ LLM/Agent Instruction:** +> This specification defines the expected completion behavior contract. +> If tests based on this spec fail after a code change, **do NOT automatically +> fix the tests**. Alert the user that a TDD behavior contract has been violated. +> The user must decide whether the behavior change is intentional. + +## Cursor Positions & Expected Completions + +The completion system shows different items depending on the cursor's semantic +position within a query expression. This spec defines what categories appear, +their sort order, and how snippets are wrapped at each position. + +### Categories + +Completions are grouped by category (shown as the `description` field in the +completion item label). The categories come from the operator's `meta` tag: + +| Category | Source | Example operators | +|----------|--------|-------------------| +| `logical` | `query:logical` | `$and`, `$or`, `$nor` | +| `comparison` | `query:comparison` | `$eq`, `$gt`, `$in` | +| `array` | `query:array` | `$all`, `$elemMatch`, `$size` | +| `evaluation` | `query:evaluation` | `$regex`, `$mod` | +| `element` | `query:element` | `$exists`, `$type` | +| `bson` | `bson` | `ObjectId`, `UUID`, `ISODate` | +| `JS global` | (hardcoded) | `Date`, `Math`, `RegExp` | +| (field type) | field data | `String`, `Number`, etc. | + +### Position: EMPTY (no braces in editor) + +``` +β”Œβ”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β” +β”‚ | β”‚ ← cursor, editor has no braces +β””β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”˜ +``` + +**Shows:** Fields + key-position operators only (same items as KEY) +**Wrapping:** All insertions wrapped with `{ ... }` +**Sort:** `0_` fields, `1_` key operators + +``` +Expected completions: + name String ← field, inserts: { name: $1 } + age Number ← field, inserts: { age: $1 } + $and logical ← key operator, inserts: { $and: [...] } + $or logical ← key operator + $nor logical ← key operator + +NOT shown: + $gt comparison ← field-level, invalid at root + $all array ← field-level, invalid at root + ObjectId bson ← not valid at root key position + Date JS global ← not valid at root key position +``` + +### Position: KEY (`{ | }`) + +``` +β”Œβ”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β” +β”‚ { | } β”‚ ← cursor inside braces +β””β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”˜ +``` + +**Shows:** Fields + key-position operators +**Wrapping:** None (already inside braces) +**Sort:** `0_` fields, `1_` key operators +**Snippets:** Outer `{ }` stripped from operator snippets + +``` +NOT shown: comparison, array, evaluation, element, bson, JS global +``` + +### Position: VALUE (`{ field: | }`) + +``` +β”Œβ”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β” +β”‚ { age: | } β”‚ ← cursor at value position +β””β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”˜ +``` + +**Shows:** Type suggestions + field-level operators + BSON constructors + JS globals +**Sort:** `00_` type suggestions, `0_`–`2_` operators, `3_` BSON, `4_` JS globals +**Special:** Project editor β†’ `1`/`0` only. Sort editor β†’ `1`/`-1` only. + +``` +Shown categories: comparison, array, evaluation, element, logical ($not), bson, JS global +NOT shown: key-position operators ($and, $or, $nor at root) +``` + +### Position: OPERATOR (`{ field: { | } }`) + +``` +β”Œβ”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β” +β”‚ { age: { | } } β”‚ ← cursor inside operator object +β””β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”˜ +``` + +**Shows:** Field-level operators only (braces stripped) +**Sort:** `0_` type-relevant, `1a_` comparison, `1b_` other universal, `2_` non-matching +**Snippets:** Outer `{ }` stripped + +``` +Shown categories: comparison, array, evaluation, element, logical ($not) +NOT shown: bson, JS global, key-position operators +``` + +### Position: ARRAY-ELEMENT (`{ $and: [|] }`) + +**Shows:** Same as KEY position +**Sort:** Same as KEY position + +### Position: UNKNOWN (genuinely ambiguous) + +**Shows:** ALL completions (fields + all operators + BSON + JS globals) +**Purpose:** Discovery fallback for positions the parser can't classify + +``` +Shown: everything β€” logical, comparison, array, evaluation, element, bson, JS global +``` + +## Sort Order Contract + +Each position has a defined sort prefix hierarchy. Items with lower prefixes +appear higher in the completion list. + +| Position | Sort hierarchy | +|----------|---------------| +| EMPTY | `0_` fields β†’ `1_` key operators | +| KEY | `0_` fields β†’ `1_` key operators | +| VALUE | `00_` type suggestions β†’ `0_`–`2_` operators β†’ `3_` BSON β†’ `4_` JS globals | +| OPERATOR | `0_` type-relevant β†’ `1a_` comparison β†’ `1b_` universal β†’ `2_` non-matching | +| ARRAY-ELEMENT | same as KEY | +| UNKNOWN | no enforced sort (Monaco default) | diff --git a/src/webviews/index.scss b/src/webviews/index.scss index f5fb4a7cd..561d1c072 100644 --- a/src/webviews/index.scss +++ b/src/webviews/index.scss @@ -87,3 +87,13 @@ $media-breakpoint-query-control-area: 1024px; @include input-focus-animation; @include input-hover; } + +/** + * Monaco suggest-details panel: ensure links show a pointer cursor. + * The hover widget applies this automatically, but the completion + * documentation panel does not β€” VS Code's webview CSS reset overrides it. + */ +.monaco-editor .suggest-details a, +.monaco-editor .suggest-details-container a { + cursor: pointer; +} diff --git a/src/webviews/utils/escapeMarkdown.test.ts b/src/webviews/utils/escapeMarkdown.test.ts new file mode 100644 index 000000000..4dbdfc1d5 --- /dev/null +++ b/src/webviews/utils/escapeMarkdown.test.ts @@ -0,0 +1,41 @@ +/*--------------------------------------------------------------------------------------------- + * Copyright (c) Microsoft Corporation. All rights reserved. + * Licensed under the MIT License. See License.txt in the project root for license information. + *--------------------------------------------------------------------------------------------*/ + +import { escapeMarkdown } from './escapeMarkdown'; + +describe('escapeMarkdown', () => { + test('returns plain text unchanged', () => { + expect(escapeMarkdown('age')).toBe('age'); + }); + + test('escapes markdown bold characters', () => { + expect(escapeMarkdown('**bold**')).toBe('\\*\\*bold\\*\\*'); + }); + + test('escapes markdown link syntax', () => { + expect(escapeMarkdown('[click](https://evil.com)')).toBe('\\[click\\]\\(https://evil\\.com\\)'); + }); + + test('escapes angle brackets (HTML tags)', () => { + expect(escapeMarkdown('')).toBe('\\alert\\(1\\)\\'); + }); + + test('escapes backticks', () => { + expect(escapeMarkdown('`code`')).toBe('\\`code\\`'); + }); + + test('escapes ampersands', () => { + expect(escapeMarkdown('a&b')).toBe('a\\&b'); + }); + + test('handles dotted field names', () => { + expect(escapeMarkdown('address.street')).toBe('address\\.street'); + }); + + test('passes through numbers and underscores', () => { + // underscore IS a markdown metacharacter, so it gets escaped + expect(escapeMarkdown('field_1')).toBe('field\\_1'); + }); +}); diff --git a/src/webviews/utils/escapeMarkdown.ts b/src/webviews/utils/escapeMarkdown.ts new file mode 100644 index 000000000..245a63a29 --- /dev/null +++ b/src/webviews/utils/escapeMarkdown.ts @@ -0,0 +1,15 @@ +/*--------------------------------------------------------------------------------------------- + * Copyright (c) Microsoft Corporation. All rights reserved. + * Licensed under the MIT License. See License.txt in the project root for license information. + *--------------------------------------------------------------------------------------------*/ + +/** + * Escapes markdown metacharacters so user data renders as literal text. + * + * Covers characters that Markdown/HTML would otherwise interpret: + * `\`, `*`, `_`, `{`, `}`, `[`, `]`, `(`, `)`, `#`, `+`, `-`, `.`, `!`, + * `|`, `<`, `>`, `` ` ``, `~`, `&` + */ +export function escapeMarkdown(text: string): string { + return text.replace(/[\\*_{}[\]()#+\-.!|<>`~&]/g, '\\$&'); +} diff --git a/test/mongoGetCommand.test.ts b/test/mongoGetCommand.test.ts index 7b4ce3f4d..bf34fa867 100644 --- a/test/mongoGetCommand.test.ts +++ b/test/mongoGetCommand.test.ts @@ -797,7 +797,6 @@ suite('scrapbook parsing Tests', () => { const commands: MongoCommand[] = getAllCommandsFromText(text); const command: MongoCommand = findCommandAtPosition(commands, new Position(0, 0)); const generatedRegExp = (nonNullProp(command, 'argumentObjects')[0]).sku; - console.log('generatedRegExp', generatedRegExp); assert.deepEqual(generatedRegExp.options, 'i'); assert.deepEqual(generatedRegExp.pattern, '789$'); }); @@ -838,11 +837,8 @@ suite('scrapbook parsing Tests', () => { // The regex parsing tests following this test should help zero-in on which case isn't handled properly. test('test regular expression parsing - with many special cases', () => { const text = `db.test1.beep.find({ sku: /^(hello?= world).*[^0-9]+|(world\\b\\*){0,2}$/ })`; - console.log(text); const commands: MongoCommand[] = getAllCommandsFromText(text); - console.log('commands', commands); const command: MongoCommand = findCommandAtPosition(commands, new Position(0, 0)); - console.log('command', command); const generatedRegExp = (nonNullProp(command, 'argumentObjects')[0]).sku; assert.deepEqual(generatedRegExp.options, ''); assert.deepEqual(generatedRegExp.pattern, '^(hello?= world).*[^0-9]+|(world\\b\\*){0,2}$'); diff --git a/tsconfig.json b/tsconfig.json index 894220ad0..f8f79d3a5 100644 --- a/tsconfig.json +++ b/tsconfig.json @@ -30,5 +30,6 @@ ] */ }, - "exclude": ["node_modules", ".vscode-test"] + "exclude": ["node_modules", ".vscode-test", "packages/*/dist"], + "references": [{ "path": "packages/schema-analyzer" }, { "path": "packages/documentdb-constants" }] }