diff --git a/.github/copilot-instructions.md b/.github/copilot-instructions.md
index e5a438ee1..aeb7ed884 100644
--- a/.github/copilot-instructions.md
+++ b/.github/copilot-instructions.md
@@ -1,6 +1,6 @@
 # GitHub Copilot Instructions for vscode-documentdb
 
-VS Code Extension for Azure Cosmos DB and MongoDB. TypeScript (strict mode), React webviews, Jest testing.
+VS Code Extension for Azure Cosmos DB and the MongoDB API. TypeScript (strict mode), React webviews, Jest testing.
 
 ## Critical Build Commands
 
@@ -32,6 +32,12 @@ Before finishing work on a PR, agents **must** run the following steps in order:
 
 > ⚠️ **An agent must not finish or terminate until all three steps above have been run and pass successfully.** Skipping these steps leads to CI failures.
 
+## Git Safety
+
+- **Never use `git add -f`** to force-add files. If `git add` refuses a file, it is likely in `.gitignore` for a reason (e.g., `docs/plan/`, `docs/analysis/`, build outputs). Do NOT override this with `-f`.
+- When `git add` warns that a path is ignored, **stop and inform the user** instead of force-adding.
+- Files in `docs/plan/` and `docs/analysis/` are **local planning documents** that must not be committed to the repository.
+
 ## Project Structure
 
 | Folder          | Purpose                                    |
@@ -178,6 +184,32 @@ For Discovery View, both `treeId` and `clusterId` are sanitized (all `/` replace
 
 See `src/tree/models/BaseClusterModel.ts` and `docs/analysis/08-cluster-model-simplification-plan.md` for details.
 
+## Terminology
+
+This is a **DocumentDB** extension that uses the **MongoDB-compatible wire protocol**.
+
+- Use **"DocumentDB"** when referring to the database service itself.
+- Use **"MongoDB API"** or **"DocumentDB API"** when referring to the wire protocol, query language, or API compatibility layer.
+- **Never use "MongoDB" alone** as a product name in code, comments, docs, or user-facing strings.
+
+| ✅ Do                                                | ❌ Don't                         |
+| ---------------------------------------------------- | -------------------------------- |
+| `// Query operators supported by the DocumentDB API` | `// MongoDB query operators`     |
+| `// BSON types per the MongoDB API spec`             | `// Uses MongoDB's $match stage` |
+| `documentdbQuery` (variable name)                    | `mongoQuery`                     |
+
+This applies to: code comments, JSDoc/TSDoc, naming (prefer `documentdb` prefix), user-facing strings, docs, and test descriptions.
+
+## TDD Contract Tests
+
+Test suites prefixed with `TDD:` (e.g., `describe('TDD: Completion Behavior', ...)`) are **behavior contracts** written before the implementation. If a `TDD:` test fails after a code change:
+
+1. **Do NOT automatically fix the test.**
+2. **Stop and ask the user** whether the behavior change is intentional.
+3. The user decides: update the contract (test) or fix the implementation.
+
+This applies to any test whose name starts with `TDD:`, regardless of folder location.
+
 ## Additional Patterns
 
 For detailed patterns, see:
diff --git a/.github/workflows/main.yml b/.github/workflows/main.yml
index ac02668fb..7ea04b6ef 100644
--- a/.github/workflows/main.yml
+++ b/.github/workflows/main.yml
@@ -55,6 +55,9 @@ jobs:
             - name: 📦 Install Dependencies (npm ci)
               run: npm ci --prefer-offline --no-audit --no-fund --progress=false --verbose
 
+            - name: 🔨 Build Workspace Packages
+              run: npm run build --workspaces --if-present
+
             - name: 🌐 Check Localization Files
               run: npm run l10n:check
 
diff --git a/.gitignore b/.gitignore
index e0bf99748..eddc516fd 100644
--- a/.gitignore
+++ b/.gitignore
@@ -1,6 +1,9 @@
 ## Ignore Visual Studio temporary files, build results, and
 ## files generated by popular Visual Studio add-ons.
 
+/docs/analysis/
+/docs/plan/
+
 # User-specific files
 *.suo
 *.user
@@ -157,6 +160,9 @@ PublishScripts/
 **/packages/*
 # except build/, which is used as an MSBuild target.
 !**/packages/build/
+# Include our monorepo packages at the root
+!/packages/
+!/packages/**
 # Uncomment if necessary however generally it will be regenerated when needed
 #!**/packages/repositories.config
 # NuGet v3's project.json files produces more ignoreable files
@@ -268,6 +274,7 @@ dist
 stats.json
 *.tgz
 *.zip
+*.tsbuildinfo
 
 # Scrapbooks
 *.mongo
diff --git a/.vscode/settings.json b/.vscode/settings.json
index 9bcd06e4a..a6a027735 100644
--- a/.vscode/settings.json
+++ b/.vscode/settings.json
@@ -1,33 +1,35 @@
 {
-    "editor.codeActionsOnSave": {
-        "source.fixAll.eslint": "explicit",
-        "source.organizeImports": "explicit"
-    },
-    "editor.detectIndentation": false,
-    "editor.formatOnSave": true,
-    "editor.formatOnPaste": false,
-    "editor.insertSpaces": true,
-    "editor.tabSize": 4,
-    "editor.defaultFormatter": "esbenp.prettier-vscode",
-    "files.insertFinalNewline": true,
-    "files.trimTrailingWhitespace": true,
-    "search.exclude": {
-        "out": true,
-        "**/node_modules": true,
-        ".vscode-test": true
-    },
-    "typescript.preferences.importModuleSpecifier": "relative",
-    "typescript.tsdk": "node_modules/typescript/lib",
-    "antlr4.generation": {
-        // Settings for "ANTLR4 grammar syntax support" extension
-        "mode": "internal",
-        "listeners": true,
-        "visitors": false
-    },
-    "vscode-nmake-tools.workspaceBuildDirectories": ["."],
-    "vscode-nmake-tools.installOsRepoRustHelperExtension": false,
-    "sarif-viewer.connectToGithubCodeScanning": "off"
-    // "eslint.workingDirectories": [
-    //     ".", "src"
-    //   ]
+  "editor.codeActionsOnSave": {
+    "source.fixAll.eslint": "explicit",
+    "source.organizeImports": "explicit"
+  },
+  "editor.detectIndentation": false,
+  "editor.formatOnSave": true,
+  "editor.formatOnPaste": false,
+  "editor.insertSpaces": true,
+  "editor.tabSize": 4,
+  "editor.defaultFormatter": "esbenp.prettier-vscode",
+  "files.insertFinalNewline": true,
+  "files.trimTrailingWhitespace": true,
+  "search.exclude": {
+    "out": true,
+    "**/node_modules": true,
+    ".vscode-test": true
+  },
+  "typescript.preferences.importModuleSpecifier": "relative",
+  "typescript.tsdk": "node_modules/typescript/lib",
+  "antlr4.generation": {
+    // Settings for "ANTLR4 grammar syntax support" extension
+    "mode": "internal",
+    "listeners": true,
+    "visitors": false
+  },
+  "vscode-nmake-tools.workspaceBuildDirectories": ["."],
+  "vscode-nmake-tools.installOsRepoRustHelperExtension": false,
+  "sarif-viewer.connectToGithubCodeScanning": "off",
+  "jest.runMode": "deferred",
+  "testing.automaticallyOpenTestResults": "neverOpen"
+  // "eslint.workingDirectories": [
+  //     ".", "src"
+  //   ]
 }
diff --git a/jest.config.js b/jest.config.js
index 7ad26361a..ca22ee433 100644
--- a/jest.config.js
+++ b/jest.config.js
@@ -1,11 +1,18 @@
 /** @type {import('ts-jest').JestConfigWithTsJest} **/
 module.exports = {
-    testEnvironment: 'node',
-    testMatch: ['<rootDir>/src/**/*.test.ts'],
-    transform: {
-        '^.+.tsx?$': ['ts-jest', {}],
-    },
     // Limit workers to avoid OOM kills on machines with many cores.
     // Each ts-jest worker loads the TypeScript compiler and consumes ~500MB+.
     maxWorkers: '50%',
+    projects: [
+        {
+            displayName: 'extension',
+            testEnvironment: 'node',
+            testMatch: ['<rootDir>/src/**/*.test.ts'],
+            transform: {
+                '^.+\\.tsx?$': ['ts-jest', {}],
+            },
+        },
+        '<rootDir>/packages/schema-analyzer',
+        '<rootDir>/packages/documentdb-constants',
+    ],
 };
diff --git a/l10n/bundle.l10n.json b/l10n/bundle.l10n.json
index 3e9519d4e..59a69f54b 100644
--- a/l10n/bundle.l10n.json
+++ b/l10n/bundle.l10n.json
@@ -415,7 +415,6 @@
   "Error running process: ": "Error running process: ",
   "Error saving the document": "Error saving the document",
   "Error validating collection name availability: {0}": "Error validating collection name availability: {0}",
-  "Error while loading the autocompletion data": "Error while loading the autocompletion data",
   "Error while loading the data": "Error while loading the data",
   "Error while loading the document": "Error while loading the document",
   "Error while refreshing the document": "Error while refreshing the document",
@@ -521,7 +520,7 @@
   "Failed to validate source collection: {0}": "Failed to validate source collection: {0}",
   "Failed with code \"{0}\".": "Failed with code \"{0}\".",
   "Fair": "Fair",
-  "Filter: Enter the DocumentDB query filter in JSON format": "Filter: Enter the DocumentDB query filter in JSON format",
+  "Filter: Enter the DocumentDB query filter": "Filter: Enter the DocumentDB query filter",
   "Find Query": "Find Query",
   "Finished importing": "Finished importing",
   "Folder name cannot be empty": "Folder name cannot be empty",
@@ -616,7 +615,7 @@
   "Invalid Connection String: {error}": "Invalid Connection String: {error}",
   "Invalid connection type selected.": "Invalid connection type selected.",
   "Invalid document ID: {0}": "Invalid document ID: {0}",
-  "Invalid filter syntax: {0}. Please use valid JSON, for example: { \"name\": \"value\" }": "Invalid filter syntax: {0}. Please use valid JSON, for example: { \"name\": \"value\" }",
+  "Invalid filter syntax: {0}. Please use valid JSON or a DocumentDB API expression, for example: { name: \"value\" }": "Invalid filter syntax: {0}. Please use valid JSON or a DocumentDB API expression, for example: { name: \"value\" }",
   "Invalid folder type.": "Invalid folder type.",
   "Invalid mongoShell command format": "Invalid mongoShell command format",
   "Invalid node type.": "Invalid node type.",
@@ -624,10 +623,10 @@
   "Invalid payload for drop index action": "Invalid payload for drop index action",
   "Invalid payload for modify index action": "Invalid payload for modify index action",
   "Invalid projection syntax: {0}": "Invalid projection syntax: {0}",
-  "Invalid projection syntax: {0}. Please use valid JSON, for example: { \"fieldName\": 1 }": "Invalid projection syntax: {0}. Please use valid JSON, for example: { \"fieldName\": 1 }",
+  "Invalid projection syntax: {0}. Please use valid JSON or a DocumentDB API expression, for example: { fieldName: 1 }": "Invalid projection syntax: {0}. Please use valid JSON or a DocumentDB API expression, for example: { fieldName: 1 }",
   "Invalid semver \"{0}\".": "Invalid semver \"{0}\".",
   "Invalid sort syntax: {0}": "Invalid sort syntax: {0}",
-  "Invalid sort syntax: {0}. Please use valid JSON, for example: { \"fieldName\": 1 }": "Invalid sort syntax: {0}. Please use valid JSON, for example: { \"fieldName\": 1 }",
+  "Invalid sort syntax: {0}. Please use valid JSON or a DocumentDB API expression, for example: { fieldName: 1 }": "Invalid sort syntax: {0}. Please use valid JSON or a DocumentDB API expression, for example: { fieldName: 1 }",
   "It could be better": "It could be better",
   "It looks like there aren't any other folders to move these items into.\nYou might want to create a new folder first.\n\nNote: You can't move items between 'DocumentDB Local' and regular connections.": "It looks like there aren't any other folders to move these items into.\nYou might want to create a new folder first.\n\nNote: You can't move items between 'DocumentDB Local' and regular connections.",
   "item": "item",
@@ -721,7 +720,6 @@
   "No matching resources found.": "No matching resources found.",
   "No node selected.": "No node selected.",
   "No parent folder selected.": "No parent folder selected.",
-  "No properties found in the schema at path \"{0}\"": "No properties found in the schema at path \"{0}\"",
   "No public connectivity": "No public connectivity",
   "No result returned from the MongoDB shell.": "No result returned from the MongoDB shell.",
   "No results found": "No results found",
diff --git a/package-lock.json b/package-lock.json
index ade59941e..8a951e8fb 100644
--- a/package-lock.json
+++ b/package-lock.json
@@ -8,6 +8,9 @@
       "name": "vscode-documentdb",
       "version": "0.7.2",
       "license": "SEE LICENSE IN LICENSE.md",
+      "workspaces": [
+        "packages/*"
+      ],
       "dependencies": {
         "@azure/arm-compute": "^22.4.0",
         "@azure/arm-cosmosdb": "~16.4.0",
@@ -24,9 +27,14 @@
         "@microsoft/vscode-azureresources-api": "~2.5.0",
         "@monaco-editor/react": "~4.7.0",
         "@mongodb-js/explain-plan-helper": "1.4.24",
+        "@mongodb-js/shell-bson-parser": "^1.5.6",
         "@trpc/client": "~11.10.0",
         "@trpc/server": "~11.10.0",
+        "@vscode-documentdb/documentdb-constants": "*",
+        "@vscode-documentdb/schema-analyzer": "*",
         "@vscode/l10n": "~0.0.18",
+        "acorn": "^8.16.0",
+        "acorn-walk": "^8.3.5",
         "antlr4ts": "^0.5.0-alpha.4",
         "bson": "~7.0.0",
         "denque": "~2.1.0",
@@ -4199,28 +4207,6 @@
         "mongodb-explain-compat": "^3.3.23"
       }
     },
-    "node_modules/@mongodb-js/explain-plan-helper/node_modules/@mongodb-js/shell-bson-parser": {
-      "version": "1.4.0",
-      "resolved": "https://registry.npmjs.org/@mongodb-js/shell-bson-parser/-/shell-bson-parser-1.4.0.tgz",
-      "integrity": "sha512-3HO90liE6pmEuUMi7SWR1HooVk23/jfx5iaBZHo250iYyF5uaqssepBGRF7J/14pmgTSwIGrrDd5rQtBYrY7wA==",
-      "license": "Apache-2.0",
-      "dependencies": {
-        "acorn": "^8.14.1"
-      },
-      "peerDependencies": {
-        "bson": "^4.6.3 || ^5 || ^6"
-      }
-    },
-    "node_modules/@mongodb-js/explain-plan-helper/node_modules/bson": {
-      "version": "6.10.4",
-      "resolved": "https://registry.npmjs.org/bson/-/bson-6.10.4.tgz",
-      "integrity": "sha512-WIsKqkSC0ABoBJuT1LEX+2HEvNmNKKgnTAyd0fL8qzK4SH2i9NXg+t08YtdZp/V9IZ33cxe3iV4yM0qg8lMQng==",
-      "license": "Apache-2.0",
-      "peer": true,
-      "engines": {
-        "node": ">=16.20.1"
-      }
-    },
     "node_modules/@mongodb-js/saslprep": {
       "version": "1.3.2",
       "resolved": "https://registry.npmjs.org/@mongodb-js/saslprep/-/saslprep-1.3.2.tgz",
@@ -4230,6 +4216,18 @@
         "sparse-bitfield": "^3.0.3"
       }
     },
+    "node_modules/@mongodb-js/shell-bson-parser": {
+      "version": "1.5.6",
+      "resolved": "https://registry.npmjs.org/@mongodb-js/shell-bson-parser/-/shell-bson-parser-1.5.6.tgz",
+      "integrity": "sha512-yzVLeOkRSE+r8scrDMJjL9zTSzypU/TLxF+INQLs3yQX9a2R6IfBDDqdSVFyHVWv1FhZN0lVeqEWTsX+Iz5BaA==",
+      "license": "Apache-2.0",
+      "dependencies": {
+        "acorn": "^8.14.1"
+      },
+      "peerDependencies": {
+        "bson": "^4.6.3 || ^5 || ^6.10.3 || ^7.0.0"
+      }
+    },
     "node_modules/@napi-rs/nice": {
       "version": "1.1.1",
       "resolved": "https://registry.npmjs.org/@napi-rs/nice/-/nice-1.1.1.tgz",
@@ -7284,6 +7282,14 @@
         "win32"
       ]
     },
+    "node_modules/@vscode-documentdb/documentdb-constants": {
+      "resolved": "packages/documentdb-constants",
+      "link": true
+    },
+    "node_modules/@vscode-documentdb/schema-analyzer": {
+      "resolved": "packages/schema-analyzer",
+      "link": true
+    },
     "node_modules/@vscode/extension-telemetry": {
       "version": "0.9.9",
       "resolved": "https://registry.npmjs.org/@vscode/extension-telemetry/-/extension-telemetry-0.9.9.tgz",
@@ -8049,9 +8055,9 @@
       }
     },
     "node_modules/acorn": {
-      "version": "8.15.0",
-      "resolved": "https://registry.npmjs.org/acorn/-/acorn-8.15.0.tgz",
-      "integrity": "sha512-NZyJarBfL7nWwIq+FDL6Zp/yHEhePMNnnJ0y3qfieCrmNvYct8uvtiV41UvlSe6apAfk0fY1FbWx+NwfmpvtTg==",
+      "version": "8.16.0",
+      "resolved": "https://registry.npmjs.org/acorn/-/acorn-8.16.0.tgz",
+      "integrity": "sha512-UVJyE9MttOsBQIDKw1skb9nAwQuR5wuGD3+82K6JgJlm/Y+KI92oNsMNGZCYdDsVtRHSak0pcV5Dno5+4jh9sw==",
       "license": "MIT",
       "bin": {
         "acorn": "bin/acorn"
@@ -8084,10 +8090,9 @@
       }
     },
     "node_modules/acorn-walk": {
-      "version": "8.3.4",
-      "resolved": "https://registry.npmjs.org/acorn-walk/-/acorn-walk-8.3.4.tgz",
-      "integrity": "sha512-ueEepnujpqee2o5aIYnvHU6C0A42MNdsIDeqy5BydrkuC5R1ZuUFnm27EeFJGoEHJQgn3uleRvmTXaJgfXbt4g==",
-      "dev": true,
+      "version": "8.3.5",
+      "resolved": "https://registry.npmjs.org/acorn-walk/-/acorn-walk-8.3.5.tgz",
+      "integrity": "sha512-HEHNfbars9v4pgpW6SO1KSPkfoS0xVOM/9UzkJltjlsHZmJasxg8aXkuZa7SMf8vKGIBhpUsPluQSqhJFCqebw==",
       "license": "MIT",
       "dependencies": {
         "acorn": "^8.11.0"
@@ -22232,6 +22237,22 @@
         "type": "github",
         "url": "https://github.com/sponsors/wooorm"
       }
+    },
+    "packages/documentdb-constants": {
+      "name": "@vscode-documentdb/documentdb-constants",
+      "version": "1.0.0",
+      "license": "MIT"
+    },
+    "packages/schema-analyzer": {
+      "name": "@vscode-documentdb/schema-analyzer",
+      "version": "1.0.0",
+      "license": "MIT",
+      "dependencies": {
+        "denque": "~2.1.0"
+      },
+      "peerDependencies": {
+        "mongodb": ">=6.0.0"
+      }
     }
   }
 }
diff --git a/package.json b/package.json
index e02878042..2577d0f69 100644
--- a/package.json
+++ b/package.json
@@ -46,6 +46,9 @@
     "type": "git",
     "url": "https://github.com/microsoft/vscode-documentdb"
   },
+  "workspaces": [
+    "packages/*"
+  ],
   "main": "./main",
   "l10n": "./l10n",
   "activationEvents": [
@@ -55,8 +58,9 @@
     "onUri"
   ],
   "scripts": {
+    "prebuild": "npm run build --workspaces --if-present",
     "build": "tsc",
-    "clean": "git clean -dfx",
+    "clean": "rimraf out dist coverage && npm run clean --workspaces --if-present",
     "compile": "tsc -watch",
     "package": "run-script-os",
     "package:win32": "npm run webpack-prod && cd dist && npm pkg delete \"scripts.vscode:prepublish\" && npx vsce package --no-dependencies --out ../%npm_package_name%-%npm_package_version%.vsix",
@@ -66,10 +70,11 @@
     "package-prerelease:default": "npm run webpack-prod && cd dist && npm pkg delete \"scripts.vscode:prepublish\" && npx vsce package --pre-release --no-dependencies --out ../${npm_package_name}-${npm_package_version}-pre-release.vsix",
     "lint": "eslint --quiet .",
     "lint-fix": "eslint . --fix",
-    "prettier": "prettier -c \"(src|test|l10n|grammar|docs)/**/*.@(js|ts|jsx|tsx|json)\" \"./*.@(js|ts|jsx|tsx|json)\"",
-    "prettier-fix": "prettier -w \"(src|test|l10n|grammar|docs)/**/*.@(js|ts|jsx|tsx|json)\" \"./*.@(js|ts|jsx|tsx|json)\"",
+    "prettier": "prettier -c \"(src|test|l10n|grammar|docs|packages)/**/*.@(js|ts|jsx|tsx|json)\" \"./*.@(js|ts|jsx|tsx|json)\"",
+    "prettier-fix": "prettier -w \"(src|test|l10n|grammar|docs|packages)/**/*.@(js|ts|jsx|tsx|json)\" \"./*.@(js|ts|jsx|tsx|json)\"",
     "pretest": "npm run build",
     "test": "vscode-test",
+    "prejesttest": "npm run build --workspaces --if-present",
     "jesttest": "jest",
     "update-grammar": "antlr4ts -visitor ./grammar/mongo.g4 -o src/documentdb/grammar",
     "webpack-dev": "rimraf ./dist && npm run webpack-dev-ext && npm run webpack-dev-wv",
@@ -162,9 +167,14 @@
     "@microsoft/vscode-azureresources-api": "~2.5.0",
     "@monaco-editor/react": "~4.7.0",
     "@mongodb-js/explain-plan-helper": "1.4.24",
+    "@mongodb-js/shell-bson-parser": "^1.5.6",
     "@trpc/client": "~11.10.0",
     "@trpc/server": "~11.10.0",
+    "@vscode-documentdb/documentdb-constants": "*",
+    "@vscode-documentdb/schema-analyzer": "*",
     "@vscode/l10n": "~0.0.18",
+    "acorn": "^8.16.0",
+    "acorn-walk": "^8.3.5",
     "antlr4ts": "^0.5.0-alpha.4",
     "bson": "~7.0.0",
     "denque": "~2.1.0",
diff --git a/packages/documentdb-constants/README.md b/packages/documentdb-constants/README.md
new file mode 100644
index 000000000..4835cdcc2
--- /dev/null
+++ b/packages/documentdb-constants/README.md
@@ -0,0 +1,85 @@
+# @vscode-documentdb/documentdb-constants
+
+Static operator metadata for all DocumentDB-supported operators, aggregation stages, accumulators, update operators, BSON type constructors, and system variables.
+
+> **Monorepo package** — this package is part of the `vscode-documentdb` workspace.
+> Dev dependencies (Jest, ts-jest, Prettier, ts-node, etc.) are provided by the
+> root `package.json`. Always install from the repository root:
+>
+> ```bash
+> cd <repo-root>
+> npm install
+> ```
+
+## Purpose
+
+This package is the **single source of truth** for operator metadata when the connected database is DocumentDB. It provides:
+
+- `OperatorEntry` objects with value, description, snippet, documentation link, and type metadata
+- Meta-tag based filtering (`getFilteredCompletions()`) for context-aware autocompletion
+- Convenience presets for common completion contexts (filter bar, aggregation pipeline, etc.)
+- Documentation URL generation (`getDocLink()`)
+
+## Data Source
+
+All operator data is derived from the official DocumentDB documentation:
+
+- **Compatibility reference:** [DocumentDB Query Language Compatibility](https://learn.microsoft.com/en-us/azure/documentdb/compatibility-query-language) — lists every operator with its support status across DocumentDB versions 5.0–8.0.
+- **Per-operator docs:** [DocumentDB Operators](https://learn.microsoft.com/en-us/azure/documentdb/operators/) — individual pages with descriptions and syntax for each operator.
+- **Source repository:** [MicrosoftDocs/azure-databases-docs](https://github.com/MicrosoftDocs/azure-databases-docs) — the GitHub repo containing the raw Markdown source for all documentation pages above (under `articles/documentdb/`).
+
+The scraper (`scripts/scrape-operator-docs.ts`) fetches data from these sources and generates the `resources/scraped/operator-reference.md` dump file that serves as the contract between the documentation and the TypeScript implementation.
+
+## Usage
+
+```typescript
+import {
+  getFilteredCompletions,
+  getAllCompletions,
+  FILTER_COMPLETION_META,
+  STAGE_COMPLETION_META,
+} from '@vscode-documentdb/documentdb-constants';
+
+// Get operators for a filter/query context
+const filterOps = getFilteredCompletions({ meta: FILTER_COMPLETION_META });
+
+// Get operators for a specific BSON type
+const stringOps = getFilteredCompletions({
+  meta: FILTER_COMPLETION_META,
+  bsonTypes: ['string'],
+});
+
+// Get all stage names
+const stages = getFilteredCompletions({ meta: STAGE_COMPLETION_META });
+```
+
+## Scraper
+
+The operator data is sourced from the official DocumentDB documentation. To re-scrape:
+
+```bash
+npm run scrape --workspace=@vscode-documentdb/documentdb-constants
+```
+
+This runs the scraper and then formats the output with Prettier. The scraper:
+
+1. **Verifies** upstream doc structure (early fail-fast)
+2. **Extracts** all operators from the [compatibility page](https://learn.microsoft.com/en-us/azure/documentdb/compatibility-query-language)
+3. **Fetches** per-operator documentation (descriptions, syntax) with a global file index fallback for operators filed in unexpected directories
+4. **Generates** `resources/scraped/operator-reference.md` in a structured heading format (`### $operator` with description, syntax, and doc link)
+
+The dump serves as the authoritative reference for the TypeScript implementation. A Jest test (`src/operatorReference.test.ts`) validates that the implementation matches the dump.
+
+## Structure
+
+| File                                        | Purpose                                      |
+| ------------------------------------------- | -------------------------------------------- |
+| `src/types.ts`                              | `OperatorEntry` interface and `MetaTag` type |
+| `src/metaTags.ts`                           | Meta tag constants and completion presets    |
+| `src/docLinks.ts`                           | Documentation URL generation                 |
+| `src/getFilteredCompletions.ts`             | Primary consumer API: filter by meta tags    |
+| `src/index.ts`                              | Barrel exports for all public API            |
+| `resources/scraped/operator-reference.md`   | Auto-generated scraped operator dump         |
+| `resources/overrides/operator-overrides.md` | Hand-maintained overrides                    |
+| `resources/overrides/operator-snippets.md`  | Snippet templates per category               |
+| `scripts/scrape-operator-docs.ts`           | Scraper script                               |
diff --git a/packages/documentdb-constants/jest.config.js b/packages/documentdb-constants/jest.config.js
new file mode 100644
index 000000000..a39810b1f
--- /dev/null
+++ b/packages/documentdb-constants/jest.config.js
@@ -0,0 +1,11 @@
+/** @type {import('ts-jest').JestConfigWithTsJest} **/
+module.exports = {
+    // Limit workers to avoid OOM kills on machines with many cores.
+    // Each ts-jest worker loads the TypeScript compiler and consumes ~500MB+.
+    maxWorkers: '50%',
+    testEnvironment: 'node',
+    testMatch: ['<rootDir>/src/**/*.test.ts'],
+    transform: {
+        '^.+\\.tsx?$': ['ts-jest', {}],
+    },
+};
diff --git a/packages/documentdb-constants/package.json b/packages/documentdb-constants/package.json
new file mode 100644
index 000000000..99f3fb017
--- /dev/null
+++ b/packages/documentdb-constants/package.json
@@ -0,0 +1,25 @@
+{
+  "name": "@vscode-documentdb/documentdb-constants",
+  "version": "1.0.0",
+  "description": "Static operator metadata for DocumentDB-supported operators, stages, accumulators, and BSON constructors",
+  "main": "dist/index.js",
+  "types": "dist/index.d.ts",
+  "files": [
+    "dist"
+  ],
+  "scripts": {
+    "build": "tsc -p . && tsc -p tsconfig.scripts.json --noEmit",
+    "clean": "rimraf dist tsconfig.tsbuildinfo",
+    "test": "jest --config jest.config.js",
+    "prettier-fix": "prettier -w \"(scripts|src)/**/*.@(js|ts|jsx|tsx|json|md)\" \"./*.@(js|ts|jsx|tsx|json|md)\"",
+    "scrape": "ts-node scripts/scrape-operator-docs.ts && prettier --write resources/scraped/operator-reference.md",
+    "generate": "ts-node scripts/generate-from-reference.ts",
+    "evaluate": "ts-node scripts/evaluate-overrides.ts"
+  },
+  "repository": {
+    "type": "git",
+    "url": "https://github.com/microsoft/vscode-documentdb",
+    "directory": "packages/documentdb-constants"
+  },
+  "license": "MIT"
+}
diff --git a/packages/documentdb-constants/resources/overrides/operator-overrides.md b/packages/documentdb-constants/resources/overrides/operator-overrides.md
new file mode 100644
index 000000000..2fe988737
--- /dev/null
+++ b/packages/documentdb-constants/resources/overrides/operator-overrides.md
@@ -0,0 +1,321 @@
+# DocumentDB Operator Reference — Overrides
+
+<!-- MANUALLY MAINTAINED -->
+<!-- This file provides overrides for scraped/operator-reference.md.            -->
+<!-- The generator (scripts/generate-from-reference.ts) merges these on top of -->
+<!-- the scraped data. Any field specified here wins over the scraped value. -->
+<!--                                                                           -->
+<!-- Use cases:                                                                -->
+<!--   1. Fill in descriptions/syntax for operators whose doc pages returned   -->
+<!--      404 during scraping (empty fields in the dump).                      -->
+<!--   2. Replace a scraped description with a better hand-written one.        -->
+<!--   3. Add or override snippets for specific operators.                     -->
+<!--                                                                           -->
+<!-- Format: same as scraped/operator-reference.md                             -->
+<!--   ## Category Name         (must match a category in the dump exactly)    -->
+<!--   ### $operatorName         (must match an operator in that category)     -->
+<!--   - **Description:** ...   (overrides description)                        -->
+<!--   - **Snippet:** ...       (overrides the generated snippet)             -->
+<!--   - **Doc Link:** ...      (overrides the doc link)                       -->
+<!--   - **Standalone:** false  (marks operator as not valid standalone)        -->
+<!--                                                                           -->
+<!-- Only fields you include are overridden; omitted fields keep their         -->
+<!-- scraped or generated values.                                              -->
+
+---
+
+## String Expression Operators
+
+### $concat
+
+- **Description:** Concatenates two or more strings and returns the resulting string.
+
+### $indexOfBytes
+
+- **Description:** Returns the byte index of the first occurrence of a substring within a string.
+
+### $indexOfCP
+
+- **Description:** Returns the code point index of the first occurrence of a substring within a string.
+
+### $ltrim
+
+- **Description:** Removes whitespace or specified characters from the beginning of a string.
+
+### $regexFind
+
+- **Description:** Applies a regular expression to a string and returns the first match.
+
+### $regexFindAll
+
+- **Description:** Applies a regular expression to a string and returns all matches as an array.
+
+### $regexMatch
+
+- **Description:** Applies a regular expression to a string and returns a boolean indicating if a match was found.
+
+### $replaceOne
+
+- **Description:** Replaces the first occurrence of a search string with a replacement string.
+
+### $replaceAll
+
+- **Description:** Replaces all occurrences of a search string with a replacement string.
+
+### $rtrim
+
+- **Description:** Removes whitespace or specified characters from the end of a string.
+
+### $split
+
+- **Description:** Splits a string by a delimiter and returns an array of substrings.
+
+### $strLenBytes
+
+- **Description:** Returns the number of UTF-8 encoded bytes in the specified string.
+
+### $strLenCP
+
+- **Description:** Returns the number of UTF-8 code points in the specified string.
+
+### $strcasecmp
+
+- **Description:** Performs a case-insensitive comparison of two strings and returns an integer.
+
+### $substr
+
+- **Description:** Returns a substring of a string, starting at a specified index for a specified length. Deprecated — use $substrBytes or $substrCP.
+
+### $substrBytes
+
+- **Description:** Returns a substring of a string by byte index, starting at a specified index for a specified number of bytes.
+
+### $substrCP
+
+- **Description:** Returns a substring of a string by code point index, starting at a specified index for a specified number of code points.
+
+### $toLower
+
+- **Description:** Converts a string to lowercase and returns the result.
+
+### $toUpper
+
+- **Description:** Converts a string to uppercase and returns the result.
+
+### $trim
+
+- **Description:** Removes whitespace or specified characters from both ends of a string.
+
+## Trigonometry Expression Operators
+
+### $sin
+
+- **Description:** Returns the sine of a value measured in radians.
+
+### $cos
+
+- **Description:** Returns the cosine of a value measured in radians.
+
+### $tan
+
+- **Description:** Returns the tangent of a value measured in radians.
+
+### $asin
+
+- **Description:** Returns the arcsine (inverse sine) of a value in radians.
+
+### $acos
+
+- **Description:** Returns the arccosine (inverse cosine) of a value in radians.
+
+### $atan
+
+- **Description:** Returns the arctangent (inverse tangent) of a value in radians.
+
+### $atan2
+
+- **Description:** Returns the arctangent of the quotient of two values, using the signs to determine the quadrant.
+
+### $asinh
+
+- **Description:** Returns the inverse hyperbolic sine of a value.
+
+### $acosh
+
+- **Description:** Returns the inverse hyperbolic cosine of a value.
+
+### $atanh
+
+- **Description:** Returns the inverse hyperbolic tangent of a value.
+
+### $sinh
+
+- **Description:** Returns the hyperbolic sine of a value.
+
+### $cosh
+
+- **Description:** Returns the hyperbolic cosine of a value.
+
+### $tanh
+
+- **Description:** Returns the hyperbolic tangent of a value.
+
+### $degreesToRadians
+
+- **Description:** Converts a value from degrees to radians.
+
+### $radiansToDegrees
+
+- **Description:** Converts a value from radians to degrees.
+
+## Aggregation Pipeline Stages
+
+### $bucketAuto
+
+- **Description:** Categorizes documents into a specified number of groups based on a given expression, automatically determining bucket boundaries.
+
+### $graphLookup
+
+- **Description:** Performs a recursive search on a collection to return documents connected by a specified field relationship.
+
+### $limit
+
+- **Description:** Restricts the number of documents passed to the next stage in the pipeline.
+
+### $project
+
+- **Description:** Reshapes documents by including, excluding, or computing new fields.
+
+### $replaceRoot
+
+- **Description:** Replaces the input document with a specified embedded document, promoting it to the top level.
+
+### $search
+
+- **Description:** Performs full-text search on string fields using Atlas Search or compatible search indexes.
+
+### $searchMeta
+
+- **Description:** Returns metadata about an Atlas Search query without returning the matching documents.
+
+### $setWindowFields
+
+- **Description:** Adds computed fields to documents using window functions over a specified partition and sort order.
+
+### $unionWith
+
+- **Description:** Combines the results of two collections into a single result set, similar to SQL UNION ALL.
+
+### $currentOp
+
+- **Description:** Returns information on active and queued operations for the database instance.
+
+## Array Update Operators
+
+### $[]
+
+- **Description:** Positional all operator. Acts as a placeholder to update all elements in an array field.
+
+### $[identifier]
+
+- **Description:** Filtered positional operator. Acts as a placeholder to update elements that match an arrayFilters condition.
+
+### $position
+
+- **Description:** Specifies the position in the array at which the $push operator inserts elements. Used with $each.
+
+## Array Expression Operators
+
+### $objectToArray
+
+- **Description:** Converts an object into an array of key-value pair documents.
+
+## Variables in Aggregation Expressions
+
+### $$NOW
+
+- **Description:** Returns the current datetime as a Date object. Constant throughout a single aggregation pipeline.
+
+### $$ROOT
+
+- **Description:** References the root document — the top-level document currently being processed in the pipeline stage.
+
+### $$REMOVE
+
+- **Description:** Removes a field from the output document. Used with $project or $addFields to conditionally exclude fields.
+
+### $$CURRENT
+
+- **Description:** References the current document in the pipeline stage. Equivalent to $$ROOT at the start of the pipeline.
+
+### $$DESCEND
+
+- **Description:** Used with $redact. Returns the document fields at the current level and continues descending into subdocuments.
+
+### $$PRUNE
+
+- **Description:** Used with $redact. Excludes all fields at the current document level and stops descending into subdocuments.
+
+### $$KEEP
+
+- **Description:** Used with $redact. Keeps all fields at the current document level without further descending into subdocuments.
+
+## Array Expression Operators
+
+### $minN
+
+- **Doc Link:** https://learn.microsoft.com/en-us/azure/documentdb/operators/accumulators/$minn
+
+## Comparison Expression Operators
+
+### $cmp
+
+- **Doc Link:** https://learn.microsoft.com/en-us/azure/documentdb/operators/comparison-query/$cmp
+
+## Window Operators
+
+### $minN
+
+- **Doc Link:** https://learn.microsoft.com/en-us/azure/documentdb/operators/accumulators/$minn
+
+## Geospatial Operators
+
+### $box
+
+- **Standalone:** false
+
+### $center
+
+- **Standalone:** false
+
+### $centerSphere
+
+- **Standalone:** false
+
+### $geometry
+
+- **Standalone:** false
+
+### $maxDistance
+
+- **Standalone:** false
+
+### $minDistance
+
+- **Standalone:** false
+
+### $polygon
+
+- **Standalone:** false
+
+## Projection Operators
+
+### $
+
+- **Standalone:** false
+
+## Miscellaneous Query Operators
+
+### $natural
+
+- **Standalone:** false
diff --git a/packages/documentdb-constants/resources/overrides/operator-snippets.md b/packages/documentdb-constants/resources/overrides/operator-snippets.md
new file mode 100644
index 000000000..9b3adf63e
--- /dev/null
+++ b/packages/documentdb-constants/resources/overrides/operator-snippets.md
@@ -0,0 +1,810 @@
+# Operator Snippets
+
+<!--
+  Provides snippet templates for all operator categories.
+
+  Format: Same heading structure as the scraped dump and overrides.
+
+  - H2 (##) headings denote categories, resolved via CATEGORY_TO_META.
+  - H3 (###) headings are either operator names (e.g., ### $match) or ### DEFAULT.
+  - '- **Snippet:** `template`' lines provide the snippet template (backtick-wrapped).
+
+  The generator resolves snippets in this order:
+    1. Snippet override from operator-overrides.md (highest priority)
+    2. Per-operator snippet from this file
+    3. DEFAULT snippet from this file (with {{VALUE}} replaced by operator name)
+    4. No snippet
+
+  {{VALUE}} is replaced by the operator name (e.g., $sum) at generation time.
+  Operators not listed here (and with no DEFAULT) receive no snippet.
+
+  Do NOT edit generated src/ files — put corrections here instead.
+-->
+
+## Aggregation Pipeline Stages
+
+### DEFAULT
+
+- **Snippet:** `{ {{VALUE}}: { ${1} } }`
+
+### $match
+
+- **Snippet:** `{ $match: { ${1:query} } }`
+
+### $group
+
+- **Snippet:** `{ $group: { _id: "${1:\$field}", ${2:accumulator}: { ${3:\$sum}: 1 } } }`
+
+### $project
+
+- **Snippet:** `{ $project: { ${1:field}: 1 } }`
+
+### $sort
+
+- **Snippet:** `{ $sort: { ${1:field}: ${2:1} } }`
+
+### $limit
+
+- **Snippet:** `{ $limit: ${1:number} }`
+
+### $skip
+
+- **Snippet:** `{ $skip: ${1:number} }`
+
+### $unwind
+
+- **Snippet:** `{ $unwind: "${1:\$arrayField}" }`
+
+### $lookup
+
+- **Snippet:** `{ $lookup: { from: "${1:collection}", localField: "${2:field}", foreignField: "${3:field}", as: "${4:result}" } }`
+
+### $addFields
+
+- **Snippet:** `{ $addFields: { ${1:newField}: ${2:expression} } }`
+
+### $set
+
+- **Snippet:** `{ $set: { ${1:field}: ${2:expression} } }`
+
+### $unset
+
+- **Snippet:** `{ $unset: "${1:field}" }`
+
+### $replaceRoot
+
+- **Snippet:** `{ $replaceRoot: { newRoot: "${1:\$field}" } }`
+
+### $replaceWith
+
+- **Snippet:** `{ $replaceWith: "${1:\$field}" }`
+
+### $count
+
+- **Snippet:** `{ $count: "${1:countField}" }`
+
+### $out
+
+- **Snippet:** `{ $out: "${1:collection}" }`
+
+### $merge
+
+- **Snippet:** `{ $merge: { into: "${1:collection}" } }`
+
+### $bucket
+
+- **Snippet:** `{ $bucket: { groupBy: "${1:\$field}", boundaries: [${2:values}], default: "${3:Other}" } }`
+
+### $bucketAuto
+
+- **Snippet:** `{ $bucketAuto: { groupBy: "${1:\$field}", buckets: ${2:number} } }`
+
+### $facet
+
+- **Snippet:** `{ $facet: { ${1:outputField}: [{ ${2:stage} }] } }`
+
+### $graphLookup
+
+- **Snippet:** `{ $graphLookup: { from: "${1:collection}", startWith: "${2:\$field}", connectFromField: "${3:field}", connectToField: "${4:field}", as: "${5:result}" } }`
+
+### $sample
+
+- **Snippet:** `{ $sample: { size: ${1:number} } }`
+
+### $sortByCount
+
+- **Snippet:** `{ $sortByCount: "${1:\$field}" }`
+
+### $redact
+
+- **Snippet:** `{ $redact: { \$cond: { if: { ${1:expression} }, then: "${2:\$\$DESCEND}", else: "${3:\$\$PRUNE}" } } }`
+
+### $unionWith
+
+- **Snippet:** `{ $unionWith: { coll: "${1:collection}", pipeline: [${2}] } }`
+
+### $setWindowFields
+
+- **Snippet:** `{ $setWindowFields: { partitionBy: "${1:\$field}", sortBy: { ${2:field}: ${3:1} }, output: { ${4:newField}: { ${5:windowFunc} } } } }`
+
+### $densify
+
+- **Snippet:** `{ $densify: { field: "${1:field}", range: { step: ${2:1}, bounds: "full" } } }`
+
+### $fill
+
+- **Snippet:** `{ $fill: { output: { ${1:field}: { method: "${2:linear}" } } } }`
+
+### $documents
+
+- **Snippet:** `{ $documents: [${1:documents}] }`
+
+### $changeStream
+
+- **Snippet:** `{ $changeStream: {} }`
+
+### $collStats
+
+- **Snippet:** `{ $collStats: { storageStats: {} } }`
+
+### $currentOp
+
+- **Snippet:** `{ $currentOp: { allUsers: true } }`
+
+### $indexStats
+
+- **Snippet:** `{ $indexStats: {} }`
+
+### $listLocalSessions
+
+- **Snippet:** `{ $listLocalSessions: { allUsers: true } }`
+
+### $geoNear
+
+- **Snippet:** `{ $geoNear: { near: { type: "Point", coordinates: [${1:lng}, ${2:lat}] }, distanceField: "${3:distance}" } }`
+
+## Comparison Query Operators
+
+### DEFAULT
+
+- **Snippet:** `{ {{VALUE}}: ${1:value} }`
+
+### $in
+
+- **Snippet:** `{ $in: [${1:value}] }`
+
+### $nin
+
+- **Snippet:** `{ $nin: [${1:value}] }`
+
+## Logical Query Operators
+
+### DEFAULT
+
+- **Snippet:** `{ {{VALUE}}: [{ ${1:expression} }] }`
+
+### $not
+
+- **Snippet:** `{ $not: { ${1:expression} } }`
+
+## Element Query Operators
+
+### $exists
+
+- **Snippet:** `{ $exists: ${1:true} }`
+
+### $type
+
+- **Snippet:** `{ $type: "${1:type}" }`
+
+## Evaluation Query Operators
+
+### $expr
+
+- **Snippet:** `{ $expr: { ${1:expression} } }`
+
+### $regex
+
+- **Snippet:** `{ $regex: /${1:pattern}/ }`
+
+### $mod
+
+- **Snippet:** `{ $mod: [${1:divisor}, ${2:remainder}] }`
+
+### $text
+
+- **Snippet:** `{ $text: { \$search: "${1:text}" } }`
+
+### $jsonSchema
+
+- **Snippet:** `{ $jsonSchema: { bsonType: "${1:object}" } }`
+
+## Array Query Operators
+
+### $all
+
+- **Snippet:** `{ $all: [${1:value}] }`
+
+### $elemMatch
+
+- **Snippet:** `{ $elemMatch: { ${1:query} } }`
+
+### $size
+
+- **Snippet:** `{ $size: ${1:number} }`
+
+## Bitwise Query Operators
+
+### DEFAULT
+
+- **Snippet:** `{ {{VALUE}}: ${1:bitmask} }`
+
+## Geospatial Operators
+
+### $near
+
+- **Snippet:** `{ $near: { \$geometry: { type: "Point", coordinates: [${1:lng}, ${2:lat}] }, \$maxDistance: ${3:distance} } }`
+
+### $nearSphere
+
+- **Snippet:** `{ $nearSphere: { \$geometry: { type: "Point", coordinates: [${1:lng}, ${2:lat}] }, \$maxDistance: ${3:distance} } }`
+
+### $geoIntersects
+
+- **Snippet:** `{ $geoIntersects: { \$geometry: { type: "${1:GeoJSON type}", coordinates: ${2:coordinates} } } }`
+
+### $geoWithin
+
+- **Snippet:** `{ $geoWithin: { \$geometry: { type: "${1:GeoJSON type}", coordinates: ${2:coordinates} } } }`
+
+### $box
+
+- **Snippet:** `[[${1:bottomLeftX}, ${2:bottomLeftY}], [${3:upperRightX}, ${4:upperRightY}]]`
+
+### $center
+
+- **Snippet:** `[[${1:x}, ${2:y}], ${3:radius}]`
+
+### $centerSphere
+
+- **Snippet:** `[[${1:x}, ${2:y}], ${3:radiusInRadians}]`
+
+### $geometry
+
+- **Snippet:** `{ type: "${1:Point}", coordinates: [${2:coordinates}] }`
+
+### $maxDistance
+
+- **Snippet:** `${1:distance}`
+
+### $minDistance
+
+- **Snippet:** `${1:distance}`
+
+### $polygon
+
+- **Snippet:** `[[${1:x1}, ${2:y1}], [${3:x2}, ${4:y2}], [${5:x3}, ${6:y3}]]`
+
+## Projection Operators
+
+### $elemMatch
+
+- **Snippet:** `{ $elemMatch: { ${1:query} } }`
+
+### $slice
+
+- **Snippet:** `{ $slice: ${1:number} }`
+
+## Miscellaneous Query Operators
+
+### $comment
+
+- **Snippet:** `{ $comment: "${1:comment}" }`
+
+### $rand
+
+- **Snippet:** `{ $rand: {} }`
+
+### $natural
+
+- **Snippet:** `{ $natural: ${1:1} }`
+
+## Field Update Operators
+
+### DEFAULT
+
+- **Snippet:** `{ {{VALUE}}: { "${1:field}": ${2:value} } }`
+
+### $rename
+
+- **Snippet:** `{ $rename: { "${1:oldField}": "${2:newField}" } }`
+
+### $currentDate
+
+- **Snippet:** `{ $currentDate: { "${1:field}": true } }`
+
+## Array Update Operators
+
+### $addToSet
+
+- **Snippet:** `{ $addToSet: { "${1:field}": ${2:value} } }`
+
+### $pop
+
+- **Snippet:** `{ $pop: { "${1:field}": ${2:1} } }`
+
+### $pull
+
+- **Snippet:** `{ $pull: { "${1:field}": ${2:condition} } }`
+
+### $push
+
+- **Snippet:** `{ $push: { "${1:field}": ${2:value} } }`
+
+### $pullAll
+
+- **Snippet:** `{ $pullAll: { "${1:field}": [${2:values}] } }`
+
+### $each
+
+- **Snippet:** `{ $each: [${1:values}] }`
+
+### $position
+
+- **Snippet:** `{ $position: ${1:index} }`
+
+### $slice
+
+- **Snippet:** `{ $slice: ${1:number} }`
+
+### $sort
+
+- **Snippet:** `{ $sort: { "${1:field}": ${2:1} } }`
+
+## Bitwise Update Operators
+
+### DEFAULT
+
+- **Snippet:** `{ {{VALUE}}: { "${1:field}": { "${2:and|or|xor}": ${3:value} } } }`
+
+## Accumulators
+
+### DEFAULT
+
+- **Snippet:** `{ {{VALUE}}: "${1:\$field}" }`
+
+### $count
+
+- **Snippet:** `{ $count: {} }`
+
+### $bottom
+
+- **Snippet:** `{ $bottom: { sortBy: { ${1:field}: ${2:1} }, output: "${3:\$field}" } }`
+
+### $top
+
+- **Snippet:** `{ $top: { sortBy: { ${1:field}: ${2:1} }, output: "${3:\$field}" } }`
+
+### $bottomN
+
+- **Snippet:** `{ $bottomN: { n: ${1:number}, sortBy: { ${2:field}: ${3:1} }, output: "${4:\$field}" } }`
+
+### $topN
+
+- **Snippet:** `{ $topN: { n: ${1:number}, sortBy: { ${2:field}: ${3:1} }, output: "${4:\$field}" } }`
+
+### $firstN
+
+- **Snippet:** `{ $firstN: { input: "${1:\$field}", n: ${2:number} } }`
+
+### $lastN
+
+- **Snippet:** `{ $lastN: { input: "${1:\$field}", n: ${2:number} } }`
+
+### $maxN
+
+- **Snippet:** `{ $maxN: { input: "${1:\$field}", n: ${2:number} } }`
+
+### $minN
+
+- **Snippet:** `{ $minN: { input: "${1:\$field}", n: ${2:number} } }`
+
+### $percentile
+
+- **Snippet:** `{ $percentile: { input: "${1:\$field}", p: [${2:0.5}], method: "approximate" } }`
+
+### $median
+
+- **Snippet:** `{ $median: { input: "${1:\$field}", method: "approximate" } }`
+
+## Window Operators
+
+### DEFAULT
+
+- **Snippet:** `{ {{VALUE}}: "${1:\$field}" }`
+
+### $shift
+
+- **Snippet:** `{ $shift: { output: "${1:\$field}", by: ${2:1}, default: ${3:null} } }`
+
+### $rank
+
+- **Snippet:** `{ $rank: {} }`
+
+### $denseRank
+
+- **Snippet:** `{ $denseRank: {} }`
+
+### $documentNumber
+
+- **Snippet:** `{ $documentNumber: {} }`
+
+### $expMovingAvg
+
+- **Snippet:** `{ $expMovingAvg: { input: "${1:\$field}", N: ${2:number} } }`
+
+### $derivative
+
+- **Snippet:** `{ $derivative: { input: "${1:\$field}", unit: "${2:hour}" } }`
+
+### $integral
+
+- **Snippet:** `{ $integral: { input: "${1:\$field}", unit: "${2:hour}" } }`
+
+## Arithmetic Expression Operators
+
+### DEFAULT
+
+- **Snippet:** `{ {{VALUE}}: "${1:\$field}" }`
+
+### $add
+
+- **Snippet:** `{ $add: ["${1:\$field1}", "${2:\$field2}"] }`
+
+### $subtract
+
+- **Snippet:** `{ $subtract: ["${1:\$field1}", "${2:\$field2}"] }`
+
+### $multiply
+
+- **Snippet:** `{ $multiply: ["${1:\$field1}", "${2:\$field2}"] }`
+
+### $divide
+
+- **Snippet:** `{ $divide: ["${1:\$field1}", "${2:\$field2}"] }`
+
+### $mod
+
+- **Snippet:** `{ $mod: ["${1:\$field1}", "${2:\$field2}"] }`
+
+### $pow
+
+- **Snippet:** `{ $pow: ["${1:\$field1}", "${2:\$field2}"] }`
+
+### $log
+
+- **Snippet:** `{ $log: ["${1:\$number}", ${2:base}] }`
+
+### $round
+
+- **Snippet:** `{ $round: ["${1:\$field}", ${2:place}] }`
+
+## Array Expression Operators
+
+### DEFAULT
+
+- **Snippet:** `{ {{VALUE}}: "${1:\$array}" }`
+
+### $arrayElemAt
+
+- **Snippet:** `{ $arrayElemAt: ["${1:\$array}", ${2:index}] }`
+
+### $concatArrays
+
+- **Snippet:** `{ $concatArrays: ["${1:\$array1}", "${2:\$array2}"] }`
+
+### $filter
+
+- **Snippet:** `{ $filter: { input: "${1:\$array}", as: "${2:item}", cond: { ${3:expression} } } }`
+
+### $in
+
+- **Snippet:** `{ $in: ["${1:\$field}", "${2:\$array}"] }`
+
+### $indexOfArray
+
+- **Snippet:** `{ $indexOfArray: ["${1:\$array}", "${2:value}"] }`
+
+### $isArray
+
+- **Snippet:** `{ $isArray: "${1:\$field}" }`
+
+### $map
+
+- **Snippet:** `{ $map: { input: "${1:\$array}", as: "${2:item}", in: { ${3:expression} } } }`
+
+### $objectToArray
+
+- **Snippet:** `{ $objectToArray: "${1:\$object}" }`
+
+### $range
+
+- **Snippet:** `{ $range: [${1:start}, ${2:end}, ${3:step}] }`
+
+### $reduce
+
+- **Snippet:** `{ $reduce: { input: "${1:\$array}", initialValue: ${2:0}, in: { ${3:expression} } } }`
+
+### $slice
+
+- **Snippet:** `{ $slice: ["${1:\$array}", ${2:n}] }`
+
+### $sortArray
+
+- **Snippet:** `{ $sortArray: { input: "${1:\$array}", sortBy: { ${2:field}: ${3:1} } } }`
+
+### $zip
+
+- **Snippet:** `{ $zip: { inputs: ["${1:\$array1}", "${2:\$array2}"] } }`
+
+### $maxN
+
+- **Snippet:** `{ $maxN: { input: "${1:\$array}", n: ${2:number} } }`
+
+### $minN
+
+- **Snippet:** `{ $minN: { input: "${1:\$array}", n: ${2:number} } }`
+
+### $firstN
+
+- **Snippet:** `{ $firstN: { input: "${1:\$array}", n: ${2:number} } }`
+
+### $lastN
+
+- **Snippet:** `{ $lastN: { input: "${1:\$array}", n: ${2:number} } }`
+
+## Boolean Expression Operators
+
+### DEFAULT
+
+- **Snippet:** `{ {{VALUE}}: ["${1:expression1}", "${2:expression2}"] }`
+
+### $not
+
+- **Snippet:** `{ $not: ["${1:expression}"] }`
+
+## Comparison Expression Operators
+
+### DEFAULT
+
+- **Snippet:** `{ {{VALUE}}: ["${1:\$field1}", "${2:\$field2}"] }`
+
+## Conditional Expression Operators
+
+### $cond
+
+- **Snippet:** `{ $cond: { if: { ${1:expression} }, then: ${2:trueValue}, else: ${3:falseValue} } }`
+
+### $ifNull
+
+- **Snippet:** `{ $ifNull: ["${1:\$field}", ${2:replacement}] }`
+
+### $switch
+
+- **Snippet:** `{ $switch: { branches: [{ case: { ${1:expression} }, then: ${2:value} }], default: ${3:defaultValue} } }`
+
+## Date Expression Operators
+
+### DEFAULT
+
+- **Snippet:** `{ {{VALUE}}: "${1:\$dateField}" }`
+
+### $dateAdd
+
+- **Snippet:** `{ $dateAdd: { startDate: "${1:\$dateField}", unit: "${2:day}", amount: ${3:1} } }`
+
+### $dateSubtract
+
+- **Snippet:** `{ $dateSubtract: { startDate: "${1:\$dateField}", unit: "${2:day}", amount: ${3:1} } }`
+
+### $dateDiff
+
+- **Snippet:** `{ $dateDiff: { startDate: "${1:\$startDate}", endDate: "${2:\$endDate}", unit: "${3:day}" } }`
+
+### $dateFromParts
+
+- **Snippet:** `{ $dateFromParts: { year: ${1:2024}, month: ${2:1}, day: ${3:1} } }`
+
+### $dateToParts
+
+- **Snippet:** `{ $dateToParts: { date: "${1:\$dateField}" } }`
+
+### $dateFromString
+
+- **Snippet:** `{ $dateFromString: { dateString: "${1:dateString}" } }`
+
+### $dateToString
+
+- **Snippet:** `{ $dateToString: { format: "${1:%Y-%m-%d}", date: "${2:\$dateField}" } }`
+
+### $dateTrunc
+
+- **Snippet:** `{ $dateTrunc: { date: "${1:\$dateField}", unit: "${2:day}" } }`
+
+### $toDate
+
+- **Snippet:** `{ $toDate: "${1:\$field}" }`
+
+## Object Expression Operators
+
+### DEFAULT
+
+- **Snippet:** `{ {{VALUE}}: "${1:\$object}" }`
+
+### $mergeObjects
+
+- **Snippet:** `{ $mergeObjects: ["${1:\$object1}", "${2:\$object2}"] }`
+
+### $setField
+
+- **Snippet:** `{ $setField: { field: "${1:fieldName}", input: "${2:\$object}", value: ${3:value} } }`
+
+## Set Expression Operators
+
+### DEFAULT
+
+- **Snippet:** `{ {{VALUE}}: ["${1:\$set1}", "${2:\$set2}"] }`
+
+### $anyElementTrue
+
+- **Snippet:** `{ $anyElementTrue: ["${1:\$array}"] }`
+
+### $allElementsTrue
+
+- **Snippet:** `{ $allElementsTrue: ["${1:\$array}"] }`
+
+## String Expression Operators
+
+### DEFAULT
+
+- **Snippet:** `{ {{VALUE}}: "${1:\$string}" }`
+
+### $concat
+
+- **Snippet:** `{ $concat: ["${1:\$string1}", "${2:\$string2}"] }`
+
+### $indexOfBytes
+
+- **Snippet:** `{ $indexOfBytes: ["${1:\$string}", "${2:substring}"] }`
+
+### $indexOfCP
+
+- **Snippet:** `{ $indexOfCP: ["${1:\$string}", "${2:substring}"] }`
+
+### $regexFind
+
+- **Snippet:** `{ $regexFind: { input: "${1:\$string}", regex: "${2:pattern}" } }`
+
+### $regexFindAll
+
+- **Snippet:** `{ $regexFindAll: { input: "${1:\$string}", regex: "${2:pattern}" } }`
+
+### $regexMatch
+
+- **Snippet:** `{ $regexMatch: { input: "${1:\$string}", regex: "${2:pattern}" } }`
+
+### $replaceOne
+
+- **Snippet:** `{ $replaceOne: { input: "${1:\$string}", find: "${2:find}", replacement: "${3:replacement}" } }`
+
+### $replaceAll
+
+- **Snippet:** `{ $replaceAll: { input: "${1:\$string}", find: "${2:find}", replacement: "${3:replacement}" } }`
+
+### $split
+
+- **Snippet:** `{ $split: ["${1:\$string}", "${2:delimiter}"] }`
+
+### $substr
+
+- **Snippet:** `{ $substr: ["${1:\$string}", ${2:start}, ${3:length}] }`
+
+### $substrBytes
+
+- **Snippet:** `{ $substrBytes: ["${1:\$string}", ${2:start}, ${3:length}] }`
+
+### $substrCP
+
+- **Snippet:** `{ $substrCP: ["${1:\$string}", ${2:start}, ${3:length}] }`
+
+### $strcasecmp
+
+- **Snippet:** `{ $strcasecmp: ["${1:\$string1}", "${2:\$string2}"] }`
+
+### $trim
+
+- **Snippet:** `{ $trim: { input: "${1:\$string}" } }`
+
+### $ltrim
+
+- **Snippet:** `{ $ltrim: { input: "${1:\$string}" } }`
+
+### $rtrim
+
+- **Snippet:** `{ $rtrim: { input: "${1:\$string}" } }`
+
+## Trigonometry Expression Operators
+
+### DEFAULT
+
+- **Snippet:** `{ {{VALUE}}: "${1:\$value}" }`
+
+### $degreesToRadians
+
+- **Snippet:** `{ $degreesToRadians: "${1:\$angle}" }`
+
+### $radiansToDegrees
+
+- **Snippet:** `{ $radiansToDegrees: "${1:\$angle}" }`
+
+## Type Expression Operators
+
+### DEFAULT
+
+- **Snippet:** `{ {{VALUE}}: "${1:\$field}" }`
+
+### $convert
+
+- **Snippet:** `{ $convert: { input: "${1:\$field}", to: "${2:type}" } }`
+
+## Data Size Operators
+
+### DEFAULT
+
+- **Snippet:** `{ {{VALUE}}: "${1:\$field}" }`
+
+## Literal Expression Operator
+
+### DEFAULT
+
+- **Snippet:** `{ {{VALUE}}: ${1:value} }`
+
+## Miscellaneous Operators
+
+### DEFAULT
+
+- **Snippet:** `{ {{VALUE}}: ${1:value} }`
+
+### $getField
+
+- **Snippet:** `{ $getField: { field: "${1:fieldName}", input: "${2:\$object}" } }`
+
+### $rand
+
+- **Snippet:** `{ $rand: {} }`
+
+### $sampleRate
+
+- **Snippet:** `{ $sampleRate: ${1:0.5} }`
+
+## Bitwise Operators
+
+### DEFAULT
+
+- **Snippet:** `{ {{VALUE}}: [${1:value1}, ${2:value2}] }`
+
+### $bitNot
+
+- **Snippet:** `{ $bitNot: "${1:\$field}" }`
+
+## Timestamp Expression Operators
+
+### DEFAULT
+
+- **Snippet:** `{ {{VALUE}}: "${1:\$timestampField}" }`
+
+## Variable Expression Operators
+
+### $let
+
+- **Snippet:** `{ $let: { vars: { ${1:var}: ${2:expression} }, in: ${3:expression} } }`
diff --git a/packages/documentdb-constants/resources/scraped/operator-reference.md b/packages/documentdb-constants/resources/scraped/operator-reference.md
new file mode 100644
index 000000000..6ba385cfb
--- /dev/null
+++ b/packages/documentdb-constants/resources/scraped/operator-reference.md
@@ -0,0 +1,4113 @@
+# DocumentDB Operator Reference
+
+<!-- AUTO-GENERATED by scrape-operator-docs.ts -->
+<!-- Last scraped: 2026-02-20 -->
+<!-- Source: https://github.com/MicrosoftDocs/azure-databases-docs -->
+
+## Summary
+
+| Category                                                      | Listed  | Total   |
+| ------------------------------------------------------------- | ------- | ------- |
+| Comparison Query Operators                                    | 8       | 8       |
+| Logical Query Operators                                       | 4       | 4       |
+| Element Query Operators                                       | 2       | 2       |
+| Evaluation Query Operators                                    | 5       | 6       |
+| Geospatial Operators                                          | 11      | 11      |
+| Array Query Operators                                         | 3       | 3       |
+| Bitwise Query Operators                                       | 4       | 4       |
+| Projection Operators                                          | 3       | 4       |
+| Miscellaneous Query Operators                                 | 3       | 3       |
+| Field Update Operators                                        | 9       | 9       |
+| Array Update Operators                                        | 12      | 12      |
+| Bitwise Update Operators                                      | 1       | 1       |
+| Arithmetic Expression Operators                               | 16      | 16      |
+| Array Expression Operators                                    | 20      | 20      |
+| Bitwise Operators                                             | 4       | 4       |
+| Boolean Expression Operators                                  | 3       | 3       |
+| Comparison Expression Operators                               | 7       | 7       |
+| Custom Aggregation Expression Operators                       | 0       | 2       |
+| Data Size Operators                                           | 2       | 2       |
+| Date Expression Operators                                     | 22      | 22      |
+| Literal Expression Operator                                   | 1       | 1       |
+| Miscellaneous Operators                                       | 3       | 3       |
+| Object Expression Operators                                   | 3       | 3       |
+| Set Expression Operators                                      | 7       | 7       |
+| String Expression Operators                                   | 23      | 23      |
+| Text Expression Operator                                      | 0       | 1       |
+| Timestamp Expression Operators                                | 2       | 2       |
+| Trigonometry Expression Operators                             | 15      | 15      |
+| Type Expression Operators                                     | 11      | 11      |
+| Accumulators ($group, $bucket, $bucketAuto, $setWindowFields) | 21      | 22      |
+| Accumulators (in Other Stages)                                | 10      | 10      |
+| Variable Expression Operators                                 | 1       | 1       |
+| Window Operators                                              | 27      | 27      |
+| Conditional Expression Operators                              | 3       | 3       |
+| Aggregation Pipeline Stages                                   | 35      | 42      |
+| Variables in Aggregation Expressions                          | 7       | 10      |
+| **Total**                                                     | **308** | **324** |
+
+## Comparison Query Operators
+
+### $eq
+
+- **Description:** The $eq query operator compares the value of a field to a specified value
+- **Syntax:**
+
+```javascript
+{
+    field: {
+        $eq: <value>
+    }
+}
+```
+
+- **Doc Link:** https://learn.microsoft.com/en-us/azure/documentdb/operators/comparison-query/$eq
+
+### $gt
+
+- **Description:** The $gt query operator retrieves documents where the value of a field is greater than a specified value
+- **Syntax:**
+
+```javascript
+{
+  field: {
+    $gt: value;
+  }
+}
+```
+
+- **Doc Link:** https://learn.microsoft.com/en-us/azure/documentdb/operators/comparison-query/$gt
+
+### $gte
+
+- **Description:** The $gte operator retrieves documents where the value of a field is greater than or equal to a specified value
+- **Syntax:**
+
+```javascript
+{
+    field: {
+        $gte: <value>
+    }
+}
+```
+
+- **Doc Link:** https://learn.microsoft.com/en-us/azure/documentdb/operators/comparison-query/$gte
+
+### $in
+
+- **Description:** The $in operator matches value of a field against an array of specified values
+- **Syntax:**
+
+```javascript
+{
+  field: {
+    $in: [listOfValues];
+  }
+}
+```
+
+- **Doc Link:** https://learn.microsoft.com/en-us/azure/documentdb/operators/comparison-query/$in
+
+### $lt
+
+- **Description:** The $lt operator retrieves documents where the value of field is less than a specified value
+- **Syntax:**
+
+```javascript
+{
+  field: {
+    $lt: value;
+  }
+}
+```
+
+- **Doc Link:** https://learn.microsoft.com/en-us/azure/documentdb/operators/comparison-query/$lt
+
+### $lte
+
+- **Description:** The $lte operator retrieves documents where the value of a field is less than or equal to a specified value
+- **Syntax:**
+
+```javascript
+{
+    field: {
+        $lte: <value>
+    }
+}
+```
+
+- **Doc Link:** https://learn.microsoft.com/en-us/azure/documentdb/operators/comparison-query/$lte
+
+### $ne
+
+- **Description:** The $ne operator retrieves documents where the value of a field doesn't equal a specified value
+- **Syntax:**
+
+```javascript
+{
+  field: {
+    $ne: value;
+  }
+}
+```
+
+- **Doc Link:** https://learn.microsoft.com/en-us/azure/documentdb/operators/comparison-query/$ne
+
+### $nin
+
+- **Description:** The $nin operator retrieves documents where the value of a field doesn't match a list of values
+- **Syntax:**
+
+```javascript
+{
+    field: {
+        $nin: [ < listOfValues > ]
+    }
+}
+```
+
+- **Doc Link:** https://learn.microsoft.com/en-us/azure/documentdb/operators/comparison-query/$nin
+
+## Logical Query Operators
+
+### $and
+
+- **Description:** The $and operator joins multiple query clauses and returns documents that match all specified conditions.
+- **Syntax:**
+
+```javascript
+{
+    $and: [{
+        < expression1 >
+    }, {
+        < expression2 >
+    }, ..., {
+        < expressionN >
+    }]
+}
+```
+
+- **Doc Link:** https://learn.microsoft.com/en-us/azure/documentdb/operators/logical-query/$and
+
+### $not
+
+- **Description:** The $not operator performs a logical NOT operation on a specified expression, selecting documents that don't match the expression.
+- **Syntax:**
+
+```javascript
+{
+    field: {
+        $not: {
+            < operator - expression >
+        }
+    }
+}
+```
+
+- **Doc Link:** https://learn.microsoft.com/en-us/azure/documentdb/operators/logical-query/$not
+
+### $nor
+
+- **Description:** The $nor operator performs a logical NOR on an array of expressions and retrieves documents that fail all the conditions.
+- **Syntax:**
+
+```javascript
+{
+    $nor: [{
+        < expression1 >
+    }, {
+        < expression2 >
+    }, ..., {
+        < expressionN >
+    }]
+}
+```
+
+- **Doc Link:** https://learn.microsoft.com/en-us/azure/documentdb/operators/logical-query/$nor
+
+### $or
+
+- **Description:** The $or operator joins query clauses with a logical OR and returns documents that match at least one of the specified conditions.
+- **Syntax:**
+
+```javascript
+{
+    $or: [{
+        < expression1 >
+    }, {
+        < expression2 >
+    }, ..., {
+        < expressionN >
+    }]
+}
+```
+
+- **Doc Link:** https://learn.microsoft.com/en-us/azure/documentdb/operators/logical-query/$or
+
+## Element Query Operators
+
+### $exists
+
+- **Description:** The $exists operator retrieves documents that contain the specified field in their document structure.
+- **Syntax:**
+
+```javascript
+{
+  <field>: { $exists: <true or false> }
+}
+```
+
+- **Doc Link:** https://learn.microsoft.com/en-us/azure/documentdb/operators/element-query/$exists
+
+### $type
+
+- **Description:** The $type operator retrieves documents if the chosen field is of the specified type.
+- **Syntax:**
+
+```javascript
+{
+  <field>: { $type: <BSON type number> | <string alias> }
+}
+```
+
+- **Doc Link:** https://learn.microsoft.com/en-us/azure/documentdb/operators/element-query/$type
+
+## Evaluation Query Operators
+
+### $expr
+
+- **Description:** The $expr operator allows the use of aggregation expressions within the query language, enabling complex field comparisons and calculations.
+- **Syntax:**
+
+```javascript
+{
+  $expr: { <aggregation expression> }
+}
+```
+
+- **Doc Link:** https://learn.microsoft.com/en-us/azure/documentdb/operators/evaluation-query/$expr
+
+### $jsonSchema
+
+- **Description:** The $jsonSchema operator validates documents against a JSON Schema definition for data validation and structure enforcement. Discover supported features and limitations.
+- **Syntax:**
+
+```javascript
+db.createCollection('collectionName', {
+  validator: {
+    $jsonSchema: {
+      bsonType: 'object',
+      required: ['field1', 'field2'],
+      properties: {
+        field1: {
+          bsonType: 'string',
+        },
+        field2: {
+          bsonType: 'int',
+          minimum: 0,
+          description: 'Description of field2 requirements',
+        },
+      },
+    },
+  },
+  validationLevel: 'strict', // Optional: "strict" or "moderate"
+  validationAction: 'error', // Optional: "error" or "warn"
+});
+```
+
+- **Doc Link:** https://learn.microsoft.com/en-us/azure/documentdb/operators/evaluation-query/$jsonschema
+
+### $mod
+
+- **Description:** The $mod operator performs a modulo operation on the value of a field and selects documents with a specified result.
+- **Syntax:**
+
+```javascript
+{
+  <field>: { $mod: [ <divisor>, <remainder> ] }
+}
+```
+
+- **Doc Link:** https://learn.microsoft.com/en-us/azure/documentdb/operators/evaluation-query/$mod
+
+### $regex
+
+- **Description:** The $regex operator provides regular expression capabilities for pattern matching in queries, allowing flexible string matching and searching.
+- **Syntax:**
+
+```javascript
+{
+  <field>: { $regex: <pattern>, $options: <options> }
+}
+```
+
+- **Doc Link:** https://learn.microsoft.com/en-us/azure/documentdb/operators/evaluation-query/$regex
+
+### $text
+
+- **Description:** The $text operator performs text search on the content of indexed string fields, enabling full-text search capabilities.
+- **Syntax:**
+
+```javascript
+{
+  $text: {
+    $search: <string>,
+    $language: <string>,
+    $caseSensitive: <boolean>,
+    $diacriticSensitive: <boolean>
+  }
+}
+```
+
+- **Doc Link:** https://learn.microsoft.com/en-us/azure/documentdb/operators/evaluation-query/$text
+
+## Geospatial Operators
+
+### $geoIntersects
+
+- **Description:** The $geoIntersects operator selects documents whose location field intersects with a specified GeoJSON object.
+- **Syntax:**
+
+```javascript
+{
+  <location field>: {
+    $geoIntersects: {
+      $geometry: {
+        type: <GeoJSON type>,
+        coordinates: <coordinates>
+      }
+    }
+  }
+}
+```
+
+- **Doc Link:** https://learn.microsoft.com/en-us/azure/documentdb/operators/geospatial/$geointersects
+
+### $geoWithin
+
+- **Description:** The $geoWithin operator selects documents whose location field is completely within a specified geometry.
+- **Syntax:**
+
+```javascript
+// Using $box
+{
+  <location field>: {
+    $geoWithin: {
+      $box: [ [ <bottom left coordinates> ], [ <upper right coordinates> ] ]
+    }
+  }
+}
+
+// Using $center
+{
+  <location field>: {
+    $geoWithin: {
+      $center: [ [ <x>, <y> ], <radius> ]
+    }
+  }
+}
+
+// Using $geometry
+{
+  <location field>: {
+    $geoWithin: {
+      $geometry: {
+        type: <GeoJSON type>,
+        coordinates: <coordinates>
+      }
+    }
+  }
+}
+```
+
+- **Doc Link:** https://learn.microsoft.com/en-us/azure/documentdb/operators/geospatial/$geowithin
+
+### $box
+
+- **Description:** The $box operator defines a rectangular area for geospatial queries using coordinate pairs.
+- **Syntax:**
+
+```javascript
+{
+  <location field>: {
+    $geoWithin: {
+      $box: [
+        [<lower_left_longitude>, <lower_left_latitude>],
+        [<upper_right_longitude>, <upper_right_latitude>]
+      ]
+    }
+  }
+}
+```
+
+- **Doc Link:** https://learn.microsoft.com/en-us/azure/documentdb/operators/geospatial/$box
+
+### $center
+
+- **Description:** The $center operator specifies a circle using legacy coordinate pairs for $geoWithin queries.
+- **Syntax:**
+
+```javascript
+{
+  $geoWithin: {
+    $center: [ [ <x>, <y> ], <radius> ]
+  }
+}
+```
+
+- **Doc Link:** https://learn.microsoft.com/en-us/azure/documentdb/operators/geospatial/$center
+
+### $centerSphere
+
+- **Description:** The $centerSphere operator specifies a circle using spherical geometry for $geoWithin queries.
+- **Syntax:**
+
+```javascript
+{
+  $geoWithin: {
+    $centerSphere: [ [ <x>, <y> ], <radius in radians> ]
+  }
+}
+```
+
+- **Doc Link:** https://learn.microsoft.com/en-us/azure/documentdb/operators/geospatial/$centersphere
+
+### $geometry
+
+- **Description:** The $geometry operator specifies a GeoJSON geometry for geospatial queries.
+- **Syntax:**
+
+```javascript
+{
+  $geometry: {
+    type: <GeoJSON type>,
+    coordinates: <coordinates>
+  }
+}
+```
+
+- **Doc Link:** https://learn.microsoft.com/en-us/azure/documentdb/operators/geospatial/$geometry
+
+### $maxDistance
+
+- **Description:** The $maxDistance operator specifies the maximum distance that can exist between two points in a geospatial query.
+- **Syntax:**
+
+```javascript
+{
+  <location field>: {
+    $near: {
+      $geometry: {
+        type: "Point",
+        coordinates: [<longitude>, <latitude>]
+      },
+      $maxDistance: <distance in meters>
+    }
+  }
+}
+```
+
+- **Doc Link:** https://learn.microsoft.com/en-us/azure/documentdb/operators/geospatial/$maxdistance
+
+### $minDistance
+
+- **Description:** The $minDistance operator specifies the minimum distance that must exist between two points in a geospatial query.
+- **Syntax:**
+
+```javascript
+{
+  <location field>: {
+    $near: {
+      $geometry: {
+        type: "Point",
+        coordinates: [<longitude>, <latitude>]
+      },
+      $minDistance: <distance in meters>
+    }
+  }
+}
+```
+
+- **Doc Link:** https://learn.microsoft.com/en-us/azure/documentdb/operators/geospatial/$mindistance
+
+### $polygon
+
+- **Description:** The $polygon operator defines a polygon for geospatial queries, allowing you to find locations within an irregular shape.
+- **Syntax:**
+
+```javascript
+{
+  <location field>: {
+    $geoWithin: {
+      $geometry: {
+        type: "Polygon",
+        coordinates: [
+          [[<longitude1>, <latitude1>], ..., [<longitudeN>, <latitudeN>], [<longitude1>, <latitude1>]]
+        ]
+      }
+    }
+  }
+}
+```
+
+- **Doc Link:** https://learn.microsoft.com/en-us/azure/documentdb/operators/geospatial/$polygon
+
+### $near
+
+- **Description:** The $near operator returns documents with location fields that are near a specified point, sorted by distance.
+- **Syntax:**
+
+```javascript
+{
+  <location field>: {
+    $near: {
+      $geometry: {
+        type: "Point",
+        coordinates: [<longitude>, <latitude>]
+      },
+      $maxDistance: <distance in meters>,
+      $minDistance: <distance in meters>
+    }
+  }
+}
+```
+
+- **Doc Link:** https://learn.microsoft.com/en-us/azure/documentdb/operators/geospatial/$near
+
+### $nearSphere
+
+- **Description:** The $nearSphere operator returns documents whose location fields are near a specified point on a sphere, sorted by distance on a spherical surface.
+- **Syntax:**
+
+```javascript
+{
+  <location field>: {
+    $nearSphere: {
+      $geometry: {
+        type: "Point",
+        coordinates: [<longitude>, <latitude>]
+      },
+      $maxDistance: <distance in meters>,
+      $minDistance: <distance in meters>
+    }
+  }
+}
+```
+
+- **Doc Link:** https://learn.microsoft.com/en-us/azure/documentdb/operators/geospatial/$nearsphere
+
+## Array Query Operators
+
+### $all
+
+- **Description:** The $all operator helps finding array documents matching all the elements.
+- **Syntax:**
+
+```javascript
+db.collection.find({
+    field : {
+        $all: [ < value1 > , < value2 > ]
+    }
+})
+```
+
+- **Doc Link:** https://learn.microsoft.com/en-us/azure/documentdb/operators/array-query/$all
+
+### $elemMatch
+
+- **Description:** The $elemmatch operator returns complete array, qualifying criteria with at least one matching array element.
+- **Syntax:**
+
+```javascript
+db.collection.find({ <field>: { $elemMatch: { <query1>, <query2>, ... } } })
+```
+
+- **Doc Link:** https://learn.microsoft.com/en-us/azure/documentdb/operators/array-query/$elemmatch
+
+### $size
+
+- **Description:** The $size operator is used to query documents where an array field has a specified number of elements.
+- **Syntax:**
+
+```javascript
+db.collection.find({ <field>: { $size: <number> } })
+```
+
+- **Doc Link:** https://learn.microsoft.com/en-us/azure/documentdb/operators/array-query/$size
+
+## Bitwise Query Operators
+
+### $bitsAllClear
+
+- **Description:** The $bitsAllClear operator is used to match documents where all the bit positions specified in a bitmask are clear.
+- **Syntax:**
+
+```javascript
+{
+  <field>: { $bitsAllClear: <bitmask> }
+}
+```
+
+- **Doc Link:** https://learn.microsoft.com/en-us/azure/documentdb/operators/bitwise-query/$bitsallclear
+
+### $bitsAllSet
+
+- **Description:** The bitsAllSet command is used to match documents where all the specified bit positions are set.
+- **Syntax:**
+
+```javascript
+{
+  <field>: { $bitsAllSet: <bitmask> }
+}
+```
+
+- **Doc Link:** https://learn.microsoft.com/en-us/azure/documentdb/operators/bitwise-query/$bitsallset
+
+### $bitsAnyClear
+
+- **Description:** The $bitsAnyClear operator matches documents where any of the specified bit positions in a bitmask are clear.
+- **Syntax:**
+
+```javascript
+{
+  <field>: { $bitsAnyClear: <bitmask> }
+}
+```
+
+- **Doc Link:** https://learn.microsoft.com/en-us/azure/documentdb/operators/bitwise-query/$bitsanyclear
+
+### $bitsAnySet
+
+- **Description:** The $bitsAnySet operator returns documents where any of the specified bit positions are set to 1.
+- **Syntax:**
+
+```javascript
+{
+  <field>: { $bitsAnySet: [ <bit positions> ] }
+}
+```
+
+- **Doc Link:** https://learn.microsoft.com/en-us/azure/documentdb/operators/bitwise-query/$bitsanyset
+
+## Projection Operators
+
+### $
+
+- **Description:** The $ positional operator identifies an element in an array to update without explicitly specifying the position of the element in the array.
+- **Syntax:**
+
+```javascript
+db.collection.updateOne(
+  { <array>: <value> },
+  { <update operator>: { "<array>.$": <value> } }
+)
+```
+
+- **Doc Link:** none
+- **Scraper Comment:** Doc page not found in expected directory 'projection/'. Content scraped from 'array-update/'.
+
+### $elemMatch
+
+- **Description:** The $elemmatch operator returns complete array, qualifying criteria with at least one matching array element.
+- **Syntax:**
+
+```javascript
+db.collection.find({ <field>: { $elemMatch: { <query1>, <query2>, ... } } })
+```
+
+- **Doc Link:** none
+- **Scraper Comment:** Doc page not found in expected directory 'projection/'. Content scraped from 'array-query/'.
+
+### $slice
+
+- **Description:** The $slice operator returns a subset of an array from any element onwards in the array.
+- **Syntax:**
+
+```javascript
+{
+  $slice: [ <array>, <n> ]
+}
+```
+
+- **Doc Link:** none
+- **Scraper Comment:** Doc page not found in expected directory 'projection/'. Content scraped from 'array-expression/'.
+
+## Miscellaneous Query Operators
+
+### $comment
+
+- **Description:** The $comment operator adds a comment to a query to help identify the query in logs and profiler output.
+- **Syntax:**
+
+```javascript
+{
+  $comment: <string>
+}
+```
+
+- **Doc Link:** https://learn.microsoft.com/en-us/azure/documentdb/operators/miscellaneous-query/$comment
+
+### $rand
+
+- **Description:** The $rand operator generates a random float value between 0 and 1.
+- **Syntax:**
+
+```javascript
+{
+  $rand: {
+  }
+}
+```
+
+- **Doc Link:** https://learn.microsoft.com/en-us/azure/documentdb/operators/miscellaneous-query/$rand
+
+### $natural
+
+- **Description:** The $natural operator forces the query to use the natural order of documents in a collection, providing control over document ordering and retrieval.
+- **Syntax:**
+
+```javascript
+{
+  $natural: <1 | -1>
+}
+```
+
+- **Doc Link:** https://learn.microsoft.com/en-us/azure/documentdb/operators/miscellaneous-query/$natural
+
+## Field Update Operators
+
+### $currentDate
+
+- **Description:** The $currentDate operator sets the value of a field to the current date, either as a Date or a timestamp.
+- **Syntax:**
+
+```javascript
+{
+  $currentDate: {
+    <field1>: <typeSpecification1>,
+    <field2>: <typeSpecification2>,
+    ...
+  }
+}
+```
+
+- **Doc Link:** https://learn.microsoft.com/en-us/azure/documentdb/operators/field-update/$currentdate
+
+### $inc
+
+- **Description:** The $inc operator increments the value of a field by a specified amount.
+- **Syntax:**
+
+```javascript
+{
+  $inc: {
+    <field1>: <amount1>,
+    <field2>: <amount2>,
+    ...
+  }
+}
+```
+
+- **Doc Link:** https://learn.microsoft.com/en-us/azure/documentdb/operators/field-update/$inc
+
+### $min
+
+- **Description:** Retrieves the minimum value for a specified field
+- **Syntax:**
+
+```javascript
+$min: <expression>
+```
+
+- **Doc Link:** none
+- **Scraper Comment:** Doc page not found in expected directory 'field-update/'. Content scraped from 'accumulators/'.
+
+### $max
+
+- **Description:** The $max operator returns the maximum value from a set of input values.
+- **Syntax:**
+
+```javascript
+$max: <expression>
+```
+
+- **Doc Link:** none
+- **Scraper Comment:** Doc page not found in expected directory 'field-update/'. Content scraped from 'accumulators/'.
+
+### $mul
+
+- **Description:** The $mul operator multiplies the value of a field by a specified number.
+- **Syntax:**
+
+```javascript
+{
+  $mul: {
+    <field1>: <number1>,
+    <field2>: <number2>,
+    ...
+  }
+}
+```
+
+- **Doc Link:** https://learn.microsoft.com/en-us/azure/documentdb/operators/field-update/$mul
+
+### $rename
+
+- **Description:** The $rename operator allows renaming fields in documents during update operations.
+- **Syntax:**
+
+```javascript
+{
+  $rename: {
+    <field1>: <newName1>,
+    <field2>: <newName2>,
+    ...
+  }
+}
+```
+
+- **Doc Link:** https://learn.microsoft.com/en-us/azure/documentdb/operators/field-update/$rename
+
+### $set
+
+- **Description:** The $set operator in Azure DocumentDB updates or creates a new field with a specified value
+- **Syntax:**
+
+```javascript
+{
+  $set: {
+    newField: <expression>
+  }
+}
+```
+
+- **Doc Link:** none
+- **Scraper Comment:** Doc page not found in expected directory 'field-update/'. Content scraped from 'aggregation/'.
+
+### $setOnInsert
+
+- **Description:** The $setOnInsert operator sets field values only when an upsert operation results in an insert of a new document.
+- **Syntax:**
+
+```javascript
+{
+  $setOnInsert: {
+    <field1>: <value1>,
+    <field2>: <value2>,
+    ...
+  }
+}
+```
+
+- **Doc Link:** https://learn.microsoft.com/en-us/azure/documentdb/operators/field-update/$setoninsert
+
+### $unset
+
+- **Description:** The $unset stage in the aggregation pipeline is used to remove specified fields from documents.
+- **Syntax:**
+
+```javascript
+{
+    $unset: "<field1>" | ["<field1>", "<field2>", ...]
+}
+```
+
+- **Doc Link:** none
+- **Scraper Comment:** Doc page not found in expected directory 'field-update/'. Content scraped from 'aggregation/'.
+
+## Array Update Operators
+
+### $
+
+- **Description:** The $ positional operator identifies an element in an array to update without explicitly specifying the position of the element in the array.
+- **Syntax:**
+
+```javascript
+db.collection.updateOne(
+  { <array>: <value> },
+  { <update operator>: { "<array>.$": <value> } }
+)
+```
+
+- **Doc Link:** https://learn.microsoft.com/en-us/azure/documentdb/operators/array-update/$
+
+### $[]
+
+### $[identifier]
+
+### $addToSet
+
+- **Description:** The addToSet operator adds elements to an array if they don't already exist, while ensuring uniqueness of elements within the set.
+- **Syntax:**
+
+```javascript
+{
+  $addToSet: { <field1>: <value1> }
+}
+```
+
+- **Doc Link:** https://learn.microsoft.com/en-us/azure/documentdb/operators/array-update/$addtoset
+
+### $pop
+
+- **Description:** Removes the first or last element of an array.
+- **Syntax:**
+
+```javascript
+{
+  $pop: {
+    <field>: <value>
+  }
+}
+```
+
+- **Doc Link:** https://learn.microsoft.com/en-us/azure/documentdb/operators/array-update/$pop
+
+### $pull
+
+- **Description:** Removes all instances of a value from an array.
+- **Syntax:**
+
+```javascript
+{
+  $pull: { <field>: <value|condition> }
+}
+```
+
+- **Doc Link:** https://learn.microsoft.com/en-us/azure/documentdb/operators/array-update/$pull
+
+### $push
+
+- **Description:** The $push operator adds a specified value to an array within a document.
+- **Syntax:**
+
+```javascript
+db.collection.update({
+    < query >
+}, {
+    $push: {
+        < field >: < value >
+    }
+}, {
+    < options >
+})
+```
+
+- **Doc Link:** https://learn.microsoft.com/en-us/azure/documentdb/operators/array-update/$push
+
+### $pullAll
+
+- **Description:** The $pullAll operator is used to remove all instances of the specified values from an array.
+- **Syntax:**
+
+```javascript
+{
+  $pullAll: { <field1>: [ <value1>, <value2>] }
+}
+```
+
+- **Doc Link:** https://learn.microsoft.com/en-us/azure/documentdb/operators/array-update/$pullall
+
+### $each
+
+- **Description:** The $each operator is used within an `$addToSet`or`$push` operation to add multiple elements to an array field in a single update operation.
+- **Syntax:**
+
+```javascript
+{
+  $push: {
+    <field>: {
+      $each: [ <value1>, <value2>],
+      <modifier1>: <value1>,
+      <modifier2>: <value2>
+    }
+  }
+}
+```
+
+- **Doc Link:** https://learn.microsoft.com/en-us/azure/documentdb/operators/array-update/$each
+
+### $position
+
+### $slice
+
+- **Description:** The $slice operator returns a subset of an array from any element onwards in the array.
+- **Syntax:**
+
+```javascript
+{
+  $slice: [ <array>, <n> ]
+}
+```
+
+- **Doc Link:** none
+- **Scraper Comment:** Doc page not found in expected directory 'array-update/'. Content scraped from 'array-expression/'.
+
+### $sort
+
+- **Description:** The $sort stage in the aggregation pipeline is used to order the documents in the pipeline by a specified field or fields.
+- **Syntax:**
+
+```javascript
+{
+    $sort: {
+        < field1 >: < sort order > ,
+        < field2 >: < sort order >
+    }
+}
+```
+
+- **Doc Link:** none
+- **Scraper Comment:** Doc page not found in expected directory 'array-update/'. Content scraped from 'aggregation/'.
+
+## Bitwise Update Operators
+
+### $bit
+
+- **Description:** The `$bit` operator is used to perform bitwise operations on integer values.
+- **Syntax:**
+
+```javascript
+{
+    $bit: {
+        < field >: {
+            < operator >: < number >
+        }
+    }
+}
+```
+
+- **Doc Link:** https://learn.microsoft.com/en-us/azure/documentdb/operators/bitwise-update/$bit
+
+## Arithmetic Expression Operators
+
+### $abs
+
+- **Description:** The $abs operator returns the absolute value of a number.
+- **Syntax:**
+
+```javascript
+{
+  $abs: <expression>
+}
+```
+
+- **Doc Link:** https://learn.microsoft.com/en-us/azure/documentdb/operators/arithmetic-expression/$abs
+
+### $add
+
+- **Description:** The $add operator returns the sum of two numbers or the sum of a date and numbers.
+- **Syntax:**
+
+```javascript
+{
+  $add: [ <listOfExpressions> ]
+}
+```
+
+- **Doc Link:** https://learn.microsoft.com/en-us/azure/documentdb/operators/arithmetic-expression/$add
+
+### $ceil
+
+- **Description:** The $ceil operator returns the smallest integer greater than or equal to the specified number.
+- **Syntax:**
+
+```javascript
+{
+  $ceil: <number>
+}
+```
+
+- **Doc Link:** https://learn.microsoft.com/en-us/azure/documentdb/operators/arithmetic-expression/$ceil
+
+### $divide
+
+- **Description:** The $divide operator divides two numbers and returns the quotient.
+- **Syntax:**
+
+```javascript
+{
+  $divide: [ <dividend>, <divisor> ]
+}
+```
+
+- **Doc Link:** https://learn.microsoft.com/en-us/azure/documentdb/operators/arithmetic-expression/$divide
+
+### $exp
+
+- **Description:** The $exp operator raises e to the specified exponent and returns the result
+- **Syntax:**
+
+```javascript
+{
+  $exp: <exponent>
+}
+```
+
+- **Doc Link:** https://learn.microsoft.com/en-us/azure/documentdb/operators/arithmetic-expression/$exp
+
+### $floor
+
+- **Description:** The $floor operator returns the largest integer less than or equal to the specified number
+- **Syntax:**
+
+```javascript
+{
+  $floor: <number>
+}
+```
+
+- **Doc Link:** https://learn.microsoft.com/en-us/azure/documentdb/operators/arithmetic-expression/$floor
+
+### $ln
+
+- **Description:** The $ln operator calculates the natural logarithm of the input
+- **Syntax:**
+
+```javascript
+{
+  $ln: <number>
+}
+```
+
+- **Doc Link:** https://learn.microsoft.com/en-us/azure/documentdb/operators/arithmetic-expression/$ln
+
+### $log
+
+- **Description:** The $log operator calculates the logarithm of a number in the specified base
+- **Syntax:**
+
+```javascript
+{
+  $log: [ <number>, <base> ]
+}
+```
+
+- **Doc Link:** https://learn.microsoft.com/en-us/azure/documentdb/operators/arithmetic-expression/$log
+
+### $log10
+
+- **Description:** The $log10 operator calculates the log of a specified number in base 10
+- **Syntax:**
+
+```javascript
+{
+  $log10: <number>
+}
+```
+
+- **Doc Link:** https://learn.microsoft.com/en-us/azure/documentdb/operators/arithmetic-expression/$log10
+
+### $mod
+
+- **Description:** The $mod operator performs a modulo operation on the value of a field and selects documents with a specified result.
+- **Syntax:**
+
+```javascript
+{
+  <field>: { $mod: [ <divisor>, <remainder> ] }
+}
+```
+
+- **Doc Link:** none
+- **Scraper Comment:** Doc page not found in expected directory 'arithmetic-expression/'. Content scraped from 'evaluation-query/'.
+
+### $multiply
+
+- **Description:** The $multiply operator multiplies the input numerical values
+- **Syntax:**
+
+```javascript
+{
+  $multiply: [ <listOfValues> ]
+}
+```
+
+- **Doc Link:** https://learn.microsoft.com/en-us/azure/documentdb/operators/arithmetic-expression/$multiply
+
+### $pow
+
+- **Description:** The `$pow` operator calculates the value of a numerical value raised to the power of a specified exponent.
+- **Syntax:**
+
+```javascript
+{
+  $pow: [ <number>, <exponent> ]
+}
+```
+
+- **Doc Link:** https://learn.microsoft.com/en-us/azure/documentdb/operators/arithmetic-expression/$pow
+
+### $round
+
+- **Description:** The $round operator rounds a number to a specified decimal place.
+- **Syntax:**
+
+```javascript
+{
+  $round: [ <number>, <place> ]
+}
+```
+
+- **Doc Link:** https://learn.microsoft.com/en-us/azure/documentdb/operators/arithmetic-expression/$round
+
+### $sqrt
+
+- **Description:** The $sqrt operator calculates and returns the square root of an input number
+- **Syntax:**
+
+```javascript
+{
+  $sqrt: <expression>
+}
+```
+
+- **Doc Link:** https://learn.microsoft.com/en-us/azure/documentdb/operators/arithmetic-expression/$sqrt
+
+### $subtract
+
+- **Description:** The $subtract operator subtracts two numbers and returns the result.
+- **Syntax:**
+
+```javascript
+{
+  $subtract: [ <expression 1>, <expression 2> ]
+}
+```
+
+- **Doc Link:** https://learn.microsoft.com/en-us/azure/documentdb/operators/arithmetic-expression/$subtract
+
+### $trunc
+
+- **Description:** The $trunc operator truncates a number to a specified decimal place.
+- **Syntax:**
+
+```javascript
+{
+  $trunc: [ <number>, <decimal place> ]
+}
+```
+
+- **Doc Link:** https://learn.microsoft.com/en-us/azure/documentdb/operators/arithmetic-expression/$trunc
+
+## Array Expression Operators
+
+### $arrayElemAt
+
+- **Description:** The $arrayElemAt returns the element at the specified array index.
+- **Syntax:**
+
+```javascript
+{
+  $arrayElemAt: ["<array>", <idx>]
+}
+```
+
+- **Doc Link:** https://learn.microsoft.com/en-us/azure/documentdb/operators/array-expression/$arrayelemat
+
+### $arrayToObject
+
+- **Description:** The $arrayToObject allows converting an array into a single document.
+- **Syntax:**
+
+```javascript
+{
+  $arrayToObject: '<array>';
+}
+```
+
+- **Doc Link:** https://learn.microsoft.com/en-us/azure/documentdb/operators/array-expression/$arraytoobject
+
+### $concatArrays
+
+- **Description:** The $concatArrays is used to combine multiple arrays into a single array.
+- **Syntax:**
+
+```javascript
+{
+  $concatArrays: ['<array1>', '<array2>'];
+}
+```
+
+- **Doc Link:** https://learn.microsoft.com/en-us/azure/documentdb/operators/array-expression/$concatarrays
+
+### $filter
+
+- **Description:** The $filter operator filters for elements from an array based on a specified condition.
+- **Syntax:**
+
+```javascript
+{
+  $filter: {
+    input: "<array>",
+    as: "<string>",
+    cond: "<expression>"
+  }
+}
+```
+
+- **Doc Link:** https://learn.microsoft.com/en-us/azure/documentdb/operators/array-expression/$filter
+
+### $firstN
+
+- **Description:** The $firstN operator sorts documents on one or more fields specified by the query and returns the first N document matching the filtering criteria
+- **Syntax:**
+
+```javascript
+{
+    $firstN: {
+        input: [listOfFields],
+        sortBy: {
+            <fieldName>: <sortOrder>
+        },
+        n: <numDocumentsToReturn>
+    }
+}
+```
+
+- **Doc Link:** none
+- **Scraper Comment:** Doc page not found in expected directory 'array-expression/'. Content scraped from 'accumulators/'.
+
+### $in
+
+- **Description:** The $in operator matches value of a field against an array of specified values
+- **Syntax:**
+
+```javascript
+{
+  field: {
+    $in: [listOfValues];
+  }
+}
+```
+
+- **Doc Link:** none
+- **Scraper Comment:** Doc page not found in expected directory 'array-expression/'. Content scraped from 'comparison-query/'.
+
+### $indexOfArray
+
+- **Description:** The $indexOfArray operator is used to search for an element in an array and return the index of the first occurrence of the element.
+- **Syntax:**
+
+```javascript
+{
+    $indexOfArray: [ < array > , < searchElement > , < start > , < end > ]
+}
+```
+
+- **Doc Link:** https://learn.microsoft.com/en-us/azure/documentdb/operators/array-expression/$indexofarray
+
+### $isArray
+
+- **Description:** The $isArray operator is used to determine if a specified value is an array.
+- **Syntax:**
+
+```javascript
+{
+  $isArray: <expression>
+}
+```
+
+- **Doc Link:** https://learn.microsoft.com/en-us/azure/documentdb/operators/array-expression/$isarray
+
+### $lastN
+
+- **Description:** The $lastN accumulator operator returns the last N values in a group of documents.
+- **Syntax:**
+
+```javascript
+{
+    $group: {
+        _id: < expression > ,
+        < field >: {
+            $lastN: {
+                 n: < number >,
+                 input: < expression >
+            }
+        }
+    }
+}
+```
+
+- **Doc Link:** none
+- **Scraper Comment:** Doc page not found in expected directory 'array-expression/'. Content scraped from 'accumulators/'.
+
+### $map
+
+- **Description:** The $map operator allows applying an expression to each element in an array.
+- **Syntax:**
+
+```javascript
+{
+  $map: {
+    input: <array>,
+    as: <variable>,
+    in: <expression>
+  }
+}
+```
+
+- **Doc Link:** https://learn.microsoft.com/en-us/azure/documentdb/operators/array-expression/$map
+
+### $maxN
+
+- **Description:** Retrieves the top N values based on a specified filtering criteria
+- **Syntax:**
+
+```javascript
+$maxN: {
+    input: < field or expression > ,
+    n: < number of values to retrieve >
+}
+```
+
+- **Doc Link:** none
+- **Scraper Comment:** Doc page not found in expected directory 'array-expression/'. Content scraped from 'accumulators/'.
+
+### $minN
+
+- **Description:** Retrieves the bottom N values based on a specified filtering criteria
+- **Syntax:**
+
+```javascript
+$minN: {
+    input: < field or expression > ,
+    n: < number of values to retrieve >
+}
+```
+
+- **Doc Link:** none
+- **Scraper Comment:** Doc page not found in expected directory 'array-expression/'. Content scraped from 'accumulators/'.
+
+### $objectToArray
+
+### $range
+
+- **Description:** The $range operator allows generating an array of sequential integers.
+- **Syntax:**
+
+```javascript
+{
+    $range: [ <start>, <end>, <step> ]
+}
+```
+
+- **Doc Link:** https://learn.microsoft.com/en-us/azure/documentdb/operators/array-expression/$range
+
+### $reduce
+
+- **Description:** The $reduce operator applies an expression to each element in an array & accumulate result as single value.
+- **Syntax:**
+
+```javascript
+$reduce: {
+   input: <array>,
+   initialValue: <expression>,
+   in: <expression>
+}
+```
+
+- **Doc Link:** https://learn.microsoft.com/en-us/azure/documentdb/operators/array-expression/$reduce
+
+### $reverseArray
+
+- **Description:** The $reverseArray operator is used to reverse the order of elements in an array.
+- **Syntax:**
+
+```javascript
+{
+  $reverseArray: <array>
+}
+```
+
+- **Doc Link:** https://learn.microsoft.com/en-us/azure/documentdb/operators/array-expression/$reversearray
+
+### $size
+
+- **Description:** The $size operator is used to query documents where an array field has a specified number of elements.
+- **Syntax:**
+
+```javascript
+db.collection.find({ <field>: { $size: <number> } })
+```
+
+- **Doc Link:** none
+- **Scraper Comment:** Doc page not found in expected directory 'array-expression/'. Content scraped from 'array-query/'.
+
+### $slice
+
+- **Description:** The $slice operator returns a subset of an array from any element onwards in the array.
+- **Syntax:**
+
+```javascript
+{
+  $slice: [ <array>, <n> ]
+}
+```
+
+- **Doc Link:** https://learn.microsoft.com/en-us/azure/documentdb/operators/array-expression/$slice
+
+### $sortArray
+
+- **Description:** The $sortArray operator helps in sorting the elements in an array.
+- **Syntax:**
+
+```javascript
+{
+  $sortArray: {
+    input: <arrayExpression>,
+    sortBy: <sortSpecification>
+  }
+}
+```
+
+- **Doc Link:** https://learn.microsoft.com/en-us/azure/documentdb/operators/array-expression/$sortarray
+
+### $zip
+
+- **Description:** The $zip operator allows merging two or more arrays element-wise into a single array or arrays.
+- **Syntax:**
+
+```javascript
+{
+  $zip: {
+    inputs: [ <array1>, <array2>, ... ],
+    useLongestLength: <boolean>, // Optional
+    defaults: <array> // Optional
+  }
+}
+```
+
+- **Doc Link:** https://learn.microsoft.com/en-us/azure/documentdb/operators/array-expression/$zip
+
+## Bitwise Operators
+
+### $bitAnd
+
+- **Description:** The $bitAnd operator performs a bitwise AND operation on integer values and returns the result as an integer.
+- **Syntax:**
+
+```javascript
+{
+  $bitAnd: [ <expression1>, <expression2>, ... ]
+}
+```
+
+- **Doc Link:** https://learn.microsoft.com/en-us/azure/documentdb/operators/bitwise/$bitand
+
+### $bitNot
+
+- **Description:** The $bitNot operator performs a bitwise NOT operation on integer values and returns the result as an integer.
+- **Syntax:**
+
+```javascript
+{
+  $bitNot: <expression>
+}
+```
+
+- **Doc Link:** https://learn.microsoft.com/en-us/azure/documentdb/operators/bitwise/$bitnot
+
+### $bitOr
+
+- **Description:** The $bitOr operator performs a bitwise OR operation on integer values and returns the result as an integer.
+- **Syntax:**
+
+```javascript
+{
+  $bitOr: [ <expression1>, <expression2>, ... ]
+}
+```
+
+- **Doc Link:** https://learn.microsoft.com/en-us/azure/documentdb/operators/bitwise/$bitor
+
+### $bitXor
+
+- **Description:** The $bitXor operator performs a bitwise XOR operation on integer values.
+- **Syntax:**
+
+```javascript
+{
+  $bitXor: [ <expression1>, <expression2>, ... ]
+}
+```
+
+- **Doc Link:** https://learn.microsoft.com/en-us/azure/documentdb/operators/bitwise/$bitxor
+
+## Boolean Expression Operators
+
+### $and
+
+- **Description:** The $and operator joins multiple query clauses and returns documents that match all specified conditions.
+- **Syntax:**
+
+```javascript
+{
+    $and: [{
+        < expression1 >
+    }, {
+        < expression2 >
+    }, ..., {
+        < expressionN >
+    }]
+}
+```
+
+- **Doc Link:** none
+- **Scraper Comment:** Doc page not found in expected directory 'boolean-expression/'. Content scraped from 'logical-query/'.
+
+### $not
+
+- **Description:** The $not operator performs a logical NOT operation on a specified expression, selecting documents that don't match the expression.
+- **Syntax:**
+
+```javascript
+{
+    field: {
+        $not: {
+            < operator - expression >
+        }
+    }
+}
+```
+
+- **Doc Link:** none
+- **Scraper Comment:** Doc page not found in expected directory 'boolean-expression/'. Content scraped from 'logical-query/'.
+
+### $or
+
+- **Description:** The $or operator joins query clauses with a logical OR and returns documents that match at least one of the specified conditions.
+- **Syntax:**
+
+```javascript
+{
+    $or: [{
+        < expression1 >
+    }, {
+        < expression2 >
+    }, ..., {
+        < expressionN >
+    }]
+}
+```
+
+- **Doc Link:** none
+- **Scraper Comment:** Doc page not found in expected directory 'boolean-expression/'. Content scraped from 'logical-query/'.
+
+## Comparison Expression Operators
+
+### $cmp
+
+- **Description:** The $cmp operator compares two values
+- **Syntax:**
+
+```javascript
+{
+  $cmp: [<firstValueToCompare>, <secondValueToCompare>]
+}
+```
+
+- **Doc Link:** none
+- **Scraper Comment:** Doc page not found in expected directory 'comparison-expression/'. Content scraped from 'comparison-query/'.
+
+### $eq
+
+- **Description:** The $eq query operator compares the value of a field to a specified value
+- **Syntax:**
+
+```javascript
+{
+    field: {
+        $eq: <value>
+    }
+}
+```
+
+- **Doc Link:** none
+- **Scraper Comment:** Doc page not found in expected directory 'comparison-expression/'. Content scraped from 'comparison-query/'.
+
+### $gt
+
+- **Description:** The $gt query operator retrieves documents where the value of a field is greater than a specified value
+- **Syntax:**
+
+```javascript
+{
+  field: {
+    $gt: value;
+  }
+}
+```
+
+- **Doc Link:** none
+- **Scraper Comment:** Doc page not found in expected directory 'comparison-expression/'. Content scraped from 'comparison-query/'.
+
+### $gte
+
+- **Description:** The $gte operator retrieves documents where the value of a field is greater than or equal to a specified value
+- **Syntax:**
+
+```javascript
+{
+    field: {
+        $gte: <value>
+    }
+}
+```
+
+- **Doc Link:** none
+- **Scraper Comment:** Doc page not found in expected directory 'comparison-expression/'. Content scraped from 'comparison-query/'.
+
+### $lt
+
+- **Description:** The $lt operator retrieves documents where the value of field is less than a specified value
+- **Syntax:**
+
+```javascript
+{
+  field: {
+    $lt: value;
+  }
+}
+```
+
+- **Doc Link:** none
+- **Scraper Comment:** Doc page not found in expected directory 'comparison-expression/'. Content scraped from 'comparison-query/'.
+
+### $lte
+
+- **Description:** The $lte operator retrieves documents where the value of a field is less than or equal to a specified value
+- **Syntax:**
+
+```javascript
+{
+    field: {
+        $lte: <value>
+    }
+}
+```
+
+- **Doc Link:** none
+- **Scraper Comment:** Doc page not found in expected directory 'comparison-expression/'. Content scraped from 'comparison-query/'.
+
+### $ne
+
+- **Description:** The $ne operator retrieves documents where the value of a field doesn't equal a specified value
+- **Syntax:**
+
+```javascript
+{
+  field: {
+    $ne: value;
+  }
+}
+```
+
+- **Doc Link:** none
+- **Scraper Comment:** Doc page not found in expected directory 'comparison-expression/'. Content scraped from 'comparison-query/'.
+
+## Data Size Operators
+
+### $bsonSize
+
+- **Description:** The $bsonSize operator returns the size of a document in bytes when encoded as BSON.
+- **Syntax:**
+
+```javascript
+{
+  $bsonSize: <expression>
+}
+```
+
+- **Doc Link:** https://learn.microsoft.com/en-us/azure/documentdb/operators/data-size/$bsonsize
+
+### $binarySize
+
+- **Description:** The $binarySize operator is used to return the size of a binary data field.
+- **Syntax:**
+
+```javascript
+{
+  $binarySize: '<field>';
+}
+```
+
+- **Doc Link:** https://learn.microsoft.com/en-us/azure/documentdb/operators/data-size/$binarysize
+
+## Date Expression Operators
+
+### $dateAdd
+
+- **Description:** The $dateAdd operator adds a specified number of time units (day, hour, month etc) to a date.
+- **Syntax:**
+
+```javascript
+$dateAdd: {
+   startDate: <expression>,
+   unit: <string>,
+   amount: <number>,
+   timezone: <string>  // Optional
+}
+```
+
+- **Doc Link:** https://learn.microsoft.com/en-us/azure/documentdb/operators/date-expression/$dateadd
+
+### $dateDiff
+
+- **Description:** The $dateDiff operator calculates the difference between two dates in various units such as years, months, days, etc.
+- **Syntax:**
+
+```javascript
+$dateDiff: {
+   startDate: <expression>,
+   endDate: <expression>,
+   unit: <string>,
+   timezone: <string>, // Optional
+   startOfWeek: <string> // Optional
+}
+```
+
+- **Doc Link:** https://learn.microsoft.com/en-us/azure/documentdb/operators/date-expression/$datediff
+
+### $dateFromParts
+
+- **Description:** The $dateFromParts operator constructs a date from individual components.
+- **Syntax:**
+
+```javascript
+{
+    $dateFromParts: {
+        year: < year > ,
+        month: < month > ,
+        day: < day > ,
+        hour: < hour > ,
+        minute: < minute > ,
+        second: < second > ,
+        millisecond: < millisecond > ,
+        timezone: < timezone >
+    }
+}
+```
+
+- **Doc Link:** https://learn.microsoft.com/en-us/azure/documentdb/operators/date-expression/$datefromparts
+
+### $dateFromString
+
+- **Description:** The $dateDiff operator converts a date/time string to a date object.
+- **Syntax:**
+
+```javascript
+{
+    $dateFromString: {
+        dateString: < string > ,
+        format: < string > ,
+        timezone: < string > ,
+        onError: < expression > ,
+        onNull: < expression >
+    }
+}
+```
+
+- **Doc Link:** https://learn.microsoft.com/en-us/azure/documentdb/operators/date-expression/$datefromstring
+
+### $dateSubtract
+
+- **Description:** The $dateSubtract operator subtracts a specified amount of time from a date.
+- **Syntax:**
+
+```javascript
+{
+  $dateSubtract: {
+    startDate: <dateExpression>,
+    unit: "<unit>",
+    amount: <number>,
+    timezone: "<timezone>" // optional
+  }
+}
+```
+
+- **Doc Link:** https://learn.microsoft.com/en-us/azure/documentdb/operators/date-expression/$datesubtract
+
+### $dateToParts
+
+- **Description:** The $dateToParts operator decomposes a date into its individual parts such as year, month, day, and more.
+- **Syntax:**
+
+```javascript
+$dateToParts: {
+  date: <dateExpression>,
+  timezone: <string>, // optional
+  iso8601: <boolean> // optional
+}
+```
+
+- **Doc Link:** https://learn.microsoft.com/en-us/azure/documentdb/operators/date-expression/$datetoparts
+
+### $dateToString
+
+- **Description:** The $dateToString operator converts a date object into a formatted string.
+- **Syntax:**
+
+```javascript
+{
+  $dateToString: {
+    format: "<format_string>",
+    date: <date_expression>,
+    timezone: "<timezone>",
+    onNull: "<replacement_value>"
+  }
+}
+```
+
+- **Doc Link:** https://learn.microsoft.com/en-us/azure/documentdb/operators/date-expression/$datetostring
+
+### $dateTrunc
+
+- **Description:** The $dateTrunc operator truncates a date to a specified unit.
+- **Syntax:**
+
+```javascript
+$dateTrunc: {
+    date: <dateExpression>,
+    unit: "<unit>",
+    binSize: <number>,       // optional
+    timezone: "<timezone>",  // optional
+    startOfWeek: "<day>"     // optional (used when unit is "week")
+  }
+```
+
+- **Doc Link:** https://learn.microsoft.com/en-us/azure/documentdb/operators/date-expression/$datetrunc
+
+### $dayOfMonth
+
+- **Description:** The $dayOfMonth operator extracts the day of the month from a date.
+- **Syntax:**
+
+```javascript
+{
+  $dayOfMonth: <dateExpression>
+}
+```
+
+- **Doc Link:** https://learn.microsoft.com/en-us/azure/documentdb/operators/date-expression/$dayofmonth
+
+### $dayOfWeek
+
+- **Description:** The $dayOfWeek operator extracts the day of the week from a date.
+- **Syntax:**
+
+```javascript
+{
+  $dayOfWeek: <dateExpression>
+}
+```
+
+- **Doc Link:** https://learn.microsoft.com/en-us/azure/documentdb/operators/date-expression/$dayofweek
+
+### $dayOfYear
+
+- **Description:** The $dayOfYear operator extracts the day of the year from a date.
+- **Syntax:**
+
+```javascript
+{
+  $dayOfYear: <dateExpression>
+}
+```
+
+- **Doc Link:** https://learn.microsoft.com/en-us/azure/documentdb/operators/date-expression/$dayofyear
+
+### $hour
+
+- **Description:** The $hour operator returns the hour portion of a date as a number between 0 and 23.
+- **Syntax:**
+
+```javascript
+{
+  $hour: <dateExpression>
+}
+```
+
+- **Doc Link:** https://learn.microsoft.com/en-us/azure/documentdb/operators/date-expression/$hour
+
+### $isoDayOfWeek
+
+- **Description:** The $isoDayOfWeek operator returns the weekday number in ISO 8601 format, ranging from 1 (Monday) to 7 (Sunday).
+- **Syntax:**
+
+```javascript
+{
+  $isoDayOfWeek: <dateExpression>
+}
+```
+
+- **Doc Link:** https://learn.microsoft.com/en-us/azure/documentdb/operators/date-expression/$isodayofweek
+
+### $isoWeek
+
+- **Description:** The $isoWeek operator returns the week number of the year in ISO 8601 format, ranging from 1 to 53.
+- **Syntax:**
+
+```javascript
+{
+  $isoWeek: <dateExpression>
+}
+```
+
+- **Doc Link:** https://learn.microsoft.com/en-us/azure/documentdb/operators/date-expression/$isoweek
+
+### $isoWeekYear
+
+- **Description:** The $isoWeekYear operator returns the year number in ISO 8601 format, which can differ from the calendar year for dates at the beginning or end of the year.
+- **Syntax:**
+
+```javascript
+{
+  $isoWeekYear: <dateExpression>
+}
+```
+
+- **Doc Link:** https://learn.microsoft.com/en-us/azure/documentdb/operators/date-expression/$isoweekyear
+
+### $millisecond
+
+- **Description:** The $millisecond operator extracts the milliseconds portion from a date value.
+- **Syntax:**
+
+```javascript
+{
+  $millisecond: <dateExpression>
+}
+```
+
+- **Doc Link:** https://learn.microsoft.com/en-us/azure/documentdb/operators/date-expression/$millisecond
+
+### $minute
+
+- **Description:** The $minute operator extracts the minute portion from a date value.
+- **Syntax:**
+
+```javascript
+{
+  $minute: <dateExpression>
+}
+```
+
+- **Doc Link:** https://learn.microsoft.com/en-us/azure/documentdb/operators/date-expression/$minute
+
+### $month
+
+- **Description:** The $month operator extracts the month portion from a date value.
+- **Syntax:**
+
+```javascript
+{
+  $month: <dateExpression>
+}
+```
+
+- **Doc Link:** https://learn.microsoft.com/en-us/azure/documentdb/operators/date-expression/$month
+
+### $second
+
+- **Description:** The $second operator extracts the seconds portion from a date value.
+- **Syntax:**
+
+```javascript
+{
+  $second: <dateExpression>
+}
+```
+
+- **Doc Link:** https://learn.microsoft.com/en-us/azure/documentdb/operators/date-expression/$second
+
+### $toDate
+
+- **Description:** The $toDate operator converts supported types to a proper Date object.
+- **Syntax:**
+
+```javascript
+{
+  $toDate: <expression>
+}
+```
+
+- **Doc Link:** none
+- **Scraper Comment:** Doc page not found in expected directory 'date-expression/'. Content scraped from 'aggregation/type-expression/'.
+
+### $week
+
+- **Description:** The $week operator returns the week number for a date as a value between 0 and 53.
+- **Syntax:**
+
+```javascript
+{
+  $week: <dateExpression>
+}
+```
+
+- **Doc Link:** https://learn.microsoft.com/en-us/azure/documentdb/operators/date-expression/$week
+
+### $year
+
+- **Description:** The $year operator returns the year for a date as a four-digit number.
+- **Syntax:**
+
+```javascript
+{
+  $year: <dateExpression>
+}
+```
+
+- **Doc Link:** https://learn.microsoft.com/en-us/azure/documentdb/operators/date-expression/$year
+
+## Literal Expression Operator
+
+### $literal
+
+- **Description:** The $literal operator returns the specified value without parsing it as an expression, allowing literal values to be used in aggregation pipelines.
+- **Syntax:**
+
+```javascript
+{
+  $literal: <value>
+}
+```
+
+- **Doc Link:** https://learn.microsoft.com/en-us/azure/documentdb/operators/literal-expression/$literal
+
+## Miscellaneous Operators
+
+### $getField
+
+- **Description:** The $getField operator allows retrieving the value of a specified field from a document.
+- **Syntax:**
+
+```javascript
+{
+  $getField: {
+    field: <string>,
+    input: <document>
+  }
+}
+```
+
+- **Doc Link:** https://learn.microsoft.com/en-us/azure/documentdb/operators/miscellaneous/$getfield
+
+### $rand
+
+- **Description:** The $rand operator generates a random float value between 0 and 1.
+- **Syntax:**
+
+```javascript
+{
+  $rand: {
+  }
+}
+```
+
+- **Doc Link:** none
+- **Scraper Comment:** Doc page not found in expected directory 'miscellaneous/'. Content scraped from 'miscellaneous-query/'.
+
+### $sampleRate
+
+- **Description:** The $sampleRate operator randomly samples documents from a collection based on a specified probability rate, useful for statistical analysis and testing.
+- **Syntax:**
+
+```javascript
+{
+  $match: {
+    $sampleRate: <number>
+  }
+}
+```
+
+- **Doc Link:** https://learn.microsoft.com/en-us/azure/documentdb/operators/miscellaneous/$samplerate
+
+## Object Expression Operators
+
+### $mergeObjects
+
+- **Description:** The $mergeObjects operator merges multiple documents into a single document
+- **Syntax:**
+
+```javascript
+{
+  $mergeObjects: [ < document1 > , < document2 > , ...]
+}
+```
+
+- **Doc Link:** https://learn.microsoft.com/en-us/azure/documentdb/operators/object-expression/$mergeobjects
+
+### $objectToArray
+
+- **Description:** The objectToArray command is used to transform a document (object) into an array of key-value pairs.
+- **Syntax:**
+
+```javascript
+{
+  $objectToArray: <object>
+}
+```
+
+- **Doc Link:** https://learn.microsoft.com/en-us/azure/documentdb/operators/object-expression/$objecttoarray
+
+### $setField
+
+- **Description:** The setField command is used to add, update, or remove fields in embedded documents.
+- **Syntax:**
+
+```javascript
+{
+  $setField: {
+    field: <fieldName>,
+    input: <expression>,
+    value: <expression>
+  }
+}
+```
+
+- **Doc Link:** https://learn.microsoft.com/en-us/azure/documentdb/operators/object-expression/$setfield
+
+## Set Expression Operators
+
+### $allElementsTrue
+
+- **Description:** The $allElementsTrue operator returns true if all elements in an array evaluate to true.
+- **Syntax:**
+
+```javascript
+{
+  $allElementsTrue: [ <array> ]
+}
+```
+
+- **Doc Link:** https://learn.microsoft.com/en-us/azure/documentdb/operators/set-expression/$allelementstrue
+
+### $anyElementTrue
+
+- **Description:** The $anyElementTrue operator returns true if any element in an array evaluates to a value of true.
+- **Syntax:**
+
+```javascript
+{
+  $anyElementTrue: [ <array> ]
+}
+```
+
+- **Doc Link:** https://learn.microsoft.com/en-us/azure/documentdb/operators/set-expression/$anyelementtrue
+
+### $setDifference
+
+- **Description:** The $setDifference operator returns a set with elements that exist in one set but not in a second set.
+- **Syntax:**
+
+```javascript
+{
+  $setDifference: [ <array1>, <array2> ]
+}
+```
+
+- **Doc Link:** https://learn.microsoft.com/en-us/azure/documentdb/operators/set-expression/$setdifference
+
+### $setEquals
+
+- **Description:** The $setEquals operator returns true if two sets have the same distinct elements.
+- **Syntax:**
+
+```javascript
+{
+  $setEquals: [ <array1>, <array2>, ... ]
+}
+```
+
+- **Doc Link:** https://learn.microsoft.com/en-us/azure/documentdb/operators/set-expression/$setequals
+
+### $setIntersection
+
+- **Description:** The $setIntersection operator returns the common elements that appear in all input arrays.
+- **Syntax:**
+
+```javascript
+{
+  $setIntersection: [ <array1>, <array2>, ... ]
+}
+```
+
+- **Doc Link:** https://learn.microsoft.com/en-us/azure/documentdb/operators/set-expression/$setintersection
+
+### $setIsSubset
+
+- **Description:** The $setIsSubset operator determines if one array is a subset of a second array.
+- **Syntax:**
+
+```javascript
+{
+  $setIsSubset: [ <array1>, <array2> ]
+}
+```
+
+- **Doc Link:** https://learn.microsoft.com/en-us/azure/documentdb/operators/set-expression/$setissubset
+
+### $setUnion
+
+- **Description:** The $setUnion operator returns an array that contains all the unique elements from the input arrays.
+- **Syntax:**
+
+```javascript
+{
+  $setUnion: [ <array1>, <array2>, ... ]
+}
+```
+
+- **Doc Link:** https://learn.microsoft.com/en-us/azure/documentdb/operators/set-expression/$setunion
+
+## String Expression Operators
+
+### $concat
+
+### $dateFromString
+
+- **Description:** The $dateDiff operator converts a date/time string to a date object.
+- **Syntax:**
+
+```javascript
+{
+    $dateFromString: {
+        dateString: < string > ,
+        format: < string > ,
+        timezone: < string > ,
+        onError: < expression > ,
+        onNull: < expression >
+    }
+}
+```
+
+- **Doc Link:** none
+- **Scraper Comment:** Doc page not found in expected directory 'string-expression/'. Content scraped from 'date-expression/'.
+
+### $dateToString
+
+- **Description:** The $dateToString operator converts a date object into a formatted string.
+- **Syntax:**
+
+```javascript
+{
+  $dateToString: {
+    format: "<format_string>",
+    date: <date_expression>,
+    timezone: "<timezone>",
+    onNull: "<replacement_value>"
+  }
+}
+```
+
+- **Doc Link:** none
+- **Scraper Comment:** Doc page not found in expected directory 'string-expression/'. Content scraped from 'date-expression/'.
+
+### $indexOfBytes
+
+### $indexOfCP
+
+### $ltrim
+
+### $regexFind
+
+### $regexFindAll
+
+### $regexMatch
+
+### $replaceOne
+
+### $replaceAll
+
+### $rtrim
+
+### $split
+
+### $strLenBytes
+
+### $strLenCP
+
+### $strcasecmp
+
+### $substr
+
+### $substrBytes
+
+### $substrCP
+
+### $toLower
+
+### $toString
+
+- **Description:** The $toString operator converts an expression into a String
+- **Syntax:**
+
+```javascript
+{
+    $toString: < expression >
+}
+```
+
+- **Doc Link:** none
+- **Scraper Comment:** Doc page not found in expected directory 'string-expression/'. Content scraped from 'aggregation/type-expression/'.
+
+### $trim
+
+### $toUpper
+
+## Timestamp Expression Operators
+
+### $tsIncrement
+
+- **Description:** The $tsIncrement operator extracts the increment portion from a timestamp value.
+- **Syntax:**
+
+```javascript
+{
+  $tsIncrement: <expression>
+}
+```
+
+- **Doc Link:** https://learn.microsoft.com/en-us/azure/documentdb/operators/timestamp-expression/$tsincrement
+
+### $tsSecond
+
+- **Description:** The $tsSecond operator extracts the seconds portion from a timestamp value.
+- **Syntax:**
+
+```javascript
+{
+  $tsSecond: <expression>
+}
+```
+
+- **Doc Link:** https://learn.microsoft.com/en-us/azure/documentdb/operators/timestamp-expression/$tssecond
+
+## Trigonometry Expression Operators
+
+### $sin
+
+### $cos
+
+### $tan
+
+### $asin
+
+### $acos
+
+### $atan
+
+### $atan2
+
+### $asinh
+
+### $acosh
+
+### $atanh
+
+### $sinh
+
+### $cosh
+
+### $tanh
+
+### $degreesToRadians
+
+### $radiansToDegrees
+
+## Type Expression Operators
+
+### $convert
+
+- **Description:** The $convert operator converts an expression into the specified type
+- **Syntax:**
+
+```javascript
+{
+    $convert: {
+        input: < expression > ,
+        to: < type > ,
+        format: < binData format > ,
+        onError: < value to return on error > ,
+        onNull: < value to return on null >
+    }
+}
+```
+
+- **Doc Link:** https://learn.microsoft.com/en-us/azure/documentdb/operators/aggregation/type-expression/$convert
+
+### $isNumber
+
+- **Description:** The $isNumber operator checks if a specified expression is a numerical type
+- **Syntax:**
+
+```javascript
+{
+    $isNumber: < expression >
+}
+```
+
+- **Doc Link:** https://learn.microsoft.com/en-us/azure/documentdb/operators/aggregation/type-expression/$isnumber
+
+### $toBool
+
+- **Description:** The $toBool operator converts an expression into a Boolean type
+- **Syntax:**
+
+```javascript
+{
+    $toBool: < expression >
+}
+```
+
+- **Doc Link:** https://learn.microsoft.com/en-us/azure/documentdb/operators/aggregation/type-expression/$tobool
+
+### $toDate
+
+- **Description:** The $toDate operator converts supported types to a proper Date object.
+- **Syntax:**
+
+```javascript
+{
+  $toDate: <expression>
+}
+```
+
+- **Doc Link:** https://learn.microsoft.com/en-us/azure/documentdb/operators/aggregation/type-expression/$todate
+
+### $toDecimal
+
+- **Description:** The $toDecimal operator converts an expression into a Decimal type
+- **Syntax:**
+
+```javascript
+{
+    $toDecimal: < expression >
+}
+```
+
+- **Doc Link:** https://learn.microsoft.com/en-us/azure/documentdb/operators/aggregation/type-expression/$todecimal
+
+### $toDouble
+
+- **Description:** The $toDouble operator converts an expression into a Double value
+- **Syntax:**
+
+```javascript
+{
+    $toDouble: < expression >
+}
+```
+
+- **Doc Link:** https://learn.microsoft.com/en-us/azure/documentdb/operators/aggregation/type-expression/$todouble
+
+### $toInt
+
+- **Description:** The $toInt operator converts an expression into an Integer
+- **Syntax:**
+
+```javascript
+{
+    $toInt: < expression >
+}
+```
+
+- **Doc Link:** https://learn.microsoft.com/en-us/azure/documentdb/operators/aggregation/type-expression/$toint
+
+### $toLong
+
+- **Description:** The $toLong operator converts an expression into a Long value
+- **Syntax:**
+
+```javascript
+{
+    $toLong: < expression >
+}
+```
+
+- **Doc Link:** https://learn.microsoft.com/en-us/azure/documentdb/operators/aggregation/type-expression/$tolong
+
+### $toObjectId
+
+- **Description:** The $toObjectId operator converts an expression into an ObjectId
+- **Syntax:**
+
+```javascript
+{
+    $toObject: < expression >
+}
+```
+
+- **Doc Link:** https://learn.microsoft.com/en-us/azure/documentdb/operators/aggregation/type-expression/$toobjectid
+
+### $toString
+
+- **Description:** The $toString operator converts an expression into a String
+- **Syntax:**
+
+```javascript
+{
+    $toString: < expression >
+}
+```
+
+- **Doc Link:** https://learn.microsoft.com/en-us/azure/documentdb/operators/aggregation/type-expression/$tostring
+
+### $type
+
+- **Description:** The $type operator retrieves documents if the chosen field is of the specified type.
+- **Syntax:**
+
+```javascript
+{
+  <field>: { $type: <BSON type number> | <string alias> }
+}
+```
+
+- **Doc Link:** none
+- **Scraper Comment:** Doc page not found in expected directory 'aggregation/type-expression/'. Content scraped from 'element-query/'.
+
+## Accumulators ($group, $bucket, $bucketAuto, $setWindowFields)
+
+### $addToSet
+
+- **Description:** The addToSet operator adds elements to an array if they don't already exist, while ensuring uniqueness of elements within the set.
+- **Syntax:**
+
+```javascript
+{
+  $addToSet: { <field1>: <value1> }
+}
+```
+
+- **Doc Link:** none
+- **Scraper Comment:** Doc page not found in expected directory 'accumulators/'. Content scraped from 'array-update/'.
+
+### $avg
+
+- **Description:** Computes the average of numeric values for documents in a group, bucket, or window.
+- **Syntax:**
+
+```javascript
+$avg: <field or expression>
+```
+
+- **Doc Link:** https://learn.microsoft.com/en-us/azure/documentdb/operators/accumulators/$avg
+
+### $bottom
+
+- **Description:** The $bottom operator returns the last document from the query's result set sorted by one or more fields
+- **Syntax:**
+
+```javascript
+{
+    $bottom: {
+        output: [listOfFields],
+        sortBy: {
+            <fieldName>: < sortOrder >
+        }
+    }
+}
+```
+
+- **Doc Link:** https://learn.microsoft.com/en-us/azure/documentdb/operators/accumulators/$bottom
+
+### $bottomN
+
+- **Description:** The $bottomN operator returns the last N documents from the result sorted by one or more fields
+- **Syntax:**
+
+```javascript
+{
+    $bottomN: {
+        output: [listOfFields],
+        sortBy: {
+            <fieldName>: < sortOrder >
+        },
+        n: < numDocumentsToReturn >
+    }
+}
+```
+
+- **Doc Link:** https://learn.microsoft.com/en-us/azure/documentdb/operators/accumulators/$bottomn
+
+### $count
+
+- **Description:** The `$count` operator is used to count the number of documents that match a query filtering criteria.
+- **Syntax:**
+
+```javascript
+{
+  $count: '<fieldName>';
+}
+```
+
+- **Doc Link:** https://learn.microsoft.com/en-us/azure/documentdb/operators/accumulators/$count
+
+### $first
+
+- **Description:** The $first operator returns the first value in a group according to the group's sorting order.
+- **Syntax:**
+
+```javascript
+{
+    $first: <expression>
+}
+```
+
+- **Doc Link:** https://learn.microsoft.com/en-us/azure/documentdb/operators/accumulators/$first
+
+### $firstN
+
+- **Description:** The $firstN operator sorts documents on one or more fields specified by the query and returns the first N document matching the filtering criteria
+- **Syntax:**
+
+```javascript
+{
+    $firstN: {
+        input: [listOfFields],
+        sortBy: {
+            <fieldName>: <sortOrder>
+        },
+        n: <numDocumentsToReturn>
+    }
+}
+```
+
+- **Doc Link:** https://learn.microsoft.com/en-us/azure/documentdb/operators/accumulators/$firstn
+
+### $last
+
+- **Description:** The $last operator returns the last document from the result sorted by one or more fields
+- **Syntax:**
+
+```javascript
+{
+  "$last": <expression>
+}
+```
+
+- **Doc Link:** https://learn.microsoft.com/en-us/azure/documentdb/operators/accumulators/$last
+
+### $lastN
+
+- **Description:** The $lastN accumulator operator returns the last N values in a group of documents.
+- **Syntax:**
+
+```javascript
+{
+    $group: {
+        _id: < expression > ,
+        < field >: {
+            $lastN: {
+                 n: < number >,
+                 input: < expression >
+            }
+        }
+    }
+}
+```
+
+- **Doc Link:** https://learn.microsoft.com/en-us/azure/documentdb/operators/accumulators/$lastn
+
+### $max
+
+- **Description:** The $max operator returns the maximum value from a set of input values.
+- **Syntax:**
+
+```javascript
+$max: <expression>
+```
+
+- **Doc Link:** https://learn.microsoft.com/en-us/azure/documentdb/operators/accumulators/$max
+
+### $maxN
+
+- **Description:** Retrieves the top N values based on a specified filtering criteria
+- **Syntax:**
+
+```javascript
+$maxN: {
+    input: < field or expression > ,
+    n: < number of values to retrieve >
+}
+```
+
+- **Doc Link:** https://learn.microsoft.com/en-us/azure/documentdb/operators/accumulators/$maxn
+
+### $median
+
+- **Description:** The $median operator calculates the median value of a numeric field in a group of documents.
+- **Syntax:**
+
+```javascript
+{
+    $group: {
+        _id: < expression > ,
+        medianValue: {
+            $median: {
+                input: < field or expression > ,
+                method: < >
+            }
+        }
+    }
+}
+```
+
+- **Doc Link:** https://learn.microsoft.com/en-us/azure/documentdb/operators/accumulators/$median
+
+### $mergeObjects
+
+- **Description:** The $mergeObjects operator merges multiple documents into a single document
+- **Syntax:**
+
+```javascript
+{
+  $mergeObjects: [ < document1 > , < document2 > , ...]
+}
+```
+
+- **Doc Link:** none
+- **Scraper Comment:** Doc page not found in expected directory 'accumulators/'. Content scraped from 'object-expression/'.
+
+### $min
+
+- **Description:** Retrieves the minimum value for a specified field
+- **Syntax:**
+
+```javascript
+$min: <expression>
+```
+
+- **Doc Link:** https://learn.microsoft.com/en-us/azure/documentdb/operators/accumulators/$min
+
+### $percentile
+
+- **Description:** The $percentile operator calculates the percentile of numerical values that match a filtering criteria
+- **Syntax:**
+
+```javascript
+$percentile: {
+    input: < field or expression > ,
+    p: [ < percentile values > ],
+    method: < method >
+}
+```
+
+- **Doc Link:** https://learn.microsoft.com/en-us/azure/documentdb/operators/accumulators/$percentile
+
+### $push
+
+- **Description:** The $push operator adds a specified value to an array within a document.
+- **Syntax:**
+
+```javascript
+db.collection.update({
+    < query >
+}, {
+    $push: {
+        < field >: < value >
+    }
+}, {
+    < options >
+})
+```
+
+- **Doc Link:** none
+- **Scraper Comment:** Doc page not found in expected directory 'accumulators/'. Content scraped from 'array-update/'.
+
+### $stdDevPop
+
+- **Description:** The $stddevpop operator calculates the standard deviation of the specified values
+- **Syntax:**
+
+```javascript
+{
+  $stddevpop: {
+    fieldName;
+  }
+}
+```
+
+- **Doc Link:** https://learn.microsoft.com/en-us/azure/documentdb/operators/accumulators/$stddevpop
+
+### $stdDevSamp
+
+- **Description:** The $stddevsamp operator calculates the standard deviation of a specified sample of values and not the entire population
+- **Syntax:**
+
+```javascript
+{
+  $stddevsamp: {
+    fieldName;
+  }
+}
+```
+
+- **Doc Link:** https://learn.microsoft.com/en-us/azure/documentdb/operators/accumulators/$stddevsamp
+
+### $sum
+
+- **Description:** The $sum operator calculates the sum of the values of a field based on a filtering criteria
+- **Syntax:**
+
+```javascript
+{
+  $sum: <field or expression>
+}
+```
+
+- **Doc Link:** https://learn.microsoft.com/en-us/azure/documentdb/operators/accumulators/$sum
+
+### $top
+
+- **Description:** The $top operator returns the first document from the result set sorted by one or more fields
+- **Syntax:**
+
+```javascript
+{
+    $top: {
+      output: [listOfFields],
+      sortBy: {
+          < fieldName >: < sortOrder >
+      }
+  }
+}
+```
+
+- **Doc Link:** https://learn.microsoft.com/en-us/azure/documentdb/operators/accumulators/$top
+
+### $topN
+
+- **Description:** The $topN operator returns the first N documents from the result sorted by one or more fields
+- **Syntax:**
+
+```javascript
+{
+    $topN: {
+        output: [listOfFields],
+        sortBy: {
+            <fieldName>: < sortOrder >
+        },
+        n: < numDocumentsToReturn >
+    }
+}
+```
+
+- **Doc Link:** https://learn.microsoft.com/en-us/azure/documentdb/operators/accumulators/$topn
+
+## Accumulators (in Other Stages)
+
+### $avg
+
+- **Description:** Computes the average of numeric values for documents in a group, bucket, or window.
+- **Syntax:**
+
+```javascript
+$avg: <field or expression>
+```
+
+- **Doc Link:** https://learn.microsoft.com/en-us/azure/documentdb/operators/accumulators/$avg
+
+### $first
+
+- **Description:** The $first operator returns the first value in a group according to the group's sorting order.
+- **Syntax:**
+
+```javascript
+{
+    $first: <expression>
+}
+```
+
+- **Doc Link:** https://learn.microsoft.com/en-us/azure/documentdb/operators/accumulators/$first
+
+### $last
+
+- **Description:** The $last operator returns the last document from the result sorted by one or more fields
+- **Syntax:**
+
+```javascript
+{
+  "$last": <expression>
+}
+```
+
+- **Doc Link:** https://learn.microsoft.com/en-us/azure/documentdb/operators/accumulators/$last
+
+### $max
+
+- **Description:** The $max operator returns the maximum value from a set of input values.
+- **Syntax:**
+
+```javascript
+$max: <expression>
+```
+
+- **Doc Link:** https://learn.microsoft.com/en-us/azure/documentdb/operators/accumulators/$max
+
+### $median
+
+- **Description:** The $median operator calculates the median value of a numeric field in a group of documents.
+- **Syntax:**
+
+```javascript
+{
+    $group: {
+        _id: < expression > ,
+        medianValue: {
+            $median: {
+                input: < field or expression > ,
+                method: < >
+            }
+        }
+    }
+}
+```
+
+- **Doc Link:** https://learn.microsoft.com/en-us/azure/documentdb/operators/accumulators/$median
+
+### $min
+
+- **Description:** Retrieves the minimum value for a specified field
+- **Syntax:**
+
+```javascript
+$min: <expression>
+```
+
+- **Doc Link:** https://learn.microsoft.com/en-us/azure/documentdb/operators/accumulators/$min
+
+### $percentile
+
+- **Description:** The $percentile operator calculates the percentile of numerical values that match a filtering criteria
+- **Syntax:**
+
+```javascript
+$percentile: {
+    input: < field or expression > ,
+    p: [ < percentile values > ],
+    method: < method >
+}
+```
+
+- **Doc Link:** https://learn.microsoft.com/en-us/azure/documentdb/operators/accumulators/$percentile
+
+### $stdDevPop
+
+- **Description:** The $stddevpop operator calculates the standard deviation of the specified values
+- **Syntax:**
+
+```javascript
+{
+  $stddevpop: {
+    fieldName;
+  }
+}
+```
+
+- **Doc Link:** https://learn.microsoft.com/en-us/azure/documentdb/operators/accumulators/$stddevpop
+
+### $stdDevSamp
+
+- **Description:** The $stddevsamp operator calculates the standard deviation of a specified sample of values and not the entire population
+- **Syntax:**
+
+```javascript
+{
+  $stddevsamp: {
+    fieldName;
+  }
+}
+```
+
+- **Doc Link:** https://learn.microsoft.com/en-us/azure/documentdb/operators/accumulators/$stddevsamp
+
+### $sum
+
+- **Description:** The $sum operator calculates the sum of the values of a field based on a filtering criteria
+- **Syntax:**
+
+```javascript
+{
+  $sum: <field or expression>
+}
+```
+
+- **Doc Link:** https://learn.microsoft.com/en-us/azure/documentdb/operators/accumulators/$sum
+
+## Variable Expression Operators
+
+### $let
+
+- **Description:** The $let operator allows defining variables for use in a specified expression, enabling complex calculations and reducing code repetition.
+- **Syntax:**
+
+```javascript
+{
+  $let: {
+    vars: {
+      <var1>: <expression1>,
+      <var2>: <expression2>,
+      ...
+    },
+    in: <expression>
+  }
+}
+```
+
+- **Doc Link:** https://learn.microsoft.com/en-us/azure/documentdb/operators/variable-expression/$let
+
+## Window Operators
+
+### $sum
+
+- **Description:** The $sum operator calculates the sum of the values of a field based on a filtering criteria
+- **Syntax:**
+
+```javascript
+{
+  $sum: <field or expression>
+}
+```
+
+- **Doc Link:** none
+- **Scraper Comment:** Doc page not found in expected directory 'window-operators/'. Content scraped from 'accumulators/'.
+
+### $push
+
+- **Description:** The $push operator adds a specified value to an array within a document.
+- **Syntax:**
+
+```javascript
+db.collection.update({
+    < query >
+}, {
+    $push: {
+        < field >: < value >
+    }
+}, {
+    < options >
+})
+```
+
+- **Doc Link:** none
+- **Scraper Comment:** Doc page not found in expected directory 'window-operators/'. Content scraped from 'array-update/'.
+
+### $addToSet
+
+- **Description:** The addToSet operator adds elements to an array if they don't already exist, while ensuring uniqueness of elements within the set.
+- **Syntax:**
+
+```javascript
+{
+  $addToSet: { <field1>: <value1> }
+}
+```
+
+- **Doc Link:** none
+- **Scraper Comment:** Doc page not found in expected directory 'window-operators/'. Content scraped from 'array-update/'.
+
+### $count
+
+- **Description:** The `$count` operator is used to count the number of documents that match a query filtering criteria.
+- **Syntax:**
+
+```javascript
+{
+  $count: '<fieldName>';
+}
+```
+
+- **Doc Link:** none
+- **Scraper Comment:** Doc page not found in expected directory 'window-operators/'. Content scraped from 'accumulators/'.
+
+### $max
+
+- **Description:** The $max operator returns the maximum value from a set of input values.
+- **Syntax:**
+
+```javascript
+$max: <expression>
+```
+
+- **Doc Link:** none
+- **Scraper Comment:** Doc page not found in expected directory 'window-operators/'. Content scraped from 'accumulators/'.
+
+### $min
+
+- **Description:** Retrieves the minimum value for a specified field
+- **Syntax:**
+
+```javascript
+$min: <expression>
+```
+
+- **Doc Link:** none
+- **Scraper Comment:** Doc page not found in expected directory 'window-operators/'. Content scraped from 'accumulators/'.
+
+### $avg
+
+- **Description:** Computes the average of numeric values for documents in a group, bucket, or window.
+- **Syntax:**
+
+```javascript
+$avg: <field or expression>
+```
+
+- **Doc Link:** none
+- **Scraper Comment:** Doc page not found in expected directory 'window-operators/'. Content scraped from 'accumulators/'.
+
+### $stdDevPop
+
+- **Description:** The $stddevpop operator calculates the standard deviation of the specified values
+- **Syntax:**
+
+```javascript
+{
+  $stddevpop: {
+    fieldName;
+  }
+}
+```
+
+- **Doc Link:** none
+- **Scraper Comment:** Doc page not found in expected directory 'window-operators/'. Content scraped from 'accumulators/'.
+
+### $bottom
+
+- **Description:** The $bottom operator returns the last document from the query's result set sorted by one or more fields
+- **Syntax:**
+
+```javascript
+{
+    $bottom: {
+        output: [listOfFields],
+        sortBy: {
+            <fieldName>: < sortOrder >
+        }
+    }
+}
+```
+
+- **Doc Link:** none
+- **Scraper Comment:** Doc page not found in expected directory 'window-operators/'. Content scraped from 'accumulators/'.
+
+### $bottomN
+
+- **Description:** The $bottomN operator returns the last N documents from the result sorted by one or more fields
+- **Syntax:**
+
+```javascript
+{
+    $bottomN: {
+        output: [listOfFields],
+        sortBy: {
+            <fieldName>: < sortOrder >
+        },
+        n: < numDocumentsToReturn >
+    }
+}
+```
+
+- **Doc Link:** none
+- **Scraper Comment:** Doc page not found in expected directory 'window-operators/'. Content scraped from 'accumulators/'.
+
+### $covariancePop
+
+- **Description:** The $covariancePop operator returns the covariance of two numerical expressions
+- **Syntax:**
+
+```javascript
+{
+  $covariancePop: [ < numericalExpression1 > , < numericalExpression2 > ]
+}
+```
+
+- **Doc Link:** https://learn.microsoft.com/en-us/azure/documentdb/operators/window-operators/$covariancepop
+
+### $covarianceSamp
+
+- **Description:** The $covarianceSamp operator returns the covariance of a sample of two numerical expressions
+- **Syntax:**
+
+```javascript
+{
+    $covarianceSamp: [ < numericalExpression1 > , < numericalExpression2 > ]
+}
+```
+
+- **Doc Link:** https://learn.microsoft.com/en-us/azure/documentdb/operators/window-operators/$covariancesamp
+
+### $denseRank
+
+- **Description:** The $denseRank operator assigns and returns a positional ranking for each document within a partition based on a specified sort order
+- **Syntax:**
+
+```javascript
+{
+  $denseRank: {
+  }
+}
+```
+
+- **Doc Link:** https://learn.microsoft.com/en-us/azure/documentdb/operators/window-operators/$denserank
+
+### $derivative
+
+- **Description:** The $derivative operator calculates the average rate of change of the value of a field within a specified window.
+- **Syntax:**
+
+```javascript
+{
+    $derivative: {
+        input: < expression >,
+        unit: < timeWindow >
+    }
+}
+```
+
+- **Doc Link:** https://learn.microsoft.com/en-us/azure/documentdb/operators/window-operators/$derivative
+
+### $documentNumber
+
+- **Description:** The $documentNumber operator assigns and returns a position for each document within a partition based on a specified sort order
+- **Syntax:**
+
+```javascript
+{
+  $documentNumber: {
+  }
+}
+```
+
+- **Doc Link:** https://learn.microsoft.com/en-us/azure/documentdb/operators/window-operators/$documentnumber
+
+### $expMovingAvg
+
+- **Description:** The $expMovingAvg operator calculates the moving average of a field based on the specified number of documents to hold the highest weight
+- **Syntax:**
+
+```javascript
+{
+    $expMovingAvg: {
+        input: < field to use for calculation >,
+        N: < number of recent documents with the highest weight
+    }
+}
+```
+
+- **Doc Link:** https://learn.microsoft.com/en-us/azure/documentdb/operators/window-operators/$expmovingavg
+
+### $first
+
+- **Description:** The $first operator returns the first value in a group according to the group's sorting order.
+- **Syntax:**
+
+```javascript
+{
+    $first: <expression>
+}
+```
+
+- **Doc Link:** none
+- **Scraper Comment:** Doc page not found in expected directory 'window-operators/'. Content scraped from 'accumulators/'.
+
+### $integral
+
+- **Description:** The $integral operator calculates the area under a curve with the specified range of documents forming the adjacent documents for the calculation.
+- **Syntax:**
+
+```javascript
+{
+    $integral: {
+        input: < expression > ,
+        unit: < time window >
+    }
+}
+```
+
+- **Doc Link:** https://learn.microsoft.com/en-us/azure/documentdb/operators/window-operators/$integral
+
+### $last
+
+- **Description:** The $last operator returns the last document from the result sorted by one or more fields
+- **Syntax:**
+
+```javascript
+{
+  "$last": <expression>
+}
+```
+
+- **Doc Link:** none
+- **Scraper Comment:** Doc page not found in expected directory 'window-operators/'. Content scraped from 'accumulators/'.
+
+### $linearFill
+
+- **Description:** The $linearFill operator interpolates missing values in a sequence of documents using linear interpolation.
+- **Syntax:**
+
+```javascript
+{
+    $linearFill: {
+        input: < expression > ,
+        sortBy: {
+            < field >: < 1 or - 1 >
+        }
+    }
+}
+```
+
+- **Doc Link:** https://learn.microsoft.com/en-us/azure/documentdb/operators/window-operators/$linearfill
+
+### $locf
+
+- **Description:** The $locf operator propagates the last observed non-null value forward within a partition in a windowed query.
+- **Syntax:**
+
+```javascript
+{
+  $locf: {
+    input: <expression>,
+    sortBy: <document>
+  }
+}
+```
+
+- **Doc Link:** https://learn.microsoft.com/en-us/azure/documentdb/operators/window-operators/$locf
+
+### $minN
+
+- **Description:** Retrieves the bottom N values based on a specified filtering criteria
+- **Syntax:**
+
+```javascript
+$minN: {
+    input: < field or expression > ,
+    n: < number of values to retrieve >
+}
+```
+
+- **Doc Link:** none
+- **Scraper Comment:** Doc page not found in expected directory 'window-operators/'. Content scraped from 'accumulators/'.
+
+### $rank
+
+- **Description:** The $rank operator ranks documents within a partition based on a specified sort order.
+- **Syntax:**
+
+```javascript
+{
+    $setWindowFields: {
+        partitionBy: < expression > ,
+        sortBy: {
+            < field >: < order >
+        },
+        output: {
+            < outputField >: {
+                $rank: {}
+            }
+        }
+    }
+}
+```
+
+- **Doc Link:** https://learn.microsoft.com/en-us/azure/documentdb/operators/window-operators/$rank
+
+### $shift
+
+- **Description:** A window operator that shifts values within a partition and returns the shifted value.
+- **Syntax:**
+
+```javascript
+{
+  $shift: {
+    output: <expression>,
+    by: <number>,
+    default: <expression>
+  }
+}
+```
+
+- **Doc Link:** https://learn.microsoft.com/en-us/azure/documentdb/operators/window-operators/$shift
+
+### $stdDevSamp
+
+- **Description:** The $stddevsamp operator calculates the standard deviation of a specified sample of values and not the entire population
+- **Syntax:**
+
+```javascript
+{
+  $stddevsamp: {
+    fieldName;
+  }
+}
+```
+
+- **Doc Link:** none
+- **Scraper Comment:** Doc page not found in expected directory 'window-operators/'. Content scraped from 'accumulators/'.
+
+### $top
+
+- **Description:** The $top operator returns the first document from the result set sorted by one or more fields
+- **Syntax:**
+
+```javascript
+{
+    $top: {
+      output: [listOfFields],
+      sortBy: {
+          < fieldName >: < sortOrder >
+      }
+  }
+}
+```
+
+- **Doc Link:** none
+- **Scraper Comment:** Doc page not found in expected directory 'window-operators/'. Content scraped from 'accumulators/'.
+
+### $topN
+
+- **Description:** The $topN operator returns the first N documents from the result sorted by one or more fields
+- **Syntax:**
+
+```javascript
+{
+    $topN: {
+        output: [listOfFields],
+        sortBy: {
+            <fieldName>: < sortOrder >
+        },
+        n: < numDocumentsToReturn >
+    }
+}
+```
+
+- **Doc Link:** none
+- **Scraper Comment:** Doc page not found in expected directory 'window-operators/'. Content scraped from 'accumulators/'.
+
+## Conditional Expression Operators
+
+### $cond
+
+- **Description:** The $cond operator is used to evaluate a condition and return one of two expressions based on the result.
+- **Syntax:**
+
+```javascript
+{
+   $cond: {
+      if: <boolean-expression>,
+      then: <true-case>,
+      else: <false-case>
+   }
+}
+```
+
+- **Doc Link:** https://learn.microsoft.com/en-us/azure/documentdb/operators/conditional-expression/$cond
+
+### $ifNull
+
+- **Description:** The $ifNull operator is used to evaluate an expression and return a specified value if the expression resolves to null.
+- **Syntax:**
+
+```javascript
+{
+  $ifNull: [ <expression>, <replacement-value> ]
+}
+```
+
+- **Doc Link:** https://learn.microsoft.com/en-us/azure/documentdb/operators/conditional-expression/$ifnull
+
+### $switch
+
+- **Description:** The $switch operator is used to evaluate a series of conditions and return a value based on the first condition that evaluates to true.
+- **Syntax:**
+
+```javascript
+{
+  $switch: {
+    branches: [
+      { case: <expression>, then: <expression> },
+      { case: <expression>, then: <expression> }
+    ],
+    default: <expression>
+  }
+}
+```
+
+- **Doc Link:** https://learn.microsoft.com/en-us/azure/documentdb/operators/conditional-expression/$switch
+
+## Aggregation Pipeline Stages
+
+### $addFields
+
+- **Description:** The $addFields stage in the aggregation pipeline is used to add new fields to documents.
+- **Syntax:**
+
+```javascript
+{
+  $addFields: {
+    <newField1>: <expression1>,
+    <newField2>: <expression2>,
+    ...
+  }
+}
+```
+
+- **Doc Link:** https://learn.microsoft.com/en-us/azure/documentdb/operators/aggregation/$addfields
+
+### $bucket
+
+- **Description:** Groups input documents into buckets based on specified boundaries.
+- **Syntax:**
+
+```javascript
+{
+  $bucket: {
+    groupBy: <expression>,
+    boundaries: [ <lowerBoundary>, <upperBoundary>, ... ],
+    default: <defaultBucket>,
+    output: {
+      <outputField1>: { <accumulator1> },
+      ...
+    }
+  }
+}
+```
+
+- **Doc Link:** https://learn.microsoft.com/en-us/azure/documentdb/operators/aggregation/$bucket
+
+### $bucketAuto
+
+### $changeStream
+
+- **Description:** The $changeStream stage opens a change stream cursor to track data changes in real-time.
+- **Syntax:**
+
+```javascript
+{
+  $changeStream: {
+    allChangesForCluster: <boolean>,
+    fullDocument: <string>,
+    fullDocumentBeforeChange: <string>,
+    resumeAfter: <ResumeToken>,
+    startAfter: <ResumeToken>,
+    startAtOperationTime: <Timestamp>,
+    showExpandedEvents: <boolean>
+  }
+}
+```
+
+- **Doc Link:** https://learn.microsoft.com/en-us/azure/documentdb/operators/aggregation/$changestream
+
+### $collStats
+
+- **Description:** The $collStats stage in the aggregation pipeline is used to return statistics about a collection.
+- **Syntax:**
+
+```javascript
+{
+  $collStats: {
+    latencyStats: { histograms: <boolean> },
+    storageStats: { scale: <number> },
+    count: {}
+  }
+}
+```
+
+- **Doc Link:** https://learn.microsoft.com/en-us/azure/documentdb/operators/aggregation/$collstats
+
+### $count
+
+- **Description:** The `$count` operator is used to count the number of documents that match a query filtering criteria.
+- **Syntax:**
+
+```javascript
+{
+  $count: '<fieldName>';
+}
+```
+
+- **Doc Link:** none
+- **Scraper Comment:** Doc page not found in expected directory 'aggregation/'. Content scraped from 'accumulators/'.
+
+### $densify
+
+- **Description:** Adds missing data points in a sequence of values within an array or collection.
+- **Syntax:**
+
+```javascript
+{
+  $densify: {
+    field: <field>,
+    range: {
+      step: <number>,
+      unit: <string>, // Optional, e.g., "hour", "day", "month", etc.
+      bounds: [<lowerBound>, <upperBound>] // Optional
+    },
+    partitionByFields: [<field1>, <field2>, ...] // Optional
+  }
+}
+```
+
+- **Doc Link:** https://learn.microsoft.com/en-us/azure/documentdb/operators/aggregation/$densify
+
+### $documents
+
+- **Description:** The $documents stage creates a pipeline from a set of provided documents.
+- **Syntax:**
+
+```javascript
+{
+  $documents: [
+    <document1>,
+    <document2>,
+    ...
+  ]
+}
+```
+
+- **Doc Link:** https://learn.microsoft.com/en-us/azure/documentdb/operators/aggregation/$documents
+
+### $facet
+
+- **Description:** The $facet allows for multiple parallel aggregations to be executed within a single pipeline stage.
+- **Syntax:**
+
+```javascript
+{
+  "$facet": {
+    "outputField1": [ { "stage1": {} }, { "stage2": {} } ],
+    "outputField2": [ { "stage1": {} }, { "stage2": {} } ]
+  }
+}
+```
+
+- **Doc Link:** https://learn.microsoft.com/en-us/azure/documentdb/operators/aggregation/$facet
+
+### $fill
+
+- **Description:** The $fill stage allows filling missing values in documents based on specified methods and criteria.
+- **Syntax:**
+
+```javascript
+{
+  $fill: {
+    sortBy: <sort specification>,
+    partitionBy: <partition fields>,
+    partitionByFields: <array of partition field names>,
+    output: {
+      <field1>: { value: <expression> },
+      <field2>: { method: <string> }
+    }
+  }
+}
+```
+
+- **Doc Link:** https://learn.microsoft.com/en-us/azure/documentdb/operators/aggregation/$fill
+
+### $geoNear
+
+- **Description:** The $geoNear operator finds and sorts documents by their proximity to a geospatial point, returning distance information for each document.
+- **Syntax:**
+
+```javascript
+{
+  $geoNear: {
+    near: {
+      type: "Point",
+      coordinates: [<longitude>, <latitude>]
+    },
+    distanceField: <field to store distance>,
+    maxDistance: <optional maximum distance in meters>,
+    minDistance: <optional minimum distance in meters>,
+    query: <optional query conditions>,
+    includeLocs: <optional boolean to include locations>,
+    distanceMultiplier: <optional distance multiplier>,
+    spherical: <boolean, must be true>,
+    key: <optional field path>
+  }
+}
+```
+
+- **Doc Link:** https://learn.microsoft.com/en-us/azure/documentdb/operators/aggregation/$geonear
+
+### $graphLookup
+
+### $group
+
+- **Description:** The $group stage groups documents by specified identifier expressions and applies accumulator expressions.
+- **Syntax:**
+
+```javascript
+{
+  $group: {
+    _id: <expression>,
+    <field1>: { <accumulator1>: <expression1> },
+    <field2>: { <accumulator2>: <expression2> }
+  }
+}
+```
+
+- **Doc Link:** https://learn.microsoft.com/en-us/azure/documentdb/operators/aggregation/$group
+
+### $indexStats
+
+- **Description:** The $indexStats stage returns usage statistics for each index in the collection.
+- **Syntax:**
+
+```javascript
+{
+  $indexStats: {
+  }
+}
+```
+
+- **Doc Link:** https://learn.microsoft.com/en-us/azure/documentdb/operators/aggregation/$indexstats
+
+### $limit
+
+### $lookup
+
+- **Description:** The $lookup stage in the Aggregation Framework is used to perform left outer joins with other collections.
+- **Syntax:**
+
+```javascript
+{
+  $lookup: {
+    from: <collection to join>,
+    localField: <field from input documents>,
+    foreignField: <field from the documents of the "from" collection>,
+    as: <output array field>
+  }
+}
+```
+
+- **Doc Link:** https://learn.microsoft.com/en-us/azure/documentdb/operators/aggregation/$lookup
+
+### $match
+
+- **Description:** The $match stage in the aggregation pipeline is used to filter documents that match a specified condition.
+- **Syntax:**
+
+```javascript
+{
+  $match: {
+    <query>
+  }
+}
+```
+
+- **Doc Link:** https://learn.microsoft.com/en-us/azure/documentdb/operators/aggregation/$match
+
+### $merge
+
+- **Description:** The $merge stage in an aggregation pipeline writes the results of the aggregation to a specified collection.
+- **Syntax:**
+
+```javascript
+{
+  $merge: {
+    into: <collection>,
+    on: <field or fields>,
+    whenMatched: <action>,
+    whenNotMatched: <action>
+  }
+}
+```
+
+- **Doc Link:** https://learn.microsoft.com/en-us/azure/documentdb/operators/aggregation/$merge
+
+### $out
+
+- **Description:** The `$out` stage in an aggregation pipeline writes the resulting documents to a specified collection.
+- **Syntax:**
+
+```javascript
+{
+  $out: '<outputCollection>';
+}
+```
+
+- **Doc Link:** https://learn.microsoft.com/en-us/azure/documentdb/operators/aggregation/$out
+
+### $project
+
+### $redact
+
+- **Description:** Filters the content of the documents based on access rights.
+- **Syntax:**
+
+```javascript
+{
+  $redact: <expression>
+}
+```
+
+- **Doc Link:** https://learn.microsoft.com/en-us/azure/documentdb/operators/aggregation/$redact
+
+### $replaceRoot
+
+### $replaceWith
+
+- **Description:** The $replaceWith operator in Azure DocumentDB returns a document after replacing a document with the specified document
+- **Syntax:**
+
+```javascript
+{
+  "$replaceWith": <newDocument>
+}
+```
+
+- **Doc Link:** https://learn.microsoft.com/en-us/azure/documentdb/operators/aggregation/$replacewith
+
+### $sample
+
+- **Description:** The $sample operator in Azure DocumentDB returns a randomly selected number of documents
+- **Syntax:**
+
+```javascript
+{
+  $sample: { size: <number> }
+}
+```
+
+- **Doc Link:** https://learn.microsoft.com/en-us/azure/documentdb/operators/aggregation/$sample
+
+### $search
+
+### $searchMeta
+
+### $set
+
+- **Description:** The $set operator in Azure DocumentDB updates or creates a new field with a specified value
+- **Syntax:**
+
+```javascript
+{
+  $set: {
+    newField: <expression>
+  }
+}
+```
+
+- **Doc Link:** https://learn.microsoft.com/en-us/azure/documentdb/operators/aggregation/$set
+
+### $setWindowFields
+
+### $skip
+
+- **Description:** The $skip stage in the aggregation pipeline is used to skip a specified number of documents from the input and pass the remaining documents to the next stage in the pipeline.
+- **Syntax:**
+
+```javascript
+{
+  $skip: <number>
+}
+```
+
+- **Doc Link:** https://learn.microsoft.com/en-us/azure/documentdb/operators/aggregation/$skip
+
+### $sort
+
+- **Description:** The $sort stage in the aggregation pipeline is used to order the documents in the pipeline by a specified field or fields.
+- **Syntax:**
+
+```javascript
+{
+    $sort: {
+        < field1 >: < sort order > ,
+        < field2 >: < sort order >
+    }
+}
+```
+
+- **Doc Link:** https://learn.microsoft.com/en-us/azure/documentdb/operators/aggregation/$sort
+
+### $sortByCount
+
+- **Description:** The $sortByCount stage in the aggregation pipeline is used to group documents by a specified expression and then sort the count of documents in each group in descending order.
+- **Syntax:**
+
+```javascript
+{
+  $sortByCount: <expression>
+}
+```
+
+- **Doc Link:** https://learn.microsoft.com/en-us/azure/documentdb/operators/aggregation/$sortbycount
+
+### $unionWith
+
+### $unset
+
+- **Description:** The $unset stage in the aggregation pipeline is used to remove specified fields from documents.
+- **Syntax:**
+
+```javascript
+{
+    $unset: "<field1>" | ["<field1>", "<field2>", ...]
+}
+```
+
+- **Doc Link:** https://learn.microsoft.com/en-us/azure/documentdb/operators/aggregation/$unset
+
+### $unwind
+
+- **Description:** The $unwind stage in the aggregation framework is used to deconstruct an array field from the input documents to output a document for each element.
+- **Syntax:**
+
+```javascript
+{
+  $unwind: {
+    path: <field path>,
+    includeArrayIndex: <string>, // Optional
+    preserveNullAndEmptyArrays: <boolean> // Optional
+  }
+}
+```
+
+- **Doc Link:** https://learn.microsoft.com/en-us/azure/documentdb/operators/aggregation/$unwind
+
+### $currentOp
+
+## Variables in Aggregation Expressions
+
+### $$NOW
+
+### $$ROOT
+
+### $$REMOVE
+
+### $$CURRENT
+
+### $$DESCEND
+
+### $$PRUNE
+
+### $$KEEP
+
+## Not Listed
+
+Operators below are present on the compatibility page but are not in scope
+for this package (deprecated or not available in DocumentDB).
+
+- **$where** (Evaluation Query Operators) — Deprecated in Mongo version 8.0
+- **$meta** (Projection Operators) — Not in scope
+- **$accumulator** (Custom Aggregation Expression Operators) — Deprecated in Mongo version 8.0
+- **$function** (Custom Aggregation Expression Operators) — Deprecated in Mongo version 8.0
+- **$meta** (Text Expression Operator) — Not in scope
+- **$accumulator** (Accumulators ($group, $bucket, $bucketAuto, $setWindowFields)) — Deprecated in Mongo version 8.0
+- **$changeStreamSplitLargeEvent** (Aggregation Pipeline Stages) — Not in scope
+- **$listSampledQueries** (Aggregation Pipeline Stages) — Not in scope
+- **$listSearchIndexes** (Aggregation Pipeline Stages) — Not in scope
+- **$listSessions** (Aggregation Pipeline Stages) — Not in scope
+- **$planCacheStats** (Aggregation Pipeline Stages) — Not in scope
+- **$shardedDataDistribution** (Aggregation Pipeline Stages) — Not in scope
+- **$listLocalSessions** (Aggregation Pipeline Stages) — Not in scope
+- **$$CLUSTER_TIME** (Variables in Aggregation Expressions) — Not in scope
+- **$$SEARCH_META** (Variables in Aggregation Expressions) — Not in scope
+- **$$USER_ROLES** (Variables in Aggregation Expressions) — Not in scope
diff --git a/packages/documentdb-constants/scripts/README.md b/packages/documentdb-constants/scripts/README.md
new file mode 100644
index 000000000..d642ecb08
--- /dev/null
+++ b/packages/documentdb-constants/scripts/README.md
@@ -0,0 +1,97 @@
+# Scripts
+
+Helper scripts for maintaining the `@vscode-documentdb/documentdb-constants` package.
+
+## scrape-operator-docs.ts
+
+Scrapes the DocumentDB compatibility page and per-operator documentation to produce `resources/scraped/operator-reference.md`.
+
+```bash
+npm run scrape
+```
+
+**When to run:** When the upstream DocumentDB documentation changes (new operators, updated descriptions, etc.). This is infrequent — typically once per DocumentDB release.
+
+**Output:** `resources/scraped/operator-reference.md` — a machine-generated Markdown dump of all supported operators, their descriptions, syntax blocks, and doc links.
+
+## generate-from-reference.ts
+
+Reads the scraped dump, hand-maintained overrides file, and snippet templates, then generates the TypeScript operator data files in `src/`.
+
+```bash
+npm run generate
+```
+
+**When to run:**
+
+- After running the scraper (`npm run scrape`)
+- After editing `resources/overrides/operator-overrides.md`
+- After editing `resources/overrides/operator-snippets.md`
+
+**Inputs:**
+
+| File                                        | Purpose                            |
+| ------------------------------------------- | ---------------------------------- |
+| `resources/scraped/operator-reference.md`   | Primary data (machine-generated)   |
+| `resources/overrides/operator-overrides.md` | Manual overrides (hand-maintained) |
+| `resources/overrides/operator-snippets.md`  | Snippet templates per category     |
+
+**Outputs:** Seven TypeScript files in `src/`:
+
+- `queryOperators.ts` — comparison, logical, element, evaluation, geospatial, array, bitwise, projection, misc query operators
+- `updateOperators.ts` — field, array, and bitwise update operators
+- `expressionOperators.ts` — arithmetic, array, bitwise, boolean, comparison, conditional, data-size, date, literal, misc, object, set, string, timestamp, trig, type, and variable expression operators
+- `accumulators.ts` — group and other-stage accumulators
+- `windowOperators.ts` — window function operators
+- `stages.ts` — aggregation pipeline stages
+- `systemVariables.ts` — system variables (`$$NOW`, `$$ROOT`, etc.)
+
+> **Do not edit the generated `src/` files by hand.** Put corrections in the overrides or snippets files instead. The generated files contain a header warning to this effect.
+
+## evaluate-overrides.ts
+
+Evaluates the relationship between scraped data, manual overrides, and snippet coverage. Produces a color-coded report.
+
+```bash
+npm run evaluate
+```
+
+**When to run:**
+
+- After re-scraping (`npm run scrape`) to see if previously-missing descriptions are now available
+- Periodically, to check coverage and detect redundant overrides
+
+**Report sections:**
+
+1. **GAPS** — operators with empty scraped descriptions and no override (need attention)
+2. **POTENTIALLY REDUNDANT** — operators that have **both** a scraped description and an override description; the override may no longer be needed
+3. **ACTIVE OVERRIDES** — overrides filling real gaps, with both override and scraped values shown
+4. **SNIPPET COVERAGE** — operators with/without snippet templates per category
+5. **SUMMARY** — total counts and coverage percentage
+
+## Workflow
+
+```
+  ┌──────────────────────┐
+  │  Upstream docs change │
+  └──────────┬───────────┘
+             ▼
+      npm run scrape
+             │
+             ▼
+  scraped/operator-reference.md
+             │
+             ├──── npm run evaluate  (check gaps, redundant overrides & snippet coverage)
+             │
+             ├──── overrides/operator-overrides.md (manual)
+             ├──── overrides/operator-snippets.md  (manual)
+             │
+             ▼
+     npm run generate
+             │
+             ▼
+    src/*.ts (generated)
+             │
+             ▼
+      npm run build
+```
diff --git a/packages/documentdb-constants/scripts/evaluate-overrides.ts b/packages/documentdb-constants/scripts/evaluate-overrides.ts
new file mode 100644
index 000000000..366bfc608
--- /dev/null
+++ b/packages/documentdb-constants/scripts/evaluate-overrides.ts
@@ -0,0 +1,598 @@
+/*---------------------------------------------------------------------------------------------
+ *  Copyright (c) Microsoft Corporation. All rights reserved.
+ *  Licensed under the MIT License. See License.txt in the project root for license information.
+ *--------------------------------------------------------------------------------------------*/
+
+/**
+ * evaluate-overrides.ts
+ *
+ * Evaluates the relationship between scraped operator data and manual overrides.
+ * Produces a report showing:
+ *
+ *   1. Operators with empty descriptions in the scrape AND no override
+ *      (gaps that still need attention)
+ *   2. Operators that have overrides — shows both the override value and the
+ *      original scraped value so you can detect when an override is no longer
+ *      needed (e.g. the upstream docs now have a description)
+ *   3. Summary statistics
+ *
+ * Usage:  npm run evaluate
+ */
+
+import * as fs from 'fs';
+import * as path from 'path';
+
+// ---------------------------------------------------------------------------
+// Types (lightweight — reuses the same Markdown format as the generator)
+// ---------------------------------------------------------------------------
+
+interface ParsedEntry {
+    value: string;
+    description: string;
+    category: string;
+    docLink: string;
+}
+
+interface OverrideEntry {
+    description?: string;
+    syntax?: string;
+    docLink?: string;
+    snippet?: string;
+}
+
+// ---------------------------------------------------------------------------
+// Parsers (simplified versions of the generator's parsers)
+// ---------------------------------------------------------------------------
+
+function parseDump(content: string): ParsedEntry[] {
+    const lines = content.split('\n');
+    const entries: ParsedEntry[] = [];
+
+    let currentCategory = '';
+    let currentOp: Partial<ParsedEntry> | null = null;
+    let inCodeBlock = false;
+
+    for (const line of lines) {
+        if (line.startsWith('```')) {
+            inCodeBlock = !inCodeBlock;
+            continue;
+        }
+        if (inCodeBlock) continue;
+
+        const h2 = line.match(/^## (.+)$/);
+        if (h2) {
+            if (currentOp && currentCategory) {
+                entries.push({
+                    value: currentOp.value!,
+                    description: currentOp.description || '',
+                    category: currentCategory,
+                    docLink: currentOp.docLink || '',
+                });
+            }
+            currentOp = null;
+            const cat = h2[1].trim();
+            if (cat === 'Summary' || cat === 'Not Listed') {
+                currentCategory = '';
+                continue;
+            }
+            currentCategory = cat;
+            continue;
+        }
+
+        const h3 = line.match(/^### (.+)$/);
+        if (h3 && currentCategory) {
+            if (currentOp) {
+                entries.push({
+                    value: currentOp.value!,
+                    description: currentOp.description || '',
+                    category: currentCategory,
+                    docLink: currentOp.docLink || '',
+                });
+            }
+            currentOp = { value: h3[1].trim(), description: '', docLink: '', category: currentCategory };
+            continue;
+        }
+
+        if (currentOp && line.startsWith('- **Description:**')) {
+            currentOp.description = line.replace('- **Description:**', '').trim();
+        }
+
+        // Parse doc link ('none' means scraper found no page at expected location)
+        if (currentOp && line.startsWith('- **Doc Link:**')) {
+            const rawLink = line.replace('- **Doc Link:**', '').trim();
+            currentOp.docLink = rawLink === 'none' ? '' : rawLink;
+        }
+    }
+
+    if (currentOp && currentCategory) {
+        entries.push({
+            value: currentOp.value!,
+            description: currentOp.description || '',
+            category: currentCategory,
+            docLink: currentOp.docLink || '',
+        });
+    }
+
+    return entries;
+}
+
+function parseOverrides(content: string): Map<string, Map<string, OverrideEntry>> {
+    const lines = content.split('\n');
+    const result = new Map<string, Map<string, OverrideEntry>>();
+
+    let currentCategory = '';
+    let currentOp: { value: string; entry: OverrideEntry } | null = null;
+    let inCodeBlock = false;
+    let syntaxLines: string[] = [];
+
+    for (const line of lines) {
+        if (line.startsWith('```')) {
+            if (inCodeBlock) {
+                inCodeBlock = false;
+                if (currentOp) {
+                    currentOp.entry.syntax = syntaxLines.join('\n').trim();
+                }
+                syntaxLines = [];
+                continue;
+            } else {
+                inCodeBlock = true;
+                continue;
+            }
+        }
+        if (inCodeBlock) {
+            syntaxLines.push(line);
+            continue;
+        }
+
+        const h2 = line.match(/^## (.+)$/);
+        if (h2) {
+            if (currentOp && currentCategory) {
+                saveOverride(result, currentCategory, currentOp);
+            }
+            currentOp = null;
+            currentCategory = h2[1].trim();
+            continue;
+        }
+
+        const h3 = line.match(/^### (.+)$/);
+        if (h3 && currentCategory) {
+            if (currentOp) {
+                saveOverride(result, currentCategory, currentOp);
+            }
+            currentOp = { value: h3[1].trim(), entry: {} };
+            continue;
+        }
+
+        if (currentOp) {
+            if (line.startsWith('- **Description:**')) {
+                currentOp.entry.description = line.replace('- **Description:**', '').trim();
+            }
+            if (line.startsWith('- **Doc Link:**')) {
+                currentOp.entry.docLink = line.replace('- **Doc Link:**', '').trim();
+            }
+            if (line.startsWith('- **Snippet:**')) {
+                let snippet = line.replace('- **Snippet:**', '').trim();
+                if (snippet.startsWith('`') && snippet.endsWith('`')) {
+                    snippet = snippet.slice(1, -1);
+                }
+                currentOp.entry.snippet = snippet;
+            }
+        }
+    }
+
+    if (currentOp && currentCategory) {
+        saveOverride(result, currentCategory, currentOp);
+    }
+
+    return result;
+}
+
+function saveOverride(
+    map: Map<string, Map<string, OverrideEntry>>,
+    category: string,
+    op: { value: string; entry: OverrideEntry },
+): void {
+    if (!map.has(category)) map.set(category, new Map());
+    map.get(category)!.set(op.value, op.entry);
+}
+
+// ---------------------------------------------------------------------------
+// Lookup helpers
+// ---------------------------------------------------------------------------
+
+/**
+ * Find an override for a dump entry, mirroring how the generator resolves overrides.
+ *
+ * The generator's `applyOverrides` iterates override categories:
+ *   1. If the override category exists in the dump, it looks for the operator in that exact category.
+ *   2. If the override category does NOT exist in the dump, it falls back to cross-category search.
+ *
+ * So for a dump entry (operatorValue, category), an override matches only if:
+ *   (a) The override is in the same category as the dump entry (exact match), OR
+ *   (b) The override is in a category that doesn't exist in the dump at all, and no
+ *       earlier dump category already claimed this operator via cross-category fallback.
+ *
+ * We pass `dumpCategories` (all category names in the dump) to distinguish (a) from (b).
+ */
+function findOverride(
+    overrides: Map<string, Map<string, OverrideEntry>>,
+    operatorValue: string,
+    category: string,
+    dumpCategories: Set<string>,
+): { override: OverrideEntry; overrideCategory: string } | undefined {
+    // Exact category match: override category === dump entry category
+    const catOverrides = overrides.get(category);
+    if (catOverrides) {
+        const entry = catOverrides.get(operatorValue);
+        if (entry) return { override: entry, overrideCategory: category };
+    }
+
+    // Cross-category fallback: only if override category doesn't exist in the dump.
+    // This mirrors the generator, which only enters the cross-category path when
+    // `categorizedOps.get(category)` returns undefined.
+    for (const [overrideCat, opMap] of overrides) {
+        if (overrideCat === category) continue;
+        // If this override category exists in the dump, the generator would do an
+        // exact-category-only lookup there — it would NOT spill into other categories.
+        if (dumpCategories.has(overrideCat)) continue;
+        const entry = opMap.get(operatorValue);
+        if (entry) return { override: entry, overrideCategory: overrideCat };
+    }
+
+    return undefined;
+}
+
+// ---------------------------------------------------------------------------
+// ANSI colors for terminal output
+// ---------------------------------------------------------------------------
+
+const RED = '\x1b[31m';
+const GREEN = '\x1b[32m';
+const YELLOW = '\x1b[33m';
+const CYAN = '\x1b[36m';
+const DIM = '\x1b[2m';
+const BOLD = '\x1b[1m';
+const RESET = '\x1b[0m';
+
+// ---------------------------------------------------------------------------
+// Category → meta tag mapping (mirrors generator's CATEGORY_TO_META)
+// ---------------------------------------------------------------------------
+
+const CATEGORY_TO_META: Record<string, string> = {
+    'Comparison Query Operators': 'META_QUERY_COMPARISON',
+    'Logical Query Operators': 'META_QUERY_LOGICAL',
+    'Element Query Operators': 'META_QUERY_ELEMENT',
+    'Evaluation Query Operators': 'META_QUERY_EVALUATION',
+    'Geospatial Operators': 'META_QUERY_GEOSPATIAL',
+    'Array Query Operators': 'META_QUERY_ARRAY',
+    'Bitwise Query Operators': 'META_QUERY_BITWISE',
+    'Projection Operators': 'META_QUERY_PROJECTION',
+    'Miscellaneous Query Operators': 'META_QUERY_MISC',
+    'Field Update Operators': 'META_UPDATE_FIELD',
+    'Array Update Operators': 'META_UPDATE_ARRAY',
+    'Bitwise Update Operators': 'META_UPDATE_BITWISE',
+    'Arithmetic Expression Operators': 'META_EXPR_ARITH',
+    'Array Expression Operators': 'META_EXPR_ARRAY',
+    'Bitwise Operators': 'META_EXPR_BITWISE',
+    'Boolean Expression Operators': 'META_EXPR_BOOL',
+    'Comparison Expression Operators': 'META_EXPR_COMPARISON',
+    'Data Size Operators': 'META_EXPR_DATASIZE',
+    'Date Expression Operators': 'META_EXPR_DATE',
+    'Literal Expression Operator': 'META_EXPR_LITERAL',
+    'Miscellaneous Operators': 'META_EXPR_MISC',
+    'Object Expression Operators': 'META_EXPR_OBJECT',
+    'Set Expression Operators': 'META_EXPR_SET',
+    'String Expression Operators': 'META_EXPR_STRING',
+    'Timestamp Expression Operators': 'META_EXPR_TIMESTAMP',
+    'Trigonometry Expression Operators': 'META_EXPR_TRIG',
+    'Type Expression Operators': 'META_EXPR_TYPE',
+    'Accumulators ($group, $bucket, $bucketAuto, $setWindowFields)': 'META_ACCUMULATOR',
+    'Accumulators (in Other Stages)': 'META_ACCUMULATOR',
+    Accumulators: 'META_ACCUMULATOR',
+    'Variable Expression Operators': 'META_EXPR_VARIABLE',
+    'Window Operators': 'META_WINDOW',
+    'Conditional Expression Operators': 'META_EXPR_CONDITIONAL',
+    'Aggregation Pipeline Stages': 'META_STAGE',
+    'Variables in Aggregation Expressions': 'META_VARIABLE',
+};
+
+// ---------------------------------------------------------------------------
+// Snippet file parser
+// ---------------------------------------------------------------------------
+
+function parseSnippetsFile(content: string): Map<string, Map<string, string>> {
+    const lines = content.split('\n');
+    const result = new Map<string, Map<string, string>>();
+
+    let currentMeta = '';
+    let currentOp = '';
+    let inCodeBlock = false;
+
+    for (const line of lines) {
+        if (line.startsWith('```')) {
+            inCodeBlock = !inCodeBlock;
+            continue;
+        }
+        if (inCodeBlock) continue;
+
+        const h2 = line.match(/^## (.+)$/);
+        if (h2) {
+            const cat = h2[1].trim();
+            const meta = CATEGORY_TO_META[cat];
+            if (meta) {
+                currentMeta = meta;
+                if (!result.has(currentMeta)) {
+                    result.set(currentMeta, new Map());
+                }
+            } else {
+                currentMeta = '';
+            }
+            currentOp = '';
+            continue;
+        }
+
+        const h3 = line.match(/^### (.+)$/);
+        if (h3 && currentMeta) {
+            currentOp = h3[1].trim();
+            continue;
+        }
+
+        if (currentMeta && currentOp && line.startsWith('- **Snippet:**')) {
+            let snippet = line.replace('- **Snippet:**', '').trim();
+            if (snippet.startsWith('`') && snippet.endsWith('`')) {
+                snippet = snippet.slice(1, -1);
+            }
+            if (snippet) {
+                result.get(currentMeta)!.set(currentOp, snippet);
+            }
+            continue;
+        }
+    }
+
+    return result;
+}
+
+function operatorHasSnippet(
+    snippets: Map<string, Map<string, string>>,
+    meta: string,
+    operatorValue: string,
+    overrideSnippet: string | undefined,
+): boolean {
+    if (overrideSnippet) return true;
+    const catSnippets = snippets.get(meta);
+    if (!catSnippets) return false;
+    if (catSnippets.has(operatorValue)) return true;
+    if (catSnippets.has('DEFAULT')) return true;
+    return false;
+}
+
+// ---------------------------------------------------------------------------
+// Main
+// ---------------------------------------------------------------------------
+
+function main(): void {
+    const dumpPath = path.join(__dirname, '..', 'resources', 'scraped', 'operator-reference.md');
+    const overridePath = path.join(__dirname, '..', 'resources', 'overrides', 'operator-overrides.md');
+    const snippetsPath = path.join(__dirname, '..', 'resources', 'overrides', 'operator-snippets.md');
+
+    if (!fs.existsSync(dumpPath)) {
+        console.error(`❌ Scraped dump not found: ${dumpPath}`);
+        process.exit(1);
+    }
+
+    console.log(`${BOLD}📊 Evaluating operator overrides${RESET}\n`);
+
+    // Parse both files
+    const dumpContent = fs.readFileSync(dumpPath, 'utf-8');
+    const dumpEntries = parseDump(dumpContent);
+
+    let overrides = new Map<string, Map<string, OverrideEntry>>();
+    let totalOverrideCount = 0;
+    if (fs.existsSync(overridePath)) {
+        const overrideContent = fs.readFileSync(overridePath, 'utf-8');
+        overrides = parseOverrides(overrideContent);
+        for (const [, catMap] of overrides) {
+            totalOverrideCount += catMap.size;
+        }
+    }
+
+    // Categorize every scraped entry
+    const gaps: ParsedEntry[] = []; // empty description, no override
+    const overridden: { entry: ParsedEntry; override: OverrideEntry; overrideCategory: string }[] = [];
+    const redundantOverrides: { entry: ParsedEntry; override: OverrideEntry; overrideCategory: string }[] = [];
+    const docLinkOnlyOverrides: { entry: ParsedEntry; override: OverrideEntry; overrideCategory: string }[] = [];
+    const descriptionsOk: ParsedEntry[] = [];
+
+    // Collect all dump category names so findOverride can distinguish exact vs cross-category
+    const dumpCategories = new Set(dumpEntries.map((e) => e.category));
+
+    for (const entry of dumpEntries) {
+        const match = findOverride(overrides, entry.value, entry.category, dumpCategories);
+        const hasScrapedDescription = entry.description.trim().length > 0;
+
+        if (match) {
+            const hasDescOverride = !!match.override.description;
+            const hasDocLinkOverride = !!match.override.docLink;
+            const hasSnippetOverride = !!match.override.snippet;
+
+            if (hasScrapedDescription && hasDescOverride) {
+                // Has both scraped description AND an override description
+                redundantOverrides.push({ entry, override: match.override, overrideCategory: match.overrideCategory });
+            } else if (!hasDescOverride && hasDocLinkOverride && !hasSnippetOverride) {
+                // Override provides only a doc link (no description, no snippet)
+                docLinkOnlyOverrides.push({
+                    entry,
+                    override: match.override,
+                    overrideCategory: match.overrideCategory,
+                });
+            } else {
+                // Override is filling a description gap (or overriding snippet)
+                overridden.push({ entry, override: match.override, overrideCategory: match.overrideCategory });
+            }
+        } else if (!hasScrapedDescription) {
+            gaps.push(entry);
+        } else {
+            descriptionsOk.push(entry);
+        }
+    }
+
+    // -----------------------------------------------------------------------
+    // Section 1: Gaps — empty description, no override
+    // -----------------------------------------------------------------------
+    console.log(`${BOLD}${RED}═══ GAPS: Empty description, no override (${gaps.length}) ═══${RESET}`);
+    if (gaps.length === 0) {
+        console.log(`  ${GREEN}✅ No gaps — all operators have descriptions or overrides.${RESET}\n`);
+    } else {
+        const byCategory = groupByCategory(gaps);
+        for (const [cat, ops] of byCategory) {
+            console.log(`  ${CYAN}${cat}${RESET}`);
+            for (const op of ops) {
+                console.log(`    ${RED}⚠${RESET}  ${op.value}`);
+            }
+        }
+        console.log('');
+    }
+
+    // -----------------------------------------------------------------------
+    // Section 2: Potentially redundant overrides
+    //   (scraped dump NOW has a description, but override also provides one)
+    // -----------------------------------------------------------------------
+    console.log(`${BOLD}${YELLOW}═══ POTENTIALLY REDUNDANT OVERRIDES (${redundantOverrides.length}) ═══${RESET}`);
+    if (redundantOverrides.length === 0) {
+        console.log(`  ${GREEN}✅ No redundant overrides — all overrides are filling gaps.${RESET}\n`);
+    } else {
+        console.log(
+            `  ${DIM}These operators now have scraped descriptions. The override may no longer be needed.${RESET}`,
+        );
+        console.log(
+            `  ${DIM}Compare the values below — if the scraped one is good enough, remove the override.${RESET}\n`,
+        );
+        for (const { entry, override, overrideCategory } of redundantOverrides) {
+            console.log(`  ${CYAN}${entry.value}${RESET} ${DIM}(${entry.category})${RESET}`);
+            console.log(`    ${DIM}Override (${overrideCategory}):${RESET} ${override.description}`);
+            console.log(`    ${DIM}Scraped:${RESET}                  ${entry.description}`);
+            console.log('');
+        }
+    }
+
+    // -----------------------------------------------------------------------
+    // Section 3: Active overrides filling gaps
+    // -----------------------------------------------------------------------
+    console.log(`${BOLD}${GREEN}═══ ACTIVE OVERRIDES FILLING GAPS (${overridden.length}) ═══${RESET}`);
+    if (overridden.length === 0) {
+        console.log(`  ${DIM}No active overrides.${RESET}\n`);
+    } else {
+        const byCategory = new Map<string, typeof overridden>();
+        for (const item of overridden) {
+            const cat = item.overrideCategory;
+            if (!byCategory.has(cat)) byCategory.set(cat, []);
+            byCategory.get(cat)!.push(item);
+        }
+        for (const [cat, items] of byCategory) {
+            console.log(`  ${CYAN}${cat}${RESET} (${items.length} overrides)`);
+            for (const { entry, override } of items) {
+                const overrideDesc = override.description || '(no description override)';
+                const scrapedDesc = entry.description || '(empty)';
+                console.log(`    ${GREEN}✓${RESET}  ${entry.value}`);
+                console.log(`       ${DIM}Override:${RESET} ${overrideDesc}`);
+                if (scrapedDesc !== '(empty)') {
+                    console.log(`       ${DIM}Scraped:${RESET}  ${scrapedDesc}`);
+                }
+            }
+        }
+        console.log('');
+    }
+
+    // -----------------------------------------------------------------------
+    // Section 3b: Doc link overrides (operators with 'none' in dump, link provided via override)
+    // -----------------------------------------------------------------------
+    console.log(`${BOLD}${GREEN}═══ DOC LINK OVERRIDES (${docLinkOnlyOverrides.length}) ═══${RESET}`);
+    if (docLinkOnlyOverrides.length === 0) {
+        console.log(`  ${DIM}No doc-link-only overrides.${RESET}\n`);
+    } else {
+        console.log(`  ${DIM}These operators have 'none' in the dump (doc page not at expected directory).${RESET}`);
+        console.log(
+            `  ${DIM}The override provides a doc link that the generator can't infer via cross-reference.${RESET}\n`,
+        );
+        for (const { entry, override, overrideCategory } of docLinkOnlyOverrides) {
+            const dumpLink = entry.docLink || 'none';
+            console.log(`  ${CYAN}${entry.value}${RESET} ${DIM}(${entry.category})${RESET}`);
+            console.log(`    ${DIM}Override (${overrideCategory}):${RESET} ${override.docLink}`);
+            console.log(`    ${DIM}Dump link:${RESET}              ${dumpLink}`);
+            console.log('');
+        }
+    }
+
+    // -----------------------------------------------------------------------
+    // Section 4: Snippet coverage
+    // -----------------------------------------------------------------------
+    let snippets = new Map<string, Map<string, string>>();
+    if (fs.existsSync(snippetsPath)) {
+        const snippetsContent = fs.readFileSync(snippetsPath, 'utf-8');
+        snippets = parseSnippetsFile(snippetsContent);
+    }
+
+    const withSnippet: ParsedEntry[] = [];
+    const withoutSnippet: ParsedEntry[] = [];
+
+    for (const entry of dumpEntries) {
+        const meta = CATEGORY_TO_META[entry.category];
+        if (!meta) {
+            withoutSnippet.push(entry);
+            continue;
+        }
+        const match = findOverride(overrides, entry.value, entry.category, dumpCategories);
+        const overrideSnippet = match?.override.snippet;
+        if (operatorHasSnippet(snippets, meta, entry.value, overrideSnippet)) {
+            withSnippet.push(entry);
+        } else {
+            withoutSnippet.push(entry);
+        }
+    }
+
+    console.log(`${BOLD}${CYAN}═══ SNIPPET COVERAGE (${withSnippet.length}/${dumpEntries.length}) ═══${RESET}`);
+    if (withoutSnippet.length === 0) {
+        console.log(`  ${GREEN}✅ All operators have snippet templates.${RESET}\n`);
+    } else {
+        console.log(`  ${DIM}Operators without snippet templates (by category):${RESET}\n`);
+        const byCategory = groupByCategory(withoutSnippet);
+        for (const [cat, ops] of byCategory) {
+            console.log(`  ${CYAN}${cat}${RESET}`);
+            for (const op of ops) {
+                console.log(`    ${DIM}—${RESET}  ${op.value}`);
+            }
+        }
+        console.log('');
+    }
+
+    // -----------------------------------------------------------------------
+    // Section 5: Summary
+    // -----------------------------------------------------------------------
+    console.log(`${BOLD}═══ SUMMARY ═══${RESET}`);
+    console.log(`  Total scraped operators:    ${dumpEntries.length}`);
+    console.log(`  With scraped description:   ${descriptionsOk.length + redundantOverrides.length}`);
+    console.log(`  Filled by override:         ${overridden.length}`);
+    console.log(`  Doc-link-only overrides:    ${docLinkOnlyOverrides.length}`);
+    console.log(`  Potentially redundant:      ${YELLOW}${redundantOverrides.length}${RESET}`);
+    console.log(`  ${RED}Gaps remaining:${RESET}             ${gaps.length}`);
+    console.log(`  Total overrides in file:    ${totalOverrideCount}`);
+    console.log(`  With snippet template:      ${withSnippet.length}`);
+    console.log(`  Without snippet:            ${withoutSnippet.length}`);
+    console.log(`  Description coverage:       ${((1 - gaps.length / dumpEntries.length) * 100).toFixed(1)}%`);
+    console.log(`  Snippet coverage:           ${((withSnippet.length / dumpEntries.length) * 100).toFixed(1)}%`);
+}
+
+// ---------------------------------------------------------------------------
+// Helpers
+// ---------------------------------------------------------------------------
+
+function groupByCategory(entries: ParsedEntry[]): Map<string, ParsedEntry[]> {
+    const map = new Map<string, ParsedEntry[]>();
+    for (const e of entries) {
+        if (!map.has(e.category)) map.set(e.category, []);
+        map.get(e.category)!.push(e);
+    }
+    return map;
+}
+
+main();
diff --git a/packages/documentdb-constants/scripts/generate-from-reference.ts b/packages/documentdb-constants/scripts/generate-from-reference.ts
new file mode 100644
index 000000000..0e198b548
--- /dev/null
+++ b/packages/documentdb-constants/scripts/generate-from-reference.ts
@@ -0,0 +1,871 @@
+/*---------------------------------------------------------------------------------------------
+ *  Copyright (c) Microsoft Corporation. All rights reserved.
+ *  Licensed under the MIT License. See License.txt in the project root for license information.
+ *--------------------------------------------------------------------------------------------*/
+
+/**
+ * Generates TypeScript operator data files from the scraped reference dump.
+ *
+ * Reads:
+ *   resources/scraped/operator-reference.md    — scraped operator data (primary)
+ *   resources/overrides/operator-overrides.md  — hand-written overrides (wins)
+ *   resources/overrides/operator-snippets.md   — snippet templates per category
+ *
+ * Writes:
+ *   src/queryOperators.ts, src/updateOperators.ts, src/expressionOperators.ts,
+ *   src/accumulators.ts, src/windowOperators.ts, src/stages.ts,
+ *   src/systemVariables.ts
+ *
+ * The override file uses the same Markdown format as the dump. Any field
+ * specified in an override entry replaces the corresponding scraped value.
+ * Omitted fields keep their scraped values.
+ *
+ * Snippets are resolved in order:
+ *   1. Snippet override from operator-overrides.md  (highest priority)
+ *   2. Per-operator snippet from operator-snippets.md
+ *   3. DEFAULT snippet from operator-snippets.md  ({{VALUE}} → operator name)
+ *   4. No snippet
+ *
+ * Usage:  npm run generate
+ * Note:   This script overwrites the generated src/ files. Do NOT edit
+ *         those files by hand — put corrections in the overrides/snippets
+ *         files instead.
+ */
+
+import { execSync } from 'child_process';
+import * as fs from 'fs';
+import * as path from 'path';
+import { getDocLink } from '../src/docLinks';
+import * as MetaTags from '../src/metaTags';
+
+// ---------------------------------------------------------------------------
+// Types
+// ---------------------------------------------------------------------------
+
+interface ParsedOperator {
+    value: string;
+    description: string;
+    syntax: string;
+    docLink: string;
+    category: string;
+    snippetOverride?: string;
+    standalone?: boolean;
+}
+
+interface FileSpec {
+    fileName: string;
+    variableName: string;
+    metaImport: string;
+    metaValue: string;
+    operators: ParsedOperator[];
+    extraImports?: string;
+}
+
+// ---------------------------------------------------------------------------
+// Category → meta tag mapping
+// ---------------------------------------------------------------------------
+
+const CATEGORY_TO_META: Record<string, string> = {
+    'Comparison Query Operators': 'META_QUERY_COMPARISON',
+    'Logical Query Operators': 'META_QUERY_LOGICAL',
+    'Element Query Operators': 'META_QUERY_ELEMENT',
+    'Evaluation Query Operators': 'META_QUERY_EVALUATION',
+    'Geospatial Operators': 'META_QUERY_GEOSPATIAL',
+    'Array Query Operators': 'META_QUERY_ARRAY',
+    'Bitwise Query Operators': 'META_QUERY_BITWISE',
+    'Projection Operators': 'META_QUERY_PROJECTION',
+    'Miscellaneous Query Operators': 'META_QUERY_MISC',
+    'Field Update Operators': 'META_UPDATE_FIELD',
+    'Array Update Operators': 'META_UPDATE_ARRAY',
+    'Bitwise Update Operators': 'META_UPDATE_BITWISE',
+    'Arithmetic Expression Operators': 'META_EXPR_ARITH',
+    'Array Expression Operators': 'META_EXPR_ARRAY',
+    'Bitwise Operators': 'META_EXPR_BITWISE',
+    'Boolean Expression Operators': 'META_EXPR_BOOL',
+    'Comparison Expression Operators': 'META_EXPR_COMPARISON',
+    'Data Size Operators': 'META_EXPR_DATASIZE',
+    'Date Expression Operators': 'META_EXPR_DATE',
+    'Literal Expression Operator': 'META_EXPR_LITERAL',
+    'Miscellaneous Operators': 'META_EXPR_MISC',
+    'Object Expression Operators': 'META_EXPR_OBJECT',
+    'Set Expression Operators': 'META_EXPR_SET',
+    'String Expression Operators': 'META_EXPR_STRING',
+    'Timestamp Expression Operators': 'META_EXPR_TIMESTAMP',
+    'Trigonometry Expression Operators': 'META_EXPR_TRIG',
+    'Type Expression Operators': 'META_EXPR_TYPE',
+    'Accumulators ($group, $bucket, $bucketAuto, $setWindowFields)': 'META_ACCUMULATOR',
+    'Accumulators (in Other Stages)': 'META_ACCUMULATOR',
+    Accumulators: 'META_ACCUMULATOR',
+    'Variable Expression Operators': 'META_EXPR_VARIABLE',
+    'Window Operators': 'META_WINDOW',
+    'Conditional Expression Operators': 'META_EXPR_CONDITIONAL',
+    'Aggregation Pipeline Stages': 'META_STAGE',
+    'Variables in Aggregation Expressions': 'META_VARIABLE',
+};
+
+/**
+ * Maps META constant names (like 'META_EXPR_STRING') to their string values
+ * (like 'expr:string') so we can call getDocLink() at generation time to
+ * compare the computed URL against the dump's verified URL.
+ */
+const META_CONST_TO_VALUE: Record<string, string> = Object.fromEntries(
+    Object.entries(MetaTags)
+        .filter(([, v]) => typeof v === 'string')
+        .map(([k, v]) => [k, v as string]),
+);
+
+// Category → output file mapping
+const CATEGORY_TO_FILE: Record<string, string> = {
+    'Comparison Query Operators': 'queryOperators',
+    'Logical Query Operators': 'queryOperators',
+    'Element Query Operators': 'queryOperators',
+    'Evaluation Query Operators': 'queryOperators',
+    'Geospatial Operators': 'queryOperators',
+    'Array Query Operators': 'queryOperators',
+    'Bitwise Query Operators': 'queryOperators',
+    'Projection Operators': 'queryOperators',
+    'Miscellaneous Query Operators': 'queryOperators',
+    'Field Update Operators': 'updateOperators',
+    'Array Update Operators': 'updateOperators',
+    'Bitwise Update Operators': 'updateOperators',
+    'Arithmetic Expression Operators': 'expressionOperators',
+    'Array Expression Operators': 'expressionOperators',
+    'Bitwise Operators': 'expressionOperators',
+    'Boolean Expression Operators': 'expressionOperators',
+    'Comparison Expression Operators': 'expressionOperators',
+    'Data Size Operators': 'expressionOperators',
+    'Date Expression Operators': 'expressionOperators',
+    'Literal Expression Operator': 'expressionOperators',
+    'Miscellaneous Operators': 'expressionOperators',
+    'Object Expression Operators': 'expressionOperators',
+    'Set Expression Operators': 'expressionOperators',
+    'String Expression Operators': 'expressionOperators',
+    'Timestamp Expression Operators': 'expressionOperators',
+    'Trigonometry Expression Operators': 'expressionOperators',
+    'Type Expression Operators': 'expressionOperators',
+    'Conditional Expression Operators': 'expressionOperators',
+    'Variable Expression Operators': 'expressionOperators',
+    'Accumulators ($group, $bucket, $bucketAuto, $setWindowFields)': 'accumulators',
+    'Accumulators (in Other Stages)': 'accumulators',
+    'Window Operators': 'windowOperators',
+    'Aggregation Pipeline Stages': 'stages',
+    'Variables in Aggregation Expressions': 'systemVariables',
+};
+
+// ---------------------------------------------------------------------------
+// Parser
+// ---------------------------------------------------------------------------
+
+function parseDump(content: string): Map<string, ParsedOperator[]> {
+    const lines = content.split('\n');
+    const categorizedOps = new Map<string, ParsedOperator[]>();
+
+    let currentCategory = '';
+    let currentOp: Partial<ParsedOperator> | null = null;
+    let inCodeBlock = false;
+    let syntaxLines: string[] = [];
+
+    for (let i = 0; i < lines.length; i++) {
+        const line = lines[i];
+
+        // Track code blocks
+        if (line.startsWith('```')) {
+            if (inCodeBlock) {
+                // End of code block
+                inCodeBlock = false;
+                if (currentOp) {
+                    currentOp.syntax = syntaxLines.join('\n').trim();
+                }
+                syntaxLines = [];
+                continue;
+            } else {
+                inCodeBlock = true;
+                continue;
+            }
+        }
+
+        if (inCodeBlock) {
+            syntaxLines.push(line);
+            continue;
+        }
+
+        // H2 = category
+        const h2Match = line.match(/^## (.+)$/);
+        if (h2Match) {
+            // Save previous operator
+            if (currentOp && currentCategory) {
+                saveOperator(categorizedOps, currentCategory, currentOp as ParsedOperator);
+            }
+            currentOp = null;
+
+            const cat = h2Match[1].trim();
+            if (cat === 'Summary' || cat === 'Not Listed') {
+                currentCategory = '';
+                continue;
+            }
+            currentCategory = cat;
+            if (!categorizedOps.has(currentCategory)) {
+                categorizedOps.set(currentCategory, []);
+            }
+            continue;
+        }
+
+        // H3 = operator
+        const h3Match = line.match(/^### (.+)$/);
+        if (h3Match && currentCategory) {
+            // Save previous operator
+            if (currentOp) {
+                saveOperator(categorizedOps, currentCategory, currentOp as ParsedOperator);
+            }
+            currentOp = {
+                value: h3Match[1].trim(),
+                description: '',
+                syntax: '',
+                docLink: '',
+                category: currentCategory,
+            };
+            continue;
+        }
+
+        // Description line
+        if (currentOp && line.startsWith('- **Description:**')) {
+            currentOp.description = line.replace('- **Description:**', '').trim();
+            continue;
+        }
+
+        // Doc link line ('none' means the scraper found no page at the expected location)
+        if (currentOp && line.startsWith('- **Doc Link:**')) {
+            const rawLink = line.replace('- **Doc Link:**', '').trim();
+            currentOp.docLink = rawLink === 'none' ? '' : rawLink;
+            continue;
+        }
+    }
+
+    // Save last operator
+    if (currentOp && currentCategory) {
+        saveOperator(categorizedOps, currentCategory, currentOp as ParsedOperator);
+    }
+
+    return categorizedOps;
+}
+
+function saveOperator(map: Map<string, ParsedOperator[]>, category: string, op: Partial<ParsedOperator>): void {
+    if (!op.value) return;
+    const list = map.get(category) || [];
+    list.push({
+        value: op.value || '',
+        description: op.description || '',
+        syntax: op.syntax || '',
+        docLink: op.docLink || '',
+        category: category,
+        snippetOverride: op.snippetOverride,
+    });
+    map.set(category, list);
+}
+
+// ---------------------------------------------------------------------------
+// Override parsing and merging
+// ---------------------------------------------------------------------------
+
+interface OverrideEntry {
+    description?: string;
+    syntax?: string;
+    docLink?: string;
+    snippet?: string;
+    standalone?: boolean;
+}
+
+function parseOverrides(content: string): Map<string, Map<string, OverrideEntry>> {
+    const lines = content.split('\n');
+    const result = new Map<string, Map<string, OverrideEntry>>();
+
+    let currentCategory = '';
+    let currentOp: { value: string; entry: OverrideEntry } | null = null;
+    let inCodeBlock = false;
+    let syntaxLines: string[] = [];
+
+    for (const line of lines) {
+        if (line.startsWith('```')) {
+            if (inCodeBlock) {
+                inCodeBlock = false;
+                if (currentOp) {
+                    currentOp.entry.syntax = syntaxLines.join('\n').trim();
+                }
+                syntaxLines = [];
+                continue;
+            } else {
+                inCodeBlock = true;
+                continue;
+            }
+        }
+        if (inCodeBlock) {
+            syntaxLines.push(line);
+            continue;
+        }
+
+        const h2 = line.match(/^## (.+)$/);
+        if (h2) {
+            if (currentOp && currentCategory) {
+                saveOverride(result, currentCategory, currentOp);
+            }
+            currentOp = null;
+            currentCategory = h2[1].trim();
+            continue;
+        }
+
+        const h3 = line.match(/^### (.+)$/);
+        if (h3 && currentCategory) {
+            if (currentOp) {
+                saveOverride(result, currentCategory, currentOp);
+            }
+            currentOp = { value: h3[1].trim(), entry: {} };
+            continue;
+        }
+
+        if (currentOp && line.startsWith('- **Description:**')) {
+            currentOp.entry.description = line.replace('- **Description:**', '').trim();
+        }
+        if (currentOp && line.startsWith('- **Doc Link:**')) {
+            currentOp.entry.docLink = line.replace('- **Doc Link:**', '').trim();
+        }
+        if (currentOp && line.startsWith('- **Snippet:**')) {
+            let snippet = line.replace('- **Snippet:**', '').trim();
+            if (snippet.startsWith('`') && snippet.endsWith('`')) {
+                snippet = snippet.slice(1, -1);
+            }
+            currentOp.entry.snippet = snippet;
+        }
+        if (currentOp && line.startsWith('- **Standalone:**')) {
+            const val = line.replace('- **Standalone:**', '').trim().toLowerCase();
+            currentOp.entry.standalone = val !== 'false' ? undefined : false;
+        }
+    }
+
+    if (currentOp && currentCategory) {
+        saveOverride(result, currentCategory, currentOp);
+    }
+
+    return result;
+}
+
+function saveOverride(
+    map: Map<string, Map<string, OverrideEntry>>,
+    category: string,
+    op: { value: string; entry: OverrideEntry },
+): void {
+    if (!map.has(category)) map.set(category, new Map());
+    map.get(category)!.set(op.value, op.entry);
+}
+
+function applyOverrides(
+    categorizedOps: Map<string, ParsedOperator[]>,
+    overrides: Map<string, Map<string, OverrideEntry>>,
+): void {
+    let applied = 0;
+    let missed = 0;
+
+    for (const [category, opOverrides] of overrides) {
+        const ops = categorizedOps.get(category);
+        if (!ops) {
+            // Try to find operators across all categories (override category
+            // may not match dump category exactly for cross-category operators)
+            for (const [opName, override] of opOverrides) {
+                const matches: Array<{ category: string; op: ParsedOperator }> = [];
+                for (const [cat, catOps] of categorizedOps) {
+                    const op = catOps.find((o) => o.value === opName);
+                    if (op) matches.push({ category: cat, op });
+                }
+                if (matches.length === 0) {
+                    console.warn(`⚠️  Override target not found: ${opName} in "${category}"`);
+                    missed++;
+                } else {
+                    if (matches.length > 1) {
+                        const catList = matches.map((m) => `"${m.category}"`).join(', ');
+                        console.warn(
+                            `⚠️  Ambiguous override fallback: "${opName}" — found in ${matches.length} categories: [${catList}]. Override from "${category}" applied to first match. Specify the correct category to disambiguate.`,
+                        );
+                    } else {
+                        console.log(
+                            `ℹ️  Override fallback: "${opName}" not found in "${category}", applied to match in "${matches[0].category}".`,
+                        );
+                    }
+                    mergeOverride(matches[0].op, override);
+                    applied++;
+                }
+            }
+            continue;
+        }
+
+        for (const [opName, override] of opOverrides) {
+            const op = ops.find((o) => o.value === opName);
+            if (op) {
+                mergeOverride(op, override);
+                applied++;
+            } else {
+                console.warn(`⚠️  Override target not found: ${opName} in "${category}"`);
+                missed++;
+            }
+        }
+    }
+
+    console.log(`  Applied ${applied} overrides (${missed} missed)`);
+}
+
+function mergeOverride(op: ParsedOperator, override: OverrideEntry): void {
+    if (override.description !== undefined && override.description !== '') {
+        op.description = override.description;
+    }
+    if (override.syntax !== undefined && override.syntax !== '') {
+        op.syntax = override.syntax;
+    }
+    if (override.docLink !== undefined && override.docLink !== '') {
+        op.docLink = override.docLink;
+    }
+    if (override.snippet !== undefined && override.snippet !== '') {
+        op.snippetOverride = override.snippet;
+    }
+    if (override.standalone !== undefined) {
+        op.standalone = override.standalone;
+    }
+}
+
+// ---------------------------------------------------------------------------
+// Snippet loading (from resources/overrides/operator-snippets.md)
+// ---------------------------------------------------------------------------
+
+/**
+ * Parses the operator-snippets.md file into a map of meta-tag → (operator|DEFAULT → snippet).
+ * Uses the same heading conventions as the dump/overrides parsers.
+ */
+function parseSnippets(content: string): Map<string, Map<string, string>> {
+    const lines = content.split('\n');
+    const result = new Map<string, Map<string, string>>();
+
+    let currentMeta = '';
+    let currentOp = '';
+    let inCodeBlock = false;
+
+    for (const line of lines) {
+        if (line.startsWith('```')) {
+            inCodeBlock = !inCodeBlock;
+            continue;
+        }
+        if (inCodeBlock) continue;
+
+        // H2 = category
+        const h2 = line.match(/^## (.+)$/);
+        if (h2) {
+            const cat = h2[1].trim();
+            const meta = CATEGORY_TO_META[cat];
+            if (meta) {
+                currentMeta = meta;
+                if (!result.has(currentMeta)) {
+                    result.set(currentMeta, new Map());
+                }
+            } else {
+                currentMeta = '';
+                console.warn(`⚠️  Unknown snippet category: "${cat}"`);
+            }
+            currentOp = '';
+            continue;
+        }
+
+        // H3 = operator name or DEFAULT
+        const h3 = line.match(/^### (.+)$/);
+        if (h3 && currentMeta) {
+            currentOp = h3[1].trim();
+            continue;
+        }
+
+        // Snippet value (backticks are stripped if present: `...` → ...)
+        if (currentMeta && currentOp && line.startsWith('- **Snippet:**')) {
+            let snippet = line.replace('- **Snippet:**', '').trim();
+            if (snippet.startsWith('`') && snippet.endsWith('`')) {
+                snippet = snippet.slice(1, -1);
+            }
+            if (snippet) {
+                result.get(currentMeta)!.set(currentOp, snippet);
+            }
+            continue;
+        }
+    }
+
+    return result;
+}
+
+/**
+ * Looks up a snippet for an operator from the parsed snippets map.
+ *
+ * Resolution order:
+ *   1. Exact operator match in the category
+ *   2. DEFAULT entry in the category (with {{VALUE}} replaced by operator name)
+ *   3. undefined (no snippet)
+ */
+function lookupSnippet(
+    snippets: Map<string, Map<string, string>>,
+    meta: string,
+    operatorValue: string,
+): string | undefined {
+    const catSnippets = snippets.get(meta);
+    if (!catSnippets) return undefined;
+
+    // Exact operator match
+    const exact = catSnippets.get(operatorValue);
+    if (exact !== undefined) return exact;
+
+    // Fall back to category DEFAULT
+    const def = catSnippets.get('DEFAULT');
+    if (def) return def.replace(/\{\{VALUE\}\}/g, operatorValue);
+
+    return undefined;
+}
+
+// ---------------------------------------------------------------------------
+// BSON type applicability
+// ---------------------------------------------------------------------------
+
+function getApplicableBsonTypes(op: ParsedOperator, meta: string): string[] | undefined {
+    const v = op.value;
+
+    // String-specific operators
+    if (v === '$regex' || v === '$text') return ['string'];
+    if (meta === 'META_EXPR_STRING' || meta === 'META_EXPR_TRIG') return undefined; // expression context, not filter-level
+
+    // Array-specific operators (query context)
+    if (meta === 'META_QUERY_ARRAY') return ['array'];
+
+    // Bitwise query operators — use 'int32' to match SchemaAnalyzer BSON types
+    if (meta === 'META_QUERY_BITWISE') return ['int32', 'long'];
+
+    return undefined;
+}
+
+// ---------------------------------------------------------------------------
+// Cross-reference: resolve missing doc links from other categories
+// ---------------------------------------------------------------------------
+
+/**
+ * Builds a map of operator name → URL from all categories.
+ * For operators that appear with a URL in ANY category, we can use that URL
+ * when the same operator appears without one in a different category.
+ *
+ * Returns the number of operators whose links were inferred.
+ */
+function crossReferenceMissingLinks(categorizedOps: Map<string, ParsedOperator[]>): number {
+    // Build global URL lookup: operator name → first known URL
+    const urlLookup = new Map<string, string>();
+    for (const ops of categorizedOps.values()) {
+        for (const op of ops) {
+            if (op.docLink && !urlLookup.has(op.value)) {
+                urlLookup.set(op.value, op.docLink);
+            }
+        }
+    }
+
+    // Fill in missing links from the cross-reference
+    let inferred = 0;
+    for (const [category, ops] of categorizedOps.entries()) {
+        for (const op of ops) {
+            if (!op.docLink) {
+                const altUrl = urlLookup.get(op.value);
+                if (altUrl) {
+                    op.docLink = altUrl;
+                    // Mark as inferred so generateSection can annotate it
+                    (op as ParsedOperator & { inferredLink?: boolean }).inferredLink = true;
+                    inferred++;
+                    console.log(`  Inferred link: ${op.value} (${category}) → ${altUrl}`);
+                }
+            }
+        }
+    }
+
+    return inferred;
+}
+
+// ---------------------------------------------------------------------------
+// File generation
+// ---------------------------------------------------------------------------
+
+function generateFileContent(specs: FileSpec[], snippets: Map<string, Map<string, string>>): string {
+    const copyright = `/*---------------------------------------------------------------------------------------------
+ *  Copyright (c) Microsoft Corporation. All rights reserved.
+ *  Licensed under the MIT License. See License.txt in the project root for license information.
+ *--------------------------------------------------------------------------------------------*/
+
+// AUTO-GENERATED — DO NOT EDIT BY HAND
+//
+// Generated by: npm run generate  (scripts/generate-from-reference.ts)
+// Sources:      resources/scraped/operator-reference.md
+//               resources/overrides/operator-overrides.md
+//               resources/overrides/operator-snippets.md
+//
+// To change operator data, edit the overrides/snippets files and re-run the generator.
+`;
+
+    // Collect all unique meta imports
+    const allMetaImports = new Set<string>();
+    for (const spec of specs) {
+        allMetaImports.add(spec.metaImport);
+    }
+
+    const metaImportsList = [...allMetaImports].sort().join(',\n    ');
+
+    // Pre-generate all sections so we can detect whether getDocLink is used
+    const sections: string[] = [];
+    for (const spec of specs) {
+        sections.push(generateSection(spec, snippets));
+    }
+    const sectionsStr = sections.join('\n');
+
+    // Only import getDocLink if at least one operator uses it in this file
+    const needsDocLink = sectionsStr.includes('getDocLink(');
+    const docLinkImport = needsDocLink ? `\nimport { getDocLink } from './docLinks';` : '';
+
+    let content = `${copyright}
+import { type OperatorEntry } from './types';
+import { ${metaImportsList} } from './metaTags';${docLinkImport}
+import { registerOperators } from './getFilteredCompletions';
+
+`;
+
+    content += sectionsStr;
+
+    // Derive the exported load function name from the first spec's fileName
+    // e.g. "queryOperators" → "loadQueryOperators"
+    const fileName = specs[0]?.fileName ?? 'operators';
+    const loadFnName = 'load' + fileName.charAt(0).toUpperCase() + fileName.slice(1);
+
+    // Emit an explicit load function instead of a side-effect registration call
+    const allVarNames = specs.map((s) => `...${s.variableName}`).join(',\n        ');
+    content += `// ---------------------------------------------------------------------------\n`;
+    content += `// Registration\n`;
+    content += `// ---------------------------------------------------------------------------\n\n`;
+    content += `export function ${loadFnName}(): void {\n`;
+    content += `    registerOperators([\n        ${allVarNames},\n    ]);\n`;
+    content += `}\n`;
+
+    return content;
+}
+
+function generateSection(spec: FileSpec, snippets: Map<string, Map<string, string>>): string {
+    let section = `// ---------------------------------------------------------------------------\n`;
+    section += `// ${spec.operators[0]?.category || spec.variableName}\n`;
+    section += `// ---------------------------------------------------------------------------\n\n`;
+
+    section += `const ${spec.variableName}: readonly OperatorEntry[] = [\n`;
+
+    // Resolve the meta tag's string value for runtime getDocLink comparison
+    const metaStringValue = META_CONST_TO_VALUE[spec.metaImport] || '';
+
+    for (const op of spec.operators) {
+        const snippet = op.snippetOverride || lookupSnippet(snippets, spec.metaImport, op.value);
+        const bsonTypes = getApplicableBsonTypes(op, spec.metaImport);
+
+        // Determine the correct link emission strategy:
+        // - If dump has a URL that matches what getDocLink() would produce → use getDocLink() (compact)
+        // - If the URL was inferred via cross-reference → emit hardcoded string with comment
+        // - If dump has a URL that differs from getDocLink() → emit hardcoded string
+        // - If dump has no URL → omit the link property
+        const computedLink = getDocLink(op.value, metaStringValue);
+        const dumpLink = op.docLink || '';
+        const isInferred = (op as ParsedOperator & { inferredLink?: boolean }).inferredLink === true;
+        let linkLine: string;
+        if (!dumpLink) {
+            // No documentation page exists — omit the link
+            linkLine = '';
+        } else if (isInferred) {
+            // Link was inferred from another category via cross-reference (scraper confirmed
+            // no page exists at this operator's own category URL — use the real page found)
+            linkLine = `        link: '${escapeString(dumpLink)}', // inferred from another category\n`;
+        } else if (dumpLink === computedLink) {
+            // The computed URL matches — use the compact getDocLink() call
+            linkLine = `        link: getDocLink('${escapeString(op.value)}', ${spec.metaImport}),\n`;
+        } else {
+            // The dump has a verified URL that differs from getDocLink() — emit hardcoded
+            linkLine = `        link: '${escapeString(dumpLink)}',\n`;
+        }
+
+        section += `    {\n`;
+        section += `        value: '${escapeString(op.value)}',\n`;
+        section += `        meta: ${spec.metaImport},\n`;
+        section += `        description: '${escapeString(op.description)}',\n`;
+        if (snippet) {
+            section += `        snippet: '${escapeString(snippet)}',\n`;
+        }
+        if (linkLine) {
+            section += linkLine;
+        }
+        if (bsonTypes) {
+            section += `        applicableBsonTypes: [${bsonTypes.map((t) => `'${t}'`).join(', ')}],\n`;
+        }
+        if (op.standalone === false) {
+            section += `        standalone: false,\n`;
+        }
+        section += `    },\n`;
+    }
+
+    section += `];\n\n`;
+    return section;
+}
+
+function escapeString(s: string): string {
+    return s.replace(/\\/g, '\\\\').replace(/'/g, "\\'");
+}
+
+// ---------------------------------------------------------------------------
+// Group operators by file and generate
+// ---------------------------------------------------------------------------
+
+function buildFileSpecs(categorizedOps: Map<string, ParsedOperator[]>): Map<string, FileSpec[]> {
+    const fileGroups = new Map<string, FileSpec[]>();
+
+    // Track seen operators per file to deduplicate
+    const seenPerFile = new Map<string, Set<string>>();
+
+    for (const [category, ops] of categorizedOps) {
+        const fileName = CATEGORY_TO_FILE[category];
+        const metaConst = CATEGORY_TO_META[category];
+
+        if (!fileName || !metaConst) {
+            console.warn(`⚠️  No mapping for category: "${category}" (${ops.length} operators)`);
+            continue;
+        }
+
+        if (!seenPerFile.has(fileName)) {
+            seenPerFile.set(fileName, new Set());
+        }
+        const seen = seenPerFile.get(fileName)!;
+
+        // Deduplicate operators (e.g., $elemMatch appears in both query:array and projection)
+        const uniqueOps = ops.filter((op) => {
+            if (seen.has(op.value + ':' + metaConst)) return false;
+            seen.add(op.value + ':' + metaConst);
+            return true;
+        });
+
+        if (uniqueOps.length === 0) continue;
+
+        // Create a camelCase variable name from the category
+        const varName = categoryToVarName(category);
+
+        const spec: FileSpec = {
+            fileName,
+            variableName: varName,
+            metaImport: metaConst,
+            metaValue: metaConst,
+            operators: uniqueOps,
+        };
+
+        if (!fileGroups.has(fileName)) {
+            fileGroups.set(fileName, []);
+        }
+        fileGroups.get(fileName)!.push(spec);
+    }
+
+    return fileGroups;
+}
+
+function categoryToVarName(category: string): string {
+    // "Comparison Query Operators" → "comparisonQueryOperators"
+    // "Accumulators ($group, $bucket, $bucketAuto, $setWindowFields)" → "groupAccumulators"
+
+    if (category === 'Accumulators ($group, $bucket, $bucketAuto, $setWindowFields)') {
+        return 'groupAccumulators';
+    }
+    if (category === 'Accumulators (in Other Stages)') {
+        return 'otherStageAccumulators';
+    }
+    if (category === 'Variables in Aggregation Expressions') {
+        return 'systemVariables';
+    }
+
+    const words = category
+        .replace(/[()$,]/g, '')
+        .split(/\s+/)
+        .filter((w) => w.length > 0);
+    return words
+        .map((w, i) => (i === 0 ? w.toLowerCase() : w.charAt(0).toUpperCase() + w.slice(1).toLowerCase()))
+        .join('');
+}
+
+// ---------------------------------------------------------------------------
+// Main
+// ---------------------------------------------------------------------------
+
+function main(): void {
+    const dumpPath = path.join(__dirname, '..', 'resources', 'scraped', 'operator-reference.md');
+    const overridePath = path.join(__dirname, '..', 'resources', 'overrides', 'operator-overrides.md');
+    const snippetsPath = path.join(__dirname, '..', 'resources', 'overrides', 'operator-snippets.md');
+    const srcDir = path.join(__dirname, '..', 'src');
+
+    console.log('📖 Reading operator reference dump...');
+    const content = fs.readFileSync(dumpPath, 'utf-8');
+
+    console.log('🔍 Parsing operators...');
+    const categorizedOps = parseDump(content);
+
+    let totalOps = 0;
+    for (const [cat, ops] of categorizedOps) {
+        console.log(`  ${cat}: ${ops.length} operators`);
+        totalOps += ops.length;
+    }
+    console.log(`  Total: ${totalOps} operators\n`);
+
+    // Apply overrides if the file exists
+    if (fs.existsSync(overridePath)) {
+        console.log('📝 Reading overrides...');
+        const overrideContent = fs.readFileSync(overridePath, 'utf-8');
+        const overrides = parseOverrides(overrideContent);
+        applyOverrides(categorizedOps, overrides);
+        console.log('');
+    } else {
+        console.log('ℹ️  No overrides file found, skipping.\n');
+    }
+
+    // Cross-reference missing doc links from other categories
+    console.log('🔗 Cross-referencing missing doc links...');
+    const inferred = crossReferenceMissingLinks(categorizedOps);
+    console.log(`  Inferred ${inferred} links from other categories\n`);
+
+    // Load snippet templates
+    let snippetsMap = new Map<string, Map<string, string>>();
+    if (fs.existsSync(snippetsPath)) {
+        console.log('📋 Reading snippet templates...');
+        const snippetsContent = fs.readFileSync(snippetsPath, 'utf-8');
+        snippetsMap = parseSnippets(snippetsContent);
+        let snippetCount = 0;
+        for (const [, catMap] of snippetsMap) {
+            snippetCount += catMap.size;
+        }
+        console.log(`  Loaded ${snippetCount} snippet entries across ${snippetsMap.size} categories\n`);
+    } else {
+        console.log('ℹ️  No snippets file found, skipping.\n');
+    }
+
+    console.log('📁 Building file specs...');
+    const fileGroups = buildFileSpecs(categorizedOps);
+
+    for (const [fileName, specs] of fileGroups) {
+        const filePath = path.join(srcDir, `${fileName}.ts`);
+        console.log(
+            `✍️  Generating ${fileName}.ts (${specs.reduce((n, s) => n + s.operators.length, 0)} operators)...`,
+        );
+        const fileContent = generateFileContent(specs, snippetsMap);
+        fs.writeFileSync(filePath, fileContent, 'utf-8');
+    }
+
+    // Format generated files with Prettier
+    const generatedFiles = [...fileGroups.keys()].map((f) => path.join(srcDir, `${f}.ts`));
+    console.log('\n🎨 Formatting generated files with Prettier...');
+    execSync(`npx prettier --write ${generatedFiles.map((f) => `"${f}"`).join(' ')}`, {
+        stdio: 'inherit',
+    });
+
+    console.log('\n✅ Done! Generated files:');
+    for (const [fileName, specs] of fileGroups) {
+        const count = specs.reduce((n, s) => n + s.operators.length, 0);
+        console.log(`  src/${fileName}.ts — ${count} operators`);
+    }
+}
+
+main();
diff --git a/packages/documentdb-constants/scripts/scrape-operator-docs.ts b/packages/documentdb-constants/scripts/scrape-operator-docs.ts
new file mode 100644
index 000000000..a4780a1d8
--- /dev/null
+++ b/packages/documentdb-constants/scripts/scrape-operator-docs.ts
@@ -0,0 +1,964 @@
+/*---------------------------------------------------------------------------------------------
+ *  Copyright (c) Microsoft Corporation. All rights reserved.
+ *  Licensed under the MIT License. See License.txt in the project root for license information.
+ *--------------------------------------------------------------------------------------------*/
+
+/**
+ * scrape-operator-docs.ts
+ *
+ * Scrapes the DocumentDB compatibility page and per-operator documentation
+ * to generate the resources/scraped/operator-reference.md dump file.
+ *
+ * Usage:
+ *   npx ts-node packages/documentdb-constants/scripts/scrape-operator-docs.ts
+ *
+ * The scraper has three phases:
+ *   Phase 1: Fetch and parse the compatibility page (operator list + support status)
+ *   Phase 2: Fetch per-operator doc pages (descriptions + syntax)
+ *   Phase 3: Generate the Markdown dump file
+ *
+ * Before doing real work, a verification step checks that the upstream
+ * documentation structure is as expected by fetching a few known URLs.
+ */
+
+import * as fs from 'fs';
+import * as path from 'path';
+
+// ---------------------------------------------------------------------------
+// Types
+// ---------------------------------------------------------------------------
+
+interface OperatorInfo {
+    operator: string;
+    category: string;
+    listed: boolean;
+    /** Human-readable reason if not listed */
+    notListedReason?: string;
+    /** Description from the per-operator doc page YAML frontmatter */
+    description?: string;
+    /** Syntax snippet from the per-operator doc page */
+    syntax?: string;
+    /** Documentation URL (derived from the directory where the .md file was found) */
+    docLink?: string;
+    /**
+     * Human-readable note added when the scraper resolves a doc page from a
+     * different directory than the operator's primary category, or when other
+     * notable resolution decisions are made. Written to the dump as
+     * `- **Scraper Comment:**` for traceability.
+     */
+    scraperComment?: string;
+}
+
+// ---------------------------------------------------------------------------
+// Constants
+// ---------------------------------------------------------------------------
+
+const COMPAT_PAGE_URL =
+    'https://raw.githubusercontent.com/MicrosoftDocs/azure-databases-docs/main/articles/documentdb/compatibility-query-language.md';
+
+const OPERATOR_DOC_BASE =
+    'https://raw.githubusercontent.com/MicrosoftDocs/azure-databases-docs/main/articles/documentdb/operators';
+
+const DOC_LINK_BASE = 'https://learn.microsoft.com/en-us/azure/documentdb/operators';
+
+/**
+ * Maps category names (as they appear in column 1 of the compat page table)
+ * to the docs directory used for per-operator doc pages.
+ *
+ * This mapping is derived from the operators TOC.yml in the azure-databases-docs repo.
+ * Category names are trimmed before lookup, so leading/trailing spaces are OK.
+ */
+const CATEGORY_TO_DIR: Record<string, string> = {
+    // Query operators
+    'Comparison Query Operators': 'comparison-query',
+    'Logical Query Operators': 'logical-query',
+    'Element Query Operators': 'element-query',
+    'Evaluation Query Operators': 'evaluation-query',
+    'Array Query Operators': 'array-query',
+    'Bitwise Query Operators': 'bitwise-query',
+    'Geospatial Operators': 'geospatial',
+    'Projection Operators': 'projection',
+    'Miscellaneous Query Operators': 'miscellaneous-query',
+    // Update operators
+    'Field Update Operators': 'field-update',
+    'Array Update Operators': 'array-update',
+    'Bitwise Update Operators': 'bitwise-update',
+    // Aggregation
+    'Aggregation Pipeline Stages': 'aggregation',
+    'Accumulators ($group, $bucket, $bucketAuto, $setWindowFields)': 'accumulators',
+    'Accumulators (in Other Stages)': 'accumulators',
+    // Expression operators
+    'Arithmetic Expression Operators': 'arithmetic-expression',
+    'Array Expression Operators': 'array-expression',
+    'Bitwise Operators': 'bitwise',
+    'Boolean Expression Operators': 'boolean-expression',
+    'Comparison Expression Operators': 'comparison-expression',
+    'Conditional Expression Operators': 'conditional-expression',
+    'Data Size Operators': 'data-size',
+    'Date Expression Operators': 'date-expression',
+    'Literal Expression Operator': 'literal-expression',
+    'Miscellaneous Operators': 'miscellaneous',
+    'Object Expression Operators': 'object-expression',
+    'Set Expression Operators': 'set-expression',
+    'String Expression Operators': 'string-expression',
+    'Trigonometry Expression Operators': 'trigonometry-expression',
+    'Type Expression Operators': 'aggregation/type-expression',
+    'Timestamp Expression Operators': 'timestamp-expression',
+    'Variable Expression Operators': 'variable-expression',
+    'Text Expression Operator': 'miscellaneous',
+    'Custom Aggregation Expression Operators': 'miscellaneous',
+    // Window
+    'Window Operators': 'window-operators',
+    // System variables — no per-operator doc pages
+    'Variables in Aggregation Expressions': '',
+};
+
+/** Delay between batches of concurrent requests (ms) */
+const BATCH_DELAY_MS = 200;
+
+/** Number of concurrent requests per batch */
+const BATCH_SIZE = 10;
+
+/** Maximum number of retry attempts for transient HTTP errors */
+const MAX_RETRIES = 3;
+
+/** Base delay for exponential backoff (ms). Doubled on each retry. */
+const BACKOFF_BASE_MS = 1000;
+
+// ---------------------------------------------------------------------------
+// Utilities
+// ---------------------------------------------------------------------------
+
+interface FetchResult {
+    content: string | null;
+    /** Non-null when content is null — e.g. "404 Not Found" or "NetworkError: ..." */
+    failReason?: string;
+}
+
+/**
+ * Returns true for HTTP status codes that are transient and worth retrying:
+ * - 429 Too Many Requests
+ * - 5xx Server errors
+ */
+function isRetryableStatus(status: number): boolean {
+    return status === 429 || status >= 500;
+}
+
+/**
+ * Fetches a URL as text with exponential backoff for transient errors.
+ *
+ * Retries on 429 (rate-limited) and 5xx (server errors). Respects
+ * Retry-After headers when present. Non-retryable failures (e.g., 404)
+ * are returned immediately without retry.
+ */
+async function fetchText(url: string): Promise<FetchResult> {
+    let lastError: string | undefined;
+
+    for (let attempt = 0; attempt <= MAX_RETRIES; attempt++) {
+        try {
+            const response = await fetch(url);
+
+            if (response.ok) {
+                return { content: await response.text() };
+            }
+
+            const reason = `${response.status} ${response.statusText}`;
+
+            if (!isRetryableStatus(response.status)) {
+                // Non-retryable (e.g., 404, 403) — fail immediately
+                return { content: null, failReason: reason };
+            }
+
+            lastError = reason;
+
+            // Calculate backoff: honour Retry-After header if present,
+            // otherwise use exponential backoff
+            const retryAfter = response.headers.get('Retry-After');
+            let delayMs: number;
+            if (retryAfter) {
+                const seconds = Number(retryAfter);
+                delayMs = Number.isNaN(seconds) ? BACKOFF_BASE_MS * 2 ** attempt : seconds * 1000;
+            } else {
+                delayMs = BACKOFF_BASE_MS * 2 ** attempt;
+            }
+
+            if (attempt < MAX_RETRIES) {
+                console.log(
+                    `\n  ⏳ ${reason} for ${url} — retrying in ${delayMs}ms (attempt ${attempt + 1}/${MAX_RETRIES})`,
+                );
+                await sleep(delayMs);
+            }
+        } catch (error) {
+            const msg = error instanceof Error ? error.message : String(error);
+            lastError = `NetworkError: ${msg}`;
+
+            if (attempt < MAX_RETRIES) {
+                const delayMs = BACKOFF_BASE_MS * 2 ** attempt;
+                console.log(`\n  ⏳ ${lastError} — retrying in ${delayMs}ms (attempt ${attempt + 1}/${MAX_RETRIES})`);
+                await sleep(delayMs);
+            }
+        }
+    }
+
+    return { content: null, failReason: lastError };
+}
+
+interface FetchJsonResult<T> {
+    data: T | null;
+    failReason?: string;
+}
+
+/**
+ * Fetches a URL as JSON with exponential backoff for transient errors.
+ * Same retry semantics as {@link fetchText}.
+ */
+async function fetchJson<T>(url: string): Promise<FetchJsonResult<T>> {
+    let lastError: string | undefined;
+
+    for (let attempt = 0; attempt <= MAX_RETRIES; attempt++) {
+        try {
+            const response = await fetch(url);
+
+            if (response.ok) {
+                return { data: (await response.json()) as T };
+            }
+
+            const reason = `${response.status} ${response.statusText}`;
+
+            if (!isRetryableStatus(response.status)) {
+                return { data: null, failReason: reason };
+            }
+
+            lastError = reason;
+
+            const retryAfter = response.headers.get('Retry-After');
+            let delayMs: number;
+            if (retryAfter) {
+                const seconds = Number(retryAfter);
+                delayMs = Number.isNaN(seconds) ? BACKOFF_BASE_MS * 2 ** attempt : seconds * 1000;
+            } else {
+                delayMs = BACKOFF_BASE_MS * 2 ** attempt;
+            }
+
+            if (attempt < MAX_RETRIES) {
+                console.log(
+                    `\n  ⏳ ${reason} for ${url} — retrying in ${delayMs}ms (attempt ${attempt + 1}/${MAX_RETRIES})`,
+                );
+                await sleep(delayMs);
+            }
+        } catch (error) {
+            const msg = error instanceof Error ? error.message : String(error);
+            lastError = `NetworkError: ${msg}`;
+
+            if (attempt < MAX_RETRIES) {
+                const delayMs = BACKOFF_BASE_MS * 2 ** attempt;
+                console.log(`\n  ⏳ ${lastError} — retrying in ${delayMs}ms (attempt ${attempt + 1}/${MAX_RETRIES})`);
+                await sleep(delayMs);
+            }
+        }
+    }
+
+    return { data: null, failReason: lastError };
+}
+
+function sleep(ms: number): Promise<void> {
+    return new Promise((resolve) => setTimeout(resolve, ms));
+}
+
+/**
+ * Resolves a category name to its docs directory.
+ */
+function getCategoryDir(category: string): string | undefined {
+    return CATEGORY_TO_DIR[category];
+}
+
+/**
+ * Extracts the YAML frontmatter description from a docs Markdown file.
+ * Normalizes CRLF line endings before parsing.
+ */
+function extractDescription(markdown: string): string | undefined {
+    const normalized = markdown.replace(/\r\n/g, '\n').replace(/\r/g, '\n');
+    const fmMatch = normalized.match(/^---\s*\n([\s\S]*?)\n---/);
+    if (!fmMatch) return undefined;
+
+    const frontmatter = fmMatch[1];
+    // Look for description field — may be indented (e.g. "  description: ...")
+    const descMatch = frontmatter.match(/^\s*description:\s*(.+)$/m);
+    if (descMatch) {
+        return descMatch[1].trim().replace(/^['"]|['"]$/g, '');
+    }
+    return undefined;
+}
+
+/**
+ * Extracts the first code block after a ## Syntax heading.
+ * Normalizes CRLF line endings to LF.
+ */
+function extractSyntax(markdown: string): string | undefined {
+    // Find ## Syntax (or ### Syntax) section
+    const syntaxSectionMatch = markdown.match(/##\s*Syntax\s*\n([\s\S]*?)(?=\n##\s|\n$)/i);
+    if (!syntaxSectionMatch) return undefined;
+
+    const section = syntaxSectionMatch[1];
+    // Find first code block in this section
+    const codeBlockMatch = section.match(/```[\w]*\s*\n([\s\S]*?)```/);
+    if (codeBlockMatch) {
+        return codeBlockMatch[1].replace(/\r\n/g, '\n').replace(/\r/g, '\n').trim();
+    }
+    return undefined;
+}
+
+/**
+ * Escape pipe characters and collapse whitespace in table cell content.
+ * Handles both \n and \r\n line endings (GitHub raw content may use CRLF).
+ */
+function escapeTableCell(text: string): string {
+    return text
+        .replace(/\r\n|\r|\n/g, ' ')
+        .replace(/\\/g, '\\\\')
+        .replace(/\|/g, '\\|')
+        .replace(/\s+/g, ' ')
+        .trim();
+}
+
+// ---------------------------------------------------------------------------
+// Phase 0: Verification
+// ---------------------------------------------------------------------------
+
+interface VerificationResult {
+    passed: boolean;
+    checks: Array<{ name: string; passed: boolean; detail: string }>;
+}
+
+async function runVerification(): Promise<VerificationResult> {
+    console.log('');
+    console.log('='.repeat(60));
+    console.log('  VERIFICATION STEP');
+    console.log('  Checking that upstream documentation structure is as expected');
+    console.log('='.repeat(60));
+    console.log('');
+
+    const checks: VerificationResult['checks'] = [];
+
+    // Check 1: Compatibility page is accessible and has expected structure
+    console.log('  [1/4] Fetching compatibility page...');
+    const compatResult = await fetchText(COMPAT_PAGE_URL);
+    if (compatResult.content) {
+        const hasTable = /\|.*\|.*\|/.test(compatResult.content);
+        const hasOperators = /\$\w+/.test(compatResult.content);
+        const passed = hasTable && hasOperators;
+        checks.push({
+            name: 'Compatibility page accessible & has tables + operators',
+            passed,
+            detail: passed
+                ? `OK — ${(compatResult.content.length / 1024).toFixed(1)} KB, tables found`
+                : `FAIL — tables: ${hasTable}, operators: ${hasOperators}`,
+        });
+    } else {
+        checks.push({
+            name: 'Compatibility page accessible',
+            passed: false,
+            detail: `FAIL — could not fetch ${COMPAT_PAGE_URL} (${compatResult.failReason})`,
+        });
+    }
+
+    // Check 2: A known operator doc page exists ($match — aggregation stage)
+    console.log('  [2/4] Fetching known operator page ($match)...');
+    const matchUrl = `${OPERATOR_DOC_BASE}/aggregation/$match.md`;
+    const matchResult = await fetchText(matchUrl);
+    if (matchResult.content) {
+        const hasDescription = extractDescription(matchResult.content) !== undefined;
+        checks.push({
+            name: '$match doc page has YAML frontmatter with description',
+            passed: hasDescription,
+            detail: hasDescription
+                ? `OK — description: "${extractDescription(matchResult.content)}"`
+                : 'FAIL — no description in frontmatter',
+        });
+    } else {
+        checks.push({
+            name: '$match doc page accessible',
+            passed: false,
+            detail: `FAIL — could not fetch ${matchUrl} (${matchResult.failReason})`,
+        });
+    }
+
+    // Check 3: A known query operator doc page exists ($eq — comparison query)
+    console.log('  [3/4] Fetching known operator page ($eq)...');
+    const eqUrl = `${OPERATOR_DOC_BASE}/comparison-query/$eq.md`;
+    const eqResult = await fetchText(eqUrl);
+    if (eqResult.content) {
+        const desc = extractDescription(eqResult.content);
+        const syntax = extractSyntax(eqResult.content);
+        const passed = desc !== undefined;
+        checks.push({
+            name: '$eq doc page has frontmatter description',
+            passed,
+            detail: passed
+                ? `OK — description: "${desc}", syntax: ${syntax ? 'found' : 'not found'}`
+                : 'FAIL — no description in frontmatter',
+        });
+    } else {
+        checks.push({
+            name: '$eq doc page accessible',
+            passed: false,
+            detail: `FAIL — could not fetch ${eqUrl} (${eqResult.failReason})`,
+        });
+    }
+
+    // Check 4: A known accumulator doc page exists ($sum)
+    console.log('  [4/4] Fetching known operator page ($sum)...');
+    const sumUrl = `${OPERATOR_DOC_BASE}/accumulators/$sum.md`;
+    const sumResult = await fetchText(sumUrl);
+    if (sumResult.content) {
+        const desc = extractDescription(sumResult.content);
+        const passed = desc !== undefined;
+        checks.push({
+            name: '$sum doc page has frontmatter description',
+            passed,
+            detail: passed ? `OK — description: "${desc}"` : 'FAIL — no description in frontmatter',
+        });
+    } else {
+        checks.push({
+            name: '$sum doc page accessible',
+            passed: false,
+            detail: `FAIL — could not fetch ${sumUrl} (${sumResult.failReason})`,
+        });
+    }
+
+    // Print results
+    console.log('');
+    console.log('-'.repeat(60));
+    console.log('  Verification Results:');
+    console.log('-'.repeat(60));
+    const allPassed = checks.every((c) => c.passed);
+    for (const check of checks) {
+        const icon = check.passed ? '✅' : '❌';
+        console.log(`  ${icon} ${check.name}`);
+        console.log(`     ${check.detail}`);
+    }
+    console.log('-'.repeat(60));
+    if (allPassed) {
+        console.log('  ✅ VERIFICATION PASSED — all checks succeeded');
+    } else {
+        console.log('  ❌ VERIFICATION FAILED — some checks did not pass');
+        console.log('     The documentation structure may have changed.');
+        console.log('     Review the failures above before proceeding.');
+    }
+    console.log('='.repeat(60));
+    console.log('');
+
+    return { passed: allPassed, checks };
+}
+
+// ---------------------------------------------------------------------------
+// Phase 1: Compatibility table extraction
+// ---------------------------------------------------------------------------
+
+/**
+ * Sections we explicitly skip (not operators — commands, indexes, etc.)
+ */
+const SKIP_SECTIONS = ['Database commands', 'Index types', 'Index properties', 'Related content'];
+
+function parseCompatibilityTables(markdown: string): OperatorInfo[] {
+    const operators: OperatorInfo[] = [];
+    const lines = markdown.split('\n');
+
+    // The compatibility page has a single "## Operators" section with one big table:
+    // | Category | Operator | Supported (v5.0) | Supported (v6.0) | Supported (v7.0) | Supported (v8.0) |
+    // | --- | --- | --- | --- | --- | --- |
+    // | Comparison Query Operators | `$eq` | ✅ Yes | ✅ Yes | ✅ Yes | ✅ Yes |
+
+    let currentSection = '';
+    let inTable = false;
+    let separatorSeen = false;
+
+    for (const line of lines) {
+        // Detect section headings
+        const h2Match = line.match(/^##\s+(.+)/);
+        if (h2Match) {
+            currentSection = h2Match[1].trim();
+            inTable = false;
+            separatorSeen = false;
+            continue;
+        }
+
+        // Skip sections we don't care about
+        if (SKIP_SECTIONS.some((s) => currentSection.startsWith(s))) {
+            continue;
+        }
+
+        // Only process lines that start with |
+        if (!line.startsWith('|')) {
+            if (inTable) {
+                inTable = false;
+                separatorSeen = false;
+            }
+            continue;
+        }
+
+        // Parse table rows
+        const cells = line
+            .split('|')
+            .map((c) => c.trim())
+            .filter((c) => c.length > 0);
+
+        if (cells.length < 2) continue;
+
+        // Detect separator row (| --- | --- | ... |)
+        if (cells.every((c) => /^-+$/.test(c) || /^:?-+:?$/.test(c))) {
+            separatorSeen = true;
+            inTable = true;
+            continue;
+        }
+
+        // Skip header row (before separator)
+        if (!separatorSeen) {
+            continue;
+        }
+
+        // Data row: | Category | Operator | v5.0 | v6.0 | v7.0 | v8.0 |
+        if (inTable && cells.length >= 2) {
+            const rawCategory = cells[0].replace(/`/g, '').replace(/\*\*/g, '').trim();
+            let rawOperator = cells[1];
+
+            // Extract from markdown links like [`$eq`](...)
+            const linkMatch = rawOperator.match(/\[([^\]]+)\]/);
+            if (linkMatch) {
+                rawOperator = linkMatch[1];
+            }
+            rawOperator = rawOperator.replace(/`/g, '').replace(/\*+$/, '').trim();
+
+            // Handle $[identifier] which may be parsed incorrectly
+            // The compat page shows `$[identifier]` — square brackets get stripped by link parsing
+            if (rawOperator === 'identifier' && rawCategory.includes('Array Update')) {
+                rawOperator = '$[identifier]';
+            }
+
+            // For Variables in Aggregation Expressions, add $$ prefix
+            if (rawCategory === 'Variables in Aggregation Expressions' && !rawOperator.startsWith('$')) {
+                rawOperator = '$$' + rawOperator;
+            }
+
+            if (!rawOperator || rawOperator === 'Operator' || rawOperator === 'Command') {
+                continue;
+            }
+
+            // Skip summary table rows where "operator" column contains numbers
+            // (e.g., "| **Aggregation Stages** | 60 | 58 | 96.67% |")
+            if (/^\d+$/.test(rawOperator)) {
+                continue;
+            }
+
+            // Skip rows where category contains percentage or "Total"
+            if (rawCategory.includes('%') || rawCategory === 'Total') {
+                continue;
+            }
+
+            // Check support status from version columns (cells 2+)
+            const versionCells = cells.slice(2);
+            const hasYes = versionCells.some((c) => c.includes('Yes') || c.includes('✅') || c.includes('✓'));
+            const hasNo = versionCells.some((c) => c.includes('No') || c.includes('❌') || c.includes('✗'));
+            const hasDeprecated = versionCells.some((c) => c.toLowerCase().includes('deprecated'));
+
+            let listed: boolean;
+            let notListedReason: string | undefined;
+
+            if (hasDeprecated) {
+                listed = false;
+                const depCell = versionCells.find((c) => c.toLowerCase().includes('deprecated'));
+                notListedReason = depCell?.replace(/[*`]/g, '').trim() || 'Deprecated';
+            } else if (hasNo && !hasYes) {
+                listed = false;
+                notListedReason = 'Not in scope';
+            } else {
+                listed = true;
+            }
+
+            operators.push({
+                operator: rawOperator,
+                category: rawCategory,
+                listed,
+                notListedReason,
+            });
+        }
+    }
+
+    return operators;
+}
+
+// ---------------------------------------------------------------------------
+// Phase 2: Per-operator doc fetching
+// ---------------------------------------------------------------------------
+
+/**
+ * Builds a global index of all operator doc files in the docs repo
+ * by crawling each known directory. Returns a map from lowercase filename
+ * (e.g. "$eq.md") to the directory path it lives in.
+ *
+ * This allows the scraper to find operators that are filed in a different
+ * directory than expected (e.g. $cmp is a comparison expression operator
+ * but lives in comparison-query/).
+ */
+async function buildGlobalFileIndex(): Promise<Map<string, string>> {
+    const GITHUB_API_BASE =
+        'https://api.github.com/repos/MicrosoftDocs/azure-databases-docs/contents/articles/documentdb/operators';
+
+    type GithubEntry = { name: string; type: string };
+    const index = new Map<string, string>();
+
+    const rootResult = await fetchJson<GithubEntry[]>(GITHUB_API_BASE);
+    if (!rootResult.data) {
+        console.log(
+            `  ⚠ Could not fetch directory listing from GitHub API — skipping global index (${rootResult.failReason})`,
+        );
+        return index;
+    }
+
+    const dirs = rootResult.data.filter((d) => d.type === 'dir' && d.name !== 'includes');
+
+    for (const dir of dirs) {
+        await sleep(300); // Rate limit GitHub API
+
+        const dirResult = await fetchJson<GithubEntry[]>(`${GITHUB_API_BASE}/${dir.name}`);
+        if (!dirResult.data) continue;
+
+        const files = dirResult.data.filter((f) => f.name.endsWith('.md'));
+        const subdirs = dirResult.data.filter((f) => f.type === 'dir');
+
+        for (const file of files) {
+            index.set(file.name.toLowerCase(), dir.name);
+        }
+
+        // Also check subdirectories (e.g., aggregation/type-expression/)
+        for (const sub of subdirs) {
+            await sleep(300);
+
+            const subResult = await fetchJson<GithubEntry[]>(`${GITHUB_API_BASE}/${dir.name}/${sub.name}`);
+            if (!subResult.data) continue;
+
+            for (const file of subResult.data.filter((f) => f.name.endsWith('.md'))) {
+                index.set(file.name.toLowerCase(), `${dir.name}/${sub.name}`);
+            }
+        }
+    }
+
+    return index;
+}
+
+interface FetchOperatorDocsResult {
+    failureDetails: { operator: string; category: string; reason: string }[];
+}
+
+async function fetchOperatorDocs(operators: OperatorInfo[]): Promise<FetchOperatorDocsResult> {
+    // Build a global index of all doc files to use as fallback
+    console.log('  Building global file index from GitHub API...');
+    const globalIndex = await buildGlobalFileIndex();
+    console.log(`  Global index: ${globalIndex.size} files found across all directories`);
+    console.log('');
+
+    // Only fetch for listed operators that have a doc directory or are in global index
+    const fetchable = operators.filter((op) => {
+        if (!op.listed) return false;
+        const dir = getCategoryDir(op.category);
+        // Skip operators whose category maps to empty string (e.g. system variables)
+        if (dir === '') return false;
+        // Include if we have a directory mapping OR if the file exists in the global index
+        const opFileName = op.operator.toLowerCase() + '.md';
+        return dir !== undefined || globalIndex.has(opFileName);
+    });
+    const total = fetchable.length;
+    let fetched = 0;
+    let succeeded = 0;
+    let failed = 0;
+    const skipped = operators.filter((op) => op.listed).length - total;
+
+    const failureDetails: { operator: string; category: string; reason: string }[] = [];
+
+    console.log(`  Phase 2: Fetching per-operator doc pages (${total} operators, ${skipped} skipped)...`);
+    console.log('');
+
+    // Process in batches
+    for (let i = 0; i < fetchable.length; i += BATCH_SIZE) {
+        const batch = fetchable.slice(i, i + BATCH_SIZE);
+
+        const promises = batch.map(async (op) => {
+            const primaryDir = getCategoryDir(op.category);
+            const opNameLower = op.operator.toLowerCase();
+            const opNameOriginal = op.operator;
+            const opFileName = opNameLower + '.md';
+
+            // Strategy:
+            // 1. Try primary directory (lowercase filename)
+            // 2. Try primary directory (original casing)
+            // 3. Try global index fallback directory (lowercase filename)
+            // 4. Try global index fallback directory (original casing)
+            let content: string | null = null;
+            let resolvedDir: string | undefined;
+            let lastFailReason: string | undefined;
+
+            if (primaryDir) {
+                const result = await fetchText(`${OPERATOR_DOC_BASE}/${primaryDir}/${opNameLower}.md`);
+                if (result.content) {
+                    content = result.content;
+                    resolvedDir = primaryDir;
+                } else {
+                    lastFailReason = result.failReason;
+                    if (opNameLower !== opNameOriginal) {
+                        const result2 = await fetchText(`${OPERATOR_DOC_BASE}/${primaryDir}/${opNameOriginal}.md`);
+                        if (result2.content) {
+                            content = result2.content;
+                            resolvedDir = primaryDir;
+                        } else {
+                            lastFailReason = result2.failReason;
+                        }
+                    }
+                }
+            }
+
+            // Fallback: check global index for a different directory
+            if (!content && globalIndex.has(opFileName)) {
+                const fallbackDir = globalIndex.get(opFileName)!;
+                if (fallbackDir !== primaryDir) {
+                    const result3 = await fetchText(`${OPERATOR_DOC_BASE}/${fallbackDir}/${opFileName}`);
+                    if (result3.content) {
+                        content = result3.content;
+                        resolvedDir = fallbackDir;
+                    } else {
+                        lastFailReason = result3.failReason;
+                    }
+                }
+            }
+
+            if (content) {
+                op.description = extractDescription(content);
+                op.syntax = extractSyntax(content);
+
+                if (primaryDir && resolvedDir !== primaryDir) {
+                    // Doc page found in a different directory — emit 'none'
+                    // so the generator can cross-reference alternative URLs.
+                    // Description/syntax were still scraped from the fallback page.
+                    op.docLink = 'none';
+                    op.scraperComment =
+                        `Doc page not found in expected directory '${primaryDir}/'. ` +
+                        `Content scraped from '${resolvedDir}/'.`;
+                } else {
+                    op.docLink = `${DOC_LINK_BASE}/${resolvedDir}/${opNameLower}`;
+                }
+                succeeded++;
+            } else {
+                failureDetails.push({
+                    operator: op.operator,
+                    category: op.category,
+                    reason: lastFailReason ?? 'Unknown',
+                });
+                failed++;
+            }
+            fetched++;
+        });
+
+        await Promise.all(promises);
+
+        // Progress output
+        const pct = ((fetched / total) * 100).toFixed(0);
+        process.stdout.write(`\r  Progress: ${fetched}/${total} (${pct}%) — ${succeeded} succeeded, ${failed} failed`);
+
+        // Rate limiting between batches
+        if (i + BATCH_SIZE < fetchable.length) {
+            await sleep(BATCH_DELAY_MS);
+        }
+    }
+
+    console.log(''); // newline after progress
+    console.log(`  Phase 2 complete: ${succeeded}/${total} docs fetched successfully`);
+    if (failed > 0) {
+        console.log(`  ⚠ ${failed} operators could not be fetched (will have empty descriptions)`);
+        console.log('');
+
+        // Group failures by reason for a clear summary
+        const byReason = new Map<string, typeof failureDetails>();
+        for (const f of failureDetails) {
+            const list = byReason.get(f.reason) ?? [];
+            list.push(f);
+            byReason.set(f.reason, list);
+        }
+
+        for (const [reason, ops] of byReason) {
+            console.log(`  [${reason}] (${ops.length} operators):`);
+            for (const f of ops) {
+                const dir = getCategoryDir(f.category) || '???';
+                const fallback = globalIndex.get(f.operator.toLowerCase() + '.md');
+                const extra = fallback && fallback !== dir ? ` (also tried ${fallback})` : '';
+                console.log(`     - ${f.operator} (${f.category} → ${dir}${extra})`);
+            }
+            console.log('');
+        }
+    }
+
+    return { failureDetails };
+}
+
+// ---------------------------------------------------------------------------
+// Phase 3: Dump generation
+// ---------------------------------------------------------------------------
+
+function generateDump(operators: OperatorInfo[]): string {
+    const now = new Date().toISOString().split('T')[0];
+    const lines: string[] = [];
+
+    lines.push('# DocumentDB Operator Reference');
+    lines.push('');
+    lines.push('<!-- AUTO-GENERATED by scrape-operator-docs.ts -->');
+    lines.push(`<!-- Last scraped: ${now} -->`);
+    lines.push('<!-- Source: https://github.com/MicrosoftDocs/azure-databases-docs -->');
+    lines.push('');
+
+    // Summary table (compact — stays as a table)
+    const categories = new Map<string, { listed: number; notListed: number }>();
+    for (const op of operators) {
+        if (!categories.has(op.category)) {
+            categories.set(op.category, { listed: 0, notListed: 0 });
+        }
+        const cat = categories.get(op.category)!;
+        if (op.listed) {
+            cat.listed++;
+        } else {
+            cat.notListed++;
+        }
+    }
+
+    lines.push('## Summary');
+    lines.push('');
+    lines.push('| Category | Listed | Total |');
+    lines.push('| --- | --- | --- |');
+    let totalListed = 0;
+    let totalAll = 0;
+    for (const [cat, counts] of categories) {
+        const total = counts.listed + counts.notListed;
+        totalListed += counts.listed;
+        totalAll += total;
+        lines.push(`| ${escapeTableCell(cat)} | ${counts.listed} | ${total} |`);
+    }
+    lines.push(`| **Total** | **${totalListed}** | **${totalAll}** |`);
+    lines.push('');
+
+    // Per-category sections with structured operator entries
+    const categoriesInOrder = [...categories.keys()];
+    for (const cat of categoriesInOrder) {
+        const catOps = operators.filter((op) => op.category === cat && op.listed);
+        if (catOps.length === 0) continue;
+
+        lines.push(`## ${cat}`);
+        lines.push('');
+
+        for (const op of catOps) {
+            lines.push(`### ${op.operator}`);
+            lines.push('');
+            if (op.description) {
+                lines.push(`- **Description:** ${op.description}`);
+            }
+            if (op.syntax) {
+                lines.push('- **Syntax:**');
+                lines.push('');
+                lines.push('```javascript');
+                lines.push(op.syntax);
+                lines.push('```');
+                lines.push('');
+            }
+            if (op.docLink) {
+                lines.push(`- **Doc Link:** ${op.docLink}`);
+            }
+            if (op.scraperComment) {
+                lines.push(`- **Scraper Comment:** ${op.scraperComment}`);
+            }
+            lines.push('');
+        }
+    }
+
+    // Not-listed operators section
+    const notListed = operators.filter((op) => !op.listed);
+    if (notListed.length > 0) {
+        lines.push('## Not Listed');
+        lines.push('');
+        lines.push('Operators below are present on the compatibility page but are not in scope');
+        lines.push('for this package (deprecated or not available in DocumentDB).');
+        lines.push('');
+        for (const op of notListed) {
+            lines.push(`- **${op.operator}** (${op.category}) — ${op.notListedReason || 'Not in scope'}`);
+        }
+        lines.push('');
+    }
+
+    return lines.join('\n');
+}
+
+// ---------------------------------------------------------------------------
+// Main
+// ---------------------------------------------------------------------------
+
+async function main(): Promise<void> {
+    console.log('DocumentDB Operator Documentation Scraper');
+    console.log('=========================================');
+    console.log('');
+
+    // Phase 0: Verification
+    const verification = await runVerification();
+    if (!verification.passed) {
+        console.error('Aborting due to verification failure.');
+        console.error('If the documentation structure has changed, update the scraper accordingly.');
+        process.exit(1);
+    }
+
+    // Phase 1: Fetch and parse compatibility page
+    console.log('  Phase 1: Fetching compatibility page...');
+    const compatResult = await fetchText(COMPAT_PAGE_URL);
+    if (!compatResult.content) {
+        console.error(`ERROR: Could not fetch compatibility page (${compatResult.failReason})`);
+        process.exit(1);
+    }
+    console.log(`  Fetched ${(compatResult.content.length / 1024).toFixed(1)} KB`);
+
+    const operators = parseCompatibilityTables(compatResult.content);
+    const listed = operators.filter((op) => op.listed);
+    const notListed = operators.filter((op) => !op.listed);
+    console.log(`  Parsed ${operators.length} operators (${listed.length} listed, ${notListed.length} not listed)`);
+    console.log('');
+
+    // Phase 2: Fetch per-operator docs
+    const { failureDetails } = await fetchOperatorDocs(operators);
+    console.log('');
+
+    // Fail immediately on network errors (transient connectivity problems that
+    // exhaust all retries). 404s are expected for operators without dedicated
+    // doc pages and do not abort the run.
+    const networkFailures = failureDetails.filter((f) => f.reason.startsWith('NetworkError:'));
+    if (networkFailures.length > 0) {
+        console.error(`ERROR: ${networkFailures.length} operator(s) failed due to network errors (not 404). Aborting.`);
+        for (const f of networkFailures) {
+            console.error(`  - ${f.operator} (${f.category}): ${f.reason}`);
+        }
+        process.exit(1);
+    }
+
+    // Phase 3: Generate dump
+    console.log('  Phase 3: Generating scraped/operator-reference.md...');
+    const dump = generateDump(operators);
+
+    const outputDir = path.join(__dirname, '..', 'resources', 'scraped');
+    if (!fs.existsSync(outputDir)) {
+        fs.mkdirSync(outputDir, { recursive: true });
+    }
+
+    const outputPath = path.join(outputDir, 'operator-reference.md');
+    fs.writeFileSync(outputPath, dump, 'utf-8');
+
+    console.log(`  Written to: ${outputPath}`);
+    console.log(`  File size: ${(dump.length / 1024).toFixed(1)} KB`);
+    console.log('');
+    console.log('Done! Review the generated file and commit it to the repo.');
+}
+
+main().catch((err) => {
+    console.error('Scraper failed:', err);
+    process.exit(1);
+});
diff --git a/packages/documentdb-constants/src/accumulators.ts b/packages/documentdb-constants/src/accumulators.ts
new file mode 100644
index 000000000..c2d4d97d0
--- /dev/null
+++ b/packages/documentdb-constants/src/accumulators.ts
@@ -0,0 +1,186 @@
+/*---------------------------------------------------------------------------------------------
+ *  Copyright (c) Microsoft Corporation. All rights reserved.
+ *  Licensed under the MIT License. See License.txt in the project root for license information.
+ *--------------------------------------------------------------------------------------------*/
+
+// AUTO-GENERATED — DO NOT EDIT BY HAND
+//
+// Generated by: npm run generate  (scripts/generate-from-reference.ts)
+// Sources:      resources/scraped/operator-reference.md
+//               resources/overrides/operator-overrides.md
+//               resources/overrides/operator-snippets.md
+//
+// To change operator data, edit the overrides/snippets files and re-run the generator.
+
+import { getDocLink } from './docLinks';
+import { registerOperators } from './getFilteredCompletions';
+import { META_ACCUMULATOR } from './metaTags';
+import { type OperatorEntry } from './types';
+
+// ---------------------------------------------------------------------------
+// Accumulators ($group, $bucket, $bucketAuto, $setWindowFields)
+// ---------------------------------------------------------------------------
+
+const groupAccumulators: readonly OperatorEntry[] = [
+    {
+        value: '$addToSet',
+        meta: META_ACCUMULATOR,
+        description:
+            "The addToSet operator adds elements to an array if they don't already exist, while ensuring uniqueness of elements within the set.",
+        snippet: '{ $addToSet: "${1:\\$field}" }',
+        link: 'https://learn.microsoft.com/en-us/azure/documentdb/operators/array-update/$addtoset', // inferred from another category
+    },
+    {
+        value: '$avg',
+        meta: META_ACCUMULATOR,
+        description: 'Computes the average of numeric values for documents in a group, bucket, or window.',
+        snippet: '{ $avg: "${1:\\$field}" }',
+        link: getDocLink('$avg', META_ACCUMULATOR),
+    },
+    {
+        value: '$bottom',
+        meta: META_ACCUMULATOR,
+        description:
+            "The $bottom operator returns the last document from the query's result set sorted by one or more fields",
+        snippet: '{ $bottom: { sortBy: { ${1:field}: ${2:1} }, output: "${3:\\$field}" } }',
+        link: getDocLink('$bottom', META_ACCUMULATOR),
+    },
+    {
+        value: '$bottomN',
+        meta: META_ACCUMULATOR,
+        description: 'The $bottomN operator returns the last N documents from the result sorted by one or more fields',
+        snippet: '{ $bottomN: { n: ${1:number}, sortBy: { ${2:field}: ${3:1} }, output: "${4:\\$field}" } }',
+        link: getDocLink('$bottomN', META_ACCUMULATOR),
+    },
+    {
+        value: '$count',
+        meta: META_ACCUMULATOR,
+        description:
+            'The `$count` operator is used to count the number of documents that match a query filtering criteria.',
+        snippet: '{ $count: {} }',
+        link: getDocLink('$count', META_ACCUMULATOR),
+    },
+    {
+        value: '$first',
+        meta: META_ACCUMULATOR,
+        description: "The $first operator returns the first value in a group according to the group's sorting order.",
+        snippet: '{ $first: "${1:\\$field}" }',
+        link: getDocLink('$first', META_ACCUMULATOR),
+    },
+    {
+        value: '$firstN',
+        meta: META_ACCUMULATOR,
+        description:
+            'The $firstN operator sorts documents on one or more fields specified by the query and returns the first N document matching the filtering criteria',
+        snippet: '{ $firstN: { input: "${1:\\$field}", n: ${2:number} } }',
+        link: getDocLink('$firstN', META_ACCUMULATOR),
+    },
+    {
+        value: '$last',
+        meta: META_ACCUMULATOR,
+        description: 'The $last operator returns the last document from the result sorted by one or more fields',
+        snippet: '{ $last: "${1:\\$field}" }',
+        link: getDocLink('$last', META_ACCUMULATOR),
+    },
+    {
+        value: '$lastN',
+        meta: META_ACCUMULATOR,
+        description: 'The $lastN accumulator operator returns the last N values in a group of documents.',
+        snippet: '{ $lastN: { input: "${1:\\$field}", n: ${2:number} } }',
+        link: getDocLink('$lastN', META_ACCUMULATOR),
+    },
+    {
+        value: '$max',
+        meta: META_ACCUMULATOR,
+        description: 'The $max operator returns the maximum value from a set of input values.',
+        snippet: '{ $max: "${1:\\$field}" }',
+        link: getDocLink('$max', META_ACCUMULATOR),
+    },
+    {
+        value: '$maxN',
+        meta: META_ACCUMULATOR,
+        description: 'Retrieves the top N values based on a specified filtering criteria',
+        snippet: '{ $maxN: { input: "${1:\\$field}", n: ${2:number} } }',
+        link: getDocLink('$maxN', META_ACCUMULATOR),
+    },
+    {
+        value: '$median',
+        meta: META_ACCUMULATOR,
+        description: 'The $median operator calculates the median value of a numeric field in a group of documents.',
+        snippet: '{ $median: { input: "${1:\\$field}", method: "approximate" } }',
+        link: getDocLink('$median', META_ACCUMULATOR),
+    },
+    {
+        value: '$mergeObjects',
+        meta: META_ACCUMULATOR,
+        description: 'The $mergeObjects operator merges multiple documents into a single document',
+        snippet: '{ $mergeObjects: "${1:\\$field}" }',
+        link: 'https://learn.microsoft.com/en-us/azure/documentdb/operators/object-expression/$mergeobjects', // inferred from another category
+    },
+    {
+        value: '$min',
+        meta: META_ACCUMULATOR,
+        description: 'Retrieves the minimum value for a specified field',
+        snippet: '{ $min: "${1:\\$field}" }',
+        link: getDocLink('$min', META_ACCUMULATOR),
+    },
+    {
+        value: '$percentile',
+        meta: META_ACCUMULATOR,
+        description:
+            'The $percentile operator calculates the percentile of numerical values that match a filtering criteria',
+        snippet: '{ $percentile: { input: "${1:\\$field}", p: [${2:0.5}], method: "approximate" } }',
+        link: getDocLink('$percentile', META_ACCUMULATOR),
+    },
+    {
+        value: '$push',
+        meta: META_ACCUMULATOR,
+        description: 'The $push operator adds a specified value to an array within a document.',
+        snippet: '{ $push: "${1:\\$field}" }',
+        link: 'https://learn.microsoft.com/en-us/azure/documentdb/operators/array-update/$push', // inferred from another category
+    },
+    {
+        value: '$stdDevPop',
+        meta: META_ACCUMULATOR,
+        description: 'The $stddevpop operator calculates the standard deviation of the specified values',
+        snippet: '{ $stdDevPop: "${1:\\$field}" }',
+        link: getDocLink('$stdDevPop', META_ACCUMULATOR),
+    },
+    {
+        value: '$stdDevSamp',
+        meta: META_ACCUMULATOR,
+        description:
+            'The $stddevsamp operator calculates the standard deviation of a specified sample of values and not the entire population',
+        snippet: '{ $stdDevSamp: "${1:\\$field}" }',
+        link: getDocLink('$stdDevSamp', META_ACCUMULATOR),
+    },
+    {
+        value: '$sum',
+        meta: META_ACCUMULATOR,
+        description: 'The $sum operator calculates the sum of the values of a field based on a filtering criteria',
+        snippet: '{ $sum: "${1:\\$field}" }',
+        link: getDocLink('$sum', META_ACCUMULATOR),
+    },
+    {
+        value: '$top',
+        meta: META_ACCUMULATOR,
+        description: 'The $top operator returns the first document from the result set sorted by one or more fields',
+        snippet: '{ $top: { sortBy: { ${1:field}: ${2:1} }, output: "${3:\\$field}" } }',
+        link: getDocLink('$top', META_ACCUMULATOR),
+    },
+    {
+        value: '$topN',
+        meta: META_ACCUMULATOR,
+        description: 'The $topN operator returns the first N documents from the result sorted by one or more fields',
+        snippet: '{ $topN: { n: ${1:number}, sortBy: { ${2:field}: ${3:1} }, output: "${4:\\$field}" } }',
+        link: getDocLink('$topN', META_ACCUMULATOR),
+    },
+];
+
+// ---------------------------------------------------------------------------
+// Registration
+// ---------------------------------------------------------------------------
+
+export function loadAccumulators(): void {
+    registerOperators([...groupAccumulators]);
+}
diff --git a/packages/documentdb-constants/src/bsonConstructors.ts b/packages/documentdb-constants/src/bsonConstructors.ts
new file mode 100644
index 000000000..5e08a22d7
--- /dev/null
+++ b/packages/documentdb-constants/src/bsonConstructors.ts
@@ -0,0 +1,83 @@
+/*---------------------------------------------------------------------------------------------
+ *  Copyright (c) Microsoft Corporation. All rights reserved.
+ *  Licensed under the MIT License. See License.txt in the project root for license information.
+ *--------------------------------------------------------------------------------------------*/
+
+import { registerOperators } from './getFilteredCompletions';
+import { META_BSON } from './metaTags';
+import { type OperatorEntry } from './types';
+
+// ---------------------------------------------------------------------------
+// BSON Constructor Functions
+// ---------------------------------------------------------------------------
+
+const bsonConstructors: readonly OperatorEntry[] = [
+    {
+        value: 'ObjectId',
+        meta: META_BSON,
+        description: 'Creates a new ObjectId value, a 12-byte unique identifier.',
+        snippet: 'ObjectId("${1:hex}")',
+    },
+    {
+        value: 'ISODate',
+        meta: META_BSON,
+        description: 'Creates a date object from an ISO 8601 date string.',
+        snippet: 'ISODate("${1:2025-01-01T00:00:00Z}")',
+    },
+    {
+        value: 'NumberLong',
+        meta: META_BSON,
+        description: 'Creates a 64-bit integer (long) value.',
+        snippet: 'NumberLong(${1:value})',
+    },
+    {
+        value: 'NumberInt',
+        meta: META_BSON,
+        description: 'Creates a 32-bit integer value.',
+        snippet: 'NumberInt(${1:value})',
+    },
+    {
+        value: 'NumberDecimal',
+        meta: META_BSON,
+        description: 'Creates a 128-bit decimal value for high-precision calculations.',
+        snippet: 'NumberDecimal("${1:value}")',
+    },
+    {
+        value: 'BinData',
+        meta: META_BSON,
+        description: 'Creates a binary data value with a specified subtype.',
+        snippet: 'BinData(${1:subtype}, "${2:base64}")',
+    },
+    {
+        value: 'UUID',
+        meta: META_BSON,
+        description: 'Creates a UUID (Universally Unique Identifier) value.',
+        snippet: 'UUID("${1:uuid}")',
+    },
+    {
+        value: 'Timestamp',
+        meta: META_BSON,
+        description: 'Creates a BSON timestamp value for internal replication use.',
+        snippet: 'Timestamp(${1:seconds}, ${2:increment})',
+    },
+    {
+        value: 'MinKey',
+        meta: META_BSON,
+        description: 'Represents the lowest possible BSON value, comparing less than all other types.',
+        snippet: 'MinKey()',
+    },
+    {
+        value: 'MaxKey',
+        meta: META_BSON,
+        description: 'Represents the highest possible BSON value, comparing greater than all other types.',
+        snippet: 'MaxKey()',
+    },
+];
+
+// ---------------------------------------------------------------------------
+// Registration
+// ---------------------------------------------------------------------------
+
+export function loadBsonConstructors(): void {
+    registerOperators(bsonConstructors);
+}
diff --git a/packages/documentdb-constants/src/docLinks.test.ts b/packages/documentdb-constants/src/docLinks.test.ts
new file mode 100644
index 000000000..c79a53da9
--- /dev/null
+++ b/packages/documentdb-constants/src/docLinks.test.ts
@@ -0,0 +1,82 @@
+/*---------------------------------------------------------------------------------------------
+ *  Copyright (c) Microsoft Corporation. All rights reserved.
+ *  Licensed under the MIT License. See License.txt in the project root for license information.
+ *--------------------------------------------------------------------------------------------*/
+
+/**
+ * Unit tests for docLinks.ts — URL generation for DocumentDB operator docs.
+ */
+
+import { getDocBase, getDocLink } from './index';
+
+describe('docLinks', () => {
+    test('getDocBase returns the expected base URL', () => {
+        expect(getDocBase()).toBe('https://learn.microsoft.com/en-us/azure/documentdb/operators');
+    });
+
+    describe('getDocLink', () => {
+        test('generates correct URL for comparison query operator', () => {
+            const link = getDocLink('$eq', 'query:comparison');
+            expect(link).toBe('https://learn.microsoft.com/en-us/azure/documentdb/operators/comparison-query/$eq');
+        });
+
+        test('generates correct URL for aggregation stage', () => {
+            const link = getDocLink('$match', 'stage');
+            expect(link).toBe('https://learn.microsoft.com/en-us/azure/documentdb/operators/aggregation/$match');
+        });
+
+        test('generates correct URL for accumulator', () => {
+            const link = getDocLink('$sum', 'accumulator');
+            expect(link).toBe('https://learn.microsoft.com/en-us/azure/documentdb/operators/accumulators/$sum');
+        });
+
+        test('generates correct URL for field update operator', () => {
+            const link = getDocLink('$set', 'update:field');
+            expect(link).toBe('https://learn.microsoft.com/en-us/azure/documentdb/operators/field-update/$set');
+        });
+
+        test('generates correct URL for array expression operator', () => {
+            const link = getDocLink('$filter', 'expr:array');
+            expect(link).toBe('https://learn.microsoft.com/en-us/azure/documentdb/operators/array-expression/$filter');
+        });
+
+        test('generates correct URL for type expression operator (nested dir)', () => {
+            const link = getDocLink('$convert', 'expr:type');
+            expect(link).toBe(
+                'https://learn.microsoft.com/en-us/azure/documentdb/operators/aggregation/type-expression/$convert',
+            );
+        });
+
+        test('generates correct URL for window operator', () => {
+            const link = getDocLink('$rank', 'window');
+            expect(link).toBe('https://learn.microsoft.com/en-us/azure/documentdb/operators/window-operators/$rank');
+        });
+
+        test('lowercases operator names in URLs', () => {
+            const link = getDocLink('$AddFields', 'stage');
+            expect(link).toBe('https://learn.microsoft.com/en-us/azure/documentdb/operators/aggregation/$addfields');
+        });
+
+        test('returns undefined for unknown meta tag', () => {
+            expect(getDocLink('$eq', 'unknown:tag')).toBeUndefined();
+        });
+
+        test('returns undefined for BSON meta tag (no docs directory)', () => {
+            expect(getDocLink('ObjectId', 'bson')).toBeUndefined();
+        });
+
+        test('returns undefined for variable meta tag (no docs directory)', () => {
+            expect(getDocLink('$$NOW', 'variable')).toBeUndefined();
+        });
+
+        test('generates correct URL for boolean expression operator', () => {
+            const link = getDocLink('$and', 'expr:bool');
+            expect(link).toBe('https://learn.microsoft.com/en-us/azure/documentdb/operators/boolean-expression/$and');
+        });
+
+        test('generates correct URL for comparison expression operator', () => {
+            const link = getDocLink('$eq', 'expr:comparison');
+            expect(link).toBe('https://learn.microsoft.com/en-us/azure/documentdb/operators/comparison-expression/$eq');
+        });
+    });
+});
diff --git a/packages/documentdb-constants/src/docLinks.ts b/packages/documentdb-constants/src/docLinks.ts
new file mode 100644
index 000000000..460112548
--- /dev/null
+++ b/packages/documentdb-constants/src/docLinks.ts
@@ -0,0 +1,77 @@
+/*---------------------------------------------------------------------------------------------
+ *  Copyright (c) Microsoft Corporation. All rights reserved.
+ *  Licensed under the MIT License. See License.txt in the project root for license information.
+ *--------------------------------------------------------------------------------------------*/
+
+/**
+ * URL generation helpers for DocumentDB documentation pages.
+ *
+ * Each operator has a documentation page at:
+ *   https://learn.microsoft.com/en-us/azure/documentdb/operators/{category}/{operatorName}
+ */
+
+const DOC_BASE = 'https://learn.microsoft.com/en-us/azure/documentdb/operators';
+
+/**
+ * Maps meta tag prefixes to the docs directory name used in the
+ * DocumentDB documentation URL path.
+ */
+const META_TO_DOC_DIR: Record<string, string> = {
+    'query:comparison': 'comparison-query',
+    'query:logical': 'logical-query',
+    'query:element': 'element-query',
+    'query:evaluation': 'evaluation-query',
+    'query:array': 'array-query',
+    'query:bitwise': 'bitwise-query',
+    'query:geospatial': 'geospatial',
+    'query:projection': 'projection',
+    'query:misc': 'miscellaneous-query',
+    'update:field': 'field-update',
+    'update:array': 'array-update',
+    'update:bitwise': 'bitwise-update',
+    stage: 'aggregation',
+    accumulator: 'accumulators',
+    'expr:arith': 'arithmetic-expression',
+    'expr:array': 'array-expression',
+    'expr:bool': 'boolean-expression',
+    'expr:comparison': 'comparison-expression',
+    'expr:conditional': 'conditional-expression',
+    'expr:date': 'date-expression',
+    'expr:object': 'object-expression',
+    'expr:set': 'set-expression',
+    'expr:string': 'string-expression',
+    'expr:trig': 'trigonometry-expression',
+    'expr:type': 'aggregation/type-expression',
+    'expr:datasize': 'data-size',
+    'expr:timestamp': 'timestamp-expression',
+    'expr:bitwise': 'bitwise',
+    'expr:literal': 'literal-expression',
+    'expr:misc': 'miscellaneous',
+    'expr:variable': 'variable-expression',
+    window: 'window-operators',
+};
+
+/**
+ * Generates a documentation URL for a DocumentDB operator.
+ *
+ * @param operatorValue - the operator name, e.g. "$bucket", "$gt"
+ * @param meta - the meta tag, e.g. "stage", "query:comparison"
+ * @returns URL string or undefined if no mapping exists for the meta tag
+ */
+export function getDocLink(operatorValue: string, meta: string): string | undefined {
+    const dir = META_TO_DOC_DIR[meta];
+    if (!dir) {
+        return undefined;
+    }
+
+    // Operator names in URLs keep their $ prefix and are lowercased
+    const name = operatorValue.toLowerCase();
+    return `${DOC_BASE}/${dir}/${name}`;
+}
+
+/**
+ * Returns the base URL for the DocumentDB operators documentation.
+ */
+export function getDocBase(): string {
+    return DOC_BASE;
+}
diff --git a/packages/documentdb-constants/src/expressionOperators.ts b/packages/documentdb-constants/src/expressionOperators.ts
new file mode 100644
index 000000000..a75905738
--- /dev/null
+++ b/packages/documentdb-constants/src/expressionOperators.ts
@@ -0,0 +1,1181 @@
+/*---------------------------------------------------------------------------------------------
+ *  Copyright (c) Microsoft Corporation. All rights reserved.
+ *  Licensed under the MIT License. See License.txt in the project root for license information.
+ *--------------------------------------------------------------------------------------------*/
+
+// AUTO-GENERATED — DO NOT EDIT BY HAND
+//
+// Generated by: npm run generate  (scripts/generate-from-reference.ts)
+// Sources:      resources/scraped/operator-reference.md
+//               resources/overrides/operator-overrides.md
+//               resources/overrides/operator-snippets.md
+//
+// To change operator data, edit the overrides/snippets files and re-run the generator.
+
+import { getDocLink } from './docLinks';
+import { registerOperators } from './getFilteredCompletions';
+import {
+    META_EXPR_ARITH,
+    META_EXPR_ARRAY,
+    META_EXPR_BITWISE,
+    META_EXPR_BOOL,
+    META_EXPR_COMPARISON,
+    META_EXPR_CONDITIONAL,
+    META_EXPR_DATASIZE,
+    META_EXPR_DATE,
+    META_EXPR_LITERAL,
+    META_EXPR_MISC,
+    META_EXPR_OBJECT,
+    META_EXPR_SET,
+    META_EXPR_STRING,
+    META_EXPR_TIMESTAMP,
+    META_EXPR_TRIG,
+    META_EXPR_TYPE,
+    META_EXPR_VARIABLE,
+} from './metaTags';
+import { type OperatorEntry } from './types';
+
+// ---------------------------------------------------------------------------
+// Arithmetic Expression Operators
+// ---------------------------------------------------------------------------
+
+const arithmeticExpressionOperators: readonly OperatorEntry[] = [
+    {
+        value: '$abs',
+        meta: META_EXPR_ARITH,
+        description: 'The $abs operator returns the absolute value of a number.',
+        snippet: '{ $abs: "${1:\\$field}" }',
+        link: getDocLink('$abs', META_EXPR_ARITH),
+    },
+    {
+        value: '$add',
+        meta: META_EXPR_ARITH,
+        description: 'The $add operator returns the sum of two numbers or the sum of a date and numbers.',
+        snippet: '{ $add: ["${1:\\$field1}", "${2:\\$field2}"] }',
+        link: getDocLink('$add', META_EXPR_ARITH),
+    },
+    {
+        value: '$ceil',
+        meta: META_EXPR_ARITH,
+        description: 'The $ceil operator returns the smallest integer greater than or equal to the specified number.',
+        snippet: '{ $ceil: "${1:\\$field}" }',
+        link: getDocLink('$ceil', META_EXPR_ARITH),
+    },
+    {
+        value: '$divide',
+        meta: META_EXPR_ARITH,
+        description: 'The $divide operator divides two numbers and returns the quotient.',
+        snippet: '{ $divide: ["${1:\\$field1}", "${2:\\$field2}"] }',
+        link: getDocLink('$divide', META_EXPR_ARITH),
+    },
+    {
+        value: '$exp',
+        meta: META_EXPR_ARITH,
+        description: 'The $exp operator raises e to the specified exponent and returns the result',
+        snippet: '{ $exp: "${1:\\$field}" }',
+        link: getDocLink('$exp', META_EXPR_ARITH),
+    },
+    {
+        value: '$floor',
+        meta: META_EXPR_ARITH,
+        description: 'The $floor operator returns the largest integer less than or equal to the specified number',
+        snippet: '{ $floor: "${1:\\$field}" }',
+        link: getDocLink('$floor', META_EXPR_ARITH),
+    },
+    {
+        value: '$ln',
+        meta: META_EXPR_ARITH,
+        description: 'The $ln operator calculates the natural logarithm of the input',
+        snippet: '{ $ln: "${1:\\$field}" }',
+        link: getDocLink('$ln', META_EXPR_ARITH),
+    },
+    {
+        value: '$log',
+        meta: META_EXPR_ARITH,
+        description: 'The $log operator calculates the logarithm of a number in the specified base',
+        snippet: '{ $log: ["${1:\\$number}", ${2:base}] }',
+        link: getDocLink('$log', META_EXPR_ARITH),
+    },
+    {
+        value: '$log10',
+        meta: META_EXPR_ARITH,
+        description: 'The $log10 operator calculates the log of a specified number in base 10',
+        snippet: '{ $log10: "${1:\\$field}" }',
+        link: getDocLink('$log10', META_EXPR_ARITH),
+    },
+    {
+        value: '$mod',
+        meta: META_EXPR_ARITH,
+        description:
+            'The $mod operator performs a modulo operation on the value of a field and selects documents with a specified result.',
+        snippet: '{ $mod: ["${1:\\$field1}", "${2:\\$field2}"] }',
+        link: 'https://learn.microsoft.com/en-us/azure/documentdb/operators/evaluation-query/$mod', // inferred from another category
+    },
+    {
+        value: '$multiply',
+        meta: META_EXPR_ARITH,
+        description: 'The $multiply operator multiplies the input numerical values',
+        snippet: '{ $multiply: ["${1:\\$field1}", "${2:\\$field2}"] }',
+        link: getDocLink('$multiply', META_EXPR_ARITH),
+    },
+    {
+        value: '$pow',
+        meta: META_EXPR_ARITH,
+        description:
+            'The `$pow` operator calculates the value of a numerical value raised to the power of a specified exponent.',
+        snippet: '{ $pow: ["${1:\\$field1}", "${2:\\$field2}"] }',
+        link: getDocLink('$pow', META_EXPR_ARITH),
+    },
+    {
+        value: '$round',
+        meta: META_EXPR_ARITH,
+        description: 'The $round operator rounds a number to a specified decimal place.',
+        snippet: '{ $round: ["${1:\\$field}", ${2:place}] }',
+        link: getDocLink('$round', META_EXPR_ARITH),
+    },
+    {
+        value: '$sqrt',
+        meta: META_EXPR_ARITH,
+        description: 'The $sqrt operator calculates and returns the square root of an input number',
+        snippet: '{ $sqrt: "${1:\\$field}" }',
+        link: getDocLink('$sqrt', META_EXPR_ARITH),
+    },
+    {
+        value: '$subtract',
+        meta: META_EXPR_ARITH,
+        description: 'The $subtract operator subtracts two numbers and returns the result.',
+        snippet: '{ $subtract: ["${1:\\$field1}", "${2:\\$field2}"] }',
+        link: getDocLink('$subtract', META_EXPR_ARITH),
+    },
+    {
+        value: '$trunc',
+        meta: META_EXPR_ARITH,
+        description: 'The $trunc operator truncates a number to a specified decimal place.',
+        snippet: '{ $trunc: "${1:\\$field}" }',
+        link: getDocLink('$trunc', META_EXPR_ARITH),
+    },
+];
+
+// ---------------------------------------------------------------------------
+// Array Expression Operators
+// ---------------------------------------------------------------------------
+
+const arrayExpressionOperators: readonly OperatorEntry[] = [
+    {
+        value: '$arrayElemAt',
+        meta: META_EXPR_ARRAY,
+        description: 'The $arrayElemAt returns the element at the specified array index.',
+        snippet: '{ $arrayElemAt: ["${1:\\$array}", ${2:index}] }',
+        link: getDocLink('$arrayElemAt', META_EXPR_ARRAY),
+    },
+    {
+        value: '$arrayToObject',
+        meta: META_EXPR_ARRAY,
+        description: 'The $arrayToObject allows converting an array into a single document.',
+        snippet: '{ $arrayToObject: "${1:\\$array}" }',
+        link: getDocLink('$arrayToObject', META_EXPR_ARRAY),
+    },
+    {
+        value: '$concatArrays',
+        meta: META_EXPR_ARRAY,
+        description: 'The $concatArrays is used to combine multiple arrays into a single array.',
+        snippet: '{ $concatArrays: ["${1:\\$array1}", "${2:\\$array2}"] }',
+        link: getDocLink('$concatArrays', META_EXPR_ARRAY),
+    },
+    {
+        value: '$filter',
+        meta: META_EXPR_ARRAY,
+        description: 'The $filter operator filters for elements from an array based on a specified condition.',
+        snippet: '{ $filter: { input: "${1:\\$array}", as: "${2:item}", cond: { ${3:expression} } } }',
+        link: getDocLink('$filter', META_EXPR_ARRAY),
+    },
+    {
+        value: '$firstN',
+        meta: META_EXPR_ARRAY,
+        description:
+            'The $firstN operator sorts documents on one or more fields specified by the query and returns the first N document matching the filtering criteria',
+        snippet: '{ $firstN: { input: "${1:\\$array}", n: ${2:number} } }',
+        link: 'https://learn.microsoft.com/en-us/azure/documentdb/operators/accumulators/$firstn', // inferred from another category
+    },
+    {
+        value: '$in',
+        meta: META_EXPR_ARRAY,
+        description: 'The $in operator matches value of a field against an array of specified values',
+        snippet: '{ $in: ["${1:\\$field}", "${2:\\$array}"] }',
+        link: 'https://learn.microsoft.com/en-us/azure/documentdb/operators/comparison-query/$in', // inferred from another category
+    },
+    {
+        value: '$indexOfArray',
+        meta: META_EXPR_ARRAY,
+        description:
+            'The $indexOfArray operator is used to search for an element in an array and return the index of the first occurrence of the element.',
+        snippet: '{ $indexOfArray: ["${1:\\$array}", "${2:value}"] }',
+        link: getDocLink('$indexOfArray', META_EXPR_ARRAY),
+    },
+    {
+        value: '$isArray',
+        meta: META_EXPR_ARRAY,
+        description: 'The $isArray operator is used to determine if a specified value is an array.',
+        snippet: '{ $isArray: "${1:\\$field}" }',
+        link: getDocLink('$isArray', META_EXPR_ARRAY),
+    },
+    {
+        value: '$lastN',
+        meta: META_EXPR_ARRAY,
+        description: 'The $lastN accumulator operator returns the last N values in a group of documents.',
+        snippet: '{ $lastN: { input: "${1:\\$array}", n: ${2:number} } }',
+        link: 'https://learn.microsoft.com/en-us/azure/documentdb/operators/accumulators/$lastn', // inferred from another category
+    },
+    {
+        value: '$map',
+        meta: META_EXPR_ARRAY,
+        description: 'The $map operator allows applying an expression to each element in an array.',
+        snippet: '{ $map: { input: "${1:\\$array}", as: "${2:item}", in: { ${3:expression} } } }',
+        link: getDocLink('$map', META_EXPR_ARRAY),
+    },
+    {
+        value: '$maxN',
+        meta: META_EXPR_ARRAY,
+        description: 'Retrieves the top N values based on a specified filtering criteria',
+        snippet: '{ $maxN: { input: "${1:\\$array}", n: ${2:number} } }',
+        link: 'https://learn.microsoft.com/en-us/azure/documentdb/operators/accumulators/$maxn', // inferred from another category
+    },
+    {
+        value: '$minN',
+        meta: META_EXPR_ARRAY,
+        description: 'Retrieves the bottom N values based on a specified filtering criteria',
+        snippet: '{ $minN: { input: "${1:\\$array}", n: ${2:number} } }',
+        link: 'https://learn.microsoft.com/en-us/azure/documentdb/operators/accumulators/$minn',
+    },
+    {
+        value: '$objectToArray',
+        meta: META_EXPR_ARRAY,
+        description: 'Converts an object into an array of key-value pair documents.',
+        snippet: '{ $objectToArray: "${1:\\$object}" }',
+        link: 'https://learn.microsoft.com/en-us/azure/documentdb/operators/object-expression/$objecttoarray', // inferred from another category
+    },
+    {
+        value: '$range',
+        meta: META_EXPR_ARRAY,
+        description: 'The $range operator allows generating an array of sequential integers.',
+        snippet: '{ $range: [${1:start}, ${2:end}, ${3:step}] }',
+        link: getDocLink('$range', META_EXPR_ARRAY),
+    },
+    {
+        value: '$reduce',
+        meta: META_EXPR_ARRAY,
+        description:
+            'The $reduce operator applies an expression to each element in an array & accumulate result as single value.',
+        snippet: '{ $reduce: { input: "${1:\\$array}", initialValue: ${2:0}, in: { ${3:expression} } } }',
+        link: getDocLink('$reduce', META_EXPR_ARRAY),
+    },
+    {
+        value: '$reverseArray',
+        meta: META_EXPR_ARRAY,
+        description: 'The $reverseArray operator is used to reverse the order of elements in an array.',
+        snippet: '{ $reverseArray: "${1:\\$array}" }',
+        link: getDocLink('$reverseArray', META_EXPR_ARRAY),
+    },
+    {
+        value: '$size',
+        meta: META_EXPR_ARRAY,
+        description:
+            'The $size operator is used to query documents where an array field has a specified number of elements.',
+        snippet: '{ $size: "${1:\\$array}" }',
+        link: 'https://learn.microsoft.com/en-us/azure/documentdb/operators/array-query/$size', // inferred from another category
+    },
+    {
+        value: '$slice',
+        meta: META_EXPR_ARRAY,
+        description: 'The $slice operator returns a subset of an array from any element onwards in the array.',
+        snippet: '{ $slice: ["${1:\\$array}", ${2:n}] }',
+        link: getDocLink('$slice', META_EXPR_ARRAY),
+    },
+    {
+        value: '$sortArray',
+        meta: META_EXPR_ARRAY,
+        description: 'The $sortArray operator helps in sorting the elements in an array.',
+        snippet: '{ $sortArray: { input: "${1:\\$array}", sortBy: { ${2:field}: ${3:1} } } }',
+        link: getDocLink('$sortArray', META_EXPR_ARRAY),
+    },
+    {
+        value: '$zip',
+        meta: META_EXPR_ARRAY,
+        description: 'The $zip operator allows merging two or more arrays element-wise into a single array or arrays.',
+        snippet: '{ $zip: { inputs: ["${1:\\$array1}", "${2:\\$array2}"] } }',
+        link: getDocLink('$zip', META_EXPR_ARRAY),
+    },
+];
+
+// ---------------------------------------------------------------------------
+// Bitwise Operators
+// ---------------------------------------------------------------------------
+
+const bitwiseOperators: readonly OperatorEntry[] = [
+    {
+        value: '$bitAnd',
+        meta: META_EXPR_BITWISE,
+        description:
+            'The $bitAnd operator performs a bitwise AND operation on integer values and returns the result as an integer.',
+        snippet: '{ $bitAnd: [${1:value1}, ${2:value2}] }',
+        link: getDocLink('$bitAnd', META_EXPR_BITWISE),
+    },
+    {
+        value: '$bitNot',
+        meta: META_EXPR_BITWISE,
+        description:
+            'The $bitNot operator performs a bitwise NOT operation on integer values and returns the result as an integer.',
+        snippet: '{ $bitNot: "${1:\\$field}" }',
+        link: getDocLink('$bitNot', META_EXPR_BITWISE),
+    },
+    {
+        value: '$bitOr',
+        meta: META_EXPR_BITWISE,
+        description:
+            'The $bitOr operator performs a bitwise OR operation on integer values and returns the result as an integer.',
+        snippet: '{ $bitOr: [${1:value1}, ${2:value2}] }',
+        link: getDocLink('$bitOr', META_EXPR_BITWISE),
+    },
+    {
+        value: '$bitXor',
+        meta: META_EXPR_BITWISE,
+        description: 'The $bitXor operator performs a bitwise XOR operation on integer values.',
+        snippet: '{ $bitXor: [${1:value1}, ${2:value2}] }',
+        link: getDocLink('$bitXor', META_EXPR_BITWISE),
+    },
+];
+
+// ---------------------------------------------------------------------------
+// Boolean Expression Operators
+// ---------------------------------------------------------------------------
+
+const booleanExpressionOperators: readonly OperatorEntry[] = [
+    {
+        value: '$and',
+        meta: META_EXPR_BOOL,
+        description:
+            'The $and operator joins multiple query clauses and returns documents that match all specified conditions.',
+        snippet: '{ $and: ["${1:expression1}", "${2:expression2}"] }',
+        link: 'https://learn.microsoft.com/en-us/azure/documentdb/operators/logical-query/$and', // inferred from another category
+    },
+    {
+        value: '$not',
+        meta: META_EXPR_BOOL,
+        description:
+            "The $not operator performs a logical NOT operation on a specified expression, selecting documents that don't match the expression.",
+        snippet: '{ $not: ["${1:expression}"] }',
+        link: 'https://learn.microsoft.com/en-us/azure/documentdb/operators/logical-query/$not', // inferred from another category
+    },
+    {
+        value: '$or',
+        meta: META_EXPR_BOOL,
+        description:
+            'The $or operator joins query clauses with a logical OR and returns documents that match at least one of the specified conditions.',
+        snippet: '{ $or: ["${1:expression1}", "${2:expression2}"] }',
+        link: 'https://learn.microsoft.com/en-us/azure/documentdb/operators/logical-query/$or', // inferred from another category
+    },
+];
+
+// ---------------------------------------------------------------------------
+// Comparison Expression Operators
+// ---------------------------------------------------------------------------
+
+const comparisonExpressionOperators: readonly OperatorEntry[] = [
+    {
+        value: '$cmp',
+        meta: META_EXPR_COMPARISON,
+        description: 'The $cmp operator compares two values',
+        snippet: '{ $cmp: ["${1:\\$field1}", "${2:\\$field2}"] }',
+        link: 'https://learn.microsoft.com/en-us/azure/documentdb/operators/comparison-query/$cmp',
+    },
+    {
+        value: '$eq',
+        meta: META_EXPR_COMPARISON,
+        description: 'The $eq query operator compares the value of a field to a specified value',
+        snippet: '{ $eq: ["${1:\\$field1}", "${2:\\$field2}"] }',
+        link: 'https://learn.microsoft.com/en-us/azure/documentdb/operators/comparison-query/$eq', // inferred from another category
+    },
+    {
+        value: '$gt',
+        meta: META_EXPR_COMPARISON,
+        description:
+            'The $gt query operator retrieves documents where the value of a field is greater than a specified value',
+        snippet: '{ $gt: ["${1:\\$field1}", "${2:\\$field2}"] }',
+        link: 'https://learn.microsoft.com/en-us/azure/documentdb/operators/comparison-query/$gt', // inferred from another category
+    },
+    {
+        value: '$gte',
+        meta: META_EXPR_COMPARISON,
+        description:
+            'The $gte operator retrieves documents where the value of a field is greater than or equal to a specified value',
+        snippet: '{ $gte: ["${1:\\$field1}", "${2:\\$field2}"] }',
+        link: 'https://learn.microsoft.com/en-us/azure/documentdb/operators/comparison-query/$gte', // inferred from another category
+    },
+    {
+        value: '$lt',
+        meta: META_EXPR_COMPARISON,
+        description: 'The $lt operator retrieves documents where the value of field is less than a specified value',
+        snippet: '{ $lt: ["${1:\\$field1}", "${2:\\$field2}"] }',
+        link: 'https://learn.microsoft.com/en-us/azure/documentdb/operators/comparison-query/$lt', // inferred from another category
+    },
+    {
+        value: '$lte',
+        meta: META_EXPR_COMPARISON,
+        description:
+            'The $lte operator retrieves documents where the value of a field is less than or equal to a specified value',
+        snippet: '{ $lte: ["${1:\\$field1}", "${2:\\$field2}"] }',
+        link: 'https://learn.microsoft.com/en-us/azure/documentdb/operators/comparison-query/$lte', // inferred from another category
+    },
+    {
+        value: '$ne',
+        meta: META_EXPR_COMPARISON,
+        description: "The $ne operator retrieves documents where the value of a field doesn't equal a specified value",
+        snippet: '{ $ne: ["${1:\\$field1}", "${2:\\$field2}"] }',
+        link: 'https://learn.microsoft.com/en-us/azure/documentdb/operators/comparison-query/$ne', // inferred from another category
+    },
+];
+
+// ---------------------------------------------------------------------------
+// Data Size Operators
+// ---------------------------------------------------------------------------
+
+const dataSizeOperators: readonly OperatorEntry[] = [
+    {
+        value: '$bsonSize',
+        meta: META_EXPR_DATASIZE,
+        description: 'The $bsonSize operator returns the size of a document in bytes when encoded as BSON.',
+        snippet: '{ $bsonSize: "${1:\\$field}" }',
+        link: getDocLink('$bsonSize', META_EXPR_DATASIZE),
+    },
+    {
+        value: '$binarySize',
+        meta: META_EXPR_DATASIZE,
+        description: 'The $binarySize operator is used to return the size of a binary data field.',
+        snippet: '{ $binarySize: "${1:\\$field}" }',
+        link: getDocLink('$binarySize', META_EXPR_DATASIZE),
+    },
+];
+
+// ---------------------------------------------------------------------------
+// Date Expression Operators
+// ---------------------------------------------------------------------------
+
+const dateExpressionOperators: readonly OperatorEntry[] = [
+    {
+        value: '$dateAdd',
+        meta: META_EXPR_DATE,
+        description: 'The $dateAdd operator adds a specified number of time units (day, hour, month etc) to a date.',
+        snippet: '{ $dateAdd: { startDate: "${1:\\$dateField}", unit: "${2:day}", amount: ${3:1} } }',
+        link: getDocLink('$dateAdd', META_EXPR_DATE),
+    },
+    {
+        value: '$dateDiff',
+        meta: META_EXPR_DATE,
+        description:
+            'The $dateDiff operator calculates the difference between two dates in various units such as years, months, days, etc.',
+        snippet: '{ $dateDiff: { startDate: "${1:\\$startDate}", endDate: "${2:\\$endDate}", unit: "${3:day}" } }',
+        link: getDocLink('$dateDiff', META_EXPR_DATE),
+    },
+    {
+        value: '$dateFromParts',
+        meta: META_EXPR_DATE,
+        description: 'The $dateFromParts operator constructs a date from individual components.',
+        snippet: '{ $dateFromParts: { year: ${1:2024}, month: ${2:1}, day: ${3:1} } }',
+        link: getDocLink('$dateFromParts', META_EXPR_DATE),
+    },
+    {
+        value: '$dateFromString',
+        meta: META_EXPR_DATE,
+        description: 'The $dateDiff operator converts a date/time string to a date object.',
+        snippet: '{ $dateFromString: { dateString: "${1:dateString}" } }',
+        link: getDocLink('$dateFromString', META_EXPR_DATE),
+    },
+    {
+        value: '$dateSubtract',
+        meta: META_EXPR_DATE,
+        description: 'The $dateSubtract operator subtracts a specified amount of time from a date.',
+        snippet: '{ $dateSubtract: { startDate: "${1:\\$dateField}", unit: "${2:day}", amount: ${3:1} } }',
+        link: getDocLink('$dateSubtract', META_EXPR_DATE),
+    },
+    {
+        value: '$dateToParts',
+        meta: META_EXPR_DATE,
+        description:
+            'The $dateToParts operator decomposes a date into its individual parts such as year, month, day, and more.',
+        snippet: '{ $dateToParts: { date: "${1:\\$dateField}" } }',
+        link: getDocLink('$dateToParts', META_EXPR_DATE),
+    },
+    {
+        value: '$dateToString',
+        meta: META_EXPR_DATE,
+        description: 'The $dateToString operator converts a date object into a formatted string.',
+        snippet: '{ $dateToString: { format: "${1:%Y-%m-%d}", date: "${2:\\$dateField}" } }',
+        link: getDocLink('$dateToString', META_EXPR_DATE),
+    },
+    {
+        value: '$dateTrunc',
+        meta: META_EXPR_DATE,
+        description: 'The $dateTrunc operator truncates a date to a specified unit.',
+        snippet: '{ $dateTrunc: { date: "${1:\\$dateField}", unit: "${2:day}" } }',
+        link: getDocLink('$dateTrunc', META_EXPR_DATE),
+    },
+    {
+        value: '$dayOfMonth',
+        meta: META_EXPR_DATE,
+        description: 'The $dayOfMonth operator extracts the day of the month from a date.',
+        snippet: '{ $dayOfMonth: "${1:\\$dateField}" }',
+        link: getDocLink('$dayOfMonth', META_EXPR_DATE),
+    },
+    {
+        value: '$dayOfWeek',
+        meta: META_EXPR_DATE,
+        description: 'The $dayOfWeek operator extracts the day of the week from a date.',
+        snippet: '{ $dayOfWeek: "${1:\\$dateField}" }',
+        link: getDocLink('$dayOfWeek', META_EXPR_DATE),
+    },
+    {
+        value: '$dayOfYear',
+        meta: META_EXPR_DATE,
+        description: 'The $dayOfYear operator extracts the day of the year from a date.',
+        snippet: '{ $dayOfYear: "${1:\\$dateField}" }',
+        link: getDocLink('$dayOfYear', META_EXPR_DATE),
+    },
+    {
+        value: '$hour',
+        meta: META_EXPR_DATE,
+        description: 'The $hour operator returns the hour portion of a date as a number between 0 and 23.',
+        snippet: '{ $hour: "${1:\\$dateField}" }',
+        link: getDocLink('$hour', META_EXPR_DATE),
+    },
+    {
+        value: '$isoDayOfWeek',
+        meta: META_EXPR_DATE,
+        description:
+            'The $isoDayOfWeek operator returns the weekday number in ISO 8601 format, ranging from 1 (Monday) to 7 (Sunday).',
+        snippet: '{ $isoDayOfWeek: "${1:\\$dateField}" }',
+        link: getDocLink('$isoDayOfWeek', META_EXPR_DATE),
+    },
+    {
+        value: '$isoWeek',
+        meta: META_EXPR_DATE,
+        description:
+            'The $isoWeek operator returns the week number of the year in ISO 8601 format, ranging from 1 to 53.',
+        snippet: '{ $isoWeek: "${1:\\$dateField}" }',
+        link: getDocLink('$isoWeek', META_EXPR_DATE),
+    },
+    {
+        value: '$isoWeekYear',
+        meta: META_EXPR_DATE,
+        description:
+            'The $isoWeekYear operator returns the year number in ISO 8601 format, which can differ from the calendar year for dates at the beginning or end of the year.',
+        snippet: '{ $isoWeekYear: "${1:\\$dateField}" }',
+        link: getDocLink('$isoWeekYear', META_EXPR_DATE),
+    },
+    {
+        value: '$millisecond',
+        meta: META_EXPR_DATE,
+        description: 'The $millisecond operator extracts the milliseconds portion from a date value.',
+        snippet: '{ $millisecond: "${1:\\$dateField}" }',
+        link: getDocLink('$millisecond', META_EXPR_DATE),
+    },
+    {
+        value: '$minute',
+        meta: META_EXPR_DATE,
+        description: 'The $minute operator extracts the minute portion from a date value.',
+        snippet: '{ $minute: "${1:\\$dateField}" }',
+        link: getDocLink('$minute', META_EXPR_DATE),
+    },
+    {
+        value: '$month',
+        meta: META_EXPR_DATE,
+        description: 'The $month operator extracts the month portion from a date value.',
+        snippet: '{ $month: "${1:\\$dateField}" }',
+        link: getDocLink('$month', META_EXPR_DATE),
+    },
+    {
+        value: '$second',
+        meta: META_EXPR_DATE,
+        description: 'The $second operator extracts the seconds portion from a date value.',
+        snippet: '{ $second: "${1:\\$dateField}" }',
+        link: getDocLink('$second', META_EXPR_DATE),
+    },
+    {
+        value: '$toDate',
+        meta: META_EXPR_DATE,
+        description: 'The $toDate operator converts supported types to a proper Date object.',
+        snippet: '{ $toDate: "${1:\\$field}" }',
+        link: 'https://learn.microsoft.com/en-us/azure/documentdb/operators/aggregation/type-expression/$todate', // inferred from another category
+    },
+    {
+        value: '$week',
+        meta: META_EXPR_DATE,
+        description: 'The $week operator returns the week number for a date as a value between 0 and 53.',
+        snippet: '{ $week: "${1:\\$dateField}" }',
+        link: getDocLink('$week', META_EXPR_DATE),
+    },
+    {
+        value: '$year',
+        meta: META_EXPR_DATE,
+        description: 'The $year operator returns the year for a date as a four-digit number.',
+        snippet: '{ $year: "${1:\\$dateField}" }',
+        link: getDocLink('$year', META_EXPR_DATE),
+    },
+];
+
+// ---------------------------------------------------------------------------
+// Literal Expression Operator
+// ---------------------------------------------------------------------------
+
+const literalExpressionOperator: readonly OperatorEntry[] = [
+    {
+        value: '$literal',
+        meta: META_EXPR_LITERAL,
+        description:
+            'The $literal operator returns the specified value without parsing it as an expression, allowing literal values to be used in aggregation pipelines.',
+        snippet: '{ $literal: ${1:value} }',
+        link: getDocLink('$literal', META_EXPR_LITERAL),
+    },
+];
+
+// ---------------------------------------------------------------------------
+// Miscellaneous Operators
+// ---------------------------------------------------------------------------
+
+const miscellaneousOperators: readonly OperatorEntry[] = [
+    {
+        value: '$getField',
+        meta: META_EXPR_MISC,
+        description: 'The $getField operator allows retrieving the value of a specified field from a document.',
+        snippet: '{ $getField: { field: "${1:fieldName}", input: "${2:\\$object}" } }',
+        link: getDocLink('$getField', META_EXPR_MISC),
+    },
+    {
+        value: '$rand',
+        meta: META_EXPR_MISC,
+        description: 'The $rand operator generates a random float value between 0 and 1.',
+        snippet: '{ $rand: {} }',
+        link: 'https://learn.microsoft.com/en-us/azure/documentdb/operators/miscellaneous-query/$rand', // inferred from another category
+    },
+    {
+        value: '$sampleRate',
+        meta: META_EXPR_MISC,
+        description:
+            'The $sampleRate operator randomly samples documents from a collection based on a specified probability rate, useful for statistical analysis and testing.',
+        snippet: '{ $sampleRate: ${1:0.5} }',
+        link: getDocLink('$sampleRate', META_EXPR_MISC),
+    },
+];
+
+// ---------------------------------------------------------------------------
+// Object Expression Operators
+// ---------------------------------------------------------------------------
+
+const objectExpressionOperators: readonly OperatorEntry[] = [
+    {
+        value: '$mergeObjects',
+        meta: META_EXPR_OBJECT,
+        description: 'The $mergeObjects operator merges multiple documents into a single document',
+        snippet: '{ $mergeObjects: ["${1:\\$object1}", "${2:\\$object2}"] }',
+        link: getDocLink('$mergeObjects', META_EXPR_OBJECT),
+    },
+    {
+        value: '$objectToArray',
+        meta: META_EXPR_OBJECT,
+        description:
+            'The objectToArray command is used to transform a document (object) into an array of key-value pairs.',
+        snippet: '{ $objectToArray: "${1:\\$object}" }',
+        link: getDocLink('$objectToArray', META_EXPR_OBJECT),
+    },
+    {
+        value: '$setField',
+        meta: META_EXPR_OBJECT,
+        description: 'The setField command is used to add, update, or remove fields in embedded documents.',
+        snippet: '{ $setField: { field: "${1:fieldName}", input: "${2:\\$object}", value: ${3:value} } }',
+        link: getDocLink('$setField', META_EXPR_OBJECT),
+    },
+];
+
+// ---------------------------------------------------------------------------
+// Set Expression Operators
+// ---------------------------------------------------------------------------
+
+const setExpressionOperators: readonly OperatorEntry[] = [
+    {
+        value: '$allElementsTrue',
+        meta: META_EXPR_SET,
+        description: 'The $allElementsTrue operator returns true if all elements in an array evaluate to true.',
+        snippet: '{ $allElementsTrue: ["${1:\\$array}"] }',
+        link: getDocLink('$allElementsTrue', META_EXPR_SET),
+    },
+    {
+        value: '$anyElementTrue',
+        meta: META_EXPR_SET,
+        description:
+            'The $anyElementTrue operator returns true if any element in an array evaluates to a value of true.',
+        snippet: '{ $anyElementTrue: ["${1:\\$array}"] }',
+        link: getDocLink('$anyElementTrue', META_EXPR_SET),
+    },
+    {
+        value: '$setDifference',
+        meta: META_EXPR_SET,
+        description:
+            'The $setDifference operator returns a set with elements that exist in one set but not in a second set.',
+        snippet: '{ $setDifference: ["${1:\\$set1}", "${2:\\$set2}"] }',
+        link: getDocLink('$setDifference', META_EXPR_SET),
+    },
+    {
+        value: '$setEquals',
+        meta: META_EXPR_SET,
+        description: 'The $setEquals operator returns true if two sets have the same distinct elements.',
+        snippet: '{ $setEquals: ["${1:\\$set1}", "${2:\\$set2}"] }',
+        link: getDocLink('$setEquals', META_EXPR_SET),
+    },
+    {
+        value: '$setIntersection',
+        meta: META_EXPR_SET,
+        description: 'The $setIntersection operator returns the common elements that appear in all input arrays.',
+        snippet: '{ $setIntersection: ["${1:\\$set1}", "${2:\\$set2}"] }',
+        link: getDocLink('$setIntersection', META_EXPR_SET),
+    },
+    {
+        value: '$setIsSubset',
+        meta: META_EXPR_SET,
+        description: 'The $setIsSubset operator determines if one array is a subset of a second array.',
+        snippet: '{ $setIsSubset: ["${1:\\$set1}", "${2:\\$set2}"] }',
+        link: getDocLink('$setIsSubset', META_EXPR_SET),
+    },
+    {
+        value: '$setUnion',
+        meta: META_EXPR_SET,
+        description:
+            'The $setUnion operator returns an array that contains all the unique elements from the input arrays.',
+        snippet: '{ $setUnion: ["${1:\\$set1}", "${2:\\$set2}"] }',
+        link: getDocLink('$setUnion', META_EXPR_SET),
+    },
+];
+
+// ---------------------------------------------------------------------------
+// String Expression Operators
+// ---------------------------------------------------------------------------
+
+const stringExpressionOperators: readonly OperatorEntry[] = [
+    {
+        value: '$concat',
+        meta: META_EXPR_STRING,
+        description: 'Concatenates two or more strings and returns the resulting string.',
+        snippet: '{ $concat: ["${1:\\$string1}", "${2:\\$string2}"] }',
+    },
+    {
+        value: '$dateFromString',
+        meta: META_EXPR_STRING,
+        description: 'The $dateDiff operator converts a date/time string to a date object.',
+        snippet: '{ $dateFromString: "${1:\\$string}" }',
+        link: 'https://learn.microsoft.com/en-us/azure/documentdb/operators/date-expression/$datefromstring', // inferred from another category
+    },
+    {
+        value: '$dateToString',
+        meta: META_EXPR_STRING,
+        description: 'The $dateToString operator converts a date object into a formatted string.',
+        snippet: '{ $dateToString: "${1:\\$string}" }',
+        link: 'https://learn.microsoft.com/en-us/azure/documentdb/operators/date-expression/$datetostring', // inferred from another category
+    },
+    {
+        value: '$indexOfBytes',
+        meta: META_EXPR_STRING,
+        description: 'Returns the byte index of the first occurrence of a substring within a string.',
+        snippet: '{ $indexOfBytes: ["${1:\\$string}", "${2:substring}"] }',
+    },
+    {
+        value: '$indexOfCP',
+        meta: META_EXPR_STRING,
+        description: 'Returns the code point index of the first occurrence of a substring within a string.',
+        snippet: '{ $indexOfCP: ["${1:\\$string}", "${2:substring}"] }',
+    },
+    {
+        value: '$ltrim',
+        meta: META_EXPR_STRING,
+        description: 'Removes whitespace or specified characters from the beginning of a string.',
+        snippet: '{ $ltrim: { input: "${1:\\$string}" } }',
+    },
+    {
+        value: '$regexFind',
+        meta: META_EXPR_STRING,
+        description: 'Applies a regular expression to a string and returns the first match.',
+        snippet: '{ $regexFind: { input: "${1:\\$string}", regex: "${2:pattern}" } }',
+    },
+    {
+        value: '$regexFindAll',
+        meta: META_EXPR_STRING,
+        description: 'Applies a regular expression to a string and returns all matches as an array.',
+        snippet: '{ $regexFindAll: { input: "${1:\\$string}", regex: "${2:pattern}" } }',
+    },
+    {
+        value: '$regexMatch',
+        meta: META_EXPR_STRING,
+        description: 'Applies a regular expression to a string and returns a boolean indicating if a match was found.',
+        snippet: '{ $regexMatch: { input: "${1:\\$string}", regex: "${2:pattern}" } }',
+    },
+    {
+        value: '$replaceOne',
+        meta: META_EXPR_STRING,
+        description: 'Replaces the first occurrence of a search string with a replacement string.',
+        snippet: '{ $replaceOne: { input: "${1:\\$string}", find: "${2:find}", replacement: "${3:replacement}" } }',
+    },
+    {
+        value: '$replaceAll',
+        meta: META_EXPR_STRING,
+        description: 'Replaces all occurrences of a search string with a replacement string.',
+        snippet: '{ $replaceAll: { input: "${1:\\$string}", find: "${2:find}", replacement: "${3:replacement}" } }',
+    },
+    {
+        value: '$rtrim',
+        meta: META_EXPR_STRING,
+        description: 'Removes whitespace or specified characters from the end of a string.',
+        snippet: '{ $rtrim: { input: "${1:\\$string}" } }',
+    },
+    {
+        value: '$split',
+        meta: META_EXPR_STRING,
+        description: 'Splits a string by a delimiter and returns an array of substrings.',
+        snippet: '{ $split: ["${1:\\$string}", "${2:delimiter}"] }',
+    },
+    {
+        value: '$strLenBytes',
+        meta: META_EXPR_STRING,
+        description: 'Returns the number of UTF-8 encoded bytes in the specified string.',
+        snippet: '{ $strLenBytes: "${1:\\$string}" }',
+    },
+    {
+        value: '$strLenCP',
+        meta: META_EXPR_STRING,
+        description: 'Returns the number of UTF-8 code points in the specified string.',
+        snippet: '{ $strLenCP: "${1:\\$string}" }',
+    },
+    {
+        value: '$strcasecmp',
+        meta: META_EXPR_STRING,
+        description: 'Performs a case-insensitive comparison of two strings and returns an integer.',
+        snippet: '{ $strcasecmp: ["${1:\\$string1}", "${2:\\$string2}"] }',
+    },
+    {
+        value: '$substr',
+        meta: META_EXPR_STRING,
+        description:
+            'Returns a substring of a string, starting at a specified index for a specified length. Deprecated — use $substrBytes or $substrCP.',
+        snippet: '{ $substr: ["${1:\\$string}", ${2:start}, ${3:length}] }',
+    },
+    {
+        value: '$substrBytes',
+        meta: META_EXPR_STRING,
+        description:
+            'Returns a substring of a string by byte index, starting at a specified index for a specified number of bytes.',
+        snippet: '{ $substrBytes: ["${1:\\$string}", ${2:start}, ${3:length}] }',
+    },
+    {
+        value: '$substrCP',
+        meta: META_EXPR_STRING,
+        description:
+            'Returns a substring of a string by code point index, starting at a specified index for a specified number of code points.',
+        snippet: '{ $substrCP: ["${1:\\$string}", ${2:start}, ${3:length}] }',
+    },
+    {
+        value: '$toLower',
+        meta: META_EXPR_STRING,
+        description: 'Converts a string to lowercase and returns the result.',
+        snippet: '{ $toLower: "${1:\\$string}" }',
+    },
+    {
+        value: '$toString',
+        meta: META_EXPR_STRING,
+        description: 'The $toString operator converts an expression into a String',
+        snippet: '{ $toString: "${1:\\$string}" }',
+        link: 'https://learn.microsoft.com/en-us/azure/documentdb/operators/aggregation/type-expression/$tostring', // inferred from another category
+    },
+    {
+        value: '$trim',
+        meta: META_EXPR_STRING,
+        description: 'Removes whitespace or specified characters from both ends of a string.',
+        snippet: '{ $trim: { input: "${1:\\$string}" } }',
+    },
+    {
+        value: '$toUpper',
+        meta: META_EXPR_STRING,
+        description: 'Converts a string to uppercase and returns the result.',
+        snippet: '{ $toUpper: "${1:\\$string}" }',
+    },
+];
+
+// ---------------------------------------------------------------------------
+// Timestamp Expression Operators
+// ---------------------------------------------------------------------------
+
+const timestampExpressionOperators: readonly OperatorEntry[] = [
+    {
+        value: '$tsIncrement',
+        meta: META_EXPR_TIMESTAMP,
+        description: 'The $tsIncrement operator extracts the increment portion from a timestamp value.',
+        snippet: '{ $tsIncrement: "${1:\\$timestampField}" }',
+        link: getDocLink('$tsIncrement', META_EXPR_TIMESTAMP),
+    },
+    {
+        value: '$tsSecond',
+        meta: META_EXPR_TIMESTAMP,
+        description: 'The $tsSecond operator extracts the seconds portion from a timestamp value.',
+        snippet: '{ $tsSecond: "${1:\\$timestampField}" }',
+        link: getDocLink('$tsSecond', META_EXPR_TIMESTAMP),
+    },
+];
+
+// ---------------------------------------------------------------------------
+// Trigonometry Expression Operators
+// ---------------------------------------------------------------------------
+
+const trigonometryExpressionOperators: readonly OperatorEntry[] = [
+    {
+        value: '$sin',
+        meta: META_EXPR_TRIG,
+        description: 'Returns the sine of a value measured in radians.',
+        snippet: '{ $sin: "${1:\\$value}" }',
+    },
+    {
+        value: '$cos',
+        meta: META_EXPR_TRIG,
+        description: 'Returns the cosine of a value measured in radians.',
+        snippet: '{ $cos: "${1:\\$value}" }',
+    },
+    {
+        value: '$tan',
+        meta: META_EXPR_TRIG,
+        description: 'Returns the tangent of a value measured in radians.',
+        snippet: '{ $tan: "${1:\\$value}" }',
+    },
+    {
+        value: '$asin',
+        meta: META_EXPR_TRIG,
+        description: 'Returns the arcsine (inverse sine) of a value in radians.',
+        snippet: '{ $asin: "${1:\\$value}" }',
+    },
+    {
+        value: '$acos',
+        meta: META_EXPR_TRIG,
+        description: 'Returns the arccosine (inverse cosine) of a value in radians.',
+        snippet: '{ $acos: "${1:\\$value}" }',
+    },
+    {
+        value: '$atan',
+        meta: META_EXPR_TRIG,
+        description: 'Returns the arctangent (inverse tangent) of a value in radians.',
+        snippet: '{ $atan: "${1:\\$value}" }',
+    },
+    {
+        value: '$atan2',
+        meta: META_EXPR_TRIG,
+        description: 'Returns the arctangent of the quotient of two values, using the signs to determine the quadrant.',
+        snippet: '{ $atan2: "${1:\\$value}" }',
+    },
+    {
+        value: '$asinh',
+        meta: META_EXPR_TRIG,
+        description: 'Returns the inverse hyperbolic sine of a value.',
+        snippet: '{ $asinh: "${1:\\$value}" }',
+    },
+    {
+        value: '$acosh',
+        meta: META_EXPR_TRIG,
+        description: 'Returns the inverse hyperbolic cosine of a value.',
+        snippet: '{ $acosh: "${1:\\$value}" }',
+    },
+    {
+        value: '$atanh',
+        meta: META_EXPR_TRIG,
+        description: 'Returns the inverse hyperbolic tangent of a value.',
+        snippet: '{ $atanh: "${1:\\$value}" }',
+    },
+    {
+        value: '$sinh',
+        meta: META_EXPR_TRIG,
+        description: 'Returns the hyperbolic sine of a value.',
+        snippet: '{ $sinh: "${1:\\$value}" }',
+    },
+    {
+        value: '$cosh',
+        meta: META_EXPR_TRIG,
+        description: 'Returns the hyperbolic cosine of a value.',
+        snippet: '{ $cosh: "${1:\\$value}" }',
+    },
+    {
+        value: '$tanh',
+        meta: META_EXPR_TRIG,
+        description: 'Returns the hyperbolic tangent of a value.',
+        snippet: '{ $tanh: "${1:\\$value}" }',
+    },
+    {
+        value: '$degreesToRadians',
+        meta: META_EXPR_TRIG,
+        description: 'Converts a value from degrees to radians.',
+        snippet: '{ $degreesToRadians: "${1:\\$angle}" }',
+    },
+    {
+        value: '$radiansToDegrees',
+        meta: META_EXPR_TRIG,
+        description: 'Converts a value from radians to degrees.',
+        snippet: '{ $radiansToDegrees: "${1:\\$angle}" }',
+    },
+];
+
+// ---------------------------------------------------------------------------
+// Type Expression Operators
+// ---------------------------------------------------------------------------
+
+const typeExpressionOperators: readonly OperatorEntry[] = [
+    {
+        value: '$convert',
+        meta: META_EXPR_TYPE,
+        description: 'The $convert operator converts an expression into the specified type',
+        snippet: '{ $convert: { input: "${1:\\$field}", to: "${2:type}" } }',
+        link: getDocLink('$convert', META_EXPR_TYPE),
+    },
+    {
+        value: '$isNumber',
+        meta: META_EXPR_TYPE,
+        description: 'The $isNumber operator checks if a specified expression is a numerical type',
+        snippet: '{ $isNumber: "${1:\\$field}" }',
+        link: getDocLink('$isNumber', META_EXPR_TYPE),
+    },
+    {
+        value: '$toBool',
+        meta: META_EXPR_TYPE,
+        description: 'The $toBool operator converts an expression into a Boolean type',
+        snippet: '{ $toBool: "${1:\\$field}" }',
+        link: getDocLink('$toBool', META_EXPR_TYPE),
+    },
+    {
+        value: '$toDate',
+        meta: META_EXPR_TYPE,
+        description: 'The $toDate operator converts supported types to a proper Date object.',
+        snippet: '{ $toDate: "${1:\\$field}" }',
+        link: getDocLink('$toDate', META_EXPR_TYPE),
+    },
+    {
+        value: '$toDecimal',
+        meta: META_EXPR_TYPE,
+        description: 'The $toDecimal operator converts an expression into a Decimal type',
+        snippet: '{ $toDecimal: "${1:\\$field}" }',
+        link: getDocLink('$toDecimal', META_EXPR_TYPE),
+    },
+    {
+        value: '$toDouble',
+        meta: META_EXPR_TYPE,
+        description: 'The $toDouble operator converts an expression into a Double value',
+        snippet: '{ $toDouble: "${1:\\$field}" }',
+        link: getDocLink('$toDouble', META_EXPR_TYPE),
+    },
+    {
+        value: '$toInt',
+        meta: META_EXPR_TYPE,
+        description: 'The $toInt operator converts an expression into an Integer',
+        snippet: '{ $toInt: "${1:\\$field}" }',
+        link: getDocLink('$toInt', META_EXPR_TYPE),
+    },
+    {
+        value: '$toLong',
+        meta: META_EXPR_TYPE,
+        description: 'The $toLong operator converts an expression into a Long value',
+        snippet: '{ $toLong: "${1:\\$field}" }',
+        link: getDocLink('$toLong', META_EXPR_TYPE),
+    },
+    {
+        value: '$toObjectId',
+        meta: META_EXPR_TYPE,
+        description: 'The $toObjectId operator converts an expression into an ObjectId',
+        snippet: '{ $toObjectId: "${1:\\$field}" }',
+        link: getDocLink('$toObjectId', META_EXPR_TYPE),
+    },
+    {
+        value: '$toString',
+        meta: META_EXPR_TYPE,
+        description: 'The $toString operator converts an expression into a String',
+        snippet: '{ $toString: "${1:\\$field}" }',
+        link: getDocLink('$toString', META_EXPR_TYPE),
+    },
+    {
+        value: '$type',
+        meta: META_EXPR_TYPE,
+        description: 'The $type operator retrieves documents if the chosen field is of the specified type.',
+        snippet: '{ $type: "${1:\\$field}" }',
+        link: 'https://learn.microsoft.com/en-us/azure/documentdb/operators/element-query/$type', // inferred from another category
+    },
+];
+
+// ---------------------------------------------------------------------------
+// Variable Expression Operators
+// ---------------------------------------------------------------------------
+
+const variableExpressionOperators: readonly OperatorEntry[] = [
+    {
+        value: '$let',
+        meta: META_EXPR_VARIABLE,
+        description:
+            'The $let operator allows defining variables for use in a specified expression, enabling complex calculations and reducing code repetition.',
+        snippet: '{ $let: { vars: { ${1:var}: ${2:expression} }, in: ${3:expression} } }',
+        link: getDocLink('$let', META_EXPR_VARIABLE),
+    },
+];
+
+// ---------------------------------------------------------------------------
+// Conditional Expression Operators
+// ---------------------------------------------------------------------------
+
+const conditionalExpressionOperators: readonly OperatorEntry[] = [
+    {
+        value: '$cond',
+        meta: META_EXPR_CONDITIONAL,
+        description:
+            'The $cond operator is used to evaluate a condition and return one of two expressions based on the result.',
+        snippet: '{ $cond: { if: { ${1:expression} }, then: ${2:trueValue}, else: ${3:falseValue} } }',
+        link: getDocLink('$cond', META_EXPR_CONDITIONAL),
+    },
+    {
+        value: '$ifNull',
+        meta: META_EXPR_CONDITIONAL,
+        description:
+            'The $ifNull operator is used to evaluate an expression and return a specified value if the expression resolves to null.',
+        snippet: '{ $ifNull: ["${1:\\$field}", ${2:replacement}] }',
+        link: getDocLink('$ifNull', META_EXPR_CONDITIONAL),
+    },
+    {
+        value: '$switch',
+        meta: META_EXPR_CONDITIONAL,
+        description:
+            'The $switch operator is used to evaluate a series of conditions and return a value based on the first condition that evaluates to true.',
+        snippet:
+            '{ $switch: { branches: [{ case: { ${1:expression} }, then: ${2:value} }], default: ${3:defaultValue} } }',
+        link: getDocLink('$switch', META_EXPR_CONDITIONAL),
+    },
+];
+
+// ---------------------------------------------------------------------------
+// Registration
+// ---------------------------------------------------------------------------
+
+export function loadExpressionOperators(): void {
+    registerOperators([
+        ...arithmeticExpressionOperators,
+        ...arrayExpressionOperators,
+        ...bitwiseOperators,
+        ...booleanExpressionOperators,
+        ...comparisonExpressionOperators,
+        ...dataSizeOperators,
+        ...dateExpressionOperators,
+        ...literalExpressionOperator,
+        ...miscellaneousOperators,
+        ...objectExpressionOperators,
+        ...setExpressionOperators,
+        ...stringExpressionOperators,
+        ...timestampExpressionOperators,
+        ...trigonometryExpressionOperators,
+        ...typeExpressionOperators,
+        ...variableExpressionOperators,
+        ...conditionalExpressionOperators,
+    ]);
+}
diff --git a/packages/documentdb-constants/src/getFilteredCompletions.test.ts b/packages/documentdb-constants/src/getFilteredCompletions.test.ts
new file mode 100644
index 000000000..02e683e5d
--- /dev/null
+++ b/packages/documentdb-constants/src/getFilteredCompletions.test.ts
@@ -0,0 +1,237 @@
+/*---------------------------------------------------------------------------------------------
+ *  Copyright (c) Microsoft Corporation. All rights reserved.
+ *  Licensed under the MIT License. See License.txt in the project root for license information.
+ *--------------------------------------------------------------------------------------------*/
+
+/**
+ * Unit tests for getFilteredCompletions and completion presets.
+ */
+
+import {
+    EXPRESSION_COMPLETION_META,
+    FILTER_COMPLETION_META,
+    GROUP_EXPRESSION_COMPLETION_META,
+    PROJECTION_COMPLETION_META,
+    STAGE_COMPLETION_META,
+    UPDATE_COMPLETION_META,
+    WINDOW_COMPLETION_META,
+    getAllCompletions,
+    getFilteredCompletions,
+    loadOperators,
+} from './index';
+
+describe('getFilteredCompletions', () => {
+    test('returns all operators when filtering by all top-level meta prefixes', () => {
+        const all = getAllCompletions();
+        expect(all.length).toBeGreaterThan(0);
+    });
+
+    test('filtering by "query" returns only query operators', () => {
+        const results = getFilteredCompletions({ meta: ['query'] });
+        expect(results.length).toBeGreaterThan(0);
+        for (const r of results) {
+            expect(r.meta).toMatch(/^query/);
+        }
+    });
+
+    test('filtering by "query:comparison" returns only comparison operators', () => {
+        const results = getFilteredCompletions({ meta: ['query:comparison'] });
+        expect(results.length).toBe(8); // $eq, $gt, $gte, $in, $lt, $lte, $ne, $nin
+        for (const r of results) {
+            expect(r.meta).toBe('query:comparison');
+        }
+    });
+
+    test('filtering by "stage" returns aggregation pipeline stages', () => {
+        const results = getFilteredCompletions({ meta: ['stage'] });
+        expect(results.length).toBe(35);
+        for (const r of results) {
+            expect(r.meta).toBe('stage');
+        }
+    });
+
+    test('filtering by "update" returns all update operators', () => {
+        const results = getFilteredCompletions({ meta: ['update'] });
+        expect(results.length).toBe(22);
+        for (const r of results) {
+            expect(r.meta).toMatch(/^update/);
+        }
+    });
+
+    test('filtering by "accumulator" returns accumulator operators', () => {
+        const results = getFilteredCompletions({ meta: ['accumulator'] });
+        expect(results.length).toBe(21);
+        for (const r of results) {
+            expect(r.meta).toBe('accumulator');
+        }
+    });
+
+    test('filtering by "expr" returns all expression operators', () => {
+        const results = getFilteredCompletions({ meta: ['expr'] });
+        expect(results.length).toBeGreaterThan(100);
+        for (const r of results) {
+            expect(r.meta).toMatch(/^expr:/);
+        }
+    });
+
+    test('filtering by "window" returns window operators', () => {
+        const results = getFilteredCompletions({ meta: ['window'] });
+        expect(results.length).toBe(27);
+        for (const r of results) {
+            expect(r.meta).toBe('window');
+        }
+    });
+
+    test('filtering by "bson" returns BSON constructors', () => {
+        const results = getFilteredCompletions({ meta: ['bson'] });
+        expect(results.length).toBe(10);
+        for (const r of results) {
+            expect(r.meta).toBe('bson');
+        }
+    });
+
+    test('filtering by "variable" returns system variables', () => {
+        const results = getFilteredCompletions({ meta: ['variable'] });
+        expect(results.length).toBe(7);
+        for (const r of results) {
+            expect(r.meta).toBe('variable');
+        }
+    });
+
+    test('filtering by multiple meta tags combines results', () => {
+        const queryOnly = getFilteredCompletions({ meta: ['query'] });
+        const stageOnly = getFilteredCompletions({ meta: ['stage'] });
+        const combined = getFilteredCompletions({ meta: ['query', 'stage'] });
+        expect(combined.length).toBe(queryOnly.length + stageOnly.length);
+    });
+
+    test('empty meta array returns no results', () => {
+        const results = getFilteredCompletions({ meta: [] });
+        expect(results.length).toBe(0);
+    });
+
+    test('unknown meta tag returns no results', () => {
+        const results = getFilteredCompletions({ meta: ['nonexistent'] });
+        expect(results.length).toBe(0);
+    });
+
+    describe('BSON type filtering', () => {
+        test('filtering by bsonTypes narrows type-specific operators', () => {
+            const allQuery = getFilteredCompletions({ meta: ['query'] });
+            const stringOnly = getFilteredCompletions({
+                meta: ['query'],
+                bsonTypes: ['string'],
+            });
+            // String-only should have fewer or equal operators (universal + string-specific)
+            expect(stringOnly.length).toBeLessThanOrEqual(allQuery.length);
+            expect(stringOnly.length).toBeGreaterThan(0);
+        });
+
+        test('universal operators (no applicableBsonTypes) always pass BSON filter', () => {
+            const withBsonFilter = getFilteredCompletions({
+                meta: ['query:comparison'],
+                bsonTypes: ['string'],
+            });
+            // All comparison operators are universal
+            expect(withBsonFilter.length).toBe(8);
+        });
+
+        test('type-specific operators are excluded when BSON type does not match', () => {
+            const stringOps = getFilteredCompletions({
+                meta: ['query'],
+                bsonTypes: ['number'],
+            });
+            // $regex should NOT be included (it's string-only)
+            const hasRegex = stringOps.some((op) => op.value === '$regex');
+            expect(hasRegex).toBe(false);
+        });
+
+        test('type-specific operators are included when BSON type matches', () => {
+            const stringOps = getFilteredCompletions({
+                meta: ['query'],
+                bsonTypes: ['string'],
+            });
+            // $regex should be included for string type
+            const hasRegex = stringOps.some((op) => op.value === '$regex');
+            expect(hasRegex).toBe(true);
+        });
+    });
+});
+
+describe('completion context presets', () => {
+    test('FILTER_COMPLETION_META returns query + bson + variable', () => {
+        const results = getFilteredCompletions({ meta: FILTER_COMPLETION_META });
+        const metas = new Set(results.map((r) => r.meta.split(':')[0]));
+        expect(metas).toContain('query');
+        expect(metas).toContain('bson');
+        expect(metas).toContain('variable');
+        expect(metas).not.toContain('stage');
+        expect(metas).not.toContain('update');
+    });
+
+    test('STAGE_COMPLETION_META returns only stages', () => {
+        const results = getFilteredCompletions({ meta: STAGE_COMPLETION_META });
+        expect(results.length).toBe(35);
+        for (const r of results) {
+            expect(r.meta).toBe('stage');
+        }
+    });
+
+    test('UPDATE_COMPLETION_META returns only update operators', () => {
+        const results = getFilteredCompletions({ meta: UPDATE_COMPLETION_META });
+        expect(results.length).toBe(22);
+        for (const r of results) {
+            expect(r.meta).toMatch(/^update/);
+        }
+    });
+
+    test('GROUP_EXPRESSION_COMPLETION_META returns expr + accumulator + bson + variable', () => {
+        const results = getFilteredCompletions({ meta: GROUP_EXPRESSION_COMPLETION_META });
+        const metaPrefixes = new Set(results.map((r) => r.meta.split(':')[0]));
+        expect(metaPrefixes).toContain('expr');
+        expect(metaPrefixes).toContain('accumulator');
+        expect(metaPrefixes).toContain('bson');
+        expect(metaPrefixes).toContain('variable');
+        expect(metaPrefixes).not.toContain('query');
+        expect(metaPrefixes).not.toContain('stage');
+    });
+
+    test('EXPRESSION_COMPLETION_META returns expr + bson + variable (no accumulators)', () => {
+        const results = getFilteredCompletions({ meta: EXPRESSION_COMPLETION_META });
+        const metaPrefixes = new Set(results.map((r) => r.meta.split(':')[0]));
+        expect(metaPrefixes).toContain('expr');
+        expect(metaPrefixes).toContain('bson');
+        expect(metaPrefixes).toContain('variable');
+        expect(metaPrefixes).not.toContain('accumulator');
+    });
+
+    test('WINDOW_COMPLETION_META returns window + accumulator + expr + bson + variable', () => {
+        const results = getFilteredCompletions({ meta: WINDOW_COMPLETION_META });
+        const metaPrefixes = new Set(results.map((r) => r.meta.split(':')[0]));
+        expect(metaPrefixes).toContain('window');
+        expect(metaPrefixes).toContain('accumulator');
+        expect(metaPrefixes).toContain('expr');
+        expect(metaPrefixes).toContain('bson');
+        expect(metaPrefixes).toContain('variable');
+    });
+
+    test('PROJECTION_COMPLETION_META returns projection operators + BSON constructors', () => {
+        const results = getFilteredCompletions({ meta: PROJECTION_COMPLETION_META });
+        // field:identifier entries are injected at runtime, not statically registered
+        // But projection operators ($, $elemMatch, $slice) and BSON constructors are static
+        expect(results.length).toBeGreaterThan(0);
+        const metas = [...new Set(results.map((r) => r.meta))];
+        expect(metas).toContain('query:projection');
+        expect(metas).toContain('bson');
+    });
+});
+
+describe('registry idempotency', () => {
+    test('calling loadOperators() twice does not duplicate entries', () => {
+        const countBefore = getAllCompletions().length;
+        // loadOperators is re-exported from index
+        loadOperators();
+        const countAfter = getAllCompletions().length;
+        expect(countAfter).toBe(countBefore);
+    });
+});
diff --git a/packages/documentdb-constants/src/getFilteredCompletions.ts b/packages/documentdb-constants/src/getFilteredCompletions.ts
new file mode 100644
index 000000000..170353f78
--- /dev/null
+++ b/packages/documentdb-constants/src/getFilteredCompletions.ts
@@ -0,0 +1,99 @@
+/*---------------------------------------------------------------------------------------------
+ *  Copyright (c) Microsoft Corporation. All rights reserved.
+ *  Licensed under the MIT License. See License.txt in the project root for license information.
+ *--------------------------------------------------------------------------------------------*/
+
+/**
+ * Primary consumer API for the documentdb-constants package.
+ *
+ * Provides filtered access to the operator entries based on meta tags
+ * and optional BSON type constraints.
+ */
+
+import { type CompletionFilter, type OperatorEntry } from './types';
+
+/**
+ * Internal registry of all operator entries. Populated by the
+ * individual operator module files (queryOperators, stages, etc.)
+ * via {@link registerOperators}.
+ */
+const allOperatorsSet = new Set<string>();
+const allOperators: OperatorEntry[] = [];
+
+/**
+ * Registers operator entries into the global registry.
+ * Duplicate entries (same value + meta key) are silently skipped,
+ * making repeated calls idempotent.
+ *
+ * Called by each operator module during module initialization.
+ *
+ * @param entries - array of OperatorEntry objects to register
+ */
+export function registerOperators(entries: readonly OperatorEntry[]): void {
+    for (const entry of entries) {
+        const key = `${entry.value}|${entry.meta}`;
+        if (!allOperatorsSet.has(key)) {
+            allOperatorsSet.add(key);
+            allOperators.push(entry);
+        }
+    }
+}
+
+/**
+ * Clears all registered operator entries.
+ * Intended for internal/testing use only.
+ */
+export function clearOperators(): void {
+    allOperators.length = 0;
+    allOperatorsSet.clear();
+}
+
+/**
+ * Returns operator entries matching the given filter.
+ *
+ * Meta tag matching uses **prefix matching**: a filter meta of 'query'
+ * matches 'query', 'query:comparison', 'query:logical', etc.
+ * A filter meta of 'expr' matches all 'expr:*' entries.
+ *
+ * BSON type filtering is applied as an intersection: if `filter.bsonTypes`
+ * is provided, only operators whose `applicableBsonTypes` includes at least
+ * one of the requested types are returned. Operators without
+ * `applicableBsonTypes` (universal operators) are always included.
+ *
+ * @param filter - the filtering criteria
+ * @returns matching operator entries as a new array — `Array.prototype.filter`
+ * always allocates a fresh array, so callers cannot mutate the internal registry
+ * through this return value.
+ */
+export function getFilteredCompletions(filter: CompletionFilter): readonly OperatorEntry[] {
+    return allOperators.filter((entry) => {
+        // Meta tag prefix matching
+        const metaMatch = filter.meta.some((prefix) => entry.meta === prefix || entry.meta.startsWith(prefix + ':'));
+        if (!metaMatch) {
+            return false;
+        }
+
+        // BSON type filtering (if specified)
+        if (filter.bsonTypes && filter.bsonTypes.length > 0) {
+            // Universal operators (no applicableBsonTypes) always pass
+            if (entry.applicableBsonTypes && entry.applicableBsonTypes.length > 0) {
+                const hasMatch = entry.applicableBsonTypes.some((t) => filter.bsonTypes!.includes(t));
+                if (!hasMatch) {
+                    return false;
+                }
+            }
+        }
+
+        return true;
+    });
+}
+
+/**
+ * Returns all operator entries (unfiltered).
+ * Useful for validation, testing, and diagnostics.
+ *
+ * Returns a shallow copy so callers cannot mutate the internal registry.
+ */
+export function getAllCompletions(): readonly OperatorEntry[] {
+    return [...allOperators];
+}
diff --git a/packages/documentdb-constants/src/index.ts b/packages/documentdb-constants/src/index.ts
new file mode 100644
index 000000000..d888fcf6b
--- /dev/null
+++ b/packages/documentdb-constants/src/index.ts
@@ -0,0 +1,105 @@
+/*---------------------------------------------------------------------------------------------
+ *  Copyright (c) Microsoft Corporation. All rights reserved.
+ *  Licensed under the MIT License. See License.txt in the project root for license information.
+ *--------------------------------------------------------------------------------------------*/
+
+/**
+ * @vscode-documentdb/documentdb-constants
+ *
+ * Static operator metadata for DocumentDB-supported operators, stages,
+ * accumulators, update operators, BSON constructors, and system variables.
+ */
+
+// -- Core types --
+export type { CompletionFilter, MetaTag, OperatorEntry } from './types';
+
+// -- Meta tag constants and presets --
+export {
+    ALL_META_TAGS,
+    EXPRESSION_COMPLETION_META,
+    // Completion context presets
+    FILTER_COMPLETION_META,
+    GROUP_EXPRESSION_COMPLETION_META,
+    META_ACCUMULATOR,
+    META_BSON,
+    META_EXPR_ARITH,
+    META_EXPR_ARRAY,
+    META_EXPR_BITWISE,
+    META_EXPR_BOOL,
+    META_EXPR_COMPARISON,
+    META_EXPR_CONDITIONAL,
+    META_EXPR_DATASIZE,
+    META_EXPR_DATE,
+    META_EXPR_LITERAL,
+    META_EXPR_MISC,
+    META_EXPR_OBJECT,
+    META_EXPR_SET,
+    META_EXPR_STRING,
+    META_EXPR_TIMESTAMP,
+    META_EXPR_TRIG,
+    META_EXPR_TYPE,
+    META_EXPR_VARIABLE,
+    META_FIELD_IDENTIFIER,
+    // Individual meta tags
+    META_QUERY,
+    META_QUERY_ARRAY,
+    META_QUERY_BITWISE,
+    META_QUERY_COMPARISON,
+    META_QUERY_ELEMENT,
+    META_QUERY_EVALUATION,
+    META_QUERY_GEOSPATIAL,
+    META_QUERY_LOGICAL,
+    META_QUERY_MISC,
+    META_QUERY_PROJECTION,
+    META_STAGE,
+    META_UPDATE,
+    META_UPDATE_ARRAY,
+    META_UPDATE_BITWISE,
+    META_UPDATE_FIELD,
+    META_VARIABLE,
+    META_WINDOW,
+    PROJECTION_COMPLETION_META,
+    STAGE_COMPLETION_META,
+    UPDATE_COMPLETION_META,
+    WINDOW_COMPLETION_META,
+} from './metaTags';
+
+// -- Consumer API --
+export { getAllCompletions, getFilteredCompletions } from './getFilteredCompletions';
+
+// -- Documentation URL helpers --
+export { getDocBase, getDocLink } from './docLinks';
+
+// -- Operator data modules --
+import { loadAccumulators } from './accumulators';
+import { loadBsonConstructors } from './bsonConstructors';
+import { loadExpressionOperators } from './expressionOperators';
+import { loadQueryOperators } from './queryOperators';
+import { loadStages } from './stages';
+import { loadSystemVariables } from './systemVariables';
+import { loadUpdateOperators } from './updateOperators';
+import { loadWindowOperators } from './windowOperators';
+
+/**
+ * Loads all built-in operator data into the registry.
+ *
+ * Called automatically at module import time so that consumers using
+ * `import { getFilteredCompletions } from '@vscode-documentdb/documentdb-constants'`
+ * get all operators without any additional setup.
+ *
+ * Can also be called explicitly (e.g. in workers or tests) — the call is
+ * idempotent when combined with {@link clearOperators}.
+ */
+export function loadOperators(): void {
+    loadAccumulators();
+    loadBsonConstructors();
+    loadExpressionOperators();
+    loadQueryOperators();
+    loadStages();
+    loadSystemVariables();
+    loadUpdateOperators();
+    loadWindowOperators();
+}
+
+// Auto-load on module import so the public API works out of the box.
+loadOperators();
diff --git a/packages/documentdb-constants/src/metaTags.ts b/packages/documentdb-constants/src/metaTags.ts
new file mode 100644
index 000000000..7a4dd7add
--- /dev/null
+++ b/packages/documentdb-constants/src/metaTags.ts
@@ -0,0 +1,130 @@
+/*---------------------------------------------------------------------------------------------
+ *  Copyright (c) Microsoft Corporation. All rights reserved.
+ *  Licensed under the MIT License. See License.txt in the project root for license information.
+ *--------------------------------------------------------------------------------------------*/
+
+/**
+ * Meta tag constants for categorizing operators in the DocumentDB constants package.
+ *
+ * Tags use a hierarchical scheme where prefix matching is supported:
+ * filtering by 'query' matches 'query', 'query:comparison', 'query:logical', etc.
+ */
+
+// -- Query operators --
+export const META_QUERY = 'query' as const;
+export const META_QUERY_COMPARISON = 'query:comparison' as const;
+export const META_QUERY_LOGICAL = 'query:logical' as const;
+export const META_QUERY_ELEMENT = 'query:element' as const;
+export const META_QUERY_EVALUATION = 'query:evaluation' as const;
+export const META_QUERY_ARRAY = 'query:array' as const;
+export const META_QUERY_BITWISE = 'query:bitwise' as const;
+export const META_QUERY_GEOSPATIAL = 'query:geospatial' as const;
+export const META_QUERY_PROJECTION = 'query:projection' as const;
+export const META_QUERY_MISC = 'query:misc' as const;
+
+// -- Update operators --
+export const META_UPDATE = 'update' as const;
+export const META_UPDATE_FIELD = 'update:field' as const;
+export const META_UPDATE_ARRAY = 'update:array' as const;
+export const META_UPDATE_BITWISE = 'update:bitwise' as const;
+
+// -- Aggregation pipeline --
+export const META_STAGE = 'stage' as const;
+export const META_ACCUMULATOR = 'accumulator' as const;
+
+// -- Expression operators --
+export const META_EXPR_ARITH = 'expr:arith' as const;
+export const META_EXPR_ARRAY = 'expr:array' as const;
+export const META_EXPR_BOOL = 'expr:bool' as const;
+export const META_EXPR_COMPARISON = 'expr:comparison' as const;
+export const META_EXPR_CONDITIONAL = 'expr:conditional' as const;
+export const META_EXPR_DATE = 'expr:date' as const;
+export const META_EXPR_OBJECT = 'expr:object' as const;
+export const META_EXPR_SET = 'expr:set' as const;
+export const META_EXPR_STRING = 'expr:string' as const;
+export const META_EXPR_TRIG = 'expr:trig' as const;
+export const META_EXPR_TYPE = 'expr:type' as const;
+export const META_EXPR_DATASIZE = 'expr:datasize' as const;
+export const META_EXPR_TIMESTAMP = 'expr:timestamp' as const;
+export const META_EXPR_BITWISE = 'expr:bitwise' as const;
+export const META_EXPR_LITERAL = 'expr:literal' as const;
+export const META_EXPR_MISC = 'expr:misc' as const;
+export const META_EXPR_VARIABLE = 'expr:variable' as const;
+
+// -- Window operators --
+export const META_WINDOW = 'window' as const;
+
+// -- BSON constructors --
+export const META_BSON = 'bson' as const;
+
+// -- System variables --
+export const META_VARIABLE = 'variable' as const;
+
+// -- Schema-injected field names (not static — provided at runtime) --
+export const META_FIELD_IDENTIFIER = 'field:identifier' as const;
+
+/**
+ * All known meta tag values for validation purposes.
+ */
+export const ALL_META_TAGS = [
+    META_QUERY,
+    META_QUERY_COMPARISON,
+    META_QUERY_LOGICAL,
+    META_QUERY_ELEMENT,
+    META_QUERY_EVALUATION,
+    META_QUERY_ARRAY,
+    META_QUERY_BITWISE,
+    META_QUERY_GEOSPATIAL,
+    META_QUERY_PROJECTION,
+    META_QUERY_MISC,
+    META_UPDATE,
+    META_UPDATE_FIELD,
+    META_UPDATE_ARRAY,
+    META_UPDATE_BITWISE,
+    META_STAGE,
+    META_ACCUMULATOR,
+    META_EXPR_ARITH,
+    META_EXPR_ARRAY,
+    META_EXPR_BOOL,
+    META_EXPR_COMPARISON,
+    META_EXPR_CONDITIONAL,
+    META_EXPR_DATE,
+    META_EXPR_OBJECT,
+    META_EXPR_SET,
+    META_EXPR_STRING,
+    META_EXPR_TRIG,
+    META_EXPR_TYPE,
+    META_EXPR_DATASIZE,
+    META_EXPR_TIMESTAMP,
+    META_EXPR_BITWISE,
+    META_EXPR_LITERAL,
+    META_EXPR_MISC,
+    META_EXPR_VARIABLE,
+    META_WINDOW,
+    META_BSON,
+    META_VARIABLE,
+    META_FIELD_IDENTIFIER,
+] as const;
+
+// -- Completion context presets --
+
+/** Query filter contexts: find filter bar, $match stage body */
+export const FILTER_COMPLETION_META: readonly string[] = ['query', 'bson', 'variable'];
+
+/** Projection/sort contexts: field names + projection operators */
+export const PROJECTION_COMPLETION_META: readonly string[] = ['field:identifier', 'query:projection', 'bson'];
+
+/** $group/$project/$addFields stage body: expressions + accumulators */
+export const GROUP_EXPRESSION_COMPLETION_META: readonly string[] = ['expr', 'accumulator', 'bson', 'variable'];
+
+/** Other stage bodies: expressions only (no accumulators) */
+export const EXPRESSION_COMPLETION_META: readonly string[] = ['expr', 'bson', 'variable'];
+
+/** Update operations: update operators */
+export const UPDATE_COMPLETION_META: readonly string[] = ['update'];
+
+/** Top-level aggregation pipeline: stage names */
+export const STAGE_COMPLETION_META: readonly string[] = ['stage'];
+
+/** Window fields: window operators + accumulators + expressions */
+export const WINDOW_COMPLETION_META: readonly string[] = ['window', 'accumulator', 'expr', 'bson', 'variable'];
diff --git a/packages/documentdb-constants/src/operatorReference.test.ts b/packages/documentdb-constants/src/operatorReference.test.ts
new file mode 100644
index 000000000..4d4a8d853
--- /dev/null
+++ b/packages/documentdb-constants/src/operatorReference.test.ts
@@ -0,0 +1,359 @@
+/*---------------------------------------------------------------------------------------------
+ *  Copyright (c) Microsoft Corporation. All rights reserved.
+ *  Licensed under the MIT License. See License.txt in the project root for license information.
+ *--------------------------------------------------------------------------------------------*/
+
+/**
+ * Dump-vs-implementation verification test.
+ *
+ * Ensures the TypeScript operator implementation always matches the
+ * resource dump (scraped/operator-reference.md). This test is the
+ * enforcing contract between "what does DocumentDB support?" (the dump)
+ * and "what does our code provide?" (the implementation).
+ *
+ * See §2.3.3 of docs/plan/03-documentdb-constants.md for design rationale.
+ */
+
+import * as fs from 'fs';
+import * as path from 'path';
+import { getAllCompletions } from './index';
+import { parseOperatorReference, type ReferenceOperator } from './parseOperatorReference';
+
+const dumpPath = path.join(__dirname, '..', 'resources', 'scraped', 'operator-reference.md');
+const dumpContent = fs.readFileSync(dumpPath, 'utf-8');
+const parsed = parseOperatorReference(dumpContent);
+const referenceOperators = parsed.operators;
+const notListedOperators = parsed.notListed;
+const implementedOperators = getAllCompletions();
+
+/**
+ * Category-to-meta mapping. Maps dump category names to the meta tags
+ * used in the implementation. Some dump categories map to the same meta
+ * tag (e.g., both accumulator categories map to 'accumulator').
+ */
+const CATEGORY_TO_META: Record<string, string> = {
+    'Comparison Query Operators': 'query:comparison',
+    'Logical Query Operators': 'query:logical',
+    'Element Query Operators': 'query:element',
+    'Evaluation Query Operators': 'query:evaluation',
+    'Geospatial Operators': 'query:geospatial',
+    'Array Query Operators': 'query:array',
+    'Bitwise Query Operators': 'query:bitwise',
+    'Projection Operators': 'query:projection',
+    'Miscellaneous Query Operators': 'query:misc',
+    'Field Update Operators': 'update:field',
+    'Array Update Operators': 'update:array',
+    'Bitwise Update Operators': 'update:bitwise',
+    'Arithmetic Expression Operators': 'expr:arith',
+    'Array Expression Operators': 'expr:array',
+    'Bitwise Operators': 'expr:bitwise',
+    'Boolean Expression Operators': 'expr:bool',
+    'Comparison Expression Operators': 'expr:comparison',
+    'Data Size Operators': 'expr:datasize',
+    'Date Expression Operators': 'expr:date',
+    'Literal Expression Operator': 'expr:literal',
+    'Miscellaneous Operators': 'expr:misc',
+    'Object Expression Operators': 'expr:object',
+    'Set Expression Operators': 'expr:set',
+    'String Expression Operators': 'expr:string',
+    'Timestamp Expression Operators': 'expr:timestamp',
+    'Trigonometry Expression Operators': 'expr:trig',
+    'Type Expression Operators': 'expr:type',
+    'Accumulators ($group, $bucket, $bucketAuto, $setWindowFields)': 'accumulator',
+    'Accumulators (in Other Stages)': 'accumulator',
+    'Variable Expression Operators': 'expr:variable',
+    'Window Operators': 'window',
+    'Conditional Expression Operators': 'expr:conditional',
+    'Aggregation Pipeline Stages': 'stage',
+    'Variables in Aggregation Expressions': 'variable',
+};
+
+describe('operator reference verification', () => {
+    test('dump file exists and is parseable', () => {
+        expect(dumpContent.length).toBeGreaterThan(1000);
+        expect(referenceOperators.length).toBeGreaterThan(250);
+    });
+
+    test('every listed operator in the dump has an implementation entry', () => {
+        const implementedValues = new Set(implementedOperators.map((op) => op.value));
+        const missing: string[] = [];
+
+        for (const ref of referenceOperators) {
+            // Some operators appear in multiple dump categories (e.g., $objectToArray
+            // in both "Array Expression" and "Object Expression"). The implementation
+            // only needs one entry per (value, meta) pair — check by value.
+            if (!implementedValues.has(ref.operator)) {
+                missing.push(`${ref.operator} (${ref.category})`);
+            }
+        }
+
+        expect(missing).toEqual([]);
+    });
+
+    test('no extra operators in implementation beyond the dump (excluding BSON/variables)', () => {
+        // Build a set of all operator values from the dump
+        const dumpValues = new Set(referenceOperators.map((r) => r.operator));
+
+        // Filter implementation entries: exclude BSON constructors and system variables
+        // (these are hand-authored, not from the compatibility page dump)
+        const extras = implementedOperators.filter(
+            (op) => !op.meta.startsWith('bson') && !op.meta.startsWith('variable') && !dumpValues.has(op.value),
+        );
+
+        expect(extras.map((e) => `${e.value} (${e.meta})`)).toEqual([]);
+    });
+
+    test('descriptions match the dump (detect drift)', () => {
+        const mismatches: string[] = [];
+
+        for (const ref of referenceOperators) {
+            if (!ref.description) {
+                continue; // some operators have empty descriptions (missing upstream docs)
+            }
+
+            // Find implementation entry matching this operator + category's meta
+            const expectedMeta = CATEGORY_TO_META[ref.category];
+            if (!expectedMeta) {
+                continue; // unknown category
+            }
+
+            const impl = implementedOperators.find((op) => op.value === ref.operator && op.meta === expectedMeta);
+
+            if (impl && impl.description !== ref.description) {
+                mismatches.push(
+                    `${ref.operator} (${ref.category}): expected "${ref.description}", got "${impl.description}"`,
+                );
+            }
+        }
+
+        expect(mismatches).toEqual([]);
+    });
+
+    test('not-listed operators are NOT in the implementation', () => {
+        const leaked: string[] = [];
+
+        for (const nl of notListedOperators) {
+            // Check the exact meta category from the dump
+            const expectedMeta = CATEGORY_TO_META[nl.category];
+            if (!expectedMeta) {
+                continue;
+            }
+
+            const found = implementedOperators.find((op) => op.value === nl.operator && op.meta === expectedMeta);
+
+            if (found) {
+                leaked.push(`${nl.operator} (${nl.category}) — ${nl.reason}`);
+            }
+        }
+
+        expect(leaked).toEqual([]);
+    });
+
+    test('all dump categories have a known meta mapping', () => {
+        const categories = new Set(referenceOperators.map((r) => r.category));
+        const unmapped = [...categories].filter((c) => !CATEGORY_TO_META[c]);
+        expect(unmapped).toEqual([]);
+    });
+
+    test('reference parser found the expected number of not-listed operators', () => {
+        // The plan lists 16 not-listed operators (§2.1)
+        expect(notListedOperators.length).toBeGreaterThanOrEqual(14);
+        expect(notListedOperators.length).toBeLessThanOrEqual(20);
+    });
+});
+
+// ---------------------------------------------------------------------------
+// Merged dump + overrides verification
+//
+// The generator (scripts/generate-from-reference.ts) merges the scraped dump
+// with manual overrides. These tests verify the implementation matches the
+// MERGED result — catching scenarios where:
+//   - Someone hand-edits a generated .ts file instead of using overrides
+//   - Someone adds an override but forgets to run `npm run generate`
+//   - Someone runs `npm run scrape` but forgets `npm run generate`
+//   - The override file is accidentally truncated
+// ---------------------------------------------------------------------------
+
+const overridesPath = path.join(__dirname, '..', 'resources', 'overrides', 'operator-overrides.md');
+const overridesContent = fs.readFileSync(overridesPath, 'utf-8');
+const parsedOverrides = parseOperatorReference(overridesContent);
+const overrideOperators = parsedOverrides.operators;
+
+/**
+ * Merges dump and override operators. For each (operator, category) pair,
+ * the override description wins if non-empty; otherwise the dump description
+ * is used. This mirrors what the generator does.
+ */
+function getMergedOperators(): readonly ReferenceOperator[] {
+    // Build a lookup: "operator|category" → override entry
+    const overrideLookup = new Map<string, ReferenceOperator>();
+    for (const ov of overrideOperators) {
+        overrideLookup.set(`${ov.operator}|${ov.category}`, ov);
+    }
+
+    return referenceOperators.map((ref) => {
+        const override = overrideLookup.get(`${ref.operator}|${ref.category}`);
+        if (!override) {
+            return ref;
+        }
+        return {
+            operator: ref.operator,
+            category: ref.category,
+            description: override.description || ref.description,
+            docLink: override.docLink || ref.docLink,
+        };
+    });
+}
+
+const mergedOperators = getMergedOperators();
+
+describe('merged dump + overrides verification', () => {
+    test('overrides file exists and has entries', () => {
+        expect(overridesContent.length).toBeGreaterThan(100);
+        expect(overrideOperators.length).toBeGreaterThan(0);
+    });
+
+    test('override count is within expected range (detect truncation)', () => {
+        // Currently 56 overrides. Allow some flex for additions/removals,
+        // but catch catastrophic truncation (e.g., file emptied to <10).
+        expect(overrideOperators.length).toBeGreaterThanOrEqual(40);
+        expect(overrideOperators.length).toBeLessThanOrEqual(80);
+    });
+
+    test('every override targets an operator that exists in the dump', () => {
+        const dumpKeys = new Set(referenceOperators.map((r) => `${r.operator}|${r.category}`));
+        const orphans: string[] = [];
+
+        for (const ov of overrideOperators) {
+            if (!dumpKeys.has(`${ov.operator}|${ov.category}`)) {
+                orphans.push(`${ov.operator} (${ov.category})`);
+            }
+        }
+
+        expect(orphans).toEqual([]);
+    });
+
+    test('descriptions match the merged dump+overrides (detect hand-edits and stale generates)', () => {
+        const mismatches: string[] = [];
+
+        for (const merged of mergedOperators) {
+            if (!merged.description) {
+                continue; // operator with no description in either dump or override
+            }
+
+            const expectedMeta = CATEGORY_TO_META[merged.category];
+            if (!expectedMeta) {
+                continue;
+            }
+
+            const impl = implementedOperators.find((op) => op.value === merged.operator && op.meta === expectedMeta);
+
+            if (impl && impl.description !== merged.description) {
+                mismatches.push(
+                    `${merged.operator} (${merged.category}): ` +
+                        `expected "${merged.description}", got "${impl.description}"`,
+                );
+            }
+        }
+
+        expect(mismatches).toEqual([]);
+    });
+
+    test('doc links from dump match implementation links for single-category operators', () => {
+        // Many operators appear in multiple dump categories (e.g., $eq in both
+        // "Comparison Query" and "Comparison Expression"). The scraper finds the
+        // doc page under whichever category directory it tries first, while the
+        // implementation generates URLs from each operator's meta tag. For
+        // cross-category operators, the dump link and impl link will point to
+        // different (but both valid) doc directories.
+        //
+        // This test only compares links for operators where the dump category
+        // maps to a unique operator — no cross-category ambiguity.
+
+        // Known scraper mismatches: the scraper's global index fallback found
+        // these operators' doc pages under a different directory than their
+        // category implies. The implementation link is correct; the dump link is
+        // a scraper artifact. Update this set when refreshing the dump.
+        //
+        // NOTE: After fixing META_TO_DOC_DIR in docLinks.ts (expr:bool → logical-query,
+        // expr:comparison → comparison-query) and adding smart link emission in the
+        // generator (hardcoded URLs for cross-category fallbacks), this set should
+        // remain empty unless new scraper mismatches are discovered.
+        const KNOWN_SCRAPER_MISMATCHES = new Set<string>([]);
+
+        // Build a set of operators that appear in more than one dump category
+        const operatorCategories = new Map<string, Set<string>>();
+        for (const ref of referenceOperators) {
+            const cats = operatorCategories.get(ref.operator) ?? new Set();
+            cats.add(ref.category);
+            operatorCategories.set(ref.operator, cats);
+        }
+
+        const mismatches: string[] = [];
+
+        for (const ref of referenceOperators) {
+            if (!ref.docLink) {
+                continue;
+            }
+
+            // Skip cross-category operators — their dump link may come from
+            // a different category than the implementation's meta tag
+            const cats = operatorCategories.get(ref.operator);
+            if (cats && cats.size > 1) {
+                continue;
+            }
+
+            // Skip known scraper mismatches (documented above)
+            if (KNOWN_SCRAPER_MISMATCHES.has(ref.operator)) {
+                continue;
+            }
+
+            const expectedMeta = CATEGORY_TO_META[ref.category];
+            if (!expectedMeta) {
+                continue;
+            }
+
+            const impl = implementedOperators.find((op) => op.value === ref.operator && op.meta === expectedMeta);
+
+            if (!impl || !impl.link) {
+                continue;
+            }
+
+            const dumpLink = ref.docLink.toLowerCase();
+            const implLink = impl.link.toLowerCase();
+
+            if (dumpLink !== implLink) {
+                mismatches.push(`${ref.operator} (${ref.category}): ` + `dump="${ref.docLink}", impl="${impl.link}"`);
+            }
+        }
+
+        expect(mismatches).toEqual([]);
+    });
+
+    test('every override with a description was applied (not silently ignored)', () => {
+        const unapplied: string[] = [];
+
+        for (const ov of overrideOperators) {
+            if (!ov.description) {
+                continue;
+            }
+
+            const expectedMeta = CATEGORY_TO_META[ov.category];
+            if (!expectedMeta) {
+                continue;
+            }
+
+            const impl = implementedOperators.find((op) => op.value === ov.operator && op.meta === expectedMeta);
+
+            if (!impl) {
+                unapplied.push(`${ov.operator} (${ov.category}): no implementation entry found`);
+            } else if (impl.description !== ov.description) {
+                unapplied.push(
+                    `${ov.operator} (${ov.category}): override="${ov.description}", ` + `impl="${impl.description}"`,
+                );
+            }
+        }
+
+        expect(unapplied).toEqual([]);
+    });
+});
diff --git a/packages/documentdb-constants/src/parseOperatorReference.test.ts b/packages/documentdb-constants/src/parseOperatorReference.test.ts
new file mode 100644
index 000000000..4ebf5138a
--- /dev/null
+++ b/packages/documentdb-constants/src/parseOperatorReference.test.ts
@@ -0,0 +1,156 @@
+/*---------------------------------------------------------------------------------------------
+ *  Copyright (c) Microsoft Corporation. All rights reserved.
+ *  Licensed under the MIT License. See License.txt in the project root for license information.
+ *--------------------------------------------------------------------------------------------*/
+
+/**
+ * Unit tests for the parseOperatorReference helper.
+ */
+
+import { parseOperatorReference } from './parseOperatorReference';
+
+describe('parseOperatorReference', () => {
+    test('parses a minimal dump with one category and one operator', () => {
+        const content = `# DocumentDB Operator Reference
+
+## Summary
+
+| Category | Listed | Total |
+| --- | --- | --- |
+| Test Category | 1 | 1 |
+
+## Test Category
+
+### $testOp
+
+- **Description:** A test operator
+- **Doc Link:** https://example.com/test
+
+## Not Listed
+
+- **$excluded** (Test Category) — Not supported
+`;
+        const result = parseOperatorReference(content);
+        expect(result.operators).toHaveLength(1);
+        expect(result.operators[0]).toEqual({
+            operator: '$testOp',
+            category: 'Test Category',
+            description: 'A test operator',
+            docLink: 'https://example.com/test',
+        });
+        expect(result.notListed).toHaveLength(1);
+        expect(result.notListed[0]).toEqual({
+            operator: '$excluded',
+            category: 'Test Category',
+            reason: 'Not supported',
+        });
+    });
+
+    test('handles operators with empty description and doc link', () => {
+        const content = `## Variables
+
+### $$NOW
+
+### $$ROOT
+`;
+        const result = parseOperatorReference(content);
+        expect(result.operators).toHaveLength(2);
+        expect(result.operators[0]).toEqual({
+            operator: '$$NOW',
+            category: 'Variables',
+            description: '',
+            docLink: '',
+        });
+        expect(result.operators[1]).toEqual({
+            operator: '$$ROOT',
+            category: 'Variables',
+            description: '',
+            docLink: '',
+        });
+    });
+
+    test('handles operators with syntax blocks (ignores syntax)', () => {
+        const content = `## Comparison Query Operators
+
+### $eq
+
+- **Description:** Matches values equal to a specified value
+- **Syntax:**
+
+\`\`\`javascript
+{ field: { $eq: value } }
+\`\`\`
+
+- **Doc Link:** https://example.com/$eq
+
+### $gt
+
+- **Description:** Matches values greater than a specified value
+- **Doc Link:** https://example.com/$gt
+`;
+        const result = parseOperatorReference(content);
+        expect(result.operators).toHaveLength(2);
+        expect(result.operators[0].operator).toBe('$eq');
+        expect(result.operators[0].description).toBe('Matches values equal to a specified value');
+        expect(result.operators[1].operator).toBe('$gt');
+    });
+
+    test('skips operators in the Summary section', () => {
+        const content = `## Summary
+
+| Category | Listed | Total |
+| --- | --- | --- |
+| Test | 2 | 3 |
+
+## Test Category
+
+### $realOp
+
+- **Description:** I am real
+`;
+        const result = parseOperatorReference(content);
+        expect(result.operators).toHaveLength(1);
+        expect(result.operators[0].operator).toBe('$realOp');
+    });
+
+    test('multiple not-listed entries are parsed correctly', () => {
+        const content = `## Not Listed
+
+Operators below are not in scope.
+
+- **$where** (Evaluation Query) — Deprecated in Mongo version 8.0
+- **$meta** (Projection) — Not in scope
+- **$accumulator** (Custom Aggregation) — Deprecated in Mongo version 8.0
+`;
+        const result = parseOperatorReference(content);
+        expect(result.notListed).toHaveLength(3);
+        expect(result.notListed[0].operator).toBe('$where');
+        expect(result.notListed[0].reason).toBe('Deprecated in Mongo version 8.0');
+        expect(result.notListed[1].operator).toBe('$meta');
+        expect(result.notListed[2].operator).toBe('$accumulator');
+    });
+
+    test('handles multiple categories', () => {
+        const content = `## Cat A
+
+### $a1
+
+- **Description:** Operator a1
+
+### $a2
+
+- **Description:** Operator a2
+
+## Cat B
+
+### $b1
+
+- **Description:** Operator b1
+`;
+        const result = parseOperatorReference(content);
+        expect(result.operators).toHaveLength(3);
+        expect(result.operators[0].category).toBe('Cat A');
+        expect(result.operators[1].category).toBe('Cat A');
+        expect(result.operators[2].category).toBe('Cat B');
+    });
+});
diff --git a/packages/documentdb-constants/src/parseOperatorReference.ts b/packages/documentdb-constants/src/parseOperatorReference.ts
new file mode 100644
index 000000000..e1179c336
--- /dev/null
+++ b/packages/documentdb-constants/src/parseOperatorReference.ts
@@ -0,0 +1,160 @@
+/*---------------------------------------------------------------------------------------------
+ *  Copyright (c) Microsoft Corporation. All rights reserved.
+ *  Licensed under the MIT License. See License.txt in the project root for license information.
+ *--------------------------------------------------------------------------------------------*/
+
+/**
+ * Parses the scraped/operator-reference.md dump file into structured data
+ * for use in the operatorReference verification test.
+ *
+ * The dump format uses structured headings:
+ *   ## Category Name           — category section
+ *   ### $operatorName          — operator heading
+ *   - **Description:** text    — operator description
+ *   - **Doc Link:** url        — documentation URL
+ *
+ *   ## Not Listed              — excluded operators section
+ *   - **$operator** (Category) — Reason
+ */
+
+/**
+ * Represents a single operator entry parsed from the reference dump.
+ */
+export interface ReferenceOperator {
+    /** Operator name, e.g. "$eq", "$$NOW" */
+    readonly operator: string;
+    /** Category from the dump, e.g. "Comparison Query Operators" */
+    readonly category: string;
+    /** Description from the dump (may be empty) */
+    readonly description: string;
+    /** Documentation URL from the dump (may be empty) */
+    readonly docLink: string;
+}
+
+/**
+ * Represents an operator excluded from the package scope.
+ */
+export interface NotListedOperator {
+    /** Operator name, e.g. "$where", "$meta" */
+    readonly operator: string;
+    /** Category from the dump */
+    readonly category: string;
+    /** Reason for exclusion */
+    readonly reason: string;
+}
+
+/**
+ * Complete parsed result from the reference dump.
+ */
+export interface ParsedReference {
+    /** All listed (in-scope) operators */
+    readonly operators: readonly ReferenceOperator[];
+    /** All not-listed (excluded) operators */
+    readonly notListed: readonly NotListedOperator[];
+}
+
+/**
+ * Parses the scraped/operator-reference.md content into structured data.
+ *
+ * @param content - the full Markdown content of the dump file
+ * @returns parsed reference data
+ */
+export function parseOperatorReference(content: string): ParsedReference {
+    const lines = content.split('\n');
+    const operators: ReferenceOperator[] = [];
+    const notListed: NotListedOperator[] = [];
+
+    let currentCategory = '';
+    let inNotListed = false;
+    let inSummary = false;
+
+    // Temp state for building current operator
+    let currentOperator = '';
+    let currentDescription = '';
+    let currentDocLink = '';
+
+    function flushOperator(): void {
+        if (currentOperator && currentCategory && !inNotListed && !inSummary) {
+            operators.push({
+                operator: currentOperator,
+                category: currentCategory,
+                description: currentDescription,
+                docLink: currentDocLink,
+            });
+        }
+        currentOperator = '';
+        currentDescription = '';
+        currentDocLink = '';
+    }
+
+    for (const line of lines) {
+        const trimmed = line.trim();
+
+        // Detect ## headings (category sections)
+        const h2Match = trimmed.match(/^## (.+)$/);
+        if (h2Match) {
+            flushOperator();
+            const heading = h2Match[1].trim();
+            if (heading === 'Summary') {
+                inSummary = true;
+                inNotListed = false;
+                currentCategory = '';
+            } else if (heading === 'Not Listed') {
+                inNotListed = true;
+                inSummary = false;
+                currentCategory = '';
+            } else {
+                currentCategory = heading;
+                inNotListed = false;
+                inSummary = false;
+            }
+            continue;
+        }
+
+        // Skip summary section
+        if (inSummary) {
+            continue;
+        }
+
+        // Parse "Not Listed" entries: - **$operator** (Category) — Reason
+        if (inNotListed) {
+            const notListedMatch = trimmed.match(/^- \*\*(.+?)\*\* \((.+?)\) — (.+)$/);
+            if (notListedMatch) {
+                notListed.push({
+                    operator: notListedMatch[1],
+                    category: notListedMatch[2],
+                    reason: notListedMatch[3],
+                });
+            }
+            continue;
+        }
+
+        // Detect ### headings (operator entries)
+        const h3Match = trimmed.match(/^### (.+)$/);
+        if (h3Match) {
+            flushOperator();
+            currentOperator = h3Match[1].trim();
+            continue;
+        }
+
+        // Parse description: - **Description:** text
+        const descMatch = trimmed.match(/^- \*\*Description:\*\* (.+)$/);
+        if (descMatch && currentOperator) {
+            currentDescription = descMatch[1].trim();
+            continue;
+        }
+
+        // Parse doc link: - **Doc Link:** url  ('none' means no page at expected location)
+        const linkMatch = trimmed.match(/^- \*\*Doc Link:\*\* (.+)$/);
+        if (linkMatch && currentOperator) {
+            const rawLink = linkMatch[1].trim();
+            currentDocLink = rawLink === 'none' ? '' : rawLink;
+            continue;
+        }
+    }
+
+    // Flush last operator
+    flushOperator();
+
+    return { operators, notListed };
+}
diff --git a/packages/documentdb-constants/src/queryOperators.ts b/packages/documentdb-constants/src/queryOperators.ts
new file mode 100644
index 000000000..8390356a6
--- /dev/null
+++ b/packages/documentdb-constants/src/queryOperators.ts
@@ -0,0 +1,458 @@
+/*---------------------------------------------------------------------------------------------
+ *  Copyright (c) Microsoft Corporation. All rights reserved.
+ *  Licensed under the MIT License. See License.txt in the project root for license information.
+ *--------------------------------------------------------------------------------------------*/
+
+// AUTO-GENERATED — DO NOT EDIT BY HAND
+//
+// Generated by: npm run generate  (scripts/generate-from-reference.ts)
+// Sources:      resources/scraped/operator-reference.md
+//               resources/overrides/operator-overrides.md
+//               resources/overrides/operator-snippets.md
+//
+// To change operator data, edit the overrides/snippets files and re-run the generator.
+
+import { getDocLink } from './docLinks';
+import { registerOperators } from './getFilteredCompletions';
+import {
+    META_QUERY_ARRAY,
+    META_QUERY_BITWISE,
+    META_QUERY_COMPARISON,
+    META_QUERY_ELEMENT,
+    META_QUERY_EVALUATION,
+    META_QUERY_GEOSPATIAL,
+    META_QUERY_LOGICAL,
+    META_QUERY_MISC,
+    META_QUERY_PROJECTION,
+} from './metaTags';
+import { type OperatorEntry } from './types';
+
+// ---------------------------------------------------------------------------
+// Comparison Query Operators
+// ---------------------------------------------------------------------------
+
+const comparisonQueryOperators: readonly OperatorEntry[] = [
+    {
+        value: '$eq',
+        meta: META_QUERY_COMPARISON,
+        description: 'The $eq query operator compares the value of a field to a specified value',
+        snippet: '{ $eq: ${1:value} }',
+        link: getDocLink('$eq', META_QUERY_COMPARISON),
+    },
+    {
+        value: '$gt',
+        meta: META_QUERY_COMPARISON,
+        description:
+            'The $gt query operator retrieves documents where the value of a field is greater than a specified value',
+        snippet: '{ $gt: ${1:value} }',
+        link: getDocLink('$gt', META_QUERY_COMPARISON),
+    },
+    {
+        value: '$gte',
+        meta: META_QUERY_COMPARISON,
+        description:
+            'The $gte operator retrieves documents where the value of a field is greater than or equal to a specified value',
+        snippet: '{ $gte: ${1:value} }',
+        link: getDocLink('$gte', META_QUERY_COMPARISON),
+    },
+    {
+        value: '$in',
+        meta: META_QUERY_COMPARISON,
+        description: 'The $in operator matches value of a field against an array of specified values',
+        snippet: '{ $in: [${1:value}] }',
+        link: getDocLink('$in', META_QUERY_COMPARISON),
+    },
+    {
+        value: '$lt',
+        meta: META_QUERY_COMPARISON,
+        description: 'The $lt operator retrieves documents where the value of field is less than a specified value',
+        snippet: '{ $lt: ${1:value} }',
+        link: getDocLink('$lt', META_QUERY_COMPARISON),
+    },
+    {
+        value: '$lte',
+        meta: META_QUERY_COMPARISON,
+        description:
+            'The $lte operator retrieves documents where the value of a field is less than or equal to a specified value',
+        snippet: '{ $lte: ${1:value} }',
+        link: getDocLink('$lte', META_QUERY_COMPARISON),
+    },
+    {
+        value: '$ne',
+        meta: META_QUERY_COMPARISON,
+        description: "The $ne operator retrieves documents where the value of a field doesn't equal a specified value",
+        snippet: '{ $ne: ${1:value} }',
+        link: getDocLink('$ne', META_QUERY_COMPARISON),
+    },
+    {
+        value: '$nin',
+        meta: META_QUERY_COMPARISON,
+        description: "The $nin operator retrieves documents where the value of a field doesn't match a list of values",
+        snippet: '{ $nin: [${1:value}] }',
+        link: getDocLink('$nin', META_QUERY_COMPARISON),
+    },
+];
+
+// ---------------------------------------------------------------------------
+// Logical Query Operators
+// ---------------------------------------------------------------------------
+
+const logicalQueryOperators: readonly OperatorEntry[] = [
+    {
+        value: '$and',
+        meta: META_QUERY_LOGICAL,
+        description:
+            'The $and operator joins multiple query clauses and returns documents that match all specified conditions.',
+        snippet: '{ $and: [{ ${1:expression} }] }',
+        link: getDocLink('$and', META_QUERY_LOGICAL),
+    },
+    {
+        value: '$not',
+        meta: META_QUERY_LOGICAL,
+        description:
+            "The $not operator performs a logical NOT operation on a specified expression, selecting documents that don't match the expression.",
+        snippet: '{ $not: { ${1:expression} } }',
+        link: getDocLink('$not', META_QUERY_LOGICAL),
+    },
+    {
+        value: '$nor',
+        meta: META_QUERY_LOGICAL,
+        description:
+            'The $nor operator performs a logical NOR on an array of expressions and retrieves documents that fail all the conditions.',
+        snippet: '{ $nor: [{ ${1:expression} }] }',
+        link: getDocLink('$nor', META_QUERY_LOGICAL),
+    },
+    {
+        value: '$or',
+        meta: META_QUERY_LOGICAL,
+        description:
+            'The $or operator joins query clauses with a logical OR and returns documents that match at least one of the specified conditions.',
+        snippet: '{ $or: [{ ${1:expression} }] }',
+        link: getDocLink('$or', META_QUERY_LOGICAL),
+    },
+];
+
+// ---------------------------------------------------------------------------
+// Element Query Operators
+// ---------------------------------------------------------------------------
+
+const elementQueryOperators: readonly OperatorEntry[] = [
+    {
+        value: '$exists',
+        meta: META_QUERY_ELEMENT,
+        description:
+            'The $exists operator retrieves documents that contain the specified field in their document structure.',
+        snippet: '{ $exists: ${1:true} }',
+        link: getDocLink('$exists', META_QUERY_ELEMENT),
+    },
+    {
+        value: '$type',
+        meta: META_QUERY_ELEMENT,
+        description: 'The $type operator retrieves documents if the chosen field is of the specified type.',
+        snippet: '{ $type: "${1:type}" }',
+        link: getDocLink('$type', META_QUERY_ELEMENT),
+    },
+];
+
+// ---------------------------------------------------------------------------
+// Evaluation Query Operators
+// ---------------------------------------------------------------------------
+
+const evaluationQueryOperators: readonly OperatorEntry[] = [
+    {
+        value: '$expr',
+        meta: META_QUERY_EVALUATION,
+        description:
+            'The $expr operator allows the use of aggregation expressions within the query language, enabling complex field comparisons and calculations.',
+        snippet: '{ $expr: { ${1:expression} } }',
+        link: getDocLink('$expr', META_QUERY_EVALUATION),
+    },
+    {
+        value: '$jsonSchema',
+        meta: META_QUERY_EVALUATION,
+        description:
+            'The $jsonSchema operator validates documents against a JSON Schema definition for data validation and structure enforcement. Discover supported features and limitations.',
+        snippet: '{ $jsonSchema: { bsonType: "${1:object}" } }',
+        link: getDocLink('$jsonSchema', META_QUERY_EVALUATION),
+    },
+    {
+        value: '$mod',
+        meta: META_QUERY_EVALUATION,
+        description:
+            'The $mod operator performs a modulo operation on the value of a field and selects documents with a specified result.',
+        snippet: '{ $mod: [${1:divisor}, ${2:remainder}] }',
+        link: getDocLink('$mod', META_QUERY_EVALUATION),
+    },
+    {
+        value: '$regex',
+        meta: META_QUERY_EVALUATION,
+        description:
+            'The $regex operator provides regular expression capabilities for pattern matching in queries, allowing flexible string matching and searching.',
+        snippet: '{ $regex: /${1:pattern}/ }',
+        link: getDocLink('$regex', META_QUERY_EVALUATION),
+        applicableBsonTypes: ['string'],
+    },
+    {
+        value: '$text',
+        meta: META_QUERY_EVALUATION,
+        description:
+            'The $text operator performs text search on the content of indexed string fields, enabling full-text search capabilities.',
+        snippet: '{ $text: { \\$search: "${1:text}" } }',
+        link: getDocLink('$text', META_QUERY_EVALUATION),
+        applicableBsonTypes: ['string'],
+    },
+];
+
+// ---------------------------------------------------------------------------
+// Geospatial Operators
+// ---------------------------------------------------------------------------
+
+const geospatialOperators: readonly OperatorEntry[] = [
+    {
+        value: '$geoIntersects',
+        meta: META_QUERY_GEOSPATIAL,
+        description:
+            'The $geoIntersects operator selects documents whose location field intersects with a specified GeoJSON object.',
+        snippet: '{ $geoIntersects: { \\$geometry: { type: "${1:GeoJSON type}", coordinates: ${2:coordinates} } } }',
+        link: getDocLink('$geoIntersects', META_QUERY_GEOSPATIAL),
+    },
+    {
+        value: '$geoWithin',
+        meta: META_QUERY_GEOSPATIAL,
+        description:
+            'The $geoWithin operator selects documents whose location field is completely within a specified geometry.',
+        snippet: '{ $geoWithin: { \\$geometry: { type: "${1:GeoJSON type}", coordinates: ${2:coordinates} } } }',
+        link: getDocLink('$geoWithin', META_QUERY_GEOSPATIAL),
+    },
+    {
+        value: '$box',
+        meta: META_QUERY_GEOSPATIAL,
+        description: 'The $box operator defines a rectangular area for geospatial queries using coordinate pairs.',
+        snippet: '[[${1:bottomLeftX}, ${2:bottomLeftY}], [${3:upperRightX}, ${4:upperRightY}]]',
+        link: getDocLink('$box', META_QUERY_GEOSPATIAL),
+        standalone: false,
+    },
+    {
+        value: '$center',
+        meta: META_QUERY_GEOSPATIAL,
+        description: 'The $center operator specifies a circle using legacy coordinate pairs for $geoWithin queries.',
+        snippet: '[[${1:x}, ${2:y}], ${3:radius}]',
+        link: getDocLink('$center', META_QUERY_GEOSPATIAL),
+        standalone: false,
+    },
+    {
+        value: '$centerSphere',
+        meta: META_QUERY_GEOSPATIAL,
+        description: 'The $centerSphere operator specifies a circle using spherical geometry for $geoWithin queries.',
+        snippet: '[[${1:x}, ${2:y}], ${3:radiusInRadians}]',
+        link: getDocLink('$centerSphere', META_QUERY_GEOSPATIAL),
+        standalone: false,
+    },
+    {
+        value: '$geometry',
+        meta: META_QUERY_GEOSPATIAL,
+        description: 'The $geometry operator specifies a GeoJSON geometry for geospatial queries.',
+        snippet: '{ type: "${1:Point}", coordinates: [${2:coordinates}] }',
+        link: getDocLink('$geometry', META_QUERY_GEOSPATIAL),
+        standalone: false,
+    },
+    {
+        value: '$maxDistance',
+        meta: META_QUERY_GEOSPATIAL,
+        description:
+            'The $maxDistance operator specifies the maximum distance that can exist between two points in a geospatial query.',
+        snippet: '${1:distance}',
+        link: getDocLink('$maxDistance', META_QUERY_GEOSPATIAL),
+        standalone: false,
+    },
+    {
+        value: '$minDistance',
+        meta: META_QUERY_GEOSPATIAL,
+        description:
+            'The $minDistance operator specifies the minimum distance that must exist between two points in a geospatial query.',
+        snippet: '${1:distance}',
+        link: getDocLink('$minDistance', META_QUERY_GEOSPATIAL),
+        standalone: false,
+    },
+    {
+        value: '$polygon',
+        meta: META_QUERY_GEOSPATIAL,
+        description:
+            'The $polygon operator defines a polygon for geospatial queries, allowing you to find locations within an irregular shape.',
+        snippet: '[[${1:x1}, ${2:y1}], [${3:x2}, ${4:y2}], [${5:x3}, ${6:y3}]]',
+        link: getDocLink('$polygon', META_QUERY_GEOSPATIAL),
+        standalone: false,
+    },
+    {
+        value: '$near',
+        meta: META_QUERY_GEOSPATIAL,
+        description:
+            'The $near operator returns documents with location fields that are near a specified point, sorted by distance.',
+        snippet:
+            '{ $near: { \\$geometry: { type: "Point", coordinates: [${1:lng}, ${2:lat}] }, \\$maxDistance: ${3:distance} } }',
+        link: getDocLink('$near', META_QUERY_GEOSPATIAL),
+    },
+    {
+        value: '$nearSphere',
+        meta: META_QUERY_GEOSPATIAL,
+        description:
+            'The $nearSphere operator returns documents whose location fields are near a specified point on a sphere, sorted by distance on a spherical surface.',
+        snippet:
+            '{ $nearSphere: { \\$geometry: { type: "Point", coordinates: [${1:lng}, ${2:lat}] }, \\$maxDistance: ${3:distance} } }',
+        link: getDocLink('$nearSphere', META_QUERY_GEOSPATIAL),
+    },
+];
+
+// ---------------------------------------------------------------------------
+// Array Query Operators
+// ---------------------------------------------------------------------------
+
+const arrayQueryOperators: readonly OperatorEntry[] = [
+    {
+        value: '$all',
+        meta: META_QUERY_ARRAY,
+        description: 'The $all operator helps finding array documents matching all the elements.',
+        snippet: '{ $all: [${1:value}] }',
+        link: getDocLink('$all', META_QUERY_ARRAY),
+        applicableBsonTypes: ['array'],
+    },
+    {
+        value: '$elemMatch',
+        meta: META_QUERY_ARRAY,
+        description:
+            'The $elemmatch operator returns complete array, qualifying criteria with at least one matching array element.',
+        snippet: '{ $elemMatch: { ${1:query} } }',
+        link: getDocLink('$elemMatch', META_QUERY_ARRAY),
+        applicableBsonTypes: ['array'],
+    },
+    {
+        value: '$size',
+        meta: META_QUERY_ARRAY,
+        description:
+            'The $size operator is used to query documents where an array field has a specified number of elements.',
+        snippet: '{ $size: ${1:number} }',
+        link: getDocLink('$size', META_QUERY_ARRAY),
+        applicableBsonTypes: ['array'],
+    },
+];
+
+// ---------------------------------------------------------------------------
+// Bitwise Query Operators
+// ---------------------------------------------------------------------------
+
+const bitwiseQueryOperators: readonly OperatorEntry[] = [
+    {
+        value: '$bitsAllClear',
+        meta: META_QUERY_BITWISE,
+        description:
+            'The $bitsAllClear operator is used to match documents where all the bit positions specified in a bitmask are clear.',
+        snippet: '{ $bitsAllClear: ${1:bitmask} }',
+        link: getDocLink('$bitsAllClear', META_QUERY_BITWISE),
+        applicableBsonTypes: ['int32', 'long'],
+    },
+    {
+        value: '$bitsAllSet',
+        meta: META_QUERY_BITWISE,
+        description: 'The bitsAllSet command is used to match documents where all the specified bit positions are set.',
+        snippet: '{ $bitsAllSet: ${1:bitmask} }',
+        link: getDocLink('$bitsAllSet', META_QUERY_BITWISE),
+        applicableBsonTypes: ['int32', 'long'],
+    },
+    {
+        value: '$bitsAnyClear',
+        meta: META_QUERY_BITWISE,
+        description:
+            'The $bitsAnyClear operator matches documents where any of the specified bit positions in a bitmask are clear.',
+        snippet: '{ $bitsAnyClear: ${1:bitmask} }',
+        link: getDocLink('$bitsAnyClear', META_QUERY_BITWISE),
+        applicableBsonTypes: ['int32', 'long'],
+    },
+    {
+        value: '$bitsAnySet',
+        meta: META_QUERY_BITWISE,
+        description:
+            'The $bitsAnySet operator returns documents where any of the specified bit positions are set to 1.',
+        snippet: '{ $bitsAnySet: ${1:bitmask} }',
+        link: getDocLink('$bitsAnySet', META_QUERY_BITWISE),
+        applicableBsonTypes: ['int32', 'long'],
+    },
+];
+
+// ---------------------------------------------------------------------------
+// Projection Operators
+// ---------------------------------------------------------------------------
+
+const projectionOperators: readonly OperatorEntry[] = [
+    {
+        value: '$',
+        meta: META_QUERY_PROJECTION,
+        description:
+            'The $ positional operator identifies an element in an array to update without explicitly specifying the position of the element in the array.',
+        link: 'https://learn.microsoft.com/en-us/azure/documentdb/operators/array-update/$', // inferred from another category
+        standalone: false,
+    },
+    {
+        value: '$elemMatch',
+        meta: META_QUERY_PROJECTION,
+        description:
+            'The $elemmatch operator returns complete array, qualifying criteria with at least one matching array element.',
+        snippet: '{ $elemMatch: { ${1:query} } }',
+        link: 'https://learn.microsoft.com/en-us/azure/documentdb/operators/array-query/$elemmatch', // inferred from another category
+    },
+    {
+        value: '$slice',
+        meta: META_QUERY_PROJECTION,
+        description: 'The $slice operator returns a subset of an array from any element onwards in the array.',
+        snippet: '{ $slice: ${1:number} }',
+        link: 'https://learn.microsoft.com/en-us/azure/documentdb/operators/array-expression/$slice', // inferred from another category
+    },
+];
+
+// ---------------------------------------------------------------------------
+// Miscellaneous Query Operators
+// ---------------------------------------------------------------------------
+
+const miscellaneousQueryOperators: readonly OperatorEntry[] = [
+    {
+        value: '$comment',
+        meta: META_QUERY_MISC,
+        description:
+            'The $comment operator adds a comment to a query to help identify the query in logs and profiler output.',
+        snippet: '{ $comment: "${1:comment}" }',
+        link: getDocLink('$comment', META_QUERY_MISC),
+    },
+    {
+        value: '$rand',
+        meta: META_QUERY_MISC,
+        description: 'The $rand operator generates a random float value between 0 and 1.',
+        snippet: '{ $rand: {} }',
+        link: getDocLink('$rand', META_QUERY_MISC),
+    },
+    {
+        value: '$natural',
+        meta: META_QUERY_MISC,
+        description:
+            'The $natural operator forces the query to use the natural order of documents in a collection, providing control over document ordering and retrieval.',
+        snippet: '{ $natural: ${1:1} }',
+        link: getDocLink('$natural', META_QUERY_MISC),
+        standalone: false,
+    },
+];
+
+// ---------------------------------------------------------------------------
+// Registration
+// ---------------------------------------------------------------------------
+
+export function loadQueryOperators(): void {
+    registerOperators([
+        ...comparisonQueryOperators,
+        ...logicalQueryOperators,
+        ...elementQueryOperators,
+        ...evaluationQueryOperators,
+        ...geospatialOperators,
+        ...arrayQueryOperators,
+        ...bitwiseQueryOperators,
+        ...projectionOperators,
+        ...miscellaneousQueryOperators,
+    ]);
+}
diff --git a/packages/documentdb-constants/src/stages.ts b/packages/documentdb-constants/src/stages.ts
new file mode 100644
index 000000000..0752d7734
--- /dev/null
+++ b/packages/documentdb-constants/src/stages.ts
@@ -0,0 +1,291 @@
+/*---------------------------------------------------------------------------------------------
+ *  Copyright (c) Microsoft Corporation. All rights reserved.
+ *  Licensed under the MIT License. See License.txt in the project root for license information.
+ *--------------------------------------------------------------------------------------------*/
+
+// AUTO-GENERATED — DO NOT EDIT BY HAND
+//
+// Generated by: npm run generate  (scripts/generate-from-reference.ts)
+// Sources:      resources/scraped/operator-reference.md
+//               resources/overrides/operator-overrides.md
+//               resources/overrides/operator-snippets.md
+//
+// To change operator data, edit the overrides/snippets files and re-run the generator.
+
+import { getDocLink } from './docLinks';
+import { registerOperators } from './getFilteredCompletions';
+import { META_STAGE } from './metaTags';
+import { type OperatorEntry } from './types';
+
+// ---------------------------------------------------------------------------
+// Aggregation Pipeline Stages
+// ---------------------------------------------------------------------------
+
+const aggregationPipelineStages: readonly OperatorEntry[] = [
+    {
+        value: '$addFields',
+        meta: META_STAGE,
+        description: 'The $addFields stage in the aggregation pipeline is used to add new fields to documents.',
+        snippet: '{ $addFields: { ${1:newField}: ${2:expression} } }',
+        link: getDocLink('$addFields', META_STAGE),
+    },
+    {
+        value: '$bucket',
+        meta: META_STAGE,
+        description: 'Groups input documents into buckets based on specified boundaries.',
+        snippet: '{ $bucket: { groupBy: "${1:\\$field}", boundaries: [${2:values}], default: "${3:Other}" } }',
+        link: getDocLink('$bucket', META_STAGE),
+    },
+    {
+        value: '$bucketAuto',
+        meta: META_STAGE,
+        description:
+            'Categorizes documents into a specified number of groups based on a given expression, automatically determining bucket boundaries.',
+        snippet: '{ $bucketAuto: { groupBy: "${1:\\$field}", buckets: ${2:number} } }',
+    },
+    {
+        value: '$changeStream',
+        meta: META_STAGE,
+        description: 'The $changeStream stage opens a change stream cursor to track data changes in real-time.',
+        snippet: '{ $changeStream: {} }',
+        link: getDocLink('$changeStream', META_STAGE),
+    },
+    {
+        value: '$collStats',
+        meta: META_STAGE,
+        description:
+            'The $collStats stage in the aggregation pipeline is used to return statistics about a collection.',
+        snippet: '{ $collStats: { storageStats: {} } }',
+        link: getDocLink('$collStats', META_STAGE),
+    },
+    {
+        value: '$count',
+        meta: META_STAGE,
+        description:
+            'The `$count` operator is used to count the number of documents that match a query filtering criteria.',
+        snippet: '{ $count: "${1:countField}" }',
+        link: 'https://learn.microsoft.com/en-us/azure/documentdb/operators/accumulators/$count', // inferred from another category
+    },
+    {
+        value: '$densify',
+        meta: META_STAGE,
+        description: 'Adds missing data points in a sequence of values within an array or collection.',
+        snippet: '{ $densify: { field: "${1:field}", range: { step: ${2:1}, bounds: "full" } } }',
+        link: getDocLink('$densify', META_STAGE),
+    },
+    {
+        value: '$documents',
+        meta: META_STAGE,
+        description: 'The $documents stage creates a pipeline from a set of provided documents.',
+        snippet: '{ $documents: [${1:documents}] }',
+        link: getDocLink('$documents', META_STAGE),
+    },
+    {
+        value: '$facet',
+        meta: META_STAGE,
+        description:
+            'The $facet allows for multiple parallel aggregations to be executed within a single pipeline stage.',
+        snippet: '{ $facet: { ${1:outputField}: [{ ${2:stage} }] } }',
+        link: getDocLink('$facet', META_STAGE),
+    },
+    {
+        value: '$fill',
+        meta: META_STAGE,
+        description:
+            'The $fill stage allows filling missing values in documents based on specified methods and criteria.',
+        snippet: '{ $fill: { output: { ${1:field}: { method: "${2:linear}" } } } }',
+        link: getDocLink('$fill', META_STAGE),
+    },
+    {
+        value: '$geoNear',
+        meta: META_STAGE,
+        description:
+            'The $geoNear operator finds and sorts documents by their proximity to a geospatial point, returning distance information for each document.',
+        snippet:
+            '{ $geoNear: { near: { type: "Point", coordinates: [${1:lng}, ${2:lat}] }, distanceField: "${3:distance}" } }',
+        link: getDocLink('$geoNear', META_STAGE),
+    },
+    {
+        value: '$graphLookup',
+        meta: META_STAGE,
+        description:
+            'Performs a recursive search on a collection to return documents connected by a specified field relationship.',
+        snippet:
+            '{ $graphLookup: { from: "${1:collection}", startWith: "${2:\\$field}", connectFromField: "${3:field}", connectToField: "${4:field}", as: "${5:result}" } }',
+    },
+    {
+        value: '$group',
+        meta: META_STAGE,
+        description:
+            'The $group stage groups documents by specified identifier expressions and applies accumulator expressions.',
+        snippet: '{ $group: { _id: "${1:\\$field}", ${2:accumulator}: { ${3:\\$sum}: 1 } } }',
+        link: getDocLink('$group', META_STAGE),
+    },
+    {
+        value: '$indexStats',
+        meta: META_STAGE,
+        description: 'The $indexStats stage returns usage statistics for each index in the collection.',
+        snippet: '{ $indexStats: {} }',
+        link: getDocLink('$indexStats', META_STAGE),
+    },
+    {
+        value: '$limit',
+        meta: META_STAGE,
+        description: 'Restricts the number of documents passed to the next stage in the pipeline.',
+        snippet: '{ $limit: ${1:number} }',
+    },
+    {
+        value: '$lookup',
+        meta: META_STAGE,
+        description:
+            'The $lookup stage in the Aggregation Framework is used to perform left outer joins with other collections.',
+        snippet:
+            '{ $lookup: { from: "${1:collection}", localField: "${2:field}", foreignField: "${3:field}", as: "${4:result}" } }',
+        link: getDocLink('$lookup', META_STAGE),
+    },
+    {
+        value: '$match',
+        meta: META_STAGE,
+        description:
+            'The $match stage in the aggregation pipeline is used to filter documents that match a specified condition.',
+        snippet: '{ $match: { ${1:query} } }',
+        link: getDocLink('$match', META_STAGE),
+    },
+    {
+        value: '$merge',
+        meta: META_STAGE,
+        description:
+            'The $merge stage in an aggregation pipeline writes the results of the aggregation to a specified collection.',
+        snippet: '{ $merge: { into: "${1:collection}" } }',
+        link: getDocLink('$merge', META_STAGE),
+    },
+    {
+        value: '$out',
+        meta: META_STAGE,
+        description:
+            'The `$out` stage in an aggregation pipeline writes the resulting documents to a specified collection.',
+        snippet: '{ $out: "${1:collection}" }',
+        link: getDocLink('$out', META_STAGE),
+    },
+    {
+        value: '$project',
+        meta: META_STAGE,
+        description: 'Reshapes documents by including, excluding, or computing new fields.',
+        snippet: '{ $project: { ${1:field}: 1 } }',
+    },
+    {
+        value: '$redact',
+        meta: META_STAGE,
+        description: 'Filters the content of the documents based on access rights.',
+        snippet:
+            '{ $redact: { \\$cond: { if: { ${1:expression} }, then: "${2:\\$\\$DESCEND}", else: "${3:\\$\\$PRUNE}" } } }',
+        link: getDocLink('$redact', META_STAGE),
+    },
+    {
+        value: '$replaceRoot',
+        meta: META_STAGE,
+        description: 'Replaces the input document with a specified embedded document, promoting it to the top level.',
+        snippet: '{ $replaceRoot: { newRoot: "${1:\\$field}" } }',
+    },
+    {
+        value: '$replaceWith',
+        meta: META_STAGE,
+        description:
+            'The $replaceWith operator in Azure DocumentDB returns a document after replacing a document with the specified document',
+        snippet: '{ $replaceWith: "${1:\\$field}" }',
+        link: getDocLink('$replaceWith', META_STAGE),
+    },
+    {
+        value: '$sample',
+        meta: META_STAGE,
+        description: 'The $sample operator in Azure DocumentDB returns a randomly selected number of documents',
+        snippet: '{ $sample: { size: ${1:number} } }',
+        link: getDocLink('$sample', META_STAGE),
+    },
+    {
+        value: '$search',
+        meta: META_STAGE,
+        description: 'Performs full-text search on string fields using Atlas Search or compatible search indexes.',
+        snippet: '{ $search: { ${1} } }',
+    },
+    {
+        value: '$searchMeta',
+        meta: META_STAGE,
+        description: 'Returns metadata about an Atlas Search query without returning the matching documents.',
+        snippet: '{ $searchMeta: { ${1} } }',
+    },
+    {
+        value: '$set',
+        meta: META_STAGE,
+        description: 'The $set operator in Azure DocumentDB updates or creates a new field with a specified value',
+        snippet: '{ $set: { ${1:field}: ${2:expression} } }',
+        link: getDocLink('$set', META_STAGE),
+    },
+    {
+        value: '$setWindowFields',
+        meta: META_STAGE,
+        description:
+            'Adds computed fields to documents using window functions over a specified partition and sort order.',
+        snippet:
+            '{ $setWindowFields: { partitionBy: "${1:\\$field}", sortBy: { ${2:field}: ${3:1} }, output: { ${4:newField}: { ${5:windowFunc} } } } }',
+    },
+    {
+        value: '$skip',
+        meta: META_STAGE,
+        description:
+            'The $skip stage in the aggregation pipeline is used to skip a specified number of documents from the input and pass the remaining documents to the next stage in the pipeline.',
+        snippet: '{ $skip: ${1:number} }',
+        link: getDocLink('$skip', META_STAGE),
+    },
+    {
+        value: '$sort',
+        meta: META_STAGE,
+        description:
+            'The $sort stage in the aggregation pipeline is used to order the documents in the pipeline by a specified field or fields.',
+        snippet: '{ $sort: { ${1:field}: ${2:1} } }',
+        link: getDocLink('$sort', META_STAGE),
+    },
+    {
+        value: '$sortByCount',
+        meta: META_STAGE,
+        description:
+            'The $sortByCount stage in the aggregation pipeline is used to group documents by a specified expression and then sort the count of documents in each group in descending order.',
+        snippet: '{ $sortByCount: "${1:\\$field}" }',
+        link: getDocLink('$sortByCount', META_STAGE),
+    },
+    {
+        value: '$unionWith',
+        meta: META_STAGE,
+        description: 'Combines the results of two collections into a single result set, similar to SQL UNION ALL.',
+        snippet: '{ $unionWith: { coll: "${1:collection}", pipeline: [${2}] } }',
+    },
+    {
+        value: '$unset',
+        meta: META_STAGE,
+        description: 'The $unset stage in the aggregation pipeline is used to remove specified fields from documents.',
+        snippet: '{ $unset: "${1:field}" }',
+        link: getDocLink('$unset', META_STAGE),
+    },
+    {
+        value: '$unwind',
+        meta: META_STAGE,
+        description:
+            'The $unwind stage in the aggregation framework is used to deconstruct an array field from the input documents to output a document for each element.',
+        snippet: '{ $unwind: "${1:\\$arrayField}" }',
+        link: getDocLink('$unwind', META_STAGE),
+    },
+    {
+        value: '$currentOp',
+        meta: META_STAGE,
+        description: 'Returns information on active and queued operations for the database instance.',
+        snippet: '{ $currentOp: { allUsers: true } }',
+    },
+];
+
+// ---------------------------------------------------------------------------
+// Registration
+// ---------------------------------------------------------------------------
+
+export function loadStages(): void {
+    registerOperators([...aggregationPipelineStages]);
+}
diff --git a/packages/documentdb-constants/src/structuralInvariants.test.ts b/packages/documentdb-constants/src/structuralInvariants.test.ts
new file mode 100644
index 000000000..953fc7831
--- /dev/null
+++ b/packages/documentdb-constants/src/structuralInvariants.test.ts
@@ -0,0 +1,242 @@
+/*---------------------------------------------------------------------------------------------
+ *  Copyright (c) Microsoft Corporation. All rights reserved.
+ *  Licensed under the MIT License. See License.txt in the project root for license information.
+ *--------------------------------------------------------------------------------------------*/
+
+/**
+ * Structural invariant tests for all operator entries.
+ *
+ * Validates that every entry in getAllCompletions() has the correct shape,
+ * consistent meta tags, and reasonable values.
+ */
+
+import { ALL_META_TAGS, getAllCompletions, type OperatorEntry } from './index';
+
+const allOperators = getAllCompletions();
+
+describe('structural invariants', () => {
+    test('total operator count is in the expected range', () => {
+        // 308 total (298 from dump + 10 BSON constructors)
+        expect(allOperators.length).toBeGreaterThanOrEqual(290);
+        expect(allOperators.length).toBeLessThanOrEqual(320);
+    });
+
+    test('every entry has required fields', () => {
+        const invalid: string[] = [];
+        for (const op of allOperators) {
+            if (!op.value) {
+                invalid.push('entry missing value');
+            }
+            if (!op.meta) {
+                invalid.push(`${op.value} missing meta`);
+            }
+            if (!op.description) {
+                invalid.push(`${op.value} missing description`);
+            }
+        }
+        expect(invalid).toEqual([]);
+    });
+
+    test('operator values start with $ or $$ (except BSON constructors)', () => {
+        const invalid: string[] = [];
+        for (const op of allOperators) {
+            if (op.meta === 'bson') {
+                // BSON constructors: ObjectId, ISODate, etc. — no $ prefix
+                expect(op.value).toMatch(/^[A-Z]/);
+            } else if (op.meta === 'variable') {
+                // System variables start with $$
+                if (!op.value.startsWith('$$')) {
+                    invalid.push(`${op.value} (variable) should start with $$`);
+                }
+            } else {
+                // All other operators start with $
+                if (!op.value.startsWith('$')) {
+                    invalid.push(`${op.value} (${op.meta}) should start with $`);
+                }
+            }
+        }
+        expect(invalid).toEqual([]);
+    });
+
+    test('every entry has a valid meta tag', () => {
+        const validMetas = new Set<string>(ALL_META_TAGS);
+        const invalid: string[] = [];
+        for (const op of allOperators) {
+            if (!validMetas.has(op.meta)) {
+                invalid.push(`${op.value} has unknown meta: ${op.meta}`);
+            }
+        }
+        expect(invalid).toEqual([]);
+    });
+
+    test('descriptions are non-empty strings', () => {
+        const empty: string[] = [];
+        for (const op of allOperators) {
+            if (typeof op.description !== 'string' || op.description.trim().length === 0) {
+                empty.push(`${op.value} (${op.meta}) has empty description`);
+            }
+        }
+        expect(empty).toEqual([]);
+    });
+
+    test('snippets are strings when present', () => {
+        const invalid: string[] = [];
+        for (const op of allOperators) {
+            if (op.snippet !== undefined && typeof op.snippet !== 'string') {
+                invalid.push(`${op.value} (${op.meta}) has non-string snippet`);
+            }
+        }
+        expect(invalid).toEqual([]);
+    });
+
+    test('links are valid URLs when present', () => {
+        const invalid: string[] = [];
+        for (const op of allOperators) {
+            if (op.link !== undefined) {
+                if (typeof op.link !== 'string' || !op.link.startsWith('https://')) {
+                    invalid.push(`${op.value} (${op.meta}) has invalid link: ${op.link}`);
+                }
+            }
+        }
+        expect(invalid).toEqual([]);
+    });
+
+    test('applicableBsonTypes is a string array when present', () => {
+        const invalid: string[] = [];
+        for (const op of allOperators) {
+            if (op.applicableBsonTypes !== undefined) {
+                if (!Array.isArray(op.applicableBsonTypes)) {
+                    invalid.push(`${op.value} (${op.meta}) applicableBsonTypes is not an array`);
+                } else {
+                    for (const t of op.applicableBsonTypes) {
+                        if (typeof t !== 'string' || t.trim().length === 0) {
+                            invalid.push(`${op.value} (${op.meta}) has empty BSON type`);
+                        }
+                    }
+                }
+            }
+        }
+        expect(invalid).toEqual([]);
+    });
+
+    test('no duplicate (value, meta) pairs', () => {
+        const seen = new Set<string>();
+        const duplicates: string[] = [];
+        for (const op of allOperators) {
+            const key = `${op.value}|${op.meta}`;
+            if (seen.has(key)) {
+                duplicates.push(key);
+            }
+            seen.add(key);
+        }
+        expect(duplicates).toEqual([]);
+    });
+
+    test('BSON constructors have expected entries', () => {
+        const bsonOps = allOperators.filter((op) => op.meta === 'bson');
+        const bsonValues = bsonOps.map((op) => op.value).sort();
+        expect(bsonValues).toEqual(
+            expect.arrayContaining([
+                'BinData',
+                'ISODate',
+                'MaxKey',
+                'MinKey',
+                'NumberDecimal',
+                'NumberInt',
+                'NumberLong',
+                'ObjectId',
+                'Timestamp',
+                'UUID',
+            ]),
+        );
+    });
+
+    test('system variables have expected entries', () => {
+        const varOps = allOperators.filter((op) => op.meta === 'variable');
+        const varValues = varOps.map((op) => op.value).sort();
+        expect(varValues).toEqual(
+            expect.arrayContaining(['$$CURRENT', '$$DESCEND', '$$KEEP', '$$NOW', '$$PRUNE', '$$REMOVE', '$$ROOT']),
+        );
+    });
+
+    test('key operators are present', () => {
+        const values = new Set(allOperators.map((op) => op.value));
+
+        // Query operators
+        expect(values.has('$eq')).toBe(true);
+        expect(values.has('$gt')).toBe(true);
+        expect(values.has('$and')).toBe(true);
+        expect(values.has('$regex')).toBe(true);
+        expect(values.has('$exists')).toBe(true);
+
+        // Stages
+        expect(values.has('$match')).toBe(true);
+        expect(values.has('$group')).toBe(true);
+        expect(values.has('$lookup')).toBe(true);
+        expect(values.has('$project')).toBe(true);
+        expect(values.has('$sort')).toBe(true);
+
+        // Update operators
+        expect(values.has('$set')).toBe(true);
+        expect(values.has('$unset')).toBe(true);
+        expect(values.has('$inc')).toBe(true);
+
+        // Accumulators
+        expect(values.has('$sum')).toBe(true);
+        expect(values.has('$avg')).toBe(true);
+
+        // Expressions
+        expect(values.has('$add')).toBe(true);
+        expect(values.has('$concat')).toBe(true);
+        expect(values.has('$cond')).toBe(true);
+    });
+
+    test('excluded operators are NOT present with unsupported meta tags', () => {
+        // These should not be present (deprecated or not supported)
+        const opsByValueMeta = new Map<string, OperatorEntry>();
+        for (const op of allOperators) {
+            opsByValueMeta.set(`${op.value}|${op.meta}`, op);
+        }
+
+        // $where is deprecated and should not be present as evaluation query
+        expect(opsByValueMeta.has('$where|query:evaluation')).toBe(false);
+    });
+});
+
+describe('meta tag coverage', () => {
+    test('every meta tag in ALL_META_TAGS has at least one operator (except parent-only and runtime tags)', () => {
+        const metasWithOps = new Set(allOperators.map((op) => op.meta));
+        // Parent-only tags: operators use subcategories (query:comparison, update:field),
+        // not the bare 'query' or 'update' tags. 'field:identifier' is runtime-injected.
+        const parentOnlyTags = new Set(['query', 'update', 'field:identifier']);
+        const missing: string[] = [];
+        for (const tag of ALL_META_TAGS) {
+            if (parentOnlyTags.has(tag)) {
+                continue;
+            }
+            if (!metasWithOps.has(tag)) {
+                missing.push(tag);
+            }
+        }
+        expect(missing).toEqual([]);
+    });
+
+    test('top-level meta categories have expected operator counts', () => {
+        const countByPrefix: Record<string, number> = {};
+        for (const op of allOperators) {
+            const prefix = op.meta.includes(':') ? op.meta.split(':')[0] : op.meta;
+            countByPrefix[prefix] = (countByPrefix[prefix] || 0) + 1;
+        }
+
+        expect(countByPrefix['query']).toBe(43);
+        expect(countByPrefix['update']).toBe(22);
+        expect(countByPrefix['stage']).toBe(35);
+        expect(countByPrefix['accumulator']).toBe(21);
+        expect(countByPrefix['window']).toBe(27);
+        expect(countByPrefix['bson']).toBe(10);
+        expect(countByPrefix['variable']).toBe(7);
+        // Expression operators: ~143-144
+        expect(countByPrefix['expr']).toBeGreaterThanOrEqual(140);
+        expect(countByPrefix['expr']).toBeLessThanOrEqual(150);
+    });
+});
diff --git a/packages/documentdb-constants/src/systemVariables.ts b/packages/documentdb-constants/src/systemVariables.ts
new file mode 100644
index 000000000..219d04eb0
--- /dev/null
+++ b/packages/documentdb-constants/src/systemVariables.ts
@@ -0,0 +1,74 @@
+/*---------------------------------------------------------------------------------------------
+ *  Copyright (c) Microsoft Corporation. All rights reserved.
+ *  Licensed under the MIT License. See License.txt in the project root for license information.
+ *--------------------------------------------------------------------------------------------*/
+
+// AUTO-GENERATED — DO NOT EDIT BY HAND
+//
+// Generated by: npm run generate  (scripts/generate-from-reference.ts)
+// Sources:      resources/scraped/operator-reference.md
+//               resources/overrides/operator-overrides.md
+//               resources/overrides/operator-snippets.md
+//
+// To change operator data, edit the overrides/snippets files and re-run the generator.
+
+import { registerOperators } from './getFilteredCompletions';
+import { META_VARIABLE } from './metaTags';
+import { type OperatorEntry } from './types';
+
+// ---------------------------------------------------------------------------
+// Variables in Aggregation Expressions
+// ---------------------------------------------------------------------------
+
+const systemVariables: readonly OperatorEntry[] = [
+    {
+        value: '$$NOW',
+        meta: META_VARIABLE,
+        description:
+            'Returns the current datetime as a Date object. Constant throughout a single aggregation pipeline.',
+    },
+    {
+        value: '$$ROOT',
+        meta: META_VARIABLE,
+        description:
+            'References the root document — the top-level document currently being processed in the pipeline stage.',
+    },
+    {
+        value: '$$REMOVE',
+        meta: META_VARIABLE,
+        description:
+            'Removes a field from the output document. Used with $project or $addFields to conditionally exclude fields.',
+    },
+    {
+        value: '$$CURRENT',
+        meta: META_VARIABLE,
+        description:
+            'References the current document in the pipeline stage. Equivalent to $$ROOT at the start of the pipeline.',
+    },
+    {
+        value: '$$DESCEND',
+        meta: META_VARIABLE,
+        description:
+            'Used with $redact. Returns the document fields at the current level and continues descending into subdocuments.',
+    },
+    {
+        value: '$$PRUNE',
+        meta: META_VARIABLE,
+        description:
+            'Used with $redact. Excludes all fields at the current document level and stops descending into subdocuments.',
+    },
+    {
+        value: '$$KEEP',
+        meta: META_VARIABLE,
+        description:
+            'Used with $redact. Keeps all fields at the current document level without further descending into subdocuments.',
+    },
+];
+
+// ---------------------------------------------------------------------------
+// Registration
+// ---------------------------------------------------------------------------
+
+export function loadSystemVariables(): void {
+    registerOperators([...systemVariables]);
+}
diff --git a/packages/documentdb-constants/src/types.ts b/packages/documentdb-constants/src/types.ts
new file mode 100644
index 000000000..d08cac711
--- /dev/null
+++ b/packages/documentdb-constants/src/types.ts
@@ -0,0 +1,154 @@
+/*---------------------------------------------------------------------------------------------
+ *  Copyright (c) Microsoft Corporation. All rights reserved.
+ *  Licensed under the MIT License. See License.txt in the project root for license information.
+ *--------------------------------------------------------------------------------------------*/
+
+import { type ALL_META_TAGS } from './metaTags';
+
+/**
+ * Represents a single operator, stage, accumulator, or BSON constructor
+ * for use in autocomplete, hover docs, and diagnostics.
+ */
+export interface OperatorEntry {
+    /** The operator string, e.g. "$gt", "$match", "ObjectId" */
+    readonly value: string;
+
+    /**
+     * Category tag for filtering. Determines which contexts this entry
+     * appears in. See {@link MetaTag} for the full set.
+     *
+     * Examples: "query", "query:comparison", "stage", "accumulator",
+     * "expr:arith", "expr:date", "bson", "field:identifier"
+     */
+    readonly meta: MetaTag;
+
+    /** Human-readable one-line description. */
+    readonly description: string;
+
+    /**
+     * Monaco snippet with tab stops for insertion.
+     * Example: '{ \\$match: { ${1:field}: ${2:value} } }'
+     * If absent, `value` is inserted as-is.
+     */
+    readonly snippet?: string;
+
+    /**
+     * URL to the DocumentDB documentation page for this operator.
+     * Generated from `docLinks.ts` helpers.
+     */
+    readonly link?: string;
+
+    /**
+     * Applicable BSON types for type-aware filtering.
+     * If set, this operator only appears when the field's bsonType
+     * matches one of these values. If absent, the operator is universal.
+     *
+     * Example: $regex → ['string'], $size → ['array']
+     */
+    readonly applicableBsonTypes?: readonly string[];
+
+    /**
+     * Whether this operator is valid as a standalone completion at top-level
+     * positions (key, value, operator). Defaults to `true` when absent.
+     *
+     * Set to `false` for operators that are only valid inside another operator's
+     * value object — e.g., geospatial shape specifiers (`$box`, `$geometry`)
+     * which are only valid inside `$geoWithin`/`$near`, or sort-only modifiers
+     * like `$natural`.
+     *
+     * Completion providers should filter out `standalone === false` entries
+     * from standard completion lists. These entries remain in the registry
+     * for hover documentation and future context-aware nested completions.
+     */
+    readonly standalone?: boolean;
+
+    /**
+     * @experimental Not yet populated by the generator; reserved for a future
+     * contextual-snippet feature.
+     *
+     * When populated, this field carries a hint about the type of value an operator
+     * produces or expects, enabling the CompletionItemProvider to tailor snippets
+     * and insert sensible placeholder values based on context.
+     *
+     * Planned values and their meanings:
+     *   - `"number"`   — operator always produces a number
+     *                    (e.g. `$size` on an array field → insert a numeric comparand)
+     *   - `"boolean"`  — operator produces true/false
+     *                    (e.g. `$and`, `$or` in expression context)
+     *   - `"string"`   — operator produces a string
+     *                    (e.g. `$concat`, `$toLower`)
+     *   - `"array"`    — operator produces an array
+     *                    (e.g. `$push` accumulator, `$concatArrays`)
+     *   - `"date"`     — operator produces a date
+     *                    (e.g. `$dateAdd`, `$toDate`)
+     *   - `"same"`     — operator produces the same type as its input
+     *                    (e.g. `$min`, `$max`, comparison operators like `$gt`)
+     *   - `"object"`   — operator produces a document/object
+     *                    (e.g. `$mergeObjects`)
+     *   - `"any"`      — return type is undetermined or context-dependent
+     *
+     * This field is intentionally absent from all current entries. The generator
+     * (`scripts/generate-from-reference.ts`) does not yet emit it. It will be
+     * populated in a follow-up pass once the `CompletionItemProvider` is ready
+     * to consume it.
+     */
+    readonly returnType?: string;
+}
+
+/**
+ * Filter configuration for {@link getFilteredCompletions}.
+ */
+export interface CompletionFilter {
+    /**
+     * Meta tag prefixes to include. Supports prefix matching:
+     * 'query' matches 'query', 'query:comparison', 'query:logical', etc.
+     * 'expr' matches all 'expr:*' entries.
+     */
+    readonly meta: readonly string[];
+
+    /** Optional: only return operators applicable to these BSON types. */
+    readonly bsonTypes?: readonly string[];
+}
+
+/**
+ * Meta tag constants. Tags use a hierarchical scheme:
+ *
+ * - 'query' — top-level query operators (in find filter, $match)
+ * - 'query:comparison' — comparison subset ($eq, $gt, etc.)
+ * - 'query:logical' — logical ($and, $or, $not, $nor)
+ * - 'query:element' — element ($exists, $type)
+ * - 'query:evaluation' — evaluation ($expr, $regex, $mod, $text)
+ * - 'query:array' — array ($all, $elemMatch, $size)
+ * - 'query:bitwise' — bitwise ($bitsAllSet, etc.)
+ * - 'query:geospatial' — geospatial ($geoWithin, $near, etc.)
+ * - 'query:projection' — projection ($, $elemMatch, $slice)
+ * - 'query:misc' — miscellaneous ($comment, $rand, $natural)
+ * - 'update' — update operators ($set, $unset, $inc, etc.)
+ * - 'update:field' — field update subset
+ * - 'update:array' — array update subset ($push, $pull, etc.)
+ * - 'update:bitwise' — bitwise update ($bit)
+ * - 'stage' — aggregation pipeline stages ($match, $group, etc.)
+ * - 'accumulator' — accumulators ($sum, $avg, $first, etc.)
+ * - 'expr:arith' — arithmetic expressions ($add, $subtract, etc.)
+ * - 'expr:array' — array expressions ($arrayElemAt, $filter, etc.)
+ * - 'expr:bool' — boolean expressions ($and, $or, $not)
+ * - 'expr:comparison' — comparison expressions ($cmp, $eq, etc.)
+ * - 'expr:conditional' — conditional ($cond, $ifNull, $switch)
+ * - 'expr:date' — date expressions ($dateAdd, $year, etc.)
+ * - 'expr:object' — object expressions ($mergeObjects, etc.)
+ * - 'expr:set' — set expressions ($setUnion, etc.)
+ * - 'expr:string' — string expressions ($concat, $substr, etc.)
+ * - 'expr:trig' — trigonometry ($sin, $cos, etc.)
+ * - 'expr:type' — type conversion ($convert, $toInt, etc.)
+ * - 'expr:datasize' — data size ($bsonSize, $binarySize)
+ * - 'expr:timestamp' — timestamp ($tsIncrement, $tsSecond)
+ * - 'expr:bitwise' — bitwise expressions ($bitAnd, $bitOr, etc.)
+ * - 'expr:literal' — $literal
+ * - 'expr:misc' — miscellaneous expressions ($getField, $rand, etc.)
+ * - 'expr:variable' — variable expressions ($let)
+ * - 'window' — window operators ($rank, $denseRank, etc.)
+ * - 'bson' — BSON constructor functions (ObjectId, ISODate, etc.)
+ * - 'variable' — system variables ($$NOW, $$ROOT, etc.)
+ * - 'field:identifier' — injected field names from schema (not static)
+ */
+export type MetaTag = (typeof ALL_META_TAGS)[number] | (string & {});
diff --git a/packages/documentdb-constants/src/updateOperators.ts b/packages/documentdb-constants/src/updateOperators.ts
new file mode 100644
index 000000000..a90f62fcd
--- /dev/null
+++ b/packages/documentdb-constants/src/updateOperators.ts
@@ -0,0 +1,203 @@
+/*---------------------------------------------------------------------------------------------
+ *  Copyright (c) Microsoft Corporation. All rights reserved.
+ *  Licensed under the MIT License. See License.txt in the project root for license information.
+ *--------------------------------------------------------------------------------------------*/
+
+// AUTO-GENERATED — DO NOT EDIT BY HAND
+//
+// Generated by: npm run generate  (scripts/generate-from-reference.ts)
+// Sources:      resources/scraped/operator-reference.md
+//               resources/overrides/operator-overrides.md
+//               resources/overrides/operator-snippets.md
+//
+// To change operator data, edit the overrides/snippets files and re-run the generator.
+
+import { getDocLink } from './docLinks';
+import { registerOperators } from './getFilteredCompletions';
+import { META_UPDATE_ARRAY, META_UPDATE_BITWISE, META_UPDATE_FIELD } from './metaTags';
+import { type OperatorEntry } from './types';
+
+// ---------------------------------------------------------------------------
+// Field Update Operators
+// ---------------------------------------------------------------------------
+
+const fieldUpdateOperators: readonly OperatorEntry[] = [
+    {
+        value: '$currentDate',
+        meta: META_UPDATE_FIELD,
+        description:
+            'The $currentDate operator sets the value of a field to the current date, either as a Date or a timestamp.',
+        snippet: '{ $currentDate: { "${1:field}": true } }',
+        link: getDocLink('$currentDate', META_UPDATE_FIELD),
+    },
+    {
+        value: '$inc',
+        meta: META_UPDATE_FIELD,
+        description: 'The $inc operator increments the value of a field by a specified amount.',
+        snippet: '{ $inc: { "${1:field}": ${2:value} } }',
+        link: getDocLink('$inc', META_UPDATE_FIELD),
+    },
+    {
+        value: '$min',
+        meta: META_UPDATE_FIELD,
+        description: 'Retrieves the minimum value for a specified field',
+        snippet: '{ $min: { "${1:field}": ${2:value} } }',
+        link: 'https://learn.microsoft.com/en-us/azure/documentdb/operators/accumulators/$min', // inferred from another category
+    },
+    {
+        value: '$max',
+        meta: META_UPDATE_FIELD,
+        description: 'The $max operator returns the maximum value from a set of input values.',
+        snippet: '{ $max: { "${1:field}": ${2:value} } }',
+        link: 'https://learn.microsoft.com/en-us/azure/documentdb/operators/accumulators/$max', // inferred from another category
+    },
+    {
+        value: '$mul',
+        meta: META_UPDATE_FIELD,
+        description: 'The $mul operator multiplies the value of a field by a specified number.',
+        snippet: '{ $mul: { "${1:field}": ${2:value} } }',
+        link: getDocLink('$mul', META_UPDATE_FIELD),
+    },
+    {
+        value: '$rename',
+        meta: META_UPDATE_FIELD,
+        description: 'The $rename operator allows renaming fields in documents during update operations.',
+        snippet: '{ $rename: { "${1:oldField}": "${2:newField}" } }',
+        link: getDocLink('$rename', META_UPDATE_FIELD),
+    },
+    {
+        value: '$set',
+        meta: META_UPDATE_FIELD,
+        description: 'The $set operator in Azure DocumentDB updates or creates a new field with a specified value',
+        snippet: '{ $set: { "${1:field}": ${2:value} } }',
+        link: 'https://learn.microsoft.com/en-us/azure/documentdb/operators/aggregation/$set', // inferred from another category
+    },
+    {
+        value: '$setOnInsert',
+        meta: META_UPDATE_FIELD,
+        description:
+            'The $setOnInsert operator sets field values only when an upsert operation results in an insert of a new document.',
+        snippet: '{ $setOnInsert: { "${1:field}": ${2:value} } }',
+        link: getDocLink('$setOnInsert', META_UPDATE_FIELD),
+    },
+    {
+        value: '$unset',
+        meta: META_UPDATE_FIELD,
+        description: 'The $unset stage in the aggregation pipeline is used to remove specified fields from documents.',
+        snippet: '{ $unset: { "${1:field}": ${2:value} } }',
+        link: 'https://learn.microsoft.com/en-us/azure/documentdb/operators/aggregation/$unset', // inferred from another category
+    },
+];
+
+// ---------------------------------------------------------------------------
+// Array Update Operators
+// ---------------------------------------------------------------------------
+
+const arrayUpdateOperators: readonly OperatorEntry[] = [
+    {
+        value: '$',
+        meta: META_UPDATE_ARRAY,
+        description:
+            'The $ positional operator identifies an element in an array to update without explicitly specifying the position of the element in the array.',
+        link: getDocLink('$', META_UPDATE_ARRAY),
+    },
+    {
+        value: '$[]',
+        meta: META_UPDATE_ARRAY,
+        description: 'Positional all operator. Acts as a placeholder to update all elements in an array field.',
+    },
+    {
+        value: '$[identifier]',
+        meta: META_UPDATE_ARRAY,
+        description:
+            'Filtered positional operator. Acts as a placeholder to update elements that match an arrayFilters condition.',
+    },
+    {
+        value: '$addToSet',
+        meta: META_UPDATE_ARRAY,
+        description:
+            "The addToSet operator adds elements to an array if they don't already exist, while ensuring uniqueness of elements within the set.",
+        snippet: '{ $addToSet: { "${1:field}": ${2:value} } }',
+        link: getDocLink('$addToSet', META_UPDATE_ARRAY),
+    },
+    {
+        value: '$pop',
+        meta: META_UPDATE_ARRAY,
+        description: 'Removes the first or last element of an array.',
+        snippet: '{ $pop: { "${1:field}": ${2:1} } }',
+        link: getDocLink('$pop', META_UPDATE_ARRAY),
+    },
+    {
+        value: '$pull',
+        meta: META_UPDATE_ARRAY,
+        description: 'Removes all instances of a value from an array.',
+        snippet: '{ $pull: { "${1:field}": ${2:condition} } }',
+        link: getDocLink('$pull', META_UPDATE_ARRAY),
+    },
+    {
+        value: '$push',
+        meta: META_UPDATE_ARRAY,
+        description: 'The $push operator adds a specified value to an array within a document.',
+        snippet: '{ $push: { "${1:field}": ${2:value} } }',
+        link: getDocLink('$push', META_UPDATE_ARRAY),
+    },
+    {
+        value: '$pullAll',
+        meta: META_UPDATE_ARRAY,
+        description: 'The $pullAll operator is used to remove all instances of the specified values from an array.',
+        snippet: '{ $pullAll: { "${1:field}": [${2:values}] } }',
+        link: getDocLink('$pullAll', META_UPDATE_ARRAY),
+    },
+    {
+        value: '$each',
+        meta: META_UPDATE_ARRAY,
+        description:
+            'The $each operator is used within an `$addToSet`or`$push` operation to add multiple elements to an array field in a single update operation.',
+        snippet: '{ $each: [${1:values}] }',
+        link: getDocLink('$each', META_UPDATE_ARRAY),
+    },
+    {
+        value: '$position',
+        meta: META_UPDATE_ARRAY,
+        description:
+            'Specifies the position in the array at which the $push operator inserts elements. Used with $each.',
+        snippet: '{ $position: ${1:index} }',
+    },
+    {
+        value: '$slice',
+        meta: META_UPDATE_ARRAY,
+        description: 'The $slice operator returns a subset of an array from any element onwards in the array.',
+        snippet: '{ $slice: ${1:number} }',
+        link: 'https://learn.microsoft.com/en-us/azure/documentdb/operators/array-expression/$slice', // inferred from another category
+    },
+    {
+        value: '$sort',
+        meta: META_UPDATE_ARRAY,
+        description:
+            'The $sort stage in the aggregation pipeline is used to order the documents in the pipeline by a specified field or fields.',
+        snippet: '{ $sort: { "${1:field}": ${2:1} } }',
+        link: 'https://learn.microsoft.com/en-us/azure/documentdb/operators/aggregation/$sort', // inferred from another category
+    },
+];
+
+// ---------------------------------------------------------------------------
+// Bitwise Update Operators
+// ---------------------------------------------------------------------------
+
+const bitwiseUpdateOperators: readonly OperatorEntry[] = [
+    {
+        value: '$bit',
+        meta: META_UPDATE_BITWISE,
+        description: 'The `$bit` operator is used to perform bitwise operations on integer values.',
+        snippet: '{ $bit: { "${1:field}": { "${2:and|or|xor}": ${3:value} } } }',
+        link: getDocLink('$bit', META_UPDATE_BITWISE),
+    },
+];
+
+// ---------------------------------------------------------------------------
+// Registration
+// ---------------------------------------------------------------------------
+
+export function loadUpdateOperators(): void {
+    registerOperators([...fieldUpdateOperators, ...arrayUpdateOperators, ...bitwiseUpdateOperators]);
+}
diff --git a/packages/documentdb-constants/src/windowOperators.ts b/packages/documentdb-constants/src/windowOperators.ts
new file mode 100644
index 000000000..f15b412e1
--- /dev/null
+++ b/packages/documentdb-constants/src/windowOperators.ts
@@ -0,0 +1,233 @@
+/*---------------------------------------------------------------------------------------------
+ *  Copyright (c) Microsoft Corporation. All rights reserved.
+ *  Licensed under the MIT License. See License.txt in the project root for license information.
+ *--------------------------------------------------------------------------------------------*/
+
+// AUTO-GENERATED — DO NOT EDIT BY HAND
+//
+// Generated by: npm run generate  (scripts/generate-from-reference.ts)
+// Sources:      resources/scraped/operator-reference.md
+//               resources/overrides/operator-overrides.md
+//               resources/overrides/operator-snippets.md
+//
+// To change operator data, edit the overrides/snippets files and re-run the generator.
+
+import { getDocLink } from './docLinks';
+import { registerOperators } from './getFilteredCompletions';
+import { META_WINDOW } from './metaTags';
+import { type OperatorEntry } from './types';
+
+// ---------------------------------------------------------------------------
+// Window Operators
+// ---------------------------------------------------------------------------
+
+const windowOperators: readonly OperatorEntry[] = [
+    {
+        value: '$sum',
+        meta: META_WINDOW,
+        description: 'The $sum operator calculates the sum of the values of a field based on a filtering criteria',
+        snippet: '{ $sum: "${1:\\$field}" }',
+        link: 'https://learn.microsoft.com/en-us/azure/documentdb/operators/accumulators/$sum', // inferred from another category
+    },
+    {
+        value: '$push',
+        meta: META_WINDOW,
+        description: 'The $push operator adds a specified value to an array within a document.',
+        snippet: '{ $push: "${1:\\$field}" }',
+        link: 'https://learn.microsoft.com/en-us/azure/documentdb/operators/array-update/$push', // inferred from another category
+    },
+    {
+        value: '$addToSet',
+        meta: META_WINDOW,
+        description:
+            "The addToSet operator adds elements to an array if they don't already exist, while ensuring uniqueness of elements within the set.",
+        snippet: '{ $addToSet: "${1:\\$field}" }',
+        link: 'https://learn.microsoft.com/en-us/azure/documentdb/operators/array-update/$addtoset', // inferred from another category
+    },
+    {
+        value: '$count',
+        meta: META_WINDOW,
+        description:
+            'The `$count` operator is used to count the number of documents that match a query filtering criteria.',
+        snippet: '{ $count: "${1:\\$field}" }',
+        link: 'https://learn.microsoft.com/en-us/azure/documentdb/operators/accumulators/$count', // inferred from another category
+    },
+    {
+        value: '$max',
+        meta: META_WINDOW,
+        description: 'The $max operator returns the maximum value from a set of input values.',
+        snippet: '{ $max: "${1:\\$field}" }',
+        link: 'https://learn.microsoft.com/en-us/azure/documentdb/operators/accumulators/$max', // inferred from another category
+    },
+    {
+        value: '$min',
+        meta: META_WINDOW,
+        description: 'Retrieves the minimum value for a specified field',
+        snippet: '{ $min: "${1:\\$field}" }',
+        link: 'https://learn.microsoft.com/en-us/azure/documentdb/operators/accumulators/$min', // inferred from another category
+    },
+    {
+        value: '$avg',
+        meta: META_WINDOW,
+        description: 'Computes the average of numeric values for documents in a group, bucket, or window.',
+        snippet: '{ $avg: "${1:\\$field}" }',
+        link: 'https://learn.microsoft.com/en-us/azure/documentdb/operators/accumulators/$avg', // inferred from another category
+    },
+    {
+        value: '$stdDevPop',
+        meta: META_WINDOW,
+        description: 'The $stddevpop operator calculates the standard deviation of the specified values',
+        snippet: '{ $stdDevPop: "${1:\\$field}" }',
+        link: 'https://learn.microsoft.com/en-us/azure/documentdb/operators/accumulators/$stddevpop', // inferred from another category
+    },
+    {
+        value: '$bottom',
+        meta: META_WINDOW,
+        description:
+            "The $bottom operator returns the last document from the query's result set sorted by one or more fields",
+        snippet: '{ $bottom: "${1:\\$field}" }',
+        link: 'https://learn.microsoft.com/en-us/azure/documentdb/operators/accumulators/$bottom', // inferred from another category
+    },
+    {
+        value: '$bottomN',
+        meta: META_WINDOW,
+        description: 'The $bottomN operator returns the last N documents from the result sorted by one or more fields',
+        snippet: '{ $bottomN: "${1:\\$field}" }',
+        link: 'https://learn.microsoft.com/en-us/azure/documentdb/operators/accumulators/$bottomn', // inferred from another category
+    },
+    {
+        value: '$covariancePop',
+        meta: META_WINDOW,
+        description: 'The $covariancePop operator returns the covariance of two numerical expressions',
+        snippet: '{ $covariancePop: "${1:\\$field}" }',
+        link: getDocLink('$covariancePop', META_WINDOW),
+    },
+    {
+        value: '$covarianceSamp',
+        meta: META_WINDOW,
+        description: 'The $covarianceSamp operator returns the covariance of a sample of two numerical expressions',
+        snippet: '{ $covarianceSamp: "${1:\\$field}" }',
+        link: getDocLink('$covarianceSamp', META_WINDOW),
+    },
+    {
+        value: '$denseRank',
+        meta: META_WINDOW,
+        description:
+            'The $denseRank operator assigns and returns a positional ranking for each document within a partition based on a specified sort order',
+        snippet: '{ $denseRank: {} }',
+        link: getDocLink('$denseRank', META_WINDOW),
+    },
+    {
+        value: '$derivative',
+        meta: META_WINDOW,
+        description:
+            'The $derivative operator calculates the average rate of change of the value of a field within a specified window.',
+        snippet: '{ $derivative: { input: "${1:\\$field}", unit: "${2:hour}" } }',
+        link: getDocLink('$derivative', META_WINDOW),
+    },
+    {
+        value: '$documentNumber',
+        meta: META_WINDOW,
+        description:
+            'The $documentNumber operator assigns and returns a position for each document within a partition based on a specified sort order',
+        snippet: '{ $documentNumber: {} }',
+        link: getDocLink('$documentNumber', META_WINDOW),
+    },
+    {
+        value: '$expMovingAvg',
+        meta: META_WINDOW,
+        description:
+            'The $expMovingAvg operator calculates the moving average of a field based on the specified number of documents to hold the highest weight',
+        snippet: '{ $expMovingAvg: { input: "${1:\\$field}", N: ${2:number} } }',
+        link: getDocLink('$expMovingAvg', META_WINDOW),
+    },
+    {
+        value: '$first',
+        meta: META_WINDOW,
+        description: "The $first operator returns the first value in a group according to the group's sorting order.",
+        snippet: '{ $first: "${1:\\$field}" }',
+        link: 'https://learn.microsoft.com/en-us/azure/documentdb/operators/accumulators/$first', // inferred from another category
+    },
+    {
+        value: '$integral',
+        meta: META_WINDOW,
+        description:
+            'The $integral operator calculates the area under a curve with the specified range of documents forming the adjacent documents for the calculation.',
+        snippet: '{ $integral: { input: "${1:\\$field}", unit: "${2:hour}" } }',
+        link: getDocLink('$integral', META_WINDOW),
+    },
+    {
+        value: '$last',
+        meta: META_WINDOW,
+        description: 'The $last operator returns the last document from the result sorted by one or more fields',
+        snippet: '{ $last: "${1:\\$field}" }',
+        link: 'https://learn.microsoft.com/en-us/azure/documentdb/operators/accumulators/$last', // inferred from another category
+    },
+    {
+        value: '$linearFill',
+        meta: META_WINDOW,
+        description:
+            'The $linearFill operator interpolates missing values in a sequence of documents using linear interpolation.',
+        snippet: '{ $linearFill: "${1:\\$field}" }',
+        link: getDocLink('$linearFill', META_WINDOW),
+    },
+    {
+        value: '$locf',
+        meta: META_WINDOW,
+        description:
+            'The $locf operator propagates the last observed non-null value forward within a partition in a windowed query.',
+        snippet: '{ $locf: "${1:\\$field}" }',
+        link: getDocLink('$locf', META_WINDOW),
+    },
+    {
+        value: '$minN',
+        meta: META_WINDOW,
+        description: 'Retrieves the bottom N values based on a specified filtering criteria',
+        snippet: '{ $minN: "${1:\\$field}" }',
+        link: 'https://learn.microsoft.com/en-us/azure/documentdb/operators/accumulators/$minn',
+    },
+    {
+        value: '$rank',
+        meta: META_WINDOW,
+        description: 'The $rank operator ranks documents within a partition based on a specified sort order.',
+        snippet: '{ $rank: {} }',
+        link: getDocLink('$rank', META_WINDOW),
+    },
+    {
+        value: '$shift',
+        meta: META_WINDOW,
+        description: 'A window operator that shifts values within a partition and returns the shifted value.',
+        snippet: '{ $shift: { output: "${1:\\$field}", by: ${2:1}, default: ${3:null} } }',
+        link: getDocLink('$shift', META_WINDOW),
+    },
+    {
+        value: '$stdDevSamp',
+        meta: META_WINDOW,
+        description:
+            'The $stddevsamp operator calculates the standard deviation of a specified sample of values and not the entire population',
+        snippet: '{ $stdDevSamp: "${1:\\$field}" }',
+        link: 'https://learn.microsoft.com/en-us/azure/documentdb/operators/accumulators/$stddevsamp', // inferred from another category
+    },
+    {
+        value: '$top',
+        meta: META_WINDOW,
+        description: 'The $top operator returns the first document from the result set sorted by one or more fields',
+        snippet: '{ $top: "${1:\\$field}" }',
+        link: 'https://learn.microsoft.com/en-us/azure/documentdb/operators/accumulators/$top', // inferred from another category
+    },
+    {
+        value: '$topN',
+        meta: META_WINDOW,
+        description: 'The $topN operator returns the first N documents from the result sorted by one or more fields',
+        snippet: '{ $topN: "${1:\\$field}" }',
+        link: 'https://learn.microsoft.com/en-us/azure/documentdb/operators/accumulators/$topn', // inferred from another category
+    },
+];
+
+// ---------------------------------------------------------------------------
+// Registration
+// ---------------------------------------------------------------------------
+
+export function loadWindowOperators(): void {
+    registerOperators([...windowOperators]);
+}
diff --git a/packages/documentdb-constants/tsconfig.json b/packages/documentdb-constants/tsconfig.json
new file mode 100644
index 000000000..8688f97ff
--- /dev/null
+++ b/packages/documentdb-constants/tsconfig.json
@@ -0,0 +1,20 @@
+{
+  "compilerOptions": {
+    "composite": true,
+    "declaration": true,
+    "declarationMap": true,
+    "module": "commonjs",
+    "target": "ES2023",
+    "lib": ["ES2023"],
+    "rootDir": "./src",
+    "outDir": "./dist",
+    "strict": true,
+    "esModuleInterop": true,
+    "allowSyntheticDefaultImports": true,
+    "skipLibCheck": true,
+    "resolveJsonModule": true,
+    "sourceMap": true
+  },
+  "include": ["src/**/*"],
+  "exclude": ["node_modules", "dist"]
+}
diff --git a/packages/documentdb-constants/tsconfig.scripts.json b/packages/documentdb-constants/tsconfig.scripts.json
new file mode 100644
index 000000000..841c83b0a
--- /dev/null
+++ b/packages/documentdb-constants/tsconfig.scripts.json
@@ -0,0 +1,13 @@
+{
+  "extends": "./tsconfig.json",
+  "compilerOptions": {
+    "composite": false,
+    "declaration": false,
+    "declarationMap": false,
+    "noEmit": true,
+    "rootDir": ".",
+    "types": ["node"]
+  },
+  "include": ["scripts/**/*", "src/**/*"],
+  "exclude": ["node_modules", "dist", "src/**/*.test.ts", "src/**/*.spec.ts"]
+}
diff --git a/packages/schema-analyzer/README.md b/packages/schema-analyzer/README.md
new file mode 100644
index 000000000..1efa58f49
--- /dev/null
+++ b/packages/schema-analyzer/README.md
@@ -0,0 +1,52 @@
+# @vscode-documentdb/schema-analyzer
+
+Incremental JSON Schema analyzer for DocumentDB API and MongoDB API documents. Processes documents one at a time (or in batches) and produces an extended JSON Schema with statistical metadata — field occurrence counts, BSON type distributions, min/max values, and array length stats.
+
+> **Monorepo package** — this package is part of the `vscode-documentdb` workspace.
+> Dev dependencies (Jest, ts-jest, Prettier, TypeScript, etc.) are provided by the
+> root `package.json`. Always install from the repository root:
+>
+> ```bash
+> cd <repo-root>
+> npm install
+> ```
+>
+> **Note:** This package is not yet published to npm. We plan to publish it once the API stabilizes. For now, it is consumed internally via npm workspaces within the [vscode-documentdb](https://github.com/microsoft/vscode-documentdb) repository.
+
+## Overview
+
+The `SchemaAnalyzer` incrementally builds a JSON Schema by inspecting DocumentDB API / MongoDB API documents. It is designed for scenarios where documents arrive over time (streaming, pagination) and the schema needs to evolve as new documents are observed.
+
+Key capabilities:
+
+- **Incremental analysis** — add documents one at a time or in batches; the schema updates in place.
+- **BSON type awareness** — recognizes BSON types defined by the MongoDB API (`ObjectId`, `Decimal128`, `Binary`, `UUID`, etc.) and annotates them with `x-bsonType`.
+- **Statistical extensions** — tracks field occurrence (`x-occurrence`), type frequency (`x-typeOccurrence`), min/max values, string lengths, array sizes, and document counts (`x-documentsInspected`).
+- **Known fields extraction** — derives a flat list of known field paths with their types and occurrence probabilities, useful for autocomplete and UI rendering.
+- **Version tracking & caching** — a monotonic version counter enables efficient cache invalidation for derived data like `getKnownFields()`.
+
+## Usage
+
+```typescript
+import { SchemaAnalyzer } from '@vscode-documentdb/schema-analyzer';
+
+// Create an analyzer and feed it documents
+const analyzer = new SchemaAnalyzer();
+analyzer.addDocument(doc1);
+analyzer.addDocuments([doc2, doc3, doc4]);
+
+// Get the JSON Schema with statistical extensions
+const schema = analyzer.getSchema();
+
+// Get a flat list of known fields (cached, version-aware)
+const fields = analyzer.getKnownFields();
+```
+
+## Requirements
+
+- **Node.js** ≥ 18
+- **mongodb** driver ≥ 6.0.0 (peer dependency)
+
+## License
+
+[MIT](../../LICENSE.md)
diff --git a/packages/schema-analyzer/jest.config.js b/packages/schema-analyzer/jest.config.js
new file mode 100644
index 000000000..6aecf39aa
--- /dev/null
+++ b/packages/schema-analyzer/jest.config.js
@@ -0,0 +1,11 @@
+/** @type {import('ts-jest').JestConfigWithTsJest} **/
+module.exports = {
+    // Limit workers to avoid OOM kills on machines with many cores.
+    // Each ts-jest worker loads the TypeScript compiler and consumes ~500MB+.
+    maxWorkers: '50%',
+    testEnvironment: 'node',
+    testMatch: ['<rootDir>/test/**/*.test.ts'],
+    transform: {
+        '^.+\\.tsx?$': ['ts-jest', {}],
+    },
+};
diff --git a/packages/schema-analyzer/package.json b/packages/schema-analyzer/package.json
new file mode 100644
index 000000000..3751cdba2
--- /dev/null
+++ b/packages/schema-analyzer/package.json
@@ -0,0 +1,27 @@
+{
+  "name": "@vscode-documentdb/schema-analyzer",
+  "version": "1.0.0",
+  "description": "Incremental JSON Schema analyzer for DocumentDB API / MongoDB API documents with statistical extensions",
+  "main": "dist/index.js",
+  "types": "dist/index.d.ts",
+  "files": [
+    "dist"
+  ],
+  "scripts": {
+    "build": "tsc -p .",
+    "clean": "rimraf dist tsconfig.tsbuildinfo",
+    "test": "jest --config jest.config.js"
+  },
+  "repository": {
+    "type": "git",
+    "url": "https://github.com/microsoft/vscode-documentdb",
+    "directory": "packages/schema-analyzer"
+  },
+  "license": "MIT",
+  "peerDependencies": {
+    "mongodb": ">=6.0.0"
+  },
+  "dependencies": {
+    "denque": "~2.1.0"
+  }
+}
diff --git a/packages/schema-analyzer/src/BSONTypes.ts b/packages/schema-analyzer/src/BSONTypes.ts
new file mode 100644
index 000000000..b8fb92f16
--- /dev/null
+++ b/packages/schema-analyzer/src/BSONTypes.ts
@@ -0,0 +1,199 @@
+/*---------------------------------------------------------------------------------------------
+ *  Copyright (c) Microsoft Corporation. All rights reserved.
+ *  Licensed under the MIT License. See License.txt in the project root for license information.
+ *--------------------------------------------------------------------------------------------*/
+
+import {
+    Binary,
+    BSONSymbol,
+    Code,
+    DBRef,
+    Decimal128,
+    Double,
+    Int32,
+    Long,
+    MaxKey,
+    MinKey,
+    ObjectId,
+    Timestamp,
+    UUID,
+} from 'mongodb';
+
+/**
+ * Represents the different data types that can be stored in a DocumentDB API / MongoDB API document.
+ * The string representation is case-sensitive and should match the MongoDB API documentation.
+ * https://www.mongodb.com/docs/manual/reference/bson-types/
+ */
+export enum BSONTypes {
+    String = 'string',
+    Number = 'number',
+    Int32 = 'int32',
+    Double = 'double',
+    Decimal128 = 'decimal128',
+    Long = 'long',
+    Boolean = 'boolean',
+    Object = 'object',
+    Array = 'array',
+    Null = 'null',
+    Undefined = 'undefined',
+    Date = 'date',
+    RegExp = 'regexp',
+    Binary = 'binary',
+    ObjectId = 'objectid',
+    Symbol = 'symbol',
+    Timestamp = 'timestamp',
+    UUID = 'uuid',
+    UUID_LEGACY = 'uuid-legacy', // old UUID subtype, used in some legacy data
+    MinKey = 'minkey',
+    MaxKey = 'maxkey',
+    DBRef = 'dbref',
+    Code = 'code',
+    CodeWithScope = 'codewithscope',
+    Map = 'map',
+    // Add any deprecated types if necessary
+    _UNKNOWN_ = '_unknown_', // Catch-all for unknown types
+}
+
+export namespace BSONTypes {
+    const displayStringMap: Record<BSONTypes, string> = {
+        [BSONTypes.String]: 'String',
+        [BSONTypes.Number]: 'Number',
+        [BSONTypes.Int32]: 'Int32',
+        [BSONTypes.Double]: 'Double',
+        [BSONTypes.Decimal128]: 'Decimal128',
+        [BSONTypes.Long]: 'Long',
+        [BSONTypes.Boolean]: 'Boolean',
+        [BSONTypes.Object]: 'Object',
+        [BSONTypes.Array]: 'Array',
+        [BSONTypes.Null]: 'Null',
+        [BSONTypes.Undefined]: 'Undefined',
+        [BSONTypes.Date]: 'Date',
+        [BSONTypes.RegExp]: 'RegExp',
+        [BSONTypes.Binary]: 'Binary',
+        [BSONTypes.ObjectId]: 'ObjectId',
+        [BSONTypes.Symbol]: 'Symbol',
+        [BSONTypes.Timestamp]: 'Timestamp',
+        [BSONTypes.MinKey]: 'MinKey',
+        [BSONTypes.MaxKey]: 'MaxKey',
+        [BSONTypes.DBRef]: 'DBRef',
+        [BSONTypes.Code]: 'Code',
+        [BSONTypes.CodeWithScope]: 'CodeWithScope',
+        [BSONTypes.Map]: 'Map',
+        [BSONTypes._UNKNOWN_]: 'Unknown',
+        [BSONTypes.UUID]: 'UUID',
+        [BSONTypes.UUID_LEGACY]: 'UUID (Legacy)',
+    };
+
+    export function toDisplayString(type: BSONTypes): string {
+        return displayStringMap[type] || 'Unknown';
+    }
+
+    export function toString(type: BSONTypes): string {
+        return type;
+    }
+
+    /**
+     * Converts a MongoDB API data type to a case-sensitive JSON data type
+     * @param type The MongoDB API data type
+     * @returns A corresponding JSON data type (please note: it's case sensitive)
+     */
+    export function toJSONType(type: BSONTypes): string {
+        switch (type) {
+            case BSONTypes.String:
+            case BSONTypes.Symbol:
+            case BSONTypes.Date:
+            case BSONTypes.Timestamp:
+            case BSONTypes.ObjectId:
+            case BSONTypes.RegExp:
+            case BSONTypes.Binary:
+            case BSONTypes.Code:
+            case BSONTypes.UUID:
+            case BSONTypes.UUID_LEGACY:
+                return 'string';
+
+            case BSONTypes.Boolean:
+                return 'boolean';
+
+            case BSONTypes.Int32:
+            case BSONTypes.Long:
+            case BSONTypes.Double:
+            case BSONTypes.Decimal128:
+                return 'number';
+
+            case BSONTypes.Object:
+            case BSONTypes.Map:
+            case BSONTypes.DBRef:
+            case BSONTypes.CodeWithScope:
+                return 'object';
+
+            case BSONTypes.Array:
+                return 'array';
+
+            case BSONTypes.Null:
+            case BSONTypes.Undefined:
+            case BSONTypes.MinKey:
+            case BSONTypes.MaxKey:
+                return 'null';
+
+            default:
+                return 'string'; // Default to string for unknown types
+        }
+    }
+
+    /**
+     * Accepts a value from a MongoDB API `Document` object and returns the inferred type.
+     * @param value The value of a field in a MongoDB API `Document` object
+     * @returns
+     */
+    export function inferType(value: unknown): BSONTypes {
+        if (value === null) return BSONTypes.Null;
+        if (value === undefined) return BSONTypes.Undefined;
+
+        switch (typeof value) {
+            case 'string':
+                return BSONTypes.String;
+            case 'number':
+                return BSONTypes.Double; // JavaScript numbers are doubles
+            case 'boolean':
+                return BSONTypes.Boolean;
+            case 'object':
+                if (Array.isArray(value)) {
+                    return BSONTypes.Array;
+                }
+
+                // Check for common BSON types first
+                if (value instanceof ObjectId) return BSONTypes.ObjectId;
+                if (value instanceof Int32) return BSONTypes.Int32;
+                if (value instanceof Double) return BSONTypes.Double;
+                if (value instanceof Date) return BSONTypes.Date;
+                if (value instanceof Timestamp) return BSONTypes.Timestamp;
+
+                // Less common types
+                if (value instanceof Decimal128) return BSONTypes.Decimal128;
+                if (value instanceof Long) return BSONTypes.Long;
+                if (value instanceof MinKey) return BSONTypes.MinKey;
+                if (value instanceof MaxKey) return BSONTypes.MaxKey;
+                if (value instanceof BSONSymbol) return BSONTypes.Symbol;
+                if (value instanceof DBRef) return BSONTypes.DBRef;
+                if (value instanceof Map) return BSONTypes.Map;
+                if (value instanceof UUID && value.sub_type === Binary.SUBTYPE_UUID) return BSONTypes.UUID;
+                if (value instanceof UUID && value.sub_type === Binary.SUBTYPE_UUID_OLD) return BSONTypes.UUID_LEGACY;
+                if (value instanceof Buffer || value instanceof Binary) return BSONTypes.Binary;
+                if (value instanceof RegExp) return BSONTypes.RegExp;
+                if (value instanceof Code) {
+                    if (value.scope) {
+                        return BSONTypes.CodeWithScope;
+                    } else {
+                        return BSONTypes.Code;
+                    }
+                }
+
+                // Default to Object if none of the above match
+                return BSONTypes.Object;
+            default:
+                // This should never happen, but if it does, we'll catch it here
+                // TODO: add telemetry somewhere to know when it happens (not here, this could get hit too often)
+                return BSONTypes._UNKNOWN_;
+        }
+    }
+}
diff --git a/src/utils/json/JSONSchema.ts b/packages/schema-analyzer/src/JSONSchema.ts
similarity index 80%
rename from src/utils/json/JSONSchema.ts
rename to packages/schema-analyzer/src/JSONSchema.ts
index 467669ed5..3127932d6 100644
--- a/src/utils/json/JSONSchema.ts
+++ b/packages/schema-analyzer/src/JSONSchema.ts
@@ -24,16 +24,14 @@ export interface JSONSchema {
     $id?: string;
     $schema?: string;
     type?: string | string[];
-    'x-documentsInspected'?: number;
-    'x-occurrence'?: number;
-    'x-typeOccurrence'?: number;
-    'x-bsonType'?: string; // Explicitly declare the key with a dash using quotes
     title?: string;
+    description?: string;
     definitions?: {
         [name: string]: JSONSchema;
     };
-    description?: string;
-    properties?: JSONSchema; // changed from: JSONSchemaMap;
+
+    // Structure
+    properties?: JSONSchemaMap;
     patternProperties?: JSONSchemaMap;
     additionalProperties?: JSONSchemaRef;
     minProperties?: number;
@@ -44,7 +42,6 @@ export interface JSONSchema {
               [prop: string]: string[];
           };
     items?: JSONSchemaRef | JSONSchemaRef[];
-
     required?: string[];
     $ref?: string;
     anyOf?: JSONSchemaRef[];
@@ -58,14 +55,35 @@ export interface JSONSchema {
     propertyNames?: JSONSchemaRef;
     examples?: undefined[];
     $comment?: string;
-
     $defs?: {
         [name: string]: JSONSchema;
     };
+
+    // Monaco extensions
     markdownEnumDescriptions?: string[];
     markdownDescription?: string;
     doNotSuggest?: boolean;
     suggestSortText?: string;
+
+    // SchemaAnalyzer extensions — document/field level
+    'x-documentsInspected'?: number;
+    'x-occurrence'?: number;
+
+    // SchemaAnalyzer extensions — type entry level (on entries in anyOf)
+    'x-bsonType'?: string;
+    'x-typeOccurrence'?: number;
+    'x-minValue'?: number;
+    'x-maxValue'?: number;
+    'x-minLength'?: number;
+    'x-maxLength'?: number;
+    'x-minDate'?: number;
+    'x-maxDate'?: number;
+    'x-trueCount'?: number;
+    'x-falseCount'?: number;
+    'x-minItems'?: number;
+    'x-maxItems'?: number;
+    'x-minProperties'?: number;
+    'x-maxProperties'?: number;
 }
 export interface JSONSchemaMap {
     [name: string]: JSONSchemaRef;
diff --git a/src/utils/json/mongo/SchemaAnalyzer.ts b/packages/schema-analyzer/src/SchemaAnalyzer.ts
similarity index 56%
rename from src/utils/json/mongo/SchemaAnalyzer.ts
rename to packages/schema-analyzer/src/SchemaAnalyzer.ts
index 278f51fc4..8f24d532a 100644
--- a/src/utils/json/mongo/SchemaAnalyzer.ts
+++ b/packages/schema-analyzer/src/SchemaAnalyzer.ts
@@ -3,66 +3,125 @@
  *  Licensed under the MIT License. See License.txt in the project root for license information.
  *--------------------------------------------------------------------------------------------*/
 
+import Denque from 'denque';
+import { type Document, type WithId } from 'mongodb';
+import assert from 'node:assert/strict';
+import { BSONTypes } from './BSONTypes';
+import { type JSONSchema, type JSONSchemaRef } from './JSONSchema';
+import { type FieldEntry, getKnownFields as getKnownFieldsFromSchema } from './getKnownFields';
+
 /**
- * This is an example of a JSON Schema document that will be generated from MongoDB documents.
- * It's optimized for the use-case of generating a schema for a table view, the monaco editor, and schema statistics.
- *
- * This is a 'work in progress' and will be updated as we progress with the project.
- *
- * Curent focus is:
- *  - discovery of the document structure
- *  - basic pre for future statistics work
+ * Incremental schema analyzer for documents from the MongoDB API / DocumentDB API.
  *
- * Future tasks:
- *  - statistics aggregation
- *  - meaningful 'description' and 'markdownDescription'
- *  - add more properties to the schema, incl. properties like '$id', '$schema', and enable schema sharing/download
+ * Analyzes documents one at a time (or in batches) and builds a cumulative
+ * JSON Schema with statistical extensions (x-occurrence, x-bsonType, etc.).
  *
+ * The output schema follows JSON Schema draft-07 with custom x- extensions.
+ */
+export class SchemaAnalyzer {
+    private _schema: JSONSchema = {};
+    private _version: number = 0;
+    private _knownFieldsCache: FieldEntry[] | null = null;
+    private _knownFieldsCacheVersion: number = -1;
 
-{
-  "$schema": "http://json-schema.org/draft-07/schema#",
-  "$id": "https://example.com/sample.schema.json",
-  "title": "Sample Document Schema",
-  "type": "object",
-  "properties": {
-    "a-propert-root-level": {
-      "description": "a description as text",
-      "anyOf": [ // anyOf is used to indicate that the value can be of any of the types listed
-        {
-          "type": "string"
-        },
-        {
-          "type": "string"
+    /**
+     * A monotonically increasing version counter. Incremented on every mutation
+     * (addDocument, addDocuments, reset). Adapters can store this value alongside
+     * their cached derived data and recompute only when it changes.
+     */
+    get version(): number {
+        return this._version;
+    }
+
+    /**
+     * Adds a single document to the accumulated schema.
+     * This is the primary incremental API — call once per document.
+     */
+    addDocument(document: WithId<Document>): void {
+        updateSchemaWithDocumentInternal(this._schema, document);
+        this._version++;
+    }
+
+    /**
+     * Adds multiple documents to the accumulated schema.
+     * Convenience method equivalent to calling addDocument() for each.
+     * Increments version once for the entire batch — not per document.
+     */
+    addDocuments(documents: ReadonlyArray<WithId<Document>>): void {
+        for (const doc of documents) {
+            updateSchemaWithDocumentInternal(this._schema, doc);
         }
-      ]
-    },
-    "isOpen": {
-      "description": "Indicates if the item is open",
-      "anyOf": [
-        {
-          "type": "boolean"
-        },
-        {
-          "type": "number"
+        this._version++;
+    }
+
+    /**
+     * Returns the current accumulated JSON Schema.
+     * The returned object is a live reference (not a copy) — do not mutate externally.
+     */
+    getSchema(): JSONSchema {
+        return this._schema;
+    }
+
+    /**
+     * Returns the number of documents analyzed so far.
+     */
+    getDocumentCount(): number {
+        return (this._schema['x-documentsInspected'] as number) ?? 0;
+    }
+
+    /**
+     * Resets the analyzer to its initial empty state.
+     */
+    reset(): void {
+        this._schema = {};
+        this._version++;
+    }
+
+    /**
+     * Creates a deep copy of this analyzer, including all accumulated schema data.
+     * Useful for aggregation stage branching where each stage needs its own schema state.
+     * The clone starts with version 0, independent from the original.
+     */
+    clone(): SchemaAnalyzer {
+        const copy = new SchemaAnalyzer();
+        copy._schema = structuredClone(this._schema);
+        return copy;
+    }
+
+    /**
+     * Returns the cached list of known fields (all nesting levels, sorted).
+     * Recomputed only when the schema version has changed since the last call.
+     */
+    getKnownFields(): FieldEntry[] {
+        if (this._knownFieldsCacheVersion !== this._version || this._knownFieldsCache === null) {
+            this._knownFieldsCache = getKnownFieldsFromSchema(this._schema);
+            this._knownFieldsCacheVersion = this._version;
         }
-      ]
+        return this._knownFieldsCache;
     }
-  },
-  "required": ["isOpen"]
-}
 
- *
- *
- */
+    /**
+     * Creates a SchemaAnalyzer from a single document.
+     * Equivalent to creating an instance and calling addDocument() once.
+     */
+    static fromDocument(document: WithId<Document>): SchemaAnalyzer {
+        const analyzer = new SchemaAnalyzer();
+        analyzer.addDocument(document);
+        return analyzer;
+    }
 
-import * as l10n from '@vscode/l10n';
-import { assert } from 'console';
-import Denque from 'denque';
-import { type Document, type WithId } from 'mongodb';
-import { type JSONSchema } from '../JSONSchema';
-import { MongoBSONTypes } from './MongoBSONTypes';
+    /**
+     * Creates a SchemaAnalyzer from multiple documents.
+     * Equivalent to creating an instance and calling addDocuments().
+     */
+    static fromDocuments(documents: ReadonlyArray<WithId<Document>>): SchemaAnalyzer {
+        const analyzer = new SchemaAnalyzer();
+        analyzer.addDocuments(documents);
+        return analyzer;
+    }
+}
 
-export function updateSchemaWithDocument(schema: JSONSchema, document: WithId<Document>): void {
+function updateSchemaWithDocumentInternal(schema: JSONSchema, document: WithId<Document>): void {
     // Initialize schema if it's empty
     if (!schema.properties) {
         schema.properties = {};
@@ -74,7 +133,7 @@ export function updateSchemaWithDocument(schema: JSONSchema, document: WithId<Do
     // Define the structure of work items to be processed
     type WorkItem = {
         fieldName: string;
-        fieldMongoType: MongoBSONTypes; // The inferred BSON type
+        fieldMongoType: BSONTypes; // The inferred BSON type
         propertySchema: JSONSchema; // Reference to the schema entry within 'properties'
         fieldValue: unknown;
         pathSoFar: string; // Used for debugging and tracing
@@ -87,7 +146,7 @@ export function updateSchemaWithDocument(schema: JSONSchema, document: WithId<Do
      * Start by pushing all root-level elements of the document into the queue
      */
     for (const [name, value] of Object.entries(document)) {
-        const mongoDatatype = MongoBSONTypes.inferType(value);
+        const mongoDatatype = BSONTypes.inferType(value);
 
         // Ensure the field exists in the schema
         if (!schema.properties[name]) {
@@ -110,7 +169,7 @@ export function updateSchemaWithDocument(schema: JSONSchema, document: WithId<Do
         if (!typeEntry) {
             // Create a new type entry
             typeEntry = {
-                type: MongoBSONTypes.toJSONType(mongoDatatype),
+                type: BSONTypes.toJSONType(mongoDatatype),
                 'x-bsonType': mongoDatatype,
                 'x-typeOccurrence': 0,
             };
@@ -144,13 +203,24 @@ export function updateSchemaWithDocument(schema: JSONSchema, document: WithId<Do
         }
 
         switch (item.fieldMongoType) {
-            case MongoBSONTypes.Object: {
+            case BSONTypes.Object: {
                 const objValue = item.fieldValue as Record<string, unknown>;
                 const objKeysCount = Object.keys(objValue).length;
 
                 // Update min and max property counts
                 updateMinMaxStats(item.propertySchema, 'x-minProperties', 'x-maxProperties', objKeysCount);
 
+                // Track how many object instances contributed to this sub-schema.
+                // This enables uniform probability computation at every nesting level:
+                //   probability = property.x-occurrence / parentObject.x-documentsInspected
+                //
+                // Without this, array-embedded objects have no denominator for probability.
+                // Example: doc1.a=[], doc2.a=[{b:1},...,{b:100}]
+                //   b.x-occurrence = 100, root.x-documentsInspected = 2
+                //   Naive: 100/2 = 5000% — wrong!
+                //   With fix: objectEntry.x-documentsInspected = 100, so 100/100 = 100%
+                item.propertySchema['x-documentsInspected'] = (item.propertySchema['x-documentsInspected'] ?? 0) + 1;
+
                 // Ensure 'properties' exists
                 if (!item.propertySchema.properties) {
                     item.propertySchema.properties = {};
@@ -158,7 +228,7 @@ export function updateSchemaWithDocument(schema: JSONSchema, document: WithId<Do
 
                 // Iterate over the object's properties
                 for (const [name, value] of Object.entries(objValue)) {
-                    const mongoDatatype = MongoBSONTypes.inferType(value);
+                    const mongoDatatype = BSONTypes.inferType(value);
 
                     // Ensure the field exists in the schema
                     if (!item.propertySchema.properties[name]) {
@@ -181,7 +251,7 @@ export function updateSchemaWithDocument(schema: JSONSchema, document: WithId<Do
                     if (!typeEntry) {
                         // Create a new type entry
                         typeEntry = {
-                            type: MongoBSONTypes.toJSONType(mongoDatatype),
+                            type: BSONTypes.toJSONType(mongoDatatype),
                             'x-bsonType': mongoDatatype,
                             'x-typeOccurrence': 0,
                         };
@@ -206,7 +276,7 @@ export function updateSchemaWithDocument(schema: JSONSchema, document: WithId<Do
                 break;
             }
 
-            case MongoBSONTypes.Array: {
+            case BSONTypes.Array: {
                 const arrayValue = item.fieldValue as unknown[];
                 const arrayLength = arrayValue.length;
 
@@ -223,20 +293,18 @@ export function updateSchemaWithDocument(schema: JSONSchema, document: WithId<Do
                 const itemsSchema: JSONSchema = item.propertySchema.items as JSONSchema;
                 assert(itemsSchema !== undefined, 'itemsSchema should not be undefined');
 
-                // Map to track types within the array
-                const encounteredMongoTypes: Map<MongoBSONTypes, JSONSchema> = new Map();
-
                 // Iterate over the array elements
                 for (const element of arrayValue) {
-                    const elementMongoType = MongoBSONTypes.inferType(element);
+                    const elementMongoType = BSONTypes.inferType(element);
 
                     // Find or create the type entry in 'items.anyOf'
                     let itemEntry = findTypeEntry(itemsSchema.anyOf as JSONSchema[], elementMongoType);
+                    const isNewTypeEntry = !itemEntry;
 
                     if (!itemEntry) {
                         // Create a new type entry
                         itemEntry = {
-                            type: MongoBSONTypes.toJSONType(elementMongoType),
+                            type: BSONTypes.toJSONType(elementMongoType),
                             'x-bsonType': elementMongoType,
                             'x-typeOccurrence': 0,
                         };
@@ -249,18 +317,19 @@ export function updateSchemaWithDocument(schema: JSONSchema, document: WithId<Do
                     // Increment the type occurrence count
                     itemEntry['x-typeOccurrence'] = (itemEntry['x-typeOccurrence'] ?? 0) + 1;
 
-                    // Update stats for the element
-                    if (!encounteredMongoTypes.has(elementMongoType)) {
-                        // First occurrence, initialize stats
+                    // Update stats for the element.
+                    // Use initializeStatsForValue only when the type entry is brand new
+                    // (first element of this type ever seen). For subsequent elements —
+                    // whether in the same array or across documents — always aggregate
+                    // to avoid overwriting previously accumulated min/max stats.
+                    if (isNewTypeEntry) {
                         initializeStatsForValue(element, elementMongoType, itemEntry);
-                        encounteredMongoTypes.set(elementMongoType, itemEntry);
                     } else {
-                        // Subsequent occurrences, aggregate stats
                         aggregateStatsForValue(element, elementMongoType, itemEntry);
                     }
 
                     // If the element is an object or array, queue it for further processing
-                    if (elementMongoType === MongoBSONTypes.Object || elementMongoType === MongoBSONTypes.Array) {
+                    if (elementMongoType === BSONTypes.Object || elementMongoType === BSONTypes.Array) {
                         fifoQueue.push({
                             fieldName: '[]', // Array items don't have a specific field name
                             fieldMongoType: elementMongoType,
@@ -291,7 +360,7 @@ export function updateSchemaWithDocument(schema: JSONSchema, document: WithId<Do
 /**
  * Helper function to find a type entry in 'anyOf' array based on 'x-bsonType'
  */
-function findTypeEntry(anyOfArray: JSONSchema[], bsonType: MongoBSONTypes): JSONSchema | undefined {
+function findTypeEntry(anyOfArray: JSONSchema[], bsonType: BSONTypes): JSONSchema | undefined {
     return anyOfArray.find((entry) => entry['x-bsonType'] === bsonType);
 }
 
@@ -299,221 +368,69 @@ function findTypeEntry(anyOfArray: JSONSchema[], bsonType: MongoBSONTypes): JSON
  * Helper function to update min and max stats
  */
 function updateMinMaxStats(schema: JSONSchema, minKey: string, maxKey: string, value: number): void {
-    if (schema[minKey] === undefined || value < schema[minKey]) {
-        schema[minKey] = value;
+    const record = schema as Record<string, unknown>;
+    if (record[minKey] === undefined || value < (record[minKey] as number)) {
+        record[minKey] = value;
     }
-    if (schema[maxKey] === undefined || value > schema[maxKey]) {
-        schema[maxKey] = value;
+    if (record[maxKey] === undefined || value > (record[maxKey] as number)) {
+        record[maxKey] = value;
     }
 }
 
-export function getSchemaFromDocument(document: WithId<Document>): JSONSchema {
-    const schema: JSONSchema = {};
-    schema['x-documentsInspected'] = 1; // we're inspecting one document, this will make sense when we start aggregating stats
-    schema.properties = {};
-
-    type WorkItem = {
-        fieldName: string;
-        fieldMongoType: MongoBSONTypes; // the inferred BSON type
-        propertyTypeEntry: JSONSchema; // points to the entry within the 'anyOf' property of the schema
-        fieldValue: unknown;
-        pathSoFar: string; // used for debugging
-    };
-
-    // having some import/require issues with Denque atm
-    // prototype with an array
-    //const fifoQueue = new Denque();
-    const fifoQueue: WorkItem[] = [];
-
-    /**
-     * Push all elements from the root of the document into the queue
-     */
-    for (const [name, value] of Object.entries(document)) {
-        const mongoDatatype = MongoBSONTypes.inferType(value);
-
-        const typeEntry = {
-            type: MongoBSONTypes.toJSONType(mongoDatatype),
-            'x-bsonType': mongoDatatype,
-            'x-typeOccurrence': 1,
-        };
-
-        // please note (1/2): we're adding the type entry to the schema here
-        schema.properties[name] = { anyOf: [typeEntry], 'x-occurrence': 1 };
-
-        fifoQueue.push({
-            fieldName: name,
-            fieldMongoType: mongoDatatype,
-            propertyTypeEntry: typeEntry, // please note (2/2): and we're keeping a reference to it here for further updates
-            fieldValue: value,
-            pathSoFar: name,
-        });
-    }
-
-    /**
-     * Work through the queue, adding elements to the schema as we go.
-     * This is a breadth-first search of the document, do note special
-     * handling on objects/arrays
-     */
-    while (fifoQueue.length > 0) {
-        const item = fifoQueue.shift(); // todo, replace with a proper queue
-        if (item === undefined) {
-            // unexpected, but let's try to continue
-            continue;
-        }
-
-        switch (item.fieldMongoType) {
-            case MongoBSONTypes.Object: {
-                const objKeys = Object.keys(item.fieldValue as object).length;
-                item.propertyTypeEntry['x-maxLength'] = objKeys;
-                item.propertyTypeEntry['x-minLength'] = objKeys;
-
-                // prepare an entry for the object properties
-                item.propertyTypeEntry.properties = {};
-
-                for (const [name, value] of Object.entries(item.fieldValue as object)) {
-                    const mongoDatatype = MongoBSONTypes.inferType(value);
-
-                    const typeEntry = {
-                        type: MongoBSONTypes.toJSONType(mongoDatatype),
-                        'x-bsonType': mongoDatatype,
-                        'x-typeOccurrence': 1,
-                    };
-
-                    // please note (1/2): we're adding the entry to the main schema here
-                    item.propertyTypeEntry.properties[name] = { anyOf: [typeEntry], 'x-occurrence': 1 };
-
-                    fifoQueue.push({
-                        fieldName: name,
-                        fieldMongoType: mongoDatatype,
-                        propertyTypeEntry: typeEntry, // please note (2/2): and we're keeping a reference to it here for further updates to the schema
-                        fieldValue: value,
-                        pathSoFar: `${item.pathSoFar}.${item.fieldName}`,
-                    });
-                }
-                break;
-            }
-            case MongoBSONTypes.Array: {
-                const arrayLength = (item.fieldValue as unknown[]).length;
-                item.propertyTypeEntry['x-maxLength'] = arrayLength;
-                item.propertyTypeEntry['x-minLength'] = arrayLength;
-
-                // preapare the array items entry (in two lines for ts not to compalin about the missing type later on)
-                item.propertyTypeEntry.items = {};
-                item.propertyTypeEntry.items.anyOf = [];
-
-                const encounteredMongoTypes: Map<MongoBSONTypes, JSONSchema> = new Map();
-
-                // iterate over the array and infer the type of each element
-                for (const element of item.fieldValue as unknown[]) {
-                    const elementMongoType = MongoBSONTypes.inferType(element);
-
-                    let itemEntry: JSONSchema;
-
-                    if (!encounteredMongoTypes.has(elementMongoType)) {
-                        itemEntry = {
-                            type: MongoBSONTypes.toJSONType(elementMongoType),
-                            'x-bsonType': elementMongoType,
-                            'x-typeOccurrence': 1, // Initialize type occurrence counter
-                        };
-                        item.propertyTypeEntry.items.anyOf.push(itemEntry);
-                        encounteredMongoTypes.set(elementMongoType, itemEntry);
-
-                        initializeStatsForValue(element, elementMongoType, itemEntry);
-                    } else {
-                        // if we've already encountered this type, we'll just add the type to the existing entry
-                        itemEntry = encounteredMongoTypes.get(elementMongoType) as JSONSchema;
-
-                        if (itemEntry === undefined) continue; // unexpected, but let's try to continue
-
-                        if (itemEntry['x-typeOccurrence'] !== undefined) {
-                            itemEntry['x-typeOccurrence'] += 1;
-                        }
-
-                        // Aggregate stats with the new value
-                        aggregateStatsForValue(element, elementMongoType, itemEntry);
-                    }
-
-                    // an imporant exception for arrays as we have to start adding them already now to the schema
-                    // (if we want to avoid more iterations over the data)
-                    if (elementMongoType === MongoBSONTypes.Object || elementMongoType === MongoBSONTypes.Array) {
-                        fifoQueue.push({
-                            fieldName: '[]', // Array items don't have a field name
-                            fieldMongoType: elementMongoType,
-                            propertyTypeEntry: itemEntry,
-                            fieldValue: element,
-                            pathSoFar: `${item.pathSoFar}.${item.fieldName}.items`,
-                        });
-                    }
-                }
-
-                break;
-            }
-
-            default: {
-                // For all other types, update stats for the value
-                initializeStatsForValue(item.fieldValue, item.fieldMongoType, item.propertyTypeEntry);
-                break;
-            }
-        }
-    }
-
-    return schema;
-}
-
 /**
  * Helper function to compute stats for a value based on its MongoDB data type
  * Updates the provided propertyTypeEntry with the computed stats
  */
-function initializeStatsForValue(value: unknown, mongoType: MongoBSONTypes, propertyTypeEntry: JSONSchema): void {
+function initializeStatsForValue(value: unknown, mongoType: BSONTypes, propertyTypeEntry: JSONSchema): void {
     switch (mongoType) {
-        case MongoBSONTypes.String: {
+        case BSONTypes.String: {
             const currentLength = (value as string).length;
             propertyTypeEntry['x-maxLength'] = currentLength;
             propertyTypeEntry['x-minLength'] = currentLength;
             break;
         }
 
-        case MongoBSONTypes.Number:
-        case MongoBSONTypes.Int32:
-        case MongoBSONTypes.Long:
-        case MongoBSONTypes.Double:
-        case MongoBSONTypes.Decimal128: {
+        case BSONTypes.Number:
+        case BSONTypes.Int32:
+        case BSONTypes.Long:
+        case BSONTypes.Double:
+        case BSONTypes.Decimal128: {
             const numericValue = Number(value);
             propertyTypeEntry['x-maxValue'] = numericValue;
             propertyTypeEntry['x-minValue'] = numericValue;
             break;
         }
 
-        case MongoBSONTypes.Boolean: {
+        case BSONTypes.Boolean: {
             const boolValue = value as boolean;
             propertyTypeEntry['x-trueCount'] = boolValue ? 1 : 0;
             propertyTypeEntry['x-falseCount'] = boolValue ? 0 : 1;
             break;
         }
 
-        case MongoBSONTypes.Date: {
+        case BSONTypes.Date: {
             const dateValue = (value as Date).getTime();
             propertyTypeEntry['x-maxDate'] = dateValue;
             propertyTypeEntry['x-minDate'] = dateValue;
             break;
         }
 
-        case MongoBSONTypes.Binary: {
+        case BSONTypes.Binary: {
             const binaryLength = (value as Buffer).length;
             propertyTypeEntry['x-maxLength'] = binaryLength;
             propertyTypeEntry['x-minLength'] = binaryLength;
             break;
         }
 
-        case MongoBSONTypes.Null:
-        case MongoBSONTypes.RegExp:
-        case MongoBSONTypes.ObjectId:
-        case MongoBSONTypes.MinKey:
-        case MongoBSONTypes.MaxKey:
-        case MongoBSONTypes.Symbol:
-        case MongoBSONTypes.Timestamp:
-        case MongoBSONTypes.DBRef:
-        case MongoBSONTypes.Map:
+        case BSONTypes.Null:
+        case BSONTypes.RegExp:
+        case BSONTypes.ObjectId:
+        case BSONTypes.MinKey:
+        case BSONTypes.MaxKey:
+        case BSONTypes.Symbol:
+        case BSONTypes.Timestamp:
+        case BSONTypes.DBRef:
+        case BSONTypes.Map:
             // No stats computation for other types
             break;
 
@@ -527,9 +444,9 @@ function initializeStatsForValue(value: unknown, mongoType: MongoBSONTypes, prop
  * Helper function to aggregate stats for a value based on its MongoDB data type
  * Used when processing multiple values (e.g., elements in arrays)
  */
-function aggregateStatsForValue(value: unknown, mongoType: MongoBSONTypes, propertyTypeEntry: JSONSchema): void {
+function aggregateStatsForValue(value: unknown, mongoType: BSONTypes, propertyTypeEntry: JSONSchema): void {
     switch (mongoType) {
-        case MongoBSONTypes.String: {
+        case BSONTypes.String: {
             const currentLength = (value as string).length;
 
             // Update minLength
@@ -544,11 +461,11 @@ function aggregateStatsForValue(value: unknown, mongoType: MongoBSONTypes, prope
             break;
         }
 
-        case MongoBSONTypes.Number:
-        case MongoBSONTypes.Int32:
-        case MongoBSONTypes.Long:
-        case MongoBSONTypes.Double:
-        case MongoBSONTypes.Decimal128: {
+        case BSONTypes.Number:
+        case BSONTypes.Int32:
+        case BSONTypes.Long:
+        case BSONTypes.Double:
+        case BSONTypes.Decimal128: {
             const numericValue = Number(value);
 
             // Update minValue
@@ -563,7 +480,7 @@ function aggregateStatsForValue(value: unknown, mongoType: MongoBSONTypes, prope
             break;
         }
 
-        case MongoBSONTypes.Boolean: {
+        case BSONTypes.Boolean: {
             const boolValue = value as boolean;
 
             // Update trueCount and falseCount
@@ -581,7 +498,7 @@ function aggregateStatsForValue(value: unknown, mongoType: MongoBSONTypes, prope
             break;
         }
 
-        case MongoBSONTypes.Date: {
+        case BSONTypes.Date: {
             const dateValue = (value as Date).getTime();
 
             // Update minDate
@@ -596,7 +513,7 @@ function aggregateStatsForValue(value: unknown, mongoType: MongoBSONTypes, prope
             break;
         }
 
-        case MongoBSONTypes.Binary: {
+        case BSONTypes.Binary: {
             const binaryLength = (value as Buffer).length;
 
             // Update minLength
@@ -617,17 +534,12 @@ function aggregateStatsForValue(value: unknown, mongoType: MongoBSONTypes, prope
     }
 }
 
-function getSchemaAtPath(schema: JSONSchema, path: string[]): JSONSchema {
-    let currentNode = schema;
+function getSchemaAtPath(schema: JSONSchema, path: string[]): JSONSchema | undefined {
+    let currentNode: JSONSchema | undefined = schema;
 
     for (let i = 0; i < path.length; i++) {
         const key = path[i];
 
-        // If the current node is an array, we should move to its `items`
-        // if (currentNode.type === 'array' && currentNode.items) {
-        //     currentNode = currentNode.items;
-        // }
-
         // Move to the next property in the schema
         if (currentNode && currentNode.properties && currentNode.properties[key]) {
             const nextNode: JSONSchema = currentNode.properties[key] as JSONSchema;
@@ -636,13 +548,15 @@ function getSchemaAtPath(schema: JSONSchema, path: string[]): JSONSchema {
              * We're looking at the "Object"-one, because these have the properties we're interested in.
              */
             if (nextNode.anyOf && nextNode.anyOf.length > 0) {
-                currentNode = nextNode.anyOf.find((entry: JSONSchema) => entry.type === 'object') as JSONSchema;
+                currentNode = nextNode.anyOf.find(
+                    (entry: JSONSchemaRef): entry is JSONSchema => typeof entry === 'object' && entry.type === 'object',
+                );
             } else {
                 // we can't continue, as we're missing the next node, we abort at the last node we managed to extract
                 return currentNode;
             }
         } else {
-            throw new Error(l10n.t('No properties found in the schema at path "{0}"', path.slice(0, i + 1).join('/')));
+            throw new Error(`No properties found in the schema at path "${path.slice(0, i + 1).join('/')}"`);
         }
     }
 
@@ -653,7 +567,7 @@ export function getPropertyNamesAtLevel(jsonSchema: JSONSchema, path: string[]):
     const headers = new Set<string>();
 
     // Explore the schema and apply the callback to collect headers at the specified path
-    const selectedSchema: JSONSchema = getSchemaAtPath(jsonSchema, path);
+    const selectedSchema = getSchemaAtPath(jsonSchema, path);
 
     if (selectedSchema && selectedSchema.properties) {
         Object.keys(selectedSchema.properties).forEach((key) => {
diff --git a/src/utils/json/mongo/MongoValueFormatters.ts b/packages/schema-analyzer/src/ValueFormatters.ts
similarity index 56%
rename from src/utils/json/mongo/MongoValueFormatters.ts
rename to packages/schema-analyzer/src/ValueFormatters.ts
index 243ce2631..7f9e8e5fa 100644
--- a/src/utils/json/mongo/MongoValueFormatters.ts
+++ b/packages/schema-analyzer/src/ValueFormatters.ts
@@ -4,16 +4,16 @@
  *--------------------------------------------------------------------------------------------*/
 
 import { type Binary, type BSONRegExp, type ObjectId } from 'mongodb';
-import { MongoBSONTypes } from './MongoBSONTypes';
+import { BSONTypes } from './BSONTypes';
 
 /**
- * Converts a MongoDB value to its display string representation based on its type.
+ * Converts a MongoDB API value to its display string representation based on its type.
  *
  * @param value - The value to be converted to a display string.
- * @param type - The MongoDB data type of the value.
+ * @param type - The MongoDB API data type of the value.
  * @returns The string representation of the value.
  *
- * The function handles various MongoDB data types including:
+ * The function handles various MongoDB API data types including:
  * - String
  * - Number, Int32, Double, Decimal128, Long
  * - Boolean
@@ -24,60 +24,60 @@ import { MongoBSONTypes } from './MongoBSONTypes';
  *
  * For unsupported or unknown types, the function defaults to JSON stringification.
  */
-export function valueToDisplayString(value: unknown, type: MongoBSONTypes): string {
+export function valueToDisplayString(value: unknown, type: BSONTypes): string {
     switch (type) {
-        case MongoBSONTypes.String: {
+        case BSONTypes.String: {
             return value as string;
         }
-        case MongoBSONTypes.Number:
-        case MongoBSONTypes.Int32:
-        case MongoBSONTypes.Double:
-        case MongoBSONTypes.Decimal128:
-        case MongoBSONTypes.Long: {
+        case BSONTypes.Number:
+        case BSONTypes.Int32:
+        case BSONTypes.Double:
+        case BSONTypes.Decimal128:
+        case BSONTypes.Long: {
             return (value as number).toString();
         }
-        case MongoBSONTypes.Boolean: {
+        case BSONTypes.Boolean: {
             return (value as boolean).toString();
         }
-        case MongoBSONTypes.Date: {
+        case BSONTypes.Date: {
             return (value as Date).toISOString();
         }
-        case MongoBSONTypes.ObjectId: {
+        case BSONTypes.ObjectId: {
             return (value as ObjectId).toHexString();
         }
-        case MongoBSONTypes.Null: {
+        case BSONTypes.Null: {
             return 'null';
         }
-        case MongoBSONTypes.RegExp: {
+        case BSONTypes.RegExp: {
             const v = value as BSONRegExp;
             return `${v.pattern} ${v.options}`;
         }
-        case MongoBSONTypes.Binary: {
+        case BSONTypes.Binary: {
             return `Binary[${(value as Binary).length()}]`;
         }
-        case MongoBSONTypes.Symbol: {
+        case BSONTypes.Symbol: {
             return (value as symbol).toString();
         }
-        case MongoBSONTypes.Timestamp: {
+        case BSONTypes.Timestamp: {
             return (value as { toString: () => string }).toString();
         }
-        case MongoBSONTypes.MinKey: {
+        case BSONTypes.MinKey: {
             return 'MinKey';
         }
-        case MongoBSONTypes.MaxKey: {
+        case BSONTypes.MaxKey: {
             return 'MaxKey';
         }
-        case MongoBSONTypes.Code:
-        case MongoBSONTypes.CodeWithScope: {
+        case BSONTypes.Code:
+        case BSONTypes.CodeWithScope: {
             return JSON.stringify(value);
         }
 
-        case MongoBSONTypes.Array:
-        case MongoBSONTypes.Object:
-        case MongoBSONTypes.Map:
-        case MongoBSONTypes.DBRef:
-        case MongoBSONTypes.Undefined:
-        case MongoBSONTypes._UNKNOWN_:
+        case BSONTypes.Array:
+        case BSONTypes.Object:
+        case BSONTypes.Map:
+        case BSONTypes.DBRef:
+        case BSONTypes.Undefined:
+        case BSONTypes._UNKNOWN_:
         default: {
             return JSON.stringify(value);
         }
diff --git a/packages/schema-analyzer/src/getKnownFields.ts b/packages/schema-analyzer/src/getKnownFields.ts
new file mode 100644
index 000000000..f5da314b6
--- /dev/null
+++ b/packages/schema-analyzer/src/getKnownFields.ts
@@ -0,0 +1,219 @@
+/*---------------------------------------------------------------------------------------------
+ *  Copyright (c) Microsoft Corporation. All rights reserved.
+ *  Licensed under the MIT License. See License.txt in the project root for license information.
+ *--------------------------------------------------------------------------------------------*/
+
+import Denque from 'denque';
+import { type JSONSchema } from './JSONSchema';
+
+export interface FieldEntry {
+    /** Dot-notated path (e.g., "user.profile.name") */
+    path: string;
+    /** JSON type of the dominant type entry ("string", "number", "object", "array", etc.) */
+    type: string;
+    /** Dominant BSON type from x-bsonType on the most common type entry ("date", "objectid", "int32", etc.) */
+    bsonType: string;
+    /** All observed BSON types for this field (for polymorphic fields) */
+    bsonTypes?: string[];
+    /**
+     * True if this field was not present in every inspected document
+     * (x-occurrence < parent x-documentsInspected).
+     *
+     * This is a statistical observation, not a schema constraint — in the MongoDB API / DocumentDB API,
+     * all fields are implicitly optional.
+     */
+    isSparse?: boolean;
+    /** If the field is an array, the dominant element BSON type */
+    arrayItemBsonType?: string;
+}
+
+/**
+ * This function traverses our JSON Schema object and collects all leaf property paths
+ * along with their most common data types.
+ *
+ * This information is needed for auto-completion support
+ *
+ * The approach is as follows:
+ * - Initialize a queue with the root properties of the schema to perform a breadth-first traversal.
+ * - While the queue is not empty:
+ *   - Dequeue the next item, which includes the current schema node and its path.
+ *   - Determine the most common type for the current node by looking at the 'x-typeOccurrence' field.
+ *   - If the most common type is an object with properties:
+ *     - Enqueue its child properties with their updated paths into the queue for further traversal.
+ *   - Else if the most common type is a leaf type (e.g., string, number, boolean):
+ *     - Add the current path and type to the result array as it represents a leaf property.
+ * - Continue this process until all nodes have been processed.
+ * - Return the result array containing objects with 'path' and 'type' for each leaf property.
+ */
+export function getKnownFields(schema: JSONSchema): FieldEntry[] {
+    const result: FieldEntry[] = [];
+
+    type QueueItem = {
+        path: string;
+        schemaNode: JSONSchema;
+        parentDocumentsInspected: number;
+    };
+
+    const rootDocumentsInspected = (schema['x-documentsInspected'] as number) ?? 0;
+    const queue: Denque<QueueItem> = new Denque();
+
+    // Initialize the queue with root properties
+    //
+    // Note: JSON Schema allows boolean values as schema references (true = accept all,
+    // false = reject all), but our SchemaAnalyzer never produces boolean refs — it always
+    // emits full schema objects. The cast to JSONSchema below is therefore safe for our
+    // use case. If this function were ever reused with externally-sourced schemas, a
+    // `typeof propSchema === 'boolean'` guard should be added here and in the nested
+    // property loop below.
+    if (schema.properties) {
+        for (const propName of Object.keys(schema.properties)) {
+            const propSchema = schema.properties[propName] as JSONSchema;
+            queue.push({
+                path: propName,
+                schemaNode: propSchema,
+                parentDocumentsInspected: rootDocumentsInspected,
+            });
+        }
+    }
+
+    while (queue.length > 0) {
+        const item = queue.shift();
+        if (!item) continue;
+
+        const { path, schemaNode, parentDocumentsInspected } = item;
+        const mostCommonTypeEntry = getMostCommonTypeEntry(schemaNode);
+
+        if (mostCommonTypeEntry) {
+            if (mostCommonTypeEntry.type === 'object' && mostCommonTypeEntry.properties) {
+                // Not a leaf node, enqueue its properties
+                const objectDocumentsInspected = (mostCommonTypeEntry['x-documentsInspected'] as number) ?? 0;
+                for (const childName of Object.keys(mostCommonTypeEntry.properties)) {
+                    const childSchema = mostCommonTypeEntry.properties[childName] as JSONSchema;
+                    // TODO: Dot-delimited path concatenation is ambiguous when a field name
+                    // itself contains a literal dot. For example, a root-level field named
+                    // "a.b" produces path "a.b", indistinguishable from a nested field
+                    // { a: { b: ... } }. Fields with literal dots in their names were
+                    // prohibited before MongoDB API 3.6 and remain rare in practice.
+                    //
+                    // Future improvement: change `path` from `string` to `string[]`
+                    // (segment array) to preserve the distinction between nesting and
+                    // literal dots, pushing escaping/formatting decisions to consumers
+                    // (TS definitions, completion items, aggregation references, etc.).
+                    queue.push({
+                        path: `${path}.${childName}`,
+                        schemaNode: childSchema,
+                        parentDocumentsInspected: objectDocumentsInspected,
+                    });
+                }
+            } else {
+                // Leaf node, build the FieldEntry
+                const bsonType = (mostCommonTypeEntry['x-bsonType'] as string) ?? (mostCommonTypeEntry.type as string);
+
+                const entry: FieldEntry = {
+                    path,
+                    type: mostCommonTypeEntry.type as string,
+                    bsonType,
+                };
+
+                // bsonTypes: collect all distinct x-bsonType values from anyOf entries
+                const allBsonTypes = collectBsonTypes(schemaNode);
+                if (allBsonTypes.length >= 2) {
+                    entry.bsonTypes = allBsonTypes;
+                }
+
+                // isSparse: field was not observed in every document
+                const occurrence = (schemaNode['x-occurrence'] as number) ?? 0;
+                if (parentDocumentsInspected > 0 && occurrence < parentDocumentsInspected) {
+                    entry.isSparse = true;
+                }
+
+                // arrayItemBsonType: for array fields, find the dominant element type
+                if (mostCommonTypeEntry.type === 'array') {
+                    const itemBsonType = getDominantArrayItemBsonType(mostCommonTypeEntry);
+                    if (itemBsonType) {
+                        entry.arrayItemBsonType = itemBsonType;
+                    }
+                }
+
+                result.push(entry);
+            }
+        }
+    }
+
+    // Sort: _id first, then alphabetical by path
+    result.sort((a, b) => {
+        if (a.path === '_id') return -1;
+        if (b.path === '_id') return 1;
+        return a.path.localeCompare(b.path);
+    });
+
+    return result;
+}
+
+/**
+ * Helper function to get the most common type entry from a schema node.
+ * It looks for the 'anyOf' array and selects the type with the highest 'x-typeOccurrence'.
+ */
+function getMostCommonTypeEntry(schemaNode: JSONSchema): JSONSchema | null {
+    if (schemaNode.anyOf && schemaNode.anyOf.length > 0) {
+        let maxOccurrence = -1;
+        let mostCommonTypeEntry: JSONSchema | null = null;
+
+        for (const typeEntry of schemaNode.anyOf as JSONSchema[]) {
+            const occurrence = typeEntry['x-typeOccurrence'] || 0;
+            if (occurrence > maxOccurrence) {
+                maxOccurrence = occurrence;
+                mostCommonTypeEntry = typeEntry;
+            }
+        }
+        return mostCommonTypeEntry;
+    } else if (schemaNode.type) {
+        // If 'anyOf' is not present, use the 'type' field directly
+        return schemaNode;
+    }
+    return null;
+}
+
+/**
+ * Collects all distinct x-bsonType values from a schema node's anyOf entries.
+ * Returns them sorted alphabetically for determinism.
+ */
+function collectBsonTypes(schemaNode: JSONSchema): string[] {
+    if (!schemaNode.anyOf || schemaNode.anyOf.length === 0) {
+        return [];
+    }
+
+    const bsonTypes = new Set<string>();
+    for (const entry of schemaNode.anyOf as JSONSchema[]) {
+        const bsonType = entry['x-bsonType'] as string | undefined;
+        if (bsonType) {
+            bsonTypes.add(bsonType);
+        }
+    }
+
+    return Array.from(bsonTypes).sort();
+}
+
+/**
+ * For an array type entry, finds the dominant element BSON type by looking at
+ * items.anyOf and selecting the entry with the highest x-typeOccurrence.
+ */
+function getDominantArrayItemBsonType(arrayTypeEntry: JSONSchema): string | undefined {
+    const itemsSchema = arrayTypeEntry.items as JSONSchema | undefined;
+    if (!itemsSchema?.anyOf || itemsSchema.anyOf.length === 0) {
+        return undefined;
+    }
+
+    let maxOccurrence = -1;
+    let dominantBsonType: string | undefined;
+
+    for (const entry of itemsSchema.anyOf as JSONSchema[]) {
+        const occurrence = (entry['x-typeOccurrence'] as number) ?? 0;
+        if (occurrence > maxOccurrence) {
+            maxOccurrence = occurrence;
+            dominantBsonType = entry['x-bsonType'] as string | undefined;
+        }
+    }
+
+    return dominantBsonType;
+}
diff --git a/packages/schema-analyzer/src/index.ts b/packages/schema-analyzer/src/index.ts
new file mode 100644
index 000000000..871fd61f8
--- /dev/null
+++ b/packages/schema-analyzer/src/index.ts
@@ -0,0 +1,10 @@
+/*---------------------------------------------------------------------------------------------
+ *  Copyright (c) Microsoft Corporation. All rights reserved.
+ *  Licensed under the MIT License. See License.txt in the project root for license information.
+ *--------------------------------------------------------------------------------------------*/
+
+export { BSONTypes } from './BSONTypes';
+export { getKnownFields, type FieldEntry } from './getKnownFields';
+export { type JSONSchema, type JSONSchemaMap, type JSONSchemaRef } from './JSONSchema';
+export { SchemaAnalyzer, buildFullPaths, getPropertyNamesAtLevel } from './SchemaAnalyzer';
+export { valueToDisplayString } from './ValueFormatters';
diff --git a/packages/schema-analyzer/test/SchemaAnalyzer.arrayStats.test.ts b/packages/schema-analyzer/test/SchemaAnalyzer.arrayStats.test.ts
new file mode 100644
index 000000000..2669d5214
--- /dev/null
+++ b/packages/schema-analyzer/test/SchemaAnalyzer.arrayStats.test.ts
@@ -0,0 +1,464 @@
+/*---------------------------------------------------------------------------------------------
+ *  Copyright (c) Microsoft Corporation. All rights reserved.
+ *  Licensed under the MIT License. See License.txt in the project root for license information.
+ *--------------------------------------------------------------------------------------------*/
+
+import { ObjectId, type Document, type WithId } from 'mongodb';
+import { type JSONSchema } from '../src/JSONSchema';
+import { SchemaAnalyzer } from '../src/SchemaAnalyzer';
+
+/**
+ * This test file investigates the array element occurrence/stats problem.
+ *
+ * The core issue: When an array contains mixed types (e.g., strings AND objects),
+ * `x-typeOccurrence` on the items' type entries counts individual elements across
+ * ALL documents, not occurrences-per-document. This makes "field presence probability"
+ * for nested object properties inside arrays hard to interpret.
+ *
+ * Example scenario:
+ *   doc1.data = ["a", "b", "c", {"value": 23}]           → 3 strings, 1 object
+ *   doc2.data = ["x", "y", {"value": 42, "flag": true}]  → 2 strings, 1 object
+ *   doc3.data = ["z"]                                     → 1 string, 0 objects
+ *
+ * After processing 3 docs:
+ *   - items.anyOf[string].x-typeOccurrence = 6 (total string elements across all docs)
+ *   - items.anyOf[object].x-typeOccurrence = 2 (total object elements across all docs)
+ *   - items.anyOf[object].properties.value.x-occurrence = 2 (from 2 object elements)
+ *   - items.anyOf[object].properties.flag.x-occurrence = 1 (from 1 object element)
+ *
+ * The problem: what is items.anyOf[object].properties.value's "probability"?
+ *   - 2/2? (present in every object element → makes sense)
+ *   - 2/3? (present in 2 of 3 documents → misleading, doc3 has no objects at all)
+ *   - 2/6? (present in 2 of 6 total elements → nonsensical, mixes types)
+ *
+ * There's no x-documentsInspected equivalent at the array level to anchor
+ * the occurrence count.
+ */
+describe('Array element occurrence analysis', () => {
+    it('counts element types across multiple documents', () => {
+        const analyzer = new SchemaAnalyzer();
+
+        const doc1: WithId<Document> = {
+            _id: new ObjectId(),
+            data: ['a', 'b', 'c', { value: 23 }],
+        };
+        const doc2: WithId<Document> = {
+            _id: new ObjectId(),
+            data: ['x', 'y', { value: 42, flag: true }],
+        };
+        const doc3: WithId<Document> = {
+            _id: new ObjectId(),
+            data: ['z'],
+        };
+
+        analyzer.addDocument(doc1);
+        analyzer.addDocument(doc2);
+        analyzer.addDocument(doc3);
+        const schema = analyzer.getSchema();
+
+        // data field: array seen in 3 docs
+        const dataField = schema.properties?.['data'] as JSONSchema;
+        expect(dataField['x-occurrence']).toBe(3);
+
+        // The array type entry
+        const arrayTypeEntry = dataField.anyOf?.find((e) => (e as JSONSchema)['x-bsonType'] === 'array') as JSONSchema;
+        expect(arrayTypeEntry).toBeDefined();
+        expect(arrayTypeEntry['x-typeOccurrence']).toBe(3);
+
+        // Array items
+        const itemsSchema = arrayTypeEntry.items as JSONSchema;
+        const stringEntry = itemsSchema.anyOf?.find((e) => (e as JSONSchema)['x-bsonType'] === 'string') as JSONSchema;
+        const objectEntry = itemsSchema.anyOf?.find((e) => (e as JSONSchema)['x-bsonType'] === 'object') as JSONSchema;
+
+        // String elements: "a","b","c","x","y","z" = 6 total
+        expect(stringEntry['x-typeOccurrence']).toBe(6);
+
+        // Object elements: {value:23}, {value:42,flag:true} = 2 total
+        expect(objectEntry['x-typeOccurrence']).toBe(2);
+
+        // Properties inside the object elements
+        const valueField = objectEntry.properties?.['value'] as JSONSchema;
+        const flagField = objectEntry.properties?.['flag'] as JSONSchema;
+
+        // "value" appeared in both objects → x-occurrence = 2
+        expect(valueField['x-occurrence']).toBe(2);
+
+        // "flag" appeared in 1 object → x-occurrence = 1
+        expect(flagField['x-occurrence']).toBe(1);
+
+        // THE CORE QUESTION: What is the denominator for probability?
+        //
+        // We know objectEntry['x-typeOccurrence'] = 2 (2 objects total across all arrays).
+        // So valueField probability = 2/2 = 100% (correct: every object had "value")
+        // And flagField probability = 1/2 = 50% (correct: half of objects had "flag")
+        //
+        // BUT: there is NO x-documentsInspected on objectEntry to formally define
+        // the denominator. The consumer has to know to use x-typeOccurrence as the
+        // denominator for nested properties inside array elements.
+        //
+        // This actually WORKS — the semantics are:
+        //   "of the N objects observed inside this array, M had this property"
+        //
+        // It just isn't obvious from the schema structure.
+    });
+
+    it('tracks min/max array lengths across documents', () => {
+        const analyzer = new SchemaAnalyzer();
+
+        const doc1: WithId<Document> = {
+            _id: new ObjectId(),
+            tags: ['a', 'b', 'c'],
+        };
+        const doc2: WithId<Document> = {
+            _id: new ObjectId(),
+            tags: ['x'],
+        };
+        const doc3: WithId<Document> = {
+            _id: new ObjectId(),
+            tags: ['p', 'q', 'r', 's', 't'],
+        };
+
+        analyzer.addDocument(doc1);
+        analyzer.addDocument(doc2);
+        analyzer.addDocument(doc3);
+        const schema = analyzer.getSchema();
+
+        const tagsField = schema.properties?.['tags'] as JSONSchema;
+        const arrayEntry = tagsField.anyOf?.find((e) => (e as JSONSchema)['x-bsonType'] === 'array') as JSONSchema;
+
+        expect(arrayEntry['x-minItems']).toBe(1);
+        expect(arrayEntry['x-maxItems']).toBe(5);
+    });
+
+    it('accumulates nested object properties from objects inside arrays across documents', () => {
+        const analyzer = new SchemaAnalyzer();
+
+        // doc1 has two objects with different properties in the items array
+        const doc1: WithId<Document> = {
+            _id: new ObjectId(),
+            items: [
+                { name: 'Laptop', price: 999 },
+                { name: 'Mouse', price: 29, discount: true },
+            ],
+        };
+
+        // doc2 has one object with yet another property
+        const doc2: WithId<Document> = {
+            _id: new ObjectId(),
+            items: [{ name: 'Desk', weight: 50 }],
+        };
+
+        analyzer.addDocument(doc1);
+        analyzer.addDocument(doc2);
+        const schema = analyzer.getSchema();
+
+        const itemsField = schema.properties?.['items'] as JSONSchema;
+        const arrayEntry = itemsField.anyOf?.find((e) => (e as JSONSchema)['x-bsonType'] === 'array') as JSONSchema;
+        const objEntry = (arrayEntry.items as JSONSchema).anyOf?.find(
+            (e) => (e as JSONSchema)['x-bsonType'] === 'object',
+        ) as JSONSchema;
+
+        const props = objEntry.properties as Record<string, JSONSchema>;
+
+        // "name" appeared in all 3 object elements
+        expect(props['name']['x-occurrence']).toBe(3);
+
+        // "price" appeared in 2 of 3 object elements
+        expect(props['price']['x-occurrence']).toBe(2);
+
+        // "discount" appeared in 1 of 3 object elements
+        expect(props['discount']['x-occurrence']).toBe(1);
+
+        // "weight" appeared in 1 of 3 object elements
+        expect(props['weight']['x-occurrence']).toBe(1);
+
+        // Total object elements = 3 (2 from doc1 + 1 from doc2)
+        expect(objEntry['x-typeOccurrence']).toBe(3);
+
+        // So probability interpretations:
+        //   name: 3/3 = 100%
+        //   price: 2/3 = 67%
+        //   discount: 1/3 = 33%
+        //   weight: 1/3 = 33%
+        //
+        // This is correct! x-typeOccurrence serves as the denominator.
+    });
+
+    it('handles arrays that ONLY contain primitives (no occurrence complexity)', () => {
+        const analyzer = new SchemaAnalyzer();
+
+        const doc1: WithId<Document> = {
+            _id: new ObjectId(),
+            scores: [90, 85, 78],
+        };
+        const doc2: WithId<Document> = {
+            _id: new ObjectId(),
+            scores: [100, 55],
+        };
+
+        analyzer.addDocument(doc1);
+        analyzer.addDocument(doc2);
+        const schema = analyzer.getSchema();
+
+        const scoresField = schema.properties?.['scores'] as JSONSchema;
+        const arrayEntry = scoresField.anyOf?.find((e) => (e as JSONSchema)['x-bsonType'] === 'array') as JSONSchema;
+
+        const numEntry = (arrayEntry.items as JSONSchema).anyOf?.find(
+            (e) => (e as JSONSchema)['x-bsonType'] === 'double',
+        ) as JSONSchema;
+
+        // 5 total numeric elements
+        expect(numEntry['x-typeOccurrence']).toBe(5);
+
+        // Stats across all elements
+        expect(numEntry['x-minValue']).toBe(55);
+        expect(numEntry['x-maxValue']).toBe(100);
+
+        // Array length stats
+        expect(arrayEntry['x-minItems']).toBe(2);
+        expect(arrayEntry['x-maxItems']).toBe(3);
+    });
+
+    it('verifies that encounteredMongoTypes map is per-document', () => {
+        // The encounteredMongoTypes map is created inside the Array case handler.
+        // It controls whether initializeStatsForValue or aggregateStatsForValue is called.
+        // If it's per-array-occurrence (per document), stats should initialize fresh for each doc.
+        //
+        // BUT WAIT: The map is local to the switch case, which processes ONE array per queue item.
+        // Multiple documents contribute different queue items, and the map is re-created for each.
+        // However, the stats update goes to the SAME itemEntry across documents (because
+        // findTypeEntry finds the existing entry). So:
+        //
+        // doc1.scores = [10, 20]  → first array processing, encounteredMongoTypes fresh
+        //   - element 10: initializeStatsForValue (sets x-minValue=10, x-maxValue=10)
+        //   - element 20: aggregateStatsForValue (updates x-maxValue=20)
+        //
+        // doc2.scores = [5, 30]   → second array processing, encounteredMongoTypes fresh
+        //   - element 5: initializeStatsForValue ← BUT x-minValue is already 10 from doc1!
+        //     initializeStatsForValue OVERWRITES x-minValue to 5 (correct by accident here)
+        //     Actually let's check... initializeStatsForValue sets x-maxValue = 5
+        //     and x-minValue = 5. So the 20 from doc1 would be lost!
+        //
+        // This is a REAL BUG: initializeStatsForValue is called for the first occurrence
+        // per array, but the typeEntry already has stats from previous arrays.
+
+        const analyzer = new SchemaAnalyzer();
+
+        const doc1: WithId<Document> = {
+            _id: new ObjectId(),
+            scores: [10, 20, 30],
+        };
+        const doc2: WithId<Document> = {
+            _id: new ObjectId(),
+            scores: [5, 15],
+        };
+
+        analyzer.addDocument(doc1);
+        analyzer.addDocument(doc2);
+        const schema = analyzer.getSchema();
+
+        const scoresField = schema.properties?.['scores'] as JSONSchema;
+        const arrayEntry = scoresField.anyOf?.find((e) => (e as JSONSchema)['x-bsonType'] === 'array') as JSONSchema;
+
+        const numEntry = (arrayEntry.items as JSONSchema).anyOf?.find(
+            (e) => (e as JSONSchema)['x-bsonType'] === 'double',
+        ) as JSONSchema;
+
+        // Expected correct values:
+        // All 5 elements: 10, 20, 30, 5, 15
+        // Global min = 5, global max = 30
+
+        // If there's a bug, doc2 processing re-initializes:
+        //   after doc1: min=10, max=30
+        //   doc2 first element (5): initializeStatsForValue → sets min=5, max=5
+        //   doc2 second element (15): aggregateStatsForValue → max becomes 15
+        //   final: min=5, max=15 ← WRONG (lost 30 from doc1)
+
+        // This test documents the actual behavior (might be buggy):
+        expect(numEntry['x-minValue']).toBe(5);
+        // If the bug exists, this will be 15 instead of 30:
+        expect(numEntry['x-maxValue']).toBe(30); // should be 30 if correct
+    });
+});
+
+describe('Array probability denominator problem', () => {
+    it('reproduces the >100% probability bug: empty array + large array', () => {
+        // User scenario:
+        //   doc1: a = []                             → 0 objects
+        //   doc2: a = [{b:1}, {b:2}, ..., {b:100}]   → 100 objects
+        //
+        // Naively computing probability as:
+        //   occurrence_of_b / root.x-documentsInspected = 100 / 2 = 5000%
+        //
+        // The correct probability should be:
+        //   occurrence_of_b / objectEntry.x-typeOccurrence = 100 / 100 = 100%
+        //
+        // FIX: Set x-documentsInspected on the object type entry so the uniform
+        //      formula `x-occurrence / parent.x-documentsInspected` works at every
+        //      nesting level.
+
+        const analyzer = new SchemaAnalyzer();
+
+        const doc1: WithId<Document> = {
+            _id: new ObjectId(),
+            a: [], // empty array
+        };
+
+        // doc2: 100 objects, each with property "b"
+        const objectElements: Record<string, unknown>[] = [];
+        for (let i = 1; i <= 100; i++) {
+            objectElements.push({ b: i });
+        }
+        const doc2: WithId<Document> = {
+            _id: new ObjectId(),
+            a: objectElements,
+        };
+
+        analyzer.addDocument(doc1);
+        analyzer.addDocument(doc2);
+        const schema = analyzer.getSchema();
+
+        // Root level
+        expect(schema['x-documentsInspected']).toBe(2);
+
+        // Navigate to the object type entry inside the array
+        const aField = schema.properties?.['a'] as JSONSchema;
+        expect(aField['x-occurrence']).toBe(2); // both docs have 'a'
+
+        const arrayEntry = aField.anyOf?.find((e) => (e as JSONSchema)['x-bsonType'] === 'array') as JSONSchema;
+        const objectEntry = (arrayEntry.items as JSONSchema).anyOf?.find(
+            (e) => (e as JSONSchema)['x-bsonType'] === 'object',
+        ) as JSONSchema;
+
+        // 100 object elements total
+        expect(objectEntry['x-typeOccurrence']).toBe(100);
+
+        // Property "b" appears in all 100 objects
+        const bField = objectEntry.properties?.['b'] as JSONSchema;
+        expect(bField['x-occurrence']).toBe(100);
+
+        // THE FIX: objectEntry should have x-documentsInspected = 100
+        // so that the uniform formula works:
+        //   probability = b.x-occurrence / objectEntry.x-documentsInspected
+        //              = 100 / 100 = 100%
+        expect(objectEntry['x-documentsInspected']).toBe(100);
+    });
+
+    it('correctly computes probability for sparse properties in array objects', () => {
+        // doc1: items = [{name:"A", price:10}, {name:"B"}]  → 2 objects, name in both, price in 1
+        // doc2: items = [{name:"C", discount:true}]          → 1 object
+        //
+        // Total objects = 3
+        // name: 3/3 = 100%
+        // price: 1/3 = 33%
+        // discount: 1/3 = 33%
+
+        const analyzer = new SchemaAnalyzer();
+
+        const doc1: WithId<Document> = {
+            _id: new ObjectId(),
+            items: [{ name: 'A', price: 10 }, { name: 'B' }],
+        };
+        const doc2: WithId<Document> = {
+            _id: new ObjectId(),
+            items: [{ name: 'C', discount: true }],
+        };
+
+        analyzer.addDocument(doc1);
+        analyzer.addDocument(doc2);
+        const schema = analyzer.getSchema();
+
+        const itemsField = schema.properties?.['items'] as JSONSchema;
+        const arrayEntry = itemsField.anyOf?.find((e) => (e as JSONSchema)['x-bsonType'] === 'array') as JSONSchema;
+        const objectEntry = (arrayEntry.items as JSONSchema).anyOf?.find(
+            (e) => (e as JSONSchema)['x-bsonType'] === 'object',
+        ) as JSONSchema;
+
+        // The object type entry should have x-documentsInspected = 3
+        expect(objectEntry['x-documentsInspected']).toBe(3);
+
+        const props = objectEntry.properties as Record<string, JSONSchema>;
+
+        // Probability = x-occurrence / x-documentsInspected (uniform formula)
+        expect(props['name']['x-occurrence']).toBe(3); // 3/3 = 100%
+        expect(props['price']['x-occurrence']).toBe(1); // 1/3 = 33%
+        expect(props['discount']['x-occurrence']).toBe(1); // 1/3 = 33%
+    });
+
+    it('sets x-documentsInspected on nested objects at all levels', () => {
+        // items: [{address: {city: "NY", zip: "10001"}}, {address: {city: "LA"}}]
+        //
+        // At items.anyOf[object] level: x-documentsInspected = 2
+        // At address.anyOf[object] level: x-documentsInspected = 2
+        //   city: 2/2 = 100%, zip: 1/2 = 50%
+
+        const analyzer = new SchemaAnalyzer();
+
+        const doc: WithId<Document> = {
+            _id: new ObjectId(),
+            items: [{ address: { city: 'NY', zip: '10001' } }, { address: { city: 'LA' } }],
+        };
+
+        analyzer.addDocument(doc);
+        const schema = analyzer.getSchema();
+
+        const itemsField = schema.properties?.['items'] as JSONSchema;
+        const arrayEntry = itemsField.anyOf?.find((e) => (e as JSONSchema)['x-bsonType'] === 'array') as JSONSchema;
+        const objectEntry = (arrayEntry.items as JSONSchema).anyOf?.find(
+            (e) => (e as JSONSchema)['x-bsonType'] === 'object',
+        ) as JSONSchema;
+
+        // 2 objects in the array
+        expect(objectEntry['x-documentsInspected']).toBe(2);
+
+        // address.anyOf[object] — the nested object type
+        const addressProp = objectEntry.properties?.['address'] as JSONSchema;
+        const addressObjEntry = addressProp.anyOf?.find(
+            (e) => (e as JSONSchema)['x-bsonType'] === 'object',
+        ) as JSONSchema;
+
+        // Both objects had address, and both addresses were objects
+        expect(addressObjEntry['x-documentsInspected']).toBe(2);
+
+        const addrProps = addressObjEntry.properties as Record<string, JSONSchema>;
+        expect(addrProps['city']['x-occurrence']).toBe(2); // 2/2 = 100%
+        expect(addrProps['zip']['x-occurrence']).toBe(1); // 1/2 = 50%
+    });
+
+    it('does NOT change x-documentsInspected at root level (root keeps document count)', () => {
+        const analyzer = new SchemaAnalyzer();
+
+        const doc1: WithId<Document> = {
+            _id: new ObjectId(),
+            name: 'Alice',
+            address: { city: 'NY' },
+        };
+        const doc2: WithId<Document> = {
+            _id: new ObjectId(),
+            name: 'Bob',
+            address: { city: 'LA', zip: '90001' },
+        };
+
+        analyzer.addDocument(doc1);
+        analyzer.addDocument(doc2);
+        const schema = analyzer.getSchema();
+
+        // Root x-documentsInspected is document count, not affected by the fix
+        expect(schema['x-documentsInspected']).toBe(2);
+
+        // Root-level probability still works: name.occurrence(2) / documentsInspected(2) = 100%
+        const nameField = schema.properties?.['name'] as JSONSchema;
+        expect(nameField['x-occurrence']).toBe(2);
+
+        // Nested object: address.anyOf[object] should have x-documentsInspected = 2
+        const addressField = schema.properties?.['address'] as JSONSchema;
+        const addressObjEntry = addressField.anyOf?.find(
+            (e) => (e as JSONSchema)['x-bsonType'] === 'object',
+        ) as JSONSchema;
+        expect(addressObjEntry['x-documentsInspected']).toBe(2);
+
+        const addrProps = addressObjEntry.properties as Record<string, JSONSchema>;
+        expect(addrProps['city']['x-occurrence']).toBe(2); // 2/2 = 100%
+        expect(addrProps['zip']['x-occurrence']).toBe(1); // 1/2 = 50%
+    });
+});
diff --git a/packages/schema-analyzer/test/SchemaAnalyzer.test.ts b/packages/schema-analyzer/test/SchemaAnalyzer.test.ts
new file mode 100644
index 000000000..f23a97bdf
--- /dev/null
+++ b/packages/schema-analyzer/test/SchemaAnalyzer.test.ts
@@ -0,0 +1,349 @@
+/*---------------------------------------------------------------------------------------------
+ *  Copyright (c) Microsoft Corporation. All rights reserved.
+ *  Licensed under the MIT License. See License.txt in the project root for license information.
+ *--------------------------------------------------------------------------------------------*/
+
+import { type JSONSchema, type JSONSchemaMap, type JSONSchemaRef } from '../src/JSONSchema';
+import { getPropertyNamesAtLevel, SchemaAnalyzer } from '../src/SchemaAnalyzer';
+import {
+    arraysWithDifferentDataTypes,
+    complexDocument,
+    complexDocumentsArray,
+    complexDocumentWithOddTypes,
+    embeddedDocumentOnly,
+    flatDocument,
+    sparseDocumentsArray,
+} from './mongoTestDocuments';
+
+describe('DocumentDB Schema Analyzer', () => {
+    it('prints out schema for testing', () => {
+        const analyzer = SchemaAnalyzer.fromDocument(embeddedDocumentOnly);
+        const schema = analyzer.getSchema();
+        expect(schema).toBeDefined();
+    });
+
+    it('supports many documents', () => {
+        const analyzer = SchemaAnalyzer.fromDocuments(sparseDocumentsArray);
+        const schema = analyzer.getSchema();
+        expect(schema).toBeDefined();
+
+        // Check that 'x-documentsInspected' is correct
+        expect(schema['x-documentsInspected']).toBe(sparseDocumentsArray.length);
+
+        // Check that the schema has the correct root properties
+        const expectedRootProperties = new Set(['_id', 'name', 'age', 'email', 'isActive', 'score', 'description']);
+
+        expect(Object.keys(schema.properties || {})).toEqual(
+            expect.arrayContaining(Array.from(expectedRootProperties)),
+        );
+
+        // Check that the 'name' field is detected correctly
+        const nameField = schema.properties?.['name'] as JSONSchema;
+        expect(nameField).toBeDefined();
+        expect(nameField?.['x-occurrence']).toBeGreaterThan(0);
+
+        // Access 'anyOf' to get the type entries
+        const nameFieldTypes = nameField.anyOf?.map((typeEntry) => (typeEntry as JSONSchema)['type']);
+        expect(nameFieldTypes).toContain('string');
+
+        // Check that the 'age' field has the correct type
+        const ageField = schema.properties?.['age'] as JSONSchema;
+        expect(ageField).toBeDefined();
+        const ageFieldTypes = ageField.anyOf?.map((typeEntry) => (typeEntry as JSONSchema)['type']);
+        expect(ageFieldTypes).toContain('number');
+
+        // Check that the 'isActive' field is a boolean
+        const isActiveField = schema.properties?.['isActive'] as JSONSchema;
+        expect(isActiveField).toBeDefined();
+        const isActiveTypes = isActiveField.anyOf?.map((typeEntry) => (typeEntry as JSONSchema)['type']);
+        expect(isActiveTypes).toContain('boolean');
+
+        // Check that the 'description' field is optional (occurs in some documents)
+        const descriptionField = schema.properties?.['description'] as JSONSchema | undefined;
+        expect(descriptionField).toBeDefined();
+        expect(descriptionField?.['x-occurrence']).toBeLessThan(sparseDocumentsArray.length);
+    });
+
+    it('detects all BSON types from flatDocument', () => {
+        const analyzer = SchemaAnalyzer.fromDocument(flatDocument);
+        const schema = analyzer.getSchema();
+
+        // Check that all fields are detected
+        const expectedFields = Object.keys(flatDocument);
+        expect(Object.keys(schema.properties || {})).toEqual(expect.arrayContaining(expectedFields));
+
+        // Helper function to get the 'x-bsonType' from a field
+        function getBsonType(fieldName: string): string | undefined {
+            const field = schema.properties?.[fieldName] as JSONSchema | undefined;
+            const anyOf = field?.anyOf;
+            return anyOf && (anyOf[0] as JSONSchema | undefined)?.['x-bsonType'];
+        }
+
+        // Check that specific BSON types are correctly identified
+        expect(getBsonType('int32Field')).toBe('int32');
+        expect(getBsonType('doubleField')).toBe('double');
+        expect(getBsonType('decimalField')).toBe('decimal128');
+        expect(getBsonType('dateField')).toBe('date');
+        expect(getBsonType('objectIdField')).toBe('objectid');
+        expect(getBsonType('codeField')).toBe('code');
+        expect(getBsonType('uuidField')).toBe('uuid');
+        expect(getBsonType('uuidLegacyField')).toBe('uuid-legacy');
+    });
+
+    it('detects embedded objects correctly', () => {
+        const analyzer = SchemaAnalyzer.fromDocument(embeddedDocumentOnly);
+        const schema = analyzer.getSchema();
+
+        // Check that the root properties are detected
+        expect(schema.properties).toHaveProperty('personalInfo');
+        expect(schema.properties).toHaveProperty('jobInfo');
+
+        // Access 'personalInfo' properties
+        const personalInfoAnyOf =
+            schema.properties && (schema.properties['personalInfo'] as JSONSchema | undefined)?.anyOf;
+        const personalInfoProperties = (personalInfoAnyOf?.[0] as JSONSchema | undefined)?.properties;
+        expect(personalInfoProperties).toBeDefined();
+        expect(personalInfoProperties).toHaveProperty('name');
+        expect(personalInfoProperties).toHaveProperty('age');
+        expect(personalInfoProperties).toHaveProperty('married');
+        expect(personalInfoProperties).toHaveProperty('address');
+
+        // Access 'address' properties within 'personalInfo'
+        const addressAnyOf = ((personalInfoProperties as JSONSchemaMap)['address'] as JSONSchema).anyOf;
+        const addressProperties = (addressAnyOf?.[0] as JSONSchema | undefined)?.properties;
+        expect(addressProperties).toBeDefined();
+        expect(addressProperties).toHaveProperty('street');
+        expect(addressProperties).toHaveProperty('city');
+        expect(addressProperties).toHaveProperty('zip');
+    });
+
+    it('detects arrays and their element types correctly', () => {
+        const analyzer = SchemaAnalyzer.fromDocument(arraysWithDifferentDataTypes);
+        const schema = analyzer.getSchema();
+
+        // Check that arrays are detected
+        expect(schema.properties).toHaveProperty('integersArray');
+        expect(schema.properties).toHaveProperty('stringsArray');
+        expect(schema.properties).toHaveProperty('booleansArray');
+        expect(schema.properties).toHaveProperty('mixedArray');
+        expect(schema.properties).toHaveProperty('datesArray');
+
+        // Helper function to get item types from an array field
+        function getArrayItemTypes(fieldName: string): string[] | undefined {
+            const field = schema.properties?.[fieldName] as JSONSchema | undefined;
+            const anyOf = field?.anyOf;
+            const itemsAnyOf: JSONSchemaRef[] | undefined = (
+                (anyOf?.[0] as JSONSchema | undefined)?.items as JSONSchema | undefined
+            )?.anyOf;
+            return itemsAnyOf?.map((typeEntry) => (typeEntry as JSONSchema)['type'] as string);
+        }
+
+        // Check that 'integersArray' has elements of type 'number'
+        const integerItemTypes = getArrayItemTypes('integersArray');
+        expect(integerItemTypes).toContain('number');
+
+        // Check that 'stringsArray' has elements of type 'string'
+        const stringItemTypes = getArrayItemTypes('stringsArray');
+        expect(stringItemTypes).toContain('string');
+
+        // Check that 'mixedArray' contains multiple types
+        const mixedItemTypes = getArrayItemTypes('mixedArray');
+        expect(mixedItemTypes).toEqual(expect.arrayContaining(['number', 'string', 'boolean', 'object', 'null']));
+    });
+
+    it('handles arrays within objects and objects within arrays', () => {
+        const analyzer = SchemaAnalyzer.fromDocument(complexDocument);
+        const schema = analyzer.getSchema();
+
+        // Access 'user.profile.hobbies'
+        const user = schema.properties?.['user'] as JSONSchema | undefined;
+        const userProfile = (user?.anyOf?.[0] as JSONSchema | undefined)?.properties?.['profile'] as
+            | JSONSchema
+            | undefined;
+        const hobbies = (userProfile?.anyOf?.[0] as JSONSchema | undefined)?.properties?.['hobbies'] as
+            | JSONSchema
+            | undefined;
+        const hobbiesItems = (hobbies?.anyOf?.[0] as JSONSchema | undefined)?.items as JSONSchema | undefined;
+        const hobbiesItemTypes = hobbiesItems?.anyOf?.map((typeEntry) => (typeEntry as JSONSchema).type);
+        expect(hobbiesItemTypes).toContain('string');
+
+        // Access 'user.profile.addresses'
+        const addresses = (userProfile?.anyOf?.[0] as JSONSchema | undefined)?.properties?.['addresses'] as
+            | JSONSchema
+            | undefined;
+        const addressesItems = (addresses?.anyOf?.[0] as JSONSchema | undefined)?.items as JSONSchema | undefined;
+        const addressItemTypes = addressesItems?.anyOf?.map((typeEntry) => (typeEntry as JSONSchema).type);
+        expect(addressItemTypes).toContain('object');
+
+        // Check that 'orders' is an array
+        const orders = schema.properties?.['orders'] as JSONSchema | undefined;
+        expect(orders).toBeDefined();
+        const ordersType = (orders?.anyOf?.[0] as JSONSchema | undefined)?.type;
+        expect(ordersType).toBe('array');
+
+        // Access 'items' within 'orders'
+        const orderItemsParent = (orders?.anyOf?.[0] as JSONSchema | undefined)?.items as JSONSchema | undefined;
+        const orderItems = (orderItemsParent?.anyOf?.[0] as JSONSchema | undefined)?.properties?.['items'] as
+            | JSONSchema
+            | undefined;
+        const orderItemsType = (orderItems?.anyOf?.[0] as JSONSchema | undefined)?.type;
+        expect(orderItemsType).toBe('array');
+    });
+
+    it('updates schema correctly when processing multiple documents', () => {
+        const analyzer = SchemaAnalyzer.fromDocuments(complexDocumentsArray);
+        const schema = analyzer.getSchema();
+
+        // Check that 'x-documentsInspected' is correct
+        expect(schema['x-documentsInspected']).toBe(complexDocumentsArray.length);
+
+        // Check that some fields are present from different documents
+        expect(schema.properties).toHaveProperty('stringField');
+        expect(schema.properties).toHaveProperty('personalInfo');
+        expect(schema.properties).toHaveProperty('integersArray');
+        expect(schema.properties).toHaveProperty('user');
+
+        // Check that 'integersArray' has correct min and max values
+        const integersArray = schema.properties?.['integersArray'] as JSONSchema | undefined;
+        const integerItemType = ((integersArray?.anyOf?.[0] as JSONSchema | undefined)?.items as JSONSchema | undefined)
+            ?.anyOf?.[0] as JSONSchema | undefined;
+        expect(integerItemType?.['x-minValue']).toBe(1);
+        expect(integerItemType?.['x-maxValue']).toBe(5);
+
+        // Check that 'orders.items.price' is detected as Decimal128
+        const orders2 = schema.properties?.['orders'] as JSONSchema | undefined;
+        const orderItemsParent2 = (orders2?.anyOf?.[0] as JSONSchema | undefined)?.items as JSONSchema | undefined;
+        const orderItems = (orderItemsParent2?.anyOf?.[0] as JSONSchema | undefined)?.properties?.['items'] as
+            | JSONSchema
+            | undefined;
+        const priceFieldParent = ((orderItems?.anyOf?.[0] as JSONSchema | undefined)?.items as JSONSchema | undefined)
+            ?.anyOf?.[0] as JSONSchema | undefined;
+        const priceField = priceFieldParent?.properties?.['price'] as JSONSchema | undefined;
+        const priceFieldType = priceField?.anyOf?.[0] as JSONSchema | undefined;
+        expect(priceFieldType?.['x-bsonType']).toBe('decimal128');
+    });
+
+    describe('traverses schema', () => {
+        it('with valid paths', () => {
+            const analyzer = SchemaAnalyzer.fromDocument(complexDocument);
+            const schema = analyzer.getSchema();
+
+            let propertiesAtRoot = getPropertyNamesAtLevel(schema, []);
+            expect(propertiesAtRoot).toHaveLength(4);
+
+            propertiesAtRoot = getPropertyNamesAtLevel(schema, ['user']);
+            expect(propertiesAtRoot).toHaveLength(3);
+
+            propertiesAtRoot = getPropertyNamesAtLevel(schema, ['user', 'profile']);
+            expect(propertiesAtRoot).toHaveLength(4);
+        });
+
+        it('with broken paths', () => {
+            const analyzer = SchemaAnalyzer.fromDocument(complexDocument);
+            const schema = analyzer.getSchema();
+
+            const propertiesAtRoot = getPropertyNamesAtLevel(schema, []);
+            expect(propertiesAtRoot).toHaveLength(4);
+
+            expect(() => getPropertyNamesAtLevel(schema, ['no-entry'])).toThrow();
+
+            expect(() => getPropertyNamesAtLevel(schema, ['user', 'no-entry'])).toThrow();
+        });
+
+        it('with sparse docs and mixed types', () => {
+            const analyzer = new SchemaAnalyzer();
+            analyzer.addDocument(complexDocument);
+            analyzer.addDocument(complexDocumentWithOddTypes);
+            const schema = analyzer.getSchema();
+
+            let propertiesAtRoot = getPropertyNamesAtLevel(schema, []);
+            expect(propertiesAtRoot).toHaveLength(4);
+
+            propertiesAtRoot = getPropertyNamesAtLevel(schema, ['user']);
+            expect(propertiesAtRoot).toHaveLength(3);
+            expect(propertiesAtRoot).toEqual(['email', 'profile', 'username']);
+
+            propertiesAtRoot = getPropertyNamesAtLevel(schema, ['user', 'profile']);
+            expect(propertiesAtRoot).toHaveLength(4);
+            expect(propertiesAtRoot).toEqual(['addresses', 'firstName', 'hobbies', 'lastName']);
+
+            propertiesAtRoot = getPropertyNamesAtLevel(schema, ['history']);
+            expect(propertiesAtRoot).toHaveLength(6);
+        });
+    });
+
+    describe('SchemaAnalyzer class methods', () => {
+        it('clone() creates an independent deep copy', () => {
+            // Use embeddedDocumentOnly (plain JS types) to avoid structuredClone issues with BSON types
+            const original = SchemaAnalyzer.fromDocument(embeddedDocumentOnly);
+            const cloned = original.clone();
+
+            // Clone has the same document count
+            expect(cloned.getDocumentCount()).toBe(1);
+
+            // Clone has the same properties
+            const originalProps = Object.keys(original.getSchema().properties || {});
+            const clonedProps = Object.keys(cloned.getSchema().properties || {});
+            expect(clonedProps).toEqual(originalProps);
+
+            // Add another document to the original only
+            original.addDocument(arraysWithDifferentDataTypes);
+            expect(original.getDocumentCount()).toBe(2);
+            expect(cloned.getDocumentCount()).toBe(1);
+
+            // Clone's schema was NOT affected by the mutation
+            const originalPropsAfter = Object.keys(original.getSchema().properties || {});
+            const clonedPropsAfter = Object.keys(cloned.getSchema().properties || {});
+            expect(originalPropsAfter).toContain('integersArray');
+            expect(originalPropsAfter).toContain('stringsArray');
+            expect(clonedPropsAfter).not.toContain('integersArray');
+            expect(clonedPropsAfter).not.toContain('stringsArray');
+        });
+
+        it('reset() clears all accumulated state', () => {
+            const analyzer = SchemaAnalyzer.fromDocument(flatDocument);
+            expect(analyzer.getDocumentCount()).toBeGreaterThan(0);
+            expect(Object.keys(analyzer.getSchema().properties || {})).not.toHaveLength(0);
+
+            analyzer.reset();
+
+            expect(analyzer.getDocumentCount()).toBe(0);
+            const schema = analyzer.getSchema();
+            expect(schema.properties).toBeUndefined();
+            expect(schema['x-documentsInspected']).toBeUndefined();
+        });
+
+        it('fromDocument() creates analyzer with single document', () => {
+            const analyzer = SchemaAnalyzer.fromDocument(flatDocument);
+            expect(analyzer.getDocumentCount()).toBe(1);
+
+            const schema = analyzer.getSchema();
+            const expectedFields = Object.keys(flatDocument);
+            expect(Object.keys(schema.properties || {})).toEqual(expect.arrayContaining(expectedFields));
+        });
+
+        it('fromDocuments() creates analyzer with multiple documents', () => {
+            const analyzer = SchemaAnalyzer.fromDocuments(sparseDocumentsArray);
+            expect(analyzer.getDocumentCount()).toBe(sparseDocumentsArray.length);
+
+            // Compare with manually-built analyzer
+            const manual = new SchemaAnalyzer();
+            manual.addDocuments(sparseDocumentsArray);
+
+            expect(JSON.stringify(analyzer.getSchema())).toBe(JSON.stringify(manual.getSchema()));
+        });
+
+        it('addDocuments() is equivalent to multiple addDocument() calls', () => {
+            const batch = new SchemaAnalyzer();
+            batch.addDocuments(complexDocumentsArray);
+
+            const sequential = new SchemaAnalyzer();
+            for (const doc of complexDocumentsArray) {
+                sequential.addDocument(doc);
+            }
+
+            expect(batch.getDocumentCount()).toBe(sequential.getDocumentCount());
+            expect(JSON.stringify(batch.getSchema())).toBe(JSON.stringify(sequential.getSchema()));
+        });
+    });
+});
diff --git a/packages/schema-analyzer/test/SchemaAnalyzer.versioning.test.ts b/packages/schema-analyzer/test/SchemaAnalyzer.versioning.test.ts
new file mode 100644
index 000000000..38ef144a6
--- /dev/null
+++ b/packages/schema-analyzer/test/SchemaAnalyzer.versioning.test.ts
@@ -0,0 +1,663 @@
+/*---------------------------------------------------------------------------------------------
+ *  Copyright (c) Microsoft Corporation. All rights reserved.
+ *  Licensed under the MIT License. See License.txt in the project root for license information.
+ *--------------------------------------------------------------------------------------------*/
+
+import { ObjectId, type Document, type WithId } from 'mongodb';
+import { type JSONSchema } from '../src/JSONSchema';
+import { SchemaAnalyzer } from '../src/SchemaAnalyzer';
+
+// ------------------------------------------------------------------
+// Test fixtures
+// ------------------------------------------------------------------
+
+function makeDoc(fields: Record<string, unknown> = {}): WithId<Document> {
+    return { _id: new ObjectId(), ...fields };
+}
+
+// ------------------------------------------------------------------
+// Version counter
+// ------------------------------------------------------------------
+describe('SchemaAnalyzer version counter', () => {
+    it('starts at 0 for a new analyzer', () => {
+        const analyzer = new SchemaAnalyzer();
+        expect(analyzer.version).toBe(0);
+    });
+
+    it('increments on addDocument()', () => {
+        const analyzer = new SchemaAnalyzer();
+        analyzer.addDocument(makeDoc({ a: 1 }));
+        expect(analyzer.version).toBe(1);
+
+        analyzer.addDocument(makeDoc({ b: 2 }));
+        expect(analyzer.version).toBe(2);
+    });
+
+    it('increments only once for addDocuments() (batch)', () => {
+        const analyzer = new SchemaAnalyzer();
+        const docs = [makeDoc({ a: 1 }), makeDoc({ b: 2 }), makeDoc({ c: 3 })];
+
+        analyzer.addDocuments(docs);
+        expect(analyzer.version).toBe(1);
+    });
+
+    it('increments on reset()', () => {
+        const analyzer = SchemaAnalyzer.fromDocument(makeDoc({ x: 1 }));
+        expect(analyzer.version).toBe(1);
+
+        analyzer.reset();
+        expect(analyzer.version).toBe(2);
+    });
+
+    it('cloned analyzer starts with version 0 (independent from original)', () => {
+        const original = new SchemaAnalyzer();
+        original.addDocument(makeDoc({ a: 1 }));
+        original.addDocument(makeDoc({ b: 2 }));
+        expect(original.version).toBe(2);
+
+        const cloned = original.clone();
+        expect(cloned.version).toBe(0);
+
+        // Mutating the clone does not affect the original's version
+        cloned.addDocument(makeDoc({ c: 3 }));
+        expect(cloned.version).toBe(1);
+        expect(original.version).toBe(2);
+    });
+
+    it('accumulates across mixed operations', () => {
+        const analyzer = new SchemaAnalyzer();
+        // addDocument +1
+        analyzer.addDocument(makeDoc());
+        expect(analyzer.version).toBe(1);
+
+        // addDocuments +1 (batch)
+        analyzer.addDocuments([makeDoc(), makeDoc()]);
+        expect(analyzer.version).toBe(2);
+
+        // reset +1
+        analyzer.reset();
+        expect(analyzer.version).toBe(3);
+
+        // addDocument after reset +1
+        analyzer.addDocument(makeDoc());
+        expect(analyzer.version).toBe(4);
+    });
+
+    it('fromDocument() factory yields version 1', () => {
+        const analyzer = SchemaAnalyzer.fromDocument(makeDoc({ a: 1 }));
+        expect(analyzer.version).toBe(1);
+    });
+
+    it('fromDocuments() factory yields version 1', () => {
+        const analyzer = SchemaAnalyzer.fromDocuments([makeDoc(), makeDoc(), makeDoc()]);
+        expect(analyzer.version).toBe(1);
+    });
+});
+
+// ------------------------------------------------------------------
+// Version-based caching (getKnownFields cache)
+// ------------------------------------------------------------------
+describe('SchemaAnalyzer getKnownFields cache', () => {
+    it('is populated on first call to getKnownFields()', () => {
+        const analyzer = SchemaAnalyzer.fromDocument(makeDoc({ name: 'Alice', age: 30 }));
+        const fields = analyzer.getKnownFields();
+
+        expect(fields.length).toBeGreaterThan(0);
+        // Should contain _id, age, name
+        const paths = fields.map((f) => f.path);
+        expect(paths).toContain('_id');
+        expect(paths).toContain('name');
+        expect(paths).toContain('age');
+    });
+
+    it('is reused when version has not changed (same reference)', () => {
+        const analyzer = SchemaAnalyzer.fromDocument(makeDoc({ name: 'Alice' }));
+        const first = analyzer.getKnownFields();
+        const second = analyzer.getKnownFields();
+
+        // Same array reference — cache was reused, not recomputed
+        expect(second).toBe(first);
+    });
+
+    it('is invalidated when addDocument() is called', () => {
+        const analyzer = SchemaAnalyzer.fromDocument(makeDoc({ name: 'Alice' }));
+        const before = analyzer.getKnownFields();
+
+        analyzer.addDocument(makeDoc({ name: 'Bob', email: 'bob@test.com' }));
+        const after = analyzer.getKnownFields();
+
+        // Different reference — cache was recomputed
+        expect(after).not.toBe(before);
+        // New field should be present
+        expect(after.map((f) => f.path)).toContain('email');
+    });
+
+    it('is invalidated when addDocuments() is called', () => {
+        const analyzer = SchemaAnalyzer.fromDocument(makeDoc({ name: 'Alice' }));
+        const before = analyzer.getKnownFields();
+
+        analyzer.addDocuments([makeDoc({ score: 42 }), makeDoc({ level: 7 })]);
+        const after = analyzer.getKnownFields();
+
+        expect(after).not.toBe(before);
+        const paths = after.map((f) => f.path);
+        expect(paths).toContain('score');
+        expect(paths).toContain('level');
+    });
+
+    it('is invalidated when reset() is called', () => {
+        const analyzer = SchemaAnalyzer.fromDocument(makeDoc({ name: 'Alice' }));
+        const before = analyzer.getKnownFields();
+        expect(before.length).toBeGreaterThan(0);
+
+        analyzer.reset();
+        const after = analyzer.getKnownFields();
+
+        expect(after).not.toBe(before);
+        // After reset the schema is empty so no fields
+        expect(after).toHaveLength(0);
+    });
+
+    it('returns updated results after cache invalidation', () => {
+        const analyzer = new SchemaAnalyzer();
+        // Empty analyzer → no known fields
+        expect(analyzer.getKnownFields()).toHaveLength(0);
+
+        // Add first doc
+        analyzer.addDocument(makeDoc({ x: 1 }));
+        const fields1 = analyzer.getKnownFields();
+        expect(fields1.map((f) => f.path)).toEqual(expect.arrayContaining(['_id', 'x']));
+
+        // Add second doc with new field
+        analyzer.addDocument(makeDoc({ x: 2, y: 'hello' }));
+        const fields2 = analyzer.getKnownFields();
+        expect(fields2).not.toBe(fields1);
+        expect(fields2.map((f) => f.path)).toContain('y');
+    });
+
+    it('clone gets its own independent cache', () => {
+        const original = SchemaAnalyzer.fromDocument(makeDoc({ name: 'Alice' }));
+        const originalFields = original.getKnownFields();
+
+        const cloned = original.clone();
+        const clonedFields = cloned.getKnownFields();
+
+        // Both should have the same content but be independent objects
+        expect(clonedFields).not.toBe(originalFields);
+        expect(clonedFields.map((f) => f.path)).toEqual(originalFields.map((f) => f.path));
+
+        // Mutating the clone should not affect the original cache
+        cloned.addDocument(makeDoc({ extra: true }));
+        const clonedFieldsAfter = cloned.getKnownFields();
+        expect(clonedFieldsAfter.map((f) => f.path)).toContain('extra');
+        expect(original.getKnownFields().map((f) => f.path)).not.toContain('extra');
+    });
+});
+
+// ------------------------------------------------------------------
+// Instances and types counting
+// ------------------------------------------------------------------
+describe('SchemaAnalyzer instances and types counting', () => {
+    describe('x-occurrence (field instance counting)', () => {
+        it('counts 1 for a field present in a single document', () => {
+            const analyzer = SchemaAnalyzer.fromDocument(makeDoc({ name: 'Alice' }));
+            const schema = analyzer.getSchema();
+            const nameField = schema.properties?.['name'] as JSONSchema;
+            expect(nameField['x-occurrence']).toBe(1);
+        });
+
+        it('counts correctly across multiple documents', () => {
+            const analyzer = new SchemaAnalyzer();
+            analyzer.addDocument(makeDoc({ name: 'Alice', age: 30 }));
+            analyzer.addDocument(makeDoc({ name: 'Bob', age: 25 }));
+            analyzer.addDocument(makeDoc({ name: 'Carol' })); // no age
+
+            const schema = analyzer.getSchema();
+            expect((schema.properties?.['name'] as JSONSchema)['x-occurrence']).toBe(3);
+            expect((schema.properties?.['age'] as JSONSchema)['x-occurrence']).toBe(2);
+        });
+
+        it('counts sparse fields correctly (field missing in some documents)', () => {
+            const analyzer = new SchemaAnalyzer();
+            analyzer.addDocument(makeDoc({ a: 1, b: 2, c: 3 }));
+            analyzer.addDocument(makeDoc({ a: 10 })); // only 'a'
+            analyzer.addDocument(makeDoc({ a: 100, c: 300 })); // 'a' and 'c'
+
+            const schema = analyzer.getSchema();
+            expect((schema.properties?.['a'] as JSONSchema)['x-occurrence']).toBe(3);
+            expect((schema.properties?.['b'] as JSONSchema)['x-occurrence']).toBe(1);
+            expect((schema.properties?.['c'] as JSONSchema)['x-occurrence']).toBe(2);
+        });
+
+        it('counts occurrences for nested object properties', () => {
+            const analyzer = new SchemaAnalyzer();
+            analyzer.addDocument(makeDoc({ user: { name: 'Alice', age: 30 } }));
+            analyzer.addDocument(makeDoc({ user: { name: 'Bob' } })); // no age
+
+            const schema = analyzer.getSchema();
+            const userField = schema.properties?.['user'] as JSONSchema;
+            const objectEntry = userField.anyOf?.find((e) => (e as JSONSchema).type === 'object') as JSONSchema;
+
+            expect((objectEntry.properties?.['name'] as JSONSchema)['x-occurrence']).toBe(2);
+            expect((objectEntry.properties?.['age'] as JSONSchema)['x-occurrence']).toBe(1);
+        });
+    });
+
+    describe('x-typeOccurrence (type counting)', () => {
+        it('counts type occurrences for a single-type field', () => {
+            const analyzer = new SchemaAnalyzer();
+            analyzer.addDocument(makeDoc({ name: 'Alice' }));
+            analyzer.addDocument(makeDoc({ name: 'Bob' }));
+            analyzer.addDocument(makeDoc({ name: 'Carol' }));
+
+            const schema = analyzer.getSchema();
+            const nameField = schema.properties?.['name'] as JSONSchema;
+            const stringEntry = nameField.anyOf?.find(
+                (e) => (e as JSONSchema)['x-bsonType'] === 'string',
+            ) as JSONSchema;
+
+            expect(stringEntry['x-typeOccurrence']).toBe(3);
+        });
+
+        it('counts type occurrences for polymorphic fields', () => {
+            const analyzer = new SchemaAnalyzer();
+            analyzer.addDocument(makeDoc({ value: 'hello' }));
+            analyzer.addDocument(makeDoc({ value: 42 }));
+            analyzer.addDocument(makeDoc({ value: 'world' }));
+            analyzer.addDocument(makeDoc({ value: true }));
+
+            const schema = analyzer.getSchema();
+            const valueField = schema.properties?.['value'] as JSONSchema;
+
+            const stringEntry = valueField.anyOf?.find(
+                (e) => (e as JSONSchema)['x-bsonType'] === 'string',
+            ) as JSONSchema;
+            const booleanEntry = valueField.anyOf?.find(
+                (e) => (e as JSONSchema)['x-bsonType'] === 'boolean',
+            ) as JSONSchema;
+
+            // 2 strings, 1 number, 1 boolean
+            expect(stringEntry['x-typeOccurrence']).toBe(2);
+            expect(booleanEntry['x-typeOccurrence']).toBe(1);
+
+            // total x-occurrence should equal sum of x-typeOccurrence values
+            const totalTypeOccurrence = (valueField.anyOf as JSONSchema[]).reduce(
+                (sum, entry) => sum + ((entry['x-typeOccurrence'] as number) ?? 0),
+                0,
+            );
+            expect(valueField['x-occurrence']).toBe(totalTypeOccurrence);
+        });
+
+        it('counts array element types across documents', () => {
+            const analyzer = new SchemaAnalyzer();
+            analyzer.addDocument(makeDoc({ tags: ['a', 'b'] })); // 2 strings
+            analyzer.addDocument(makeDoc({ tags: ['c', 42] })); // 1 string + 1 number
+            analyzer.addDocument(makeDoc({ tags: [true] })); // 1 boolean
+
+            const schema = analyzer.getSchema();
+            const tagsField = schema.properties?.['tags'] as JSONSchema;
+            const arrayEntry = tagsField.anyOf?.find((e) => (e as JSONSchema)['x-bsonType'] === 'array') as JSONSchema;
+            const itemsSchema = arrayEntry.items as JSONSchema;
+
+            const stringEntry = itemsSchema.anyOf?.find(
+                (e) => (e as JSONSchema)['x-bsonType'] === 'string',
+            ) as JSONSchema;
+            const booleanEntry = itemsSchema.anyOf?.find(
+                (e) => (e as JSONSchema)['x-bsonType'] === 'boolean',
+            ) as JSONSchema;
+
+            // 3 string elements total: "a", "b", "c"
+            expect(stringEntry['x-typeOccurrence']).toBe(3);
+
+            // 1 boolean element
+            expect(booleanEntry['x-typeOccurrence']).toBe(1);
+        });
+
+        it('type occurrence count equals field occurrence for a single-type field', () => {
+            const analyzer = new SchemaAnalyzer();
+            for (let i = 0; i < 5; i++) {
+                analyzer.addDocument(makeDoc({ score: i * 10 }));
+            }
+
+            const schema = analyzer.getSchema();
+            const scoreField = schema.properties?.['score'] as JSONSchema;
+            const typeEntries = scoreField.anyOf as JSONSchema[];
+
+            // Only one type, so its typeOccurrence should equal the field occurrence
+            expect(typeEntries).toHaveLength(1);
+            expect(typeEntries[0]['x-typeOccurrence']).toBe(scoreField['x-occurrence']);
+        });
+    });
+
+    describe('x-documentsInspected counting', () => {
+        it('tracks document count at root level', () => {
+            const analyzer = new SchemaAnalyzer();
+            analyzer.addDocument(makeDoc({ a: 1 }));
+            analyzer.addDocument(makeDoc({ b: 2 }));
+            analyzer.addDocument(makeDoc({ c: 3 }));
+
+            expect(analyzer.getSchema()['x-documentsInspected']).toBe(3);
+            expect(analyzer.getDocumentCount()).toBe(3);
+        });
+
+        it('tracks object instances for nested objects', () => {
+            const analyzer = new SchemaAnalyzer();
+            analyzer.addDocument(makeDoc({ info: { x: 1 } }));
+            analyzer.addDocument(makeDoc({ info: { x: 2, y: 3 } }));
+
+            const schema = analyzer.getSchema();
+            const infoField = schema.properties?.['info'] as JSONSchema;
+            const objectEntry = infoField.anyOf?.find(
+                (e) => (e as JSONSchema)['x-bsonType'] === 'object',
+            ) as JSONSchema;
+
+            expect(objectEntry['x-documentsInspected']).toBe(2);
+        });
+
+        it('tracks object instances inside arrays accurately', () => {
+            const analyzer = new SchemaAnalyzer();
+            // doc1: array with 2 objects
+            analyzer.addDocument(makeDoc({ items: [{ a: 1 }, { a: 2 }] }));
+            // doc2: array with 1 object
+            analyzer.addDocument(makeDoc({ items: [{ a: 3, b: 4 }] }));
+
+            const schema = analyzer.getSchema();
+            const itemsField = schema.properties?.['items'] as JSONSchema;
+            const arrayEntry = itemsField.anyOf?.find((e) => (e as JSONSchema)['x-bsonType'] === 'array') as JSONSchema;
+            const objectEntry = (arrayEntry.items as JSONSchema).anyOf?.find(
+                (e) => (e as JSONSchema)['x-bsonType'] === 'object',
+            ) as JSONSchema;
+
+            // 3 objects total (2 from doc1, 1 from doc2)
+            expect(objectEntry['x-documentsInspected']).toBe(3);
+            // "a" appears in all 3 objects
+            expect((objectEntry.properties?.['a'] as JSONSchema)['x-occurrence']).toBe(3);
+            // "b" appears in 1 of 3 objects
+            expect((objectEntry.properties?.['b'] as JSONSchema)['x-occurrence']).toBe(1);
+        });
+
+        it('resets to 0 after reset()', () => {
+            const analyzer = SchemaAnalyzer.fromDocuments([makeDoc({ a: 1 }), makeDoc({ b: 2 })]);
+            expect(analyzer.getDocumentCount()).toBe(2);
+
+            analyzer.reset();
+            expect(analyzer.getDocumentCount()).toBe(0);
+        });
+    });
+
+    describe('probability correctness (occurrence / documentsInspected)', () => {
+        it('yields 100% for fields present in every document', () => {
+            const analyzer = new SchemaAnalyzer();
+            for (let i = 0; i < 10; i++) {
+                analyzer.addDocument(makeDoc({ name: `user-${i}` }));
+            }
+
+            const schema = analyzer.getSchema();
+            const occurrence = (schema.properties?.['name'] as JSONSchema)['x-occurrence'] as number;
+            const total = schema['x-documentsInspected'] as number;
+            expect(occurrence / total).toBe(1);
+        });
+
+        it('yields correct fraction for sparse fields', () => {
+            const analyzer = new SchemaAnalyzer();
+            // 3 docs with 'a', 1 doc with 'b'
+            analyzer.addDocument(makeDoc({ a: 1, b: 10 }));
+            analyzer.addDocument(makeDoc({ a: 2 }));
+            analyzer.addDocument(makeDoc({ a: 3 }));
+
+            const schema = analyzer.getSchema();
+            const total = schema['x-documentsInspected'] as number;
+            const aOccurrence = (schema.properties?.['a'] as JSONSchema)['x-occurrence'] as number;
+            const bOccurrence = (schema.properties?.['b'] as JSONSchema)['x-occurrence'] as number;
+
+            expect(aOccurrence / total).toBe(1); // 3/3
+            expect(bOccurrence / total).toBeCloseTo(1 / 3); // 1/3
+        });
+
+        it('yields correct fraction for nested objects inside arrays', () => {
+            const analyzer = new SchemaAnalyzer();
+            analyzer.addDocument(
+                makeDoc({
+                    items: [
+                        { name: 'A', price: 10 },
+                        { name: 'B' }, // no price
+                    ],
+                }),
+            );
+            analyzer.addDocument(makeDoc({ items: [{ name: 'C', price: 20 }] }));
+
+            const schema = analyzer.getSchema();
+            const itemsField = schema.properties?.['items'] as JSONSchema;
+            const arrayEntry = itemsField.anyOf?.find((e) => (e as JSONSchema)['x-bsonType'] === 'array') as JSONSchema;
+            const objectEntry = (arrayEntry.items as JSONSchema).anyOf?.find(
+                (e) => (e as JSONSchema)['x-bsonType'] === 'object',
+            ) as JSONSchema;
+
+            const denominator = objectEntry['x-documentsInspected'] as number;
+            const nameOccurrence = (objectEntry.properties?.['name'] as JSONSchema)['x-occurrence'] as number;
+            const priceOccurrence = (objectEntry.properties?.['price'] as JSONSchema)['x-occurrence'] as number;
+
+            expect(denominator).toBe(3); // 3 objects total
+            expect(nameOccurrence / denominator).toBe(1); // 3/3
+            expect(priceOccurrence / denominator).toBeCloseTo(2 / 3); // 2/3
+        });
+    });
+
+    describe('array and nested array counting', () => {
+        it('counts x-typeOccurrence for the array type entry across documents', () => {
+            const analyzer = new SchemaAnalyzer();
+            analyzer.addDocument(makeDoc({ tags: ['a'] }));
+            analyzer.addDocument(makeDoc({ tags: ['b', 'c'] }));
+            analyzer.addDocument(makeDoc({ tags: 42 })); // not an array
+
+            const schema = analyzer.getSchema();
+            const tagsField = schema.properties?.['tags'] as JSONSchema;
+
+            // Field seen 3 times total
+            expect(tagsField['x-occurrence']).toBe(3);
+
+            const arrayEntry = tagsField.anyOf?.find((e) => (e as JSONSchema)['x-bsonType'] === 'array') as JSONSchema;
+
+            // Array type seen 2 out of 3 times
+            expect(arrayEntry['x-typeOccurrence']).toBe(2);
+
+            // x-minItems / x-maxItems tracked across array instances
+            expect(arrayEntry['x-minItems']).toBe(1);
+            expect(arrayEntry['x-maxItems']).toBe(2);
+        });
+
+        it('counts x-minItems / x-maxItems for arrays across documents', () => {
+            const analyzer = new SchemaAnalyzer();
+            analyzer.addDocument(makeDoc({ nums: [1, 2, 3] })); // length 3
+            analyzer.addDocument(makeDoc({ nums: [10] })); // length 1
+            analyzer.addDocument(makeDoc({ nums: [4, 5, 6, 7, 8] })); // length 5
+
+            const schema = analyzer.getSchema();
+            const numsField = schema.properties?.['nums'] as JSONSchema;
+            const arrayEntry = numsField.anyOf?.find((e) => (e as JSONSchema)['x-bsonType'] === 'array') as JSONSchema;
+
+            expect(arrayEntry['x-minItems']).toBe(1);
+            expect(arrayEntry['x-maxItems']).toBe(5);
+            expect(arrayEntry['x-typeOccurrence']).toBe(3);
+        });
+
+        it('counts nested arrays (arrays within arrays)', () => {
+            const analyzer = new SchemaAnalyzer();
+            // matrix is an array of arrays of numbers
+            analyzer.addDocument(
+                makeDoc({
+                    matrix: [
+                        [1, 2],
+                        [3, 4, 5],
+                    ],
+                }),
+            );
+            analyzer.addDocument(makeDoc({ matrix: [[10]] }));
+
+            const schema = analyzer.getSchema();
+            const matrixField = schema.properties?.['matrix'] as JSONSchema;
+            const outerArrayEntry = matrixField.anyOf?.find(
+                (e) => (e as JSONSchema)['x-bsonType'] === 'array',
+            ) as JSONSchema;
+
+            // Outer array seen in 2 documents
+            expect(outerArrayEntry['x-typeOccurrence']).toBe(2);
+            // doc1 has 2 inner arrays, doc2 has 1
+            expect(outerArrayEntry['x-minItems']).toBe(1);
+            expect(outerArrayEntry['x-maxItems']).toBe(2);
+
+            // Inner arrays: items type should be 'array'
+            const innerArrayEntry = (outerArrayEntry.items as JSONSchema).anyOf?.find(
+                (e) => (e as JSONSchema)['x-bsonType'] === 'array',
+            ) as JSONSchema;
+            expect(innerArrayEntry).toBeDefined();
+            // 3 inner arrays total: [1,2], [3,4,5], [10]
+            expect(innerArrayEntry['x-typeOccurrence']).toBe(3);
+            // inner array lengths: 2, 3, 1
+            expect(innerArrayEntry['x-minItems']).toBe(1);
+            expect(innerArrayEntry['x-maxItems']).toBe(3);
+
+            // Elements inside inner arrays are numbers
+            const numberEntry = (innerArrayEntry.items as JSONSchema).anyOf?.find(
+                (e) => (e as JSONSchema).type === 'number',
+            ) as JSONSchema;
+            expect(numberEntry).toBeDefined();
+            // 6 numbers total: 1,2,3,4,5,10
+            expect(numberEntry['x-typeOccurrence']).toBe(6);
+        });
+
+        it('counts objects within arrays within objects (deep nesting)', () => {
+            const analyzer = new SchemaAnalyzer();
+            analyzer.addDocument(
+                makeDoc({
+                    company: {
+                        departments: [
+                            { name: 'Eng', employees: [{ role: 'Dev' }, { role: 'QA', level: 3 }] },
+                            { name: 'Sales' },
+                        ],
+                    },
+                }),
+            );
+            analyzer.addDocument(
+                makeDoc({
+                    company: {
+                        departments: [{ name: 'HR', employees: [{ role: 'Recruiter' }] }],
+                    },
+                }),
+            );
+
+            const schema = analyzer.getSchema();
+
+            // company is an object
+            const companyField = schema.properties?.['company'] as JSONSchema;
+            const companyObj = companyField.anyOf?.find(
+                (e) => (e as JSONSchema)['x-bsonType'] === 'object',
+            ) as JSONSchema;
+            expect(companyObj['x-documentsInspected']).toBe(2);
+
+            // departments is an array inside company
+            const deptField = companyObj.properties?.['departments'] as JSONSchema;
+            const deptArrayEntry = deptField.anyOf?.find(
+                (e) => (e as JSONSchema)['x-bsonType'] === 'array',
+            ) as JSONSchema;
+            expect(deptArrayEntry['x-typeOccurrence']).toBe(2);
+
+            // department objects: 2 from doc1 + 1 from doc2 = 3
+            const deptObjEntry = (deptArrayEntry.items as JSONSchema).anyOf?.find(
+                (e) => (e as JSONSchema)['x-bsonType'] === 'object',
+            ) as JSONSchema;
+            expect(deptObjEntry['x-documentsInspected']).toBe(3);
+            expect(deptObjEntry['x-typeOccurrence']).toBe(3);
+
+            // "name" in all 3 department objects, "employees" in 2 of 3
+            expect((deptObjEntry.properties?.['name'] as JSONSchema)['x-occurrence']).toBe(3);
+            expect((deptObjEntry.properties?.['employees'] as JSONSchema)['x-occurrence']).toBe(2);
+
+            // employees is an array inside department objects
+            const empField = deptObjEntry.properties?.['employees'] as JSONSchema;
+            const empArrayEntry = empField.anyOf?.find(
+                (e) => (e as JSONSchema)['x-bsonType'] === 'array',
+            ) as JSONSchema;
+            expect(empArrayEntry['x-typeOccurrence']).toBe(2);
+
+            // employee objects: 2 from first dept + 1 from HR = 3
+            const empObjEntry = (empArrayEntry.items as JSONSchema).anyOf?.find(
+                (e) => (e as JSONSchema)['x-bsonType'] === 'object',
+            ) as JSONSchema;
+            expect(empObjEntry['x-documentsInspected']).toBe(3);
+
+            // "role" in all 3 employee objects, "level" in 1
+            expect((empObjEntry.properties?.['role'] as JSONSchema)['x-occurrence']).toBe(3);
+            expect((empObjEntry.properties?.['level'] as JSONSchema)['x-occurrence']).toBe(1);
+        });
+
+        it('tracks mixed types inside arrays (objects + primitives)', () => {
+            const analyzer = new SchemaAnalyzer();
+            analyzer.addDocument(makeDoc({ data: ['hello', { key: 'val' }, 42] }));
+            analyzer.addDocument(makeDoc({ data: [{ key: 'v2', extra: true }] }));
+
+            const schema = analyzer.getSchema();
+            const dataField = schema.properties?.['data'] as JSONSchema;
+            const arrayEntry = dataField.anyOf?.find((e) => (e as JSONSchema)['x-bsonType'] === 'array') as JSONSchema;
+            const itemsSchema = arrayEntry.items as JSONSchema;
+
+            // string: 1, object: 2, number: 1
+            const stringEntry = itemsSchema.anyOf?.find(
+                (e) => (e as JSONSchema)['x-bsonType'] === 'string',
+            ) as JSONSchema;
+            const objectEntry = itemsSchema.anyOf?.find(
+                (e) => (e as JSONSchema)['x-bsonType'] === 'object',
+            ) as JSONSchema;
+
+            expect(stringEntry['x-typeOccurrence']).toBe(1);
+            expect(objectEntry['x-typeOccurrence']).toBe(2);
+            expect(objectEntry['x-documentsInspected']).toBe(2);
+
+            // "key" in both objects, "extra" in 1
+            expect((objectEntry.properties?.['key'] as JSONSchema)['x-occurrence']).toBe(2);
+            expect((objectEntry.properties?.['extra'] as JSONSchema)['x-occurrence']).toBe(1);
+        });
+    });
+
+    describe('addDocuments vs sequential addDocument equivalence', () => {
+        it('produces identical occurrence counts', () => {
+            const docs = [makeDoc({ a: 1, b: 'x' }), makeDoc({ a: 2 }), makeDoc({ a: 3, c: true })];
+
+            const batch = new SchemaAnalyzer();
+            batch.addDocuments(docs);
+
+            const sequential = new SchemaAnalyzer();
+            for (const doc of docs) {
+                sequential.addDocument(doc);
+            }
+
+            const batchSchema = batch.getSchema();
+            const seqSchema = sequential.getSchema();
+
+            // Root counts match
+            expect(batchSchema['x-documentsInspected']).toBe(seqSchema['x-documentsInspected']);
+
+            // Field-level occurrence counts match
+            for (const key of Object.keys(batchSchema.properties ?? {})) {
+                const batchField = batchSchema.properties?.[key] as JSONSchema;
+                const seqField = seqSchema.properties?.[key] as JSONSchema;
+                expect(batchField['x-occurrence']).toBe(seqField['x-occurrence']);
+            }
+        });
+
+        it('produces identical type occurrence counts', () => {
+            const docs = [makeDoc({ value: 'hello' }), makeDoc({ value: 42 }), makeDoc({ value: 'world' })];
+
+            const batch = new SchemaAnalyzer();
+            batch.addDocuments(docs);
+
+            const sequential = new SchemaAnalyzer();
+            for (const doc of docs) {
+                sequential.addDocument(doc);
+            }
+
+            // Stringify the schemas to compare their full type entry structures
+            expect(JSON.stringify(batch.getSchema())).toBe(JSON.stringify(sequential.getSchema()));
+        });
+    });
+});
diff --git a/src/utils/json/mongo/mongoTestDocuments.ts b/packages/schema-analyzer/test/mongoTestDocuments.ts
similarity index 100%
rename from src/utils/json/mongo/mongoTestDocuments.ts
rename to packages/schema-analyzer/test/mongoTestDocuments.ts
diff --git a/packages/schema-analyzer/tsconfig.json b/packages/schema-analyzer/tsconfig.json
new file mode 100644
index 000000000..8688f97ff
--- /dev/null
+++ b/packages/schema-analyzer/tsconfig.json
@@ -0,0 +1,20 @@
+{
+  "compilerOptions": {
+    "composite": true,
+    "declaration": true,
+    "declarationMap": true,
+    "module": "commonjs",
+    "target": "ES2023",
+    "lib": ["ES2023"],
+    "rootDir": "./src",
+    "outDir": "./dist",
+    "strict": true,
+    "esModuleInterop": true,
+    "allowSyntheticDefaultImports": true,
+    "skipLibCheck": true,
+    "resolveJsonModule": true,
+    "sourceMap": true
+  },
+  "include": ["src/**/*"],
+  "exclude": ["node_modules", "dist"]
+}
diff --git a/src/documentdb/ClusterSession.ts b/src/documentdb/ClusterSession.ts
index da81218fe..da3b5107c 100644
--- a/src/documentdb/ClusterSession.ts
+++ b/src/documentdb/ClusterSession.ts
@@ -3,11 +3,17 @@
  *  Licensed under the MIT License. See License.txt in the project root for license information.
  *--------------------------------------------------------------------------------------------*/
 
+import { ParseMode, parse as parseShellBSON } from '@mongodb-js/shell-bson-parser';
+import {
+    SchemaAnalyzer,
+    getPropertyNamesAtLevel,
+    type FieldEntry,
+    type JSONSchema,
+} from '@vscode-documentdb/schema-analyzer';
 import * as l10n from '@vscode/l10n';
 import { EJSON } from 'bson';
 import { ObjectId, type Document, type Filter, type WithId } from 'mongodb';
-import { type JSONSchema } from '../utils/json/JSONSchema';
-import { getPropertyNamesAtLevel, updateSchemaWithDocument } from '../utils/json/mongo/SchemaAnalyzer';
+import { ext } from '../extensionVariables';
 import { getDataAtPath } from '../utils/slickgrid/mongo/toSlickGridTable';
 import { toSlickGridTree, type TreeData } from '../utils/slickgrid/mongo/toSlickGridTree';
 import { ClustersClient, type FindQueryParams } from './ClustersClient';
@@ -78,7 +84,7 @@ export class ClusterSession {
      * Updates progressively as users navigate through different pages.
      * Reset when the query or page size changes.
      */
-    private _accumulatedJsonSchema: JSONSchema = {};
+    private _schemaAnalyzer: SchemaAnalyzer = new SchemaAnalyzer();
 
     /**
      * Tracks the highest page number that has been accumulated into the schema.
@@ -161,8 +167,17 @@ export class ClusterSession {
             }
         }
 
-        // The user's query has changed, invalidate all caches
-        this._accumulatedJsonSchema = {};
+        // The user's query has changed, invalidate all caches.
+        //
+        // NOTE: We intentionally do NOT reset the SchemaAnalyzer here.
+        // When a new query returns 0 results, preserving field knowledge from
+        // previous queries is more valuable for autocompletion than having an
+        // empty field list. The SchemaAnalyzer accumulates field data
+        // monotonically — new fields are added, existing field type statistics
+        // are enriched with each query. This means type statistics represent
+        // aggregated observations across queries, not a single query snapshot.
+        // Consumers should treat type frequency data as approximate/relative
+        // (e.g., "mostly String") rather than absolute percentages.
         this._highestPageAccumulated = 0;
         this._currentPageSize = null;
         this._currentRawDocuments = [];
@@ -185,7 +200,8 @@ export class ClusterSession {
     private resetAccumulationIfPageSizeChanged(newPageSize: number): void {
         if (this._currentPageSize !== null && this._currentPageSize !== newPageSize) {
             // Page size changed, reset accumulation tracking
-            this._accumulatedJsonSchema = {};
+            this._schemaAnalyzer.reset();
+            ext.outputChannel.trace('[SchemaAnalyzer] Reset — page size changed');
             this._highestPageAccumulated = 0;
         }
         this._currentPageSize = newPageSize;
@@ -298,8 +314,12 @@ export class ClusterSession {
         // Since navigation is sequential and starts at page 1, we only need to track
         // the highest page number accumulated
         if (pageNumber > this._highestPageAccumulated) {
-            this._currentRawDocuments.map((doc) => updateSchemaWithDocument(this._accumulatedJsonSchema, doc));
+            this._schemaAnalyzer.addDocuments(this._currentRawDocuments);
             this._highestPageAccumulated = pageNumber;
+
+            ext.outputChannel.trace(
+                `[SchemaAnalyzer] Analyzed ${String(this._schemaAnalyzer.getDocumentCount())} documents, ${String(this._schemaAnalyzer.getKnownFields().length)} known fields`,
+            );
         }
 
         return documents.length;
@@ -355,7 +375,7 @@ export class ClusterSession {
     public getCurrentPageAsTable(path: string[]): TableData {
         const responsePack: TableData = {
             path: path,
-            headers: getPropertyNamesAtLevel(this._accumulatedJsonSchema, path),
+            headers: getPropertyNamesAtLevel(this._schemaAnalyzer.getSchema(), path),
             data: getDataAtPath(this._currentRawDocuments, path),
         };
 
@@ -363,7 +383,15 @@ export class ClusterSession {
     }
 
     public getCurrentSchema(): JSONSchema {
-        return this._accumulatedJsonSchema;
+        return this._schemaAnalyzer.getSchema();
+    }
+
+    /**
+     * Returns the cached list of known fields from the accumulated schema.
+     * Uses SchemaAnalyzer's version-based caching — only recomputed when the schema changes.
+     */
+    public getKnownFields(): FieldEntry[] {
+        return this._schemaAnalyzer.getKnownFields();
     }
 
     // ============================================================================
@@ -521,7 +549,7 @@ export class ClusterSession {
      * @remarks
      * This method uses the same BSON parsing logic as ClustersClient.runFindQuery():
      * - filter is parsed with toFilterQueryObj() which handles UUID(), Date(), MinKey(), MaxKey() constructors
-     * - projection and sort are parsed with EJSON.parse()
+     * - projection and sort are parsed with parseShellBSON() in Loose mode
      *
      * Use this method when you need the actual MongoDB Document objects for query execution.
      * Use getCurrentFindQueryParams() when you only need the string representations.
@@ -536,7 +564,9 @@ export class ClusterSession {
         let projectionObj: Document | undefined;
         if (stringParams.project && stringParams.project.trim() !== '{}') {
             try {
-                projectionObj = EJSON.parse(stringParams.project) as Document;
+                projectionObj = parseShellBSON(stringParams.project, {
+                    mode: ParseMode.Loose,
+                }) as Document;
             } catch (error) {
                 throw new Error(
                     l10n.t('Invalid projection syntax: {0}', error instanceof Error ? error.message : String(error)),
@@ -548,7 +578,9 @@ export class ClusterSession {
         let sortObj: Document | undefined;
         if (stringParams.sort && stringParams.sort.trim() !== '{}') {
             try {
-                sortObj = EJSON.parse(stringParams.sort) as Document;
+                sortObj = parseShellBSON(stringParams.sort, {
+                    mode: ParseMode.Loose,
+                }) as Document;
             } catch (error) {
                 throw new Error(
                     l10n.t('Invalid sort syntax: {0}', error instanceof Error ? error.message : String(error)),
diff --git a/src/documentdb/ClustersClient.ts b/src/documentdb/ClustersClient.ts
index bc28cff61..2adb398f6 100644
--- a/src/documentdb/ClustersClient.ts
+++ b/src/documentdb/ClustersClient.ts
@@ -10,6 +10,7 @@
  */
 
 import { appendExtensionUserAgent, callWithTelemetryAndErrorHandling, parseError } from '@microsoft/vscode-azext-utils';
+import { ParseMode, parse as parseShellBSON } from '@mongodb-js/shell-bson-parser';
 import * as l10n from '@vscode/l10n';
 import { EJSON } from 'bson';
 import {
@@ -513,13 +514,15 @@ export class ClustersClient {
         // Parse and add projection if provided
         if (queryParams.project && queryParams.project.trim() !== '{}') {
             try {
-                options.projection = EJSON.parse(queryParams.project) as Document;
+                options.projection = parseShellBSON(queryParams.project, {
+                    mode: ParseMode.Loose,
+                }) as Document;
             } catch (error) {
                 const cause = error instanceof Error ? error : new Error(String(error));
                 throw new QueryError(
                     'INVALID_PROJECTION',
                     l10n.t(
-                        'Invalid projection syntax: {0}. Please use valid JSON, for example: { "fieldName": 1 }',
+                        'Invalid projection syntax: {0}. Please use valid JSON or a DocumentDB API expression, for example: { fieldName: 1 }',
                         cause.message,
                     ),
                     cause,
@@ -530,13 +533,15 @@ export class ClustersClient {
         // Parse and add sort if provided
         if (queryParams.sort && queryParams.sort.trim() !== '{}') {
             try {
-                options.sort = EJSON.parse(queryParams.sort) as Document;
+                options.sort = parseShellBSON(queryParams.sort, {
+                    mode: ParseMode.Loose,
+                }) as Document;
             } catch (error) {
                 const cause = error instanceof Error ? error : new Error(String(error));
                 throw new QueryError(
                     'INVALID_SORT',
                     l10n.t(
-                        'Invalid sort syntax: {0}. Please use valid JSON, for example: { "fieldName": 1 }',
+                        'Invalid sort syntax: {0}. Please use valid JSON or a DocumentDB API expression, for example: { fieldName: 1 }',
                         cause.message,
                     ),
                     cause,
@@ -662,13 +667,15 @@ export class ClustersClient {
         // Parse and add projection if provided
         if (queryParams.project && queryParams.project.trim() !== '{}') {
             try {
-                options.projection = EJSON.parse(queryParams.project) as Document;
+                options.projection = parseShellBSON(queryParams.project, {
+                    mode: ParseMode.Loose,
+                }) as Document;
             } catch (error) {
                 const cause = error instanceof Error ? error : new Error(String(error));
                 throw new QueryError(
                     'INVALID_PROJECTION',
                     l10n.t(
-                        'Invalid projection syntax: {0}. Please use valid JSON, for example: { "fieldName": 1 }',
+                        'Invalid projection syntax: {0}. Please use valid JSON or a DocumentDB API expression, for example: { fieldName: 1 }',
                         cause.message,
                     ),
                     cause,
@@ -679,13 +686,15 @@ export class ClustersClient {
         // Parse and add sort if provided
         if (queryParams.sort && queryParams.sort.trim() !== '{}') {
             try {
-                options.sort = EJSON.parse(queryParams.sort) as Document;
+                options.sort = parseShellBSON(queryParams.sort, {
+                    mode: ParseMode.Loose,
+                }) as Document;
             } catch (error) {
                 const cause = error instanceof Error ? error : new Error(String(error));
                 throw new QueryError(
                     'INVALID_SORT',
                     l10n.t(
-                        'Invalid sort syntax: {0}. Please use valid JSON, for example: { "fieldName": 1 }',
+                        'Invalid sort syntax: {0}. Please use valid JSON or a DocumentDB API expression, for example: { fieldName: 1 }',
                         cause.message,
                     ),
                     cause,
diff --git a/src/documentdb/utils/toFilterQuery.test.ts b/src/documentdb/utils/toFilterQuery.test.ts
index ca8ff0352..a19caa7ef 100644
--- a/src/documentdb/utils/toFilterQuery.test.ts
+++ b/src/documentdb/utils/toFilterQuery.test.ts
@@ -3,7 +3,8 @@
  *  Licensed under the MIT License. See License.txt in the project root for license information.
  *--------------------------------------------------------------------------------------------*/
 
-import { MaxKey, MinKey, UUID } from 'mongodb';
+import { Binary, Decimal128, Int32, Long, ObjectId, Timestamp } from 'bson';
+import { MaxKey, MinKey } from 'mongodb';
 import { QueryError } from '../errors/QueryError';
 import { toFilterQueryObj } from './toFilterQuery';
 
@@ -29,175 +30,164 @@ jest.mock('../../extensionVariables', () => ({
     },
 }));
 
-// Basic query examples
-const basicQueries = [
-    { input: '{ }', expected: {} },
-    { input: '{ "name": "John" }', expected: { name: 'John' } },
-    { input: '{ "name": "John", "age": { "$gt": 30 } }', expected: { name: 'John', age: { $gt: 30 } } },
-];
-
-// BSON function examples with different variations
-const bsonFunctionTestCases = [
-    // UUID cases
-    {
-        type: 'UUID',
-        input: '{ "id": UUID("123e4567-e89b-12d3-a456-426614174000") }',
-        property: 'id',
-        expectedClass: UUID,
-        expectedValue: '123e4567-e89b-12d3-a456-426614174000',
-    },
-    {
-        type: 'UUID with new',
-        input: '{ "userId": new UUID("550e8400-e29b-41d4-a716-446655440000") }',
-        property: 'userId',
-        expectedClass: UUID,
-        expectedValue: '550e8400-e29b-41d4-a716-446655440000',
-    },
-    {
-        type: 'UUID with single quotes',
-        input: '{ "id": UUID(\'123e4567-e89b-12d3-a456-426614174000\') }',
-        property: 'id',
-        expectedClass: UUID,
-        expectedValue: '123e4567-e89b-12d3-a456-426614174000',
-    },
-    // MinKey cases
-    {
-        type: 'MinKey',
-        input: '{ "start": MinKey() }',
-        property: 'start',
-        expectedClass: MinKey,
-    },
-    {
-        type: 'MinKey with new',
-        input: '{ "min": new MinKey() }',
-        property: 'min',
-        expectedClass: MinKey,
-    },
-    // MaxKey cases
-    {
-        type: 'MaxKey',
-        input: '{ "end": MaxKey() }',
-        property: 'end',
-        expectedClass: MaxKey,
-    },
-    {
-        type: 'MaxKey with new',
-        input: '{ "max": new MaxKey() }',
-        property: 'max',
-        expectedClass: MaxKey,
-    },
-    // Date cases
-    {
-        type: 'Date',
-        input: '{ "created": new Date("2023-01-01") }',
-        property: 'created',
-        expectedClass: Date,
-        expectedValue: '2023-01-01T00:00:00.000Z',
-    },
-    {
-        type: 'Date without new',
-        input: '{ "updated": Date("2023-12-31T23:59:59.999Z") }',
-        property: 'updated',
-        expectedClass: Date,
-        expectedValue: '2023-12-31T23:59:59.999Z',
-    },
-];
+describe('toFilterQuery', () => {
+    describe('basic queries', () => {
+        test('empty string returns empty object', () => {
+            expect(toFilterQueryObj('')).toEqual({});
+        });
 
-// Examples of mixed BSON types
-const mixedQuery =
-    '{ "id": UUID("123e4567-e89b-12d3-a456-426614174000"), "start": MinKey(), "end": MaxKey(), "created": new Date("2023-01-01") }';
+        test('whitespace-only returns empty object', () => {
+            expect(toFilterQueryObj('   ')).toEqual({});
+        });
 
-// Complex nested query
-const complexQuery =
-    '{ "range": { "start": MinKey(), "end": MaxKey() }, "timestamp": new Date("2023-01-01"), "ids": [UUID("123e4567-e89b-12d3-a456-426614174000")] }';
+        test('empty object returns empty object', () => {
+            expect(toFilterQueryObj('{ }')).toEqual({});
+        });
 
-// String that contains BSON function syntax but should be treated as plain text
-const textWithFunctionSyntax = '{ "userName": "A user with UUID()name and Date() format", "status": "active" }';
+        test('simple string filter', () => {
+            expect(toFilterQueryObj('{ "name": "John" }')).toEqual({ name: 'John' });
+        });
 
-// Error test cases
-const errorTestCases = [
-    { description: 'invalid JSON', input: '{ invalid json }' },
-    { description: 'invalid UUID', input: '{ "id": UUID("invalid-uuid") }' },
-    { description: 'invalid Date', input: '{ "date": new Date("invalid-date") }' },
-    { description: 'missing parameter', input: '{ "key": UUID() }' },
-];
+        test('filter with query operator', () => {
+            expect(toFilterQueryObj('{ "age": { "$gt": 30 } }')).toEqual({ age: { $gt: 30 } });
+        });
 
-describe('toFilterQuery', () => {
-    it('converts basic query strings to objects', () => {
-        basicQueries.forEach((testCase) => {
-            expect(toFilterQueryObj(testCase.input)).toEqual(testCase.expected);
+        test('combined filter', () => {
+            expect(toFilterQueryObj('{ "name": "John", "age": { "$gt": 30 } }')).toEqual({
+                name: 'John',
+                age: { $gt: 30 },
+            });
         });
     });
 
-    describe('BSON function support', () => {
-        test.each(bsonFunctionTestCases)('converts $type', ({ input, property, expectedClass, expectedValue }) => {
-            const result = toFilterQueryObj(input);
-
-            expect(result).toHaveProperty(property);
-            expect(result[property]).toBeInstanceOf(expectedClass);
-
-            if (expectedValue) {
-                if (result[property] instanceof UUID) {
-                    // eslint-disable-next-line jest/no-conditional-expect
-                    expect(result[property].toString()).toBe(expectedValue);
-                } else if (result[property] instanceof Date) {
-                    // eslint-disable-next-line jest/no-conditional-expect
-                    expect(result[property].toISOString()).toBe(expectedValue);
-                }
-            }
+    describe('relaxed syntax (new with shell-bson-parser)', () => {
+        test('unquoted keys', () => {
+            expect(toFilterQueryObj('{ count: 42 }')).toEqual({ count: 42 });
         });
-    });
 
-    it('handles mixed BSON types in the same query', () => {
-        const result = toFilterQueryObj(mixedQuery);
+        test('single-quoted strings', () => {
+            expect(toFilterQueryObj("{ name: 'Alice' }")).toEqual({ name: 'Alice' });
+        });
+
+        test('Math.min expression', () => {
+            const result = toFilterQueryObj('{ rating: Math.min(1.7, 2) }');
+            expect(result).toEqual({ rating: 1.7 });
+        });
 
-        expect(result.id).toBeInstanceOf(UUID);
-        expect(result.start).toBeInstanceOf(MinKey);
-        expect(result.end).toBeInstanceOf(MaxKey);
-        expect(result.created).toBeInstanceOf(Date);
+        test('unquoted keys with nested operators', () => {
+            expect(toFilterQueryObj('{ age: { $gt: 25 } }')).toEqual({ age: { $gt: 25 } });
+        });
 
-        expect((result.id as UUID).toString()).toBe('123e4567-e89b-12d3-a456-426614174000');
-        expect((result.created as Date).toISOString()).toBe('2023-01-01T00:00:00.000Z');
+        test('mixed quoted and unquoted keys', () => {
+            expect(toFilterQueryObj('{ name: "Alice", "age": 30 }')).toEqual({ name: 'Alice', age: 30 });
+        });
     });
 
-    it('handles complex nested queries with multiple BSON types', () => {
-        const result = toFilterQueryObj(complexQuery);
+    describe('BSON constructor support', () => {
+        test('UUID constructor', () => {
+            const result = toFilterQueryObj('{ id: UUID("123e4567-e89b-12d3-a456-426614174000") }');
+            expect(result).toHaveProperty('id');
+            // shell-bson-parser returns Binary subtype 4 for UUID
+            expect(result.id).toBeInstanceOf(Binary);
+            expect((result.id as Binary).sub_type).toBe(Binary.SUBTYPE_UUID);
+        });
 
-        expect(result.range.start).toBeInstanceOf(MinKey);
-        expect(result.range.end).toBeInstanceOf(MaxKey);
-        expect(result.timestamp).toBeInstanceOf(Date);
-        expect(result.ids[0]).toBeInstanceOf(UUID);
-    });
+        test('UUID with new keyword', () => {
+            const result = toFilterQueryObj('{ userId: new UUID("550e8400-e29b-41d4-a716-446655440000") }');
+            expect(result).toHaveProperty('userId');
+            expect(result.userId).toBeInstanceOf(Binary);
+            expect((result.userId as Binary).sub_type).toBe(Binary.SUBTYPE_UUID);
+        });
+
+        test('MinKey constructor', () => {
+            const result = toFilterQueryObj('{ start: MinKey() }');
+            expect(result).toHaveProperty('start');
+            expect(result.start).toBeInstanceOf(MinKey);
+        });
 
-    it('does not process BSON function calls within string values', () => {
-        const result = toFilterQueryObj(textWithFunctionSyntax);
-        expect(result).toEqual({
-            userName: 'A user with UUID()name and Date() format',
-            status: 'active',
+        test('MaxKey constructor', () => {
+            const result = toFilterQueryObj('{ end: MaxKey() }');
+            expect(result).toHaveProperty('end');
+            expect(result.end).toBeInstanceOf(MaxKey);
+        });
+
+        test('Date constructor', () => {
+            const result = toFilterQueryObj('{ created: new Date("2023-01-01") }');
+            expect(result).toHaveProperty('created');
+            expect(result.created).toBeInstanceOf(Date);
+            expect((result.created as Date).toISOString()).toBe('2023-01-01T00:00:00.000Z');
+        });
+
+        test('ObjectId constructor', () => {
+            const result = toFilterQueryObj('{ _id: ObjectId("507f1f77bcf86cd799439011") }');
+            expect(result).toHaveProperty('_id');
+            expect(result._id).toBeInstanceOf(ObjectId);
+        });
+
+        test('ISODate constructor', () => {
+            const result = toFilterQueryObj('{ ts: ISODate("2024-01-01") }');
+            expect(result).toHaveProperty('ts');
+            expect(result.ts).toBeInstanceOf(Date);
+        });
+
+        test('Decimal128 constructor', () => {
+            const result = toFilterQueryObj('{ val: Decimal128("1.23") }');
+            expect(result).toHaveProperty('val');
+            expect(result.val).toBeInstanceOf(Decimal128);
+        });
+
+        test('NumberInt constructor', () => {
+            const result = toFilterQueryObj('{ n: NumberInt(42) }');
+            expect(result).toHaveProperty('n');
+            expect(result.n).toBeInstanceOf(Int32);
+        });
+
+        test('NumberLong constructor', () => {
+            const result = toFilterQueryObj('{ n: NumberLong(42) }');
+            expect(result).toHaveProperty('n');
+            expect(result.n).toBeInstanceOf(Long);
+        });
+
+        test('Timestamp constructor', () => {
+            const result = toFilterQueryObj('{ ts: Timestamp(1, 1) }');
+            expect(result).toHaveProperty('ts');
+            expect(result.ts).toBeInstanceOf(Timestamp);
         });
     });
 
-    describe('error handling', () => {
-        test.each(errorTestCases)('throws QueryError for $description', ({ input }) => {
-            expect(() => toFilterQueryObj(input)).toThrow(QueryError);
+    describe('mixed BSON types', () => {
+        test('multiple BSON constructors in one query', () => {
+            const result = toFilterQueryObj(
+                '{ id: UUID("123e4567-e89b-12d3-a456-426614174000"), start: MinKey(), end: MaxKey(), created: new Date("2023-01-01") }',
+            );
+
+            expect(result.id).toBeInstanceOf(Binary);
+            expect((result.id as Binary).sub_type).toBe(Binary.SUBTYPE_UUID);
+            expect(result.start).toBeInstanceOf(MinKey);
+            expect(result.end).toBeInstanceOf(MaxKey);
+            expect(result.created).toBeInstanceOf(Date);
         });
 
-        it('throws QueryError with INVALID_FILTER code for invalid JSON', () => {
-            let thrownError: QueryError | undefined;
-            try {
-                toFilterQueryObj('{ invalid json }');
-            } catch (error) {
-                thrownError = error as QueryError;
-            }
-            expect(thrownError).toBeDefined();
-            expect(thrownError?.name).toBe('QueryError');
-            expect(thrownError?.code).toBe('INVALID_FILTER');
+        test('nested BSON constructors', () => {
+            const result = toFilterQueryObj(
+                '{ range: { start: MinKey(), end: MaxKey() }, timestamp: new Date("2023-01-01") }',
+            );
+
+            expect(result.range.start).toBeInstanceOf(MinKey);
+            expect(result.range.end).toBeInstanceOf(MaxKey);
+            expect(result.timestamp).toBeInstanceOf(Date);
+        });
+    });
+
+    describe('error handling', () => {
+        test('throws QueryError for invalid syntax', () => {
+            expect(() => toFilterQueryObj('{ invalid json }')).toThrow(QueryError);
         });
 
-        it('throws QueryError with INVALID_FILTER code for invalid UUID', () => {
+        test('throws QueryError with INVALID_FILTER code', () => {
             let thrownError: QueryError | undefined;
             try {
-                toFilterQueryObj('{ "id": UUID("invalid-uuid") }');
+                toFilterQueryObj('not valid at all');
             } catch (error) {
                 thrownError = error as QueryError;
             }
@@ -206,10 +196,10 @@ describe('toFilterQuery', () => {
             expect(thrownError?.code).toBe('INVALID_FILTER');
         });
 
-        it('includes original error message in QueryError message', () => {
+        test('error message contains "Invalid filter syntax"', () => {
             let thrownError: QueryError | undefined;
             try {
-                toFilterQueryObj('{ invalid json }');
+                toFilterQueryObj('not valid');
             } catch (error) {
                 thrownError = error as QueryError;
             }
@@ -217,16 +207,15 @@ describe('toFilterQuery', () => {
             expect(thrownError?.message).toContain('Invalid filter syntax');
         });
 
-        it('includes helpful JSON example in error message', () => {
+        test('error message contains helpful example', () => {
             let thrownError: QueryError | undefined;
             try {
-                toFilterQueryObj('{ invalid json }');
+                toFilterQueryObj('not valid');
             } catch (error) {
                 thrownError = error as QueryError;
             }
             expect(thrownError).toBeDefined();
-            expect(thrownError?.message).toContain('Please use valid JSON');
-            expect(thrownError?.message).toContain('"name": "value"');
+            expect(thrownError?.message).toContain('name: "value"');
         });
     });
 });
diff --git a/src/documentdb/utils/toFilterQuery.ts b/src/documentdb/utils/toFilterQuery.ts
index 807f18858..1cbb67a15 100644
--- a/src/documentdb/utils/toFilterQuery.ts
+++ b/src/documentdb/utils/toFilterQuery.ts
@@ -3,227 +3,38 @@
  *  Licensed under the MIT License. See License.txt in the project root for license information.
  *--------------------------------------------------------------------------------------------*/
 
-import { EJSON } from 'bson';
-import { UUID, type Document, type Filter } from 'mongodb';
+import { ParseMode, parse as parseShellBSON } from '@mongodb-js/shell-bson-parser';
+import { type Document, type Filter } from 'mongodb';
 import * as vscode from 'vscode';
 import { QueryError } from '../errors/QueryError';
 
+/**
+ * Parses a user-provided filter query string into a DocumentDB filter object.
+ *
+ * Uses `@mongodb-js/shell-bson-parser` in Loose mode, which supports:
+ * - Unquoted keys: `{ name: 1 }`
+ * - Single-quoted strings: `{ name: 'Alice' }`
+ * - BSON constructors: `ObjectId("...")`, `UUID("...")`, `ISODate("...")`, etc.
+ * - JS expressions: `Math.min(1.7, 2)`, `Date.now()`, arithmetic
+ * - MongoDB Extended JSON: `{ "$oid": "..." }`
+ *
+ * Replaces the previous hand-rolled regex-based converter + EJSON.parse pipeline.
+ */
 export function toFilterQueryObj(queryString: string): Filter<Document> {
     try {
-        // Convert pseudo-JavaScript style BSON constructor calls into Extended JSON that EJSON can parse.
-        // Example:  { "id": UUID("...") }  ->  { "id": {"$uuid":"..."} }
-        const extendedJsonQuery = convertToExtendedJson(queryString);
-        // EJSON.parse will turn Extended JSON into native BSON/JS types (UUID, Date, etc.).
-        return EJSON.parse(extendedJsonQuery) as Filter<Document>;
-    } catch (error) {
         if (queryString.trim().length === 0) {
             return {} as Filter<Document>;
         }
-
+        return parseShellBSON(queryString, { mode: ParseMode.Loose }) as Filter<Document>;
+    } catch (error) {
         const cause = error instanceof Error ? error : new Error(String(error));
         throw new QueryError(
             'INVALID_FILTER',
             vscode.l10n.t(
-                'Invalid filter syntax: {0}. Please use valid JSON, for example: { "name": "value" }',
+                'Invalid filter syntax: {0}. Please use valid JSON or a DocumentDB API expression, for example: { name: "value" }',
                 cause.message,
             ),
             cause,
         );
     }
 }
-
-/**
- * Walks the raw query text and rewrites BSON-like constructor calls (UUID, MinKey, MaxKey, Date)
- * into MongoDB Extended JSON fragments while deliberately skipping anything that appears inside
- * string literals (so user text containing e.g. "UUID(" is not transformed).
- *
- * This is intentionally lightweight and avoids a full JS / JSON parser to keep latency low inside
- * the query input UX. Future improvements may replace this with a tokenizer / parser for richer
- * validation and diagnostics.
- */
-function convertToExtendedJson(query: string): string {
-    // Phase 1: Precompute which character positions are inside a (single or double quoted) string.
-    // This lets the replacement pass stay simple and branchless for non‑string regions.
-    const isInString = markStringLiterals(query);
-
-    // Phase 2: Scan + rewrite BSON-like calls only when not inside a string literal.
-    let result = '';
-    let i = 0;
-    while (i < query.length) {
-        if (isInString[i]) {
-            // Inside a user string literal – copy verbatim.
-            result += query[i];
-            i += 1;
-            continue;
-        }
-
-        const remaining = query.slice(i);
-
-        // UUID(...)
-        const uuidMatch = matchUUID(remaining);
-        if (uuidMatch) {
-            const { raw, uuidString } = uuidMatch;
-            try {
-                // Validate early so we fail fast instead of producing malformed Extended JSON.
-                // (Instantiation is enough to validate format.)
-                new UUID(uuidString);
-            } catch {
-                throw new Error(`Invalid UUID: ${uuidString}`);
-            }
-            result += `{"$uuid":"${uuidString}"}`;
-            i += raw.length;
-            continue;
-        }
-
-        // MinKey()
-        const minKeyMatch = matchMinKey(remaining);
-        if (minKeyMatch) {
-            result += '{"$minKey":1}';
-            i += minKeyMatch.raw.length;
-            continue;
-        }
-
-        // MaxKey()
-        const maxKeyMatch = matchMaxKey(remaining);
-        if (maxKeyMatch) {
-            result += '{"$maxKey":1}';
-            i += maxKeyMatch.raw.length;
-            continue;
-        }
-
-        // Date("...")
-        const dateMatch = matchDate(remaining);
-        if (dateMatch) {
-            const { raw, dateString } = dateMatch;
-            const date = new Date(dateString);
-            if (Number.isNaN(date.getTime())) {
-                throw new Error(`Invalid date: ${dateString}`);
-            }
-            result += `{"$date":"${dateString}"}`;
-            i += raw.length;
-            continue;
-        }
-
-        // Fallback: copy one character.
-        result += query[i];
-        i += 1;
-    }
-
-    return result;
-}
-
-/**
- * markStringLiterals
- *
- * Lightweight pass to flag which character indices are inside a quoted string.
- *
- * Supported:
- *   - Single quotes '...'
- *   - Double quotes "..."
- *   - Escapes inside those strings via backslash (\" or \')
- *
- * Not a full JSON validator:
- *   - Does not detect malformed / unclosed strings (those will just mark to end)
- *   - Does not handle template literals (not valid JSON anyway)
- *
- * Rationale:
- *   This is intentionally simple and fast. It exists to prevent accidental rewriting of text
- *   inside user-provided string values (e.g. "note: call UUID('x') later") while we still accept
- *   a relaxed JSON-ish syntax for convenience. If the query authoring experience is expanded
- *   (linting, richer autocomplete, tolerant recovery) we can replace this with a proper tokenizer.
- */
-function markStringLiterals(input: string): boolean[] {
-    const isInString: boolean[] = new Array(input.length).fill(false) as boolean[];
-    let inString = false;
-    let currentQuote: '"' | "'" | null = null;
-    let escapeNext = false;
-
-    for (let i = 0; i < input.length; i++) {
-        const ch = input[i];
-
-        if (escapeNext) {
-            // Current char is escaped; treat it as plain content inside the string.
-            isInString[i] = inString;
-            escapeNext = false;
-            continue;
-        }
-
-        if (inString) {
-            // Inside a string: mark and handle escapes / termination.
-            isInString[i] = true;
-            if (ch === '\\') {
-                escapeNext = true;
-            } else if (ch === currentQuote) {
-                inString = false;
-                currentQuote = null;
-            }
-            continue;
-        }
-
-        // Not currently in a string – only a quote can start one.
-        if (ch === '"' || ch === "'") {
-            inString = true;
-            currentQuote = ch as '"' | "'";
-            isInString[i] = true;
-            continue;
-        }
-
-        // Outside of strings.
-        isInString[i] = false;
-    }
-
-    return isInString;
-}
-
-// --- Regex constants for BSON-like constructor calls ---
-
-/**
- * Matches UUID constructor calls, e.g. UUID("...") or new UUID('...'), case-insensitive.
- * Captures the quoted UUID string.
- * Pattern details:
- *   - Optional "new" prefix with whitespace: (?:new\s+)?
- *   - "uuid" keyword, case-insensitive
- *   - Optional whitespace before and inside parentheses
- *   - Quoted string (single or double quotes) as argument, captured in group 1
- */
-const UUID_REGEX = /^(?:new\s+)?uuid\s*\(\s*["']([^"']+)["']\s*\)/i;
-
-/**
- * Matches MinKey constructor calls, e.g. MinKey() or new MinKey(), case-insensitive.
- * No arguments.
- */
-const MIN_KEY_REGEX = /^(?:new\s+)?minkey\s*\(\s*\)/i;
-
-/**
- * Matches MaxKey constructor calls, e.g. MaxKey() or new MaxKey(), case-insensitive.
- * No arguments.
- */
-const MAX_KEY_REGEX = /^(?:new\s+)?maxkey\s*\(\s*\)/i;
-
-/**
- * Matches Date constructor calls, e.g. Date("...") or new Date('...'), case-insensitive.
- * Captures the quoted date string.
- * Pattern details:
- *   - Optional "new" prefix with whitespace: (?:new\s+)?
- *   - "date" keyword, case-insensitive
- *   - Optional whitespace before and inside parentheses
- *   - Quoted string (single or double quotes) as argument, captured in group 1
- */
-const DATE_REGEX = /^(?:new\s+)?date\s*\(\s*["']([^"']+)["']\s*\)/i;
-
-function matchUUID(src: string): { raw: string; uuidString: string } | undefined {
-    const m = UUID_REGEX.exec(src);
-    return m ? { raw: m[0], uuidString: m[1] } : undefined;
-}
-function matchMinKey(src: string): { raw: string } | undefined {
-    const m = MIN_KEY_REGEX.exec(src);
-    return m ? { raw: m[0] } : undefined;
-}
-function matchMaxKey(src: string): { raw: string } | undefined {
-    const m = MAX_KEY_REGEX.exec(src);
-    return m ? { raw: m[0] } : undefined;
-}
-function matchDate(src: string): { raw: string; dateString: string } | undefined {
-    const m = DATE_REGEX.exec(src);
-    return m ? { raw: m[0], dateString: m[1] } : undefined;
-}
diff --git a/src/utils/json/data-api/autocomplete/future-work.md b/src/utils/json/data-api/autocomplete/future-work.md
new file mode 100644
index 000000000..660113c7d
--- /dev/null
+++ b/src/utils/json/data-api/autocomplete/future-work.md
@@ -0,0 +1,161 @@
+# Autocomplete — Future Work
+
+Outstanding TODOs flagged in code during the schema transformer implementation (PR #506).
+These must be resolved before the completion providers ship to users.
+
+---
+
+## ~~1. `SPECIAL_CHARS_PATTERN` is incomplete + `insertText` quoting doesn't escape~~ ✅ RESOLVED
+
+**Resolved in:** PR #506 (commit addressing copilot review comment)
+
+Replaced `SPECIAL_CHARS_PATTERN` with `JS_IDENTIFIER_PATTERN` — a proper identifier validity check.
+Added `\` → `\\` and `"` → `\"` escaping when quoting `insertText`.
+Tests cover dashes, brackets, digits, embedded quotes, and backslashes.
+
+---
+
+## 2. `referenceText` is invalid MQL for special field names
+
+**Severity:** Medium — will generate broken aggregation expressions
+**File:** `toFieldCompletionItems.ts` — `referenceText` construction
+**When to fix:** Before the aggregation completion provider is wired up
+
+### Problem
+
+`referenceText` is always `$${entry.path}` (e.g., `$address.city`). In MQL, the `$field.path` syntax only works when every segment is a valid identifier without dots, spaces, or `$`. For field names like `order-items`, `a.b`, or `my field`, the `$` prefix syntax produces invalid references.
+
+### Examples
+
+| Field name          | Current `referenceText` | Valid?         | Correct MQL                          |
+| ------------------- | ----------------------- | -------------- | ------------------------------------ |
+| `age`               | `$age`                  | ✅             | `$age`                               |
+| `address.city`      | `$address.city`         | ✅ (nested)    | `$address.city`                      |
+| `order-items`       | `$order-items`          | ❌             | `{ $getField: "order-items" }`       |
+| `a.b` (literal dot) | `$a.b`                  | ❌ (ambiguous) | `{ $getField: { $literal: "a.b" } }` |
+| `my field`          | `$my field`             | ❌             | `{ $getField: "my field" }`          |
+
+### Proposed approaches
+
+**Option A — Make `referenceText` optional:** Return `undefined` for fields that can't use `$`-prefix syntax. The completion provider would omit the reference suggestion for those fields.
+
+**Option B — Use `$getField` for special names:**
+
+```typescript
+referenceText: needsQuoting
+    ? `{ $getField: "${escaped}" }`
+    : `$${entry.path}`,
+```
+
+**Option C — Provide both forms:** Add a `referenceTextRaw` (always `$path`) and `referenceTextSafe` (uses `$getField` when needed). Let the completion provider choose based on context.
+
+**Recommendation:** Option B is pragmatic. Option C is more flexible if we later need to support both forms in different contexts (e.g., `$match` vs `$project`).
+
+---
+
+## 3. `FieldEntry.path` dot-concatenation is ambiguous for literal dots
+
+**Severity:** Low (rare in practice) — fields with literal dots were prohibited before MongoDB API 3.6
+**File:** `getKnownFields.ts` — path concatenation at `path: \`${path}.${childName}\``**When to fix:** When we encounter real-world schemas with literal dots, or during the next`FieldEntry` interface revision
+
+### Problem
+
+Paths are built by concatenating segments with `.` as separator. A root-level field named `"a.b"` produces `path: "a.b"`, which is indistinguishable from a nested field `{ a: { b: ... } }`.
+
+This ambiguity flows downstream to all consumers: `toTypeScriptDefinition`, `toFieldCompletionItems`, `generateDescriptions`, and any future completion provider.
+
+### Examples
+
+| Document shape        | Resulting `path` | Ambiguous?                    |
+| --------------------- | ---------------- | ----------------------------- |
+| `{ a: { b: 1 } }`     | `"a.b"`          | —                             |
+| `{ "a.b": 1 }`        | `"a.b"`          | ✅ Same as above              |
+| `{ x: { "y.z": 1 } }` | `"x.y.z"`        | ✅ Looks like 3-level nesting |
+
+### Proposed fix
+
+Change `FieldEntry.path` from `string` to `string[]` (segment array):
+
+```typescript
+// Before
+interface FieldEntry {
+    path: string;        // "address.city"
+    ...
+}
+
+// After
+interface FieldEntry {
+    path: string[];      // ["address", "city"]
+    ...
+}
+```
+
+Each consumer then formats the path for its own context:
+
+- **TypeScript definitions:** Already use schema `properties` keys directly (no change needed there)
+- **Completion items:** `entry.path.join('.')` for display, bracket notation for special segments
+- **Aggregation references:** `$` + segments joined with `.`, or `$getField` chains for special segments
+
+### Impact
+
+This is a **breaking change** to the `FieldEntry` interface. Affected consumers:
+
+- `toFieldCompletionItems.ts`
+- `toTypeScriptDefinition.ts` (indirect — uses schema, not FieldEntry paths)
+- `generateDescriptions.ts` (uses schema, not FieldEntry paths)
+- `collectionViewRouter.ts` (imports `FieldEntry` type)
+- `ClusterSession.ts` (imports `FieldEntry` type)
+- `generateMongoFindJsonSchema.ts` (imports `FieldEntry` type)
+- `SchemaAnalyzer.ts` (returns `FieldEntry[]` via `getKnownFields`)
+
+**Recommendation:** Defer until the completion provider is built. The ambiguity only matters for fields with literal dots, which are uncommon. When fixing, do it as a single atomic change across all consumers.
+
+---
+
+## 4. TypeScript definition output references undeclared BSON type names
+
+**Severity:** Low — the TS definition is for display/hover only, not compiled or type-checked
+**File:** `toTypeScriptDefinition.ts` — `bsonToTypeScriptMap`
+**When to fix:** Before the TS definition is used in a context where type correctness matters (e.g., Monaco intellisense with an actual TS language service)
+
+### Problem
+
+The BSON-to-TypeScript type mapping emits non-built-in type names such as `ObjectId`, `Binary`, `Timestamp`, `MinKey`, `MaxKey`, `Code`, `DBRef`, and `UUID`. These are MongoDB API BSON driver types, but the generated definition string doesn't include `import` statements or `declare` stubs for them.
+
+If the output is ever fed to a TypeScript compiler or language service (e.g., Monaco with full TS checking), it will report "Cannot find name 'ObjectId'" etc.
+
+### Current state
+
+The generated output is used for documentation/hover display only — it's rendered as syntax-highlighted text, not compiled. So this is purely cosmetic today.
+
+### Proposed fix (when needed)
+
+**Option A — Emit `import type`:**
+
+```typescript
+import type { ObjectId, Binary, Timestamp, MinKey, MaxKey, Code, DBRef, UUID } from 'mongodb';
+```
+
+Only include types that actually appear in the schema.
+
+**Option B — Emit `declare type` stubs:**
+
+```typescript
+declare type ObjectId = { toString(): string };
+declare type Binary = { length(): number };
+// ... etc.
+```
+
+Lightweight, no dependency on the `mongodb` package.
+
+**Option C — Map everything to primitive types:**
+
+```typescript
+ObjectId → string  // (its string representation)
+Binary → Uint8Array
+Timestamp → { t: number; i: number }
+```
+
+Loses semantic precision but avoids the undeclared-type problem entirely.
+
+**Recommendation:** Option A is the most correct approach. Collect the set of non-built-in types actually used in the schema, then prepend a single `import type` line. Defer until the output is consumed by a real TS language service.
diff --git a/src/utils/json/data-api/autocomplete/generateDescriptions.test.ts b/src/utils/json/data-api/autocomplete/generateDescriptions.test.ts
new file mode 100644
index 000000000..32a103431
--- /dev/null
+++ b/src/utils/json/data-api/autocomplete/generateDescriptions.test.ts
@@ -0,0 +1,210 @@
+/*---------------------------------------------------------------------------------------------
+ *  Copyright (c) Microsoft Corporation. All rights reserved.
+ *  Licensed under the MIT License. See License.txt in the project root for license information.
+ *--------------------------------------------------------------------------------------------*/
+
+import { type JSONSchema } from '@vscode-documentdb/schema-analyzer';
+import { generateDescriptions } from './generateDescriptions';
+
+describe('generateDescriptions', () => {
+    it('adds descriptions with type and percentage for simple document', () => {
+        const schema: JSONSchema = {
+            'x-documentsInspected': 100,
+            properties: {
+                name: {
+                    'x-occurrence': 100,
+                    anyOf: [
+                        {
+                            type: 'string',
+                            'x-bsonType': 'string',
+                            'x-typeOccurrence': 100,
+                        },
+                    ],
+                },
+            },
+        };
+
+        generateDescriptions(schema);
+
+        const nameSchema = schema.properties?.name as JSONSchema;
+        expect(nameSchema.description).toBe('String · 100%');
+    });
+
+    it('includes min/max stats for numeric fields', () => {
+        const schema: JSONSchema = {
+            'x-documentsInspected': 100,
+            properties: {
+                age: {
+                    'x-occurrence': 95,
+                    anyOf: [
+                        {
+                            type: 'number',
+                            'x-bsonType': 'int32',
+                            'x-typeOccurrence': 95,
+                            'x-minValue': 18,
+                            'x-maxValue': 95,
+                        },
+                    ],
+                },
+            },
+        };
+
+        generateDescriptions(schema);
+
+        const ageSchema = schema.properties?.age as JSONSchema;
+        expect(ageSchema.description).toBe('Int32 · 95% · range: 18–95');
+    });
+
+    it('includes length stats for string fields', () => {
+        const schema: JSONSchema = {
+            'x-documentsInspected': 100,
+            properties: {
+                name: {
+                    'x-occurrence': 100,
+                    anyOf: [
+                        {
+                            type: 'string',
+                            'x-bsonType': 'string',
+                            'x-typeOccurrence': 100,
+                            'x-minLength': 3,
+                            'x-maxLength': 50,
+                        },
+                    ],
+                },
+            },
+        };
+
+        generateDescriptions(schema);
+
+        const nameSchema = schema.properties?.name as JSONSchema;
+        expect(nameSchema.description).toBe('String · 100% · length: 3–50');
+    });
+
+    it('includes date range stats for date fields', () => {
+        const minDate = new Date('2020-01-01T00:00:00.000Z').getTime();
+        const maxDate = new Date('2024-12-31T00:00:00.000Z').getTime();
+
+        const schema: JSONSchema = {
+            'x-documentsInspected': 100,
+            properties: {
+                createdAt: {
+                    'x-occurrence': 80,
+                    anyOf: [
+                        {
+                            type: 'string',
+                            'x-bsonType': 'date',
+                            'x-typeOccurrence': 80,
+                            'x-minDate': minDate,
+                            'x-maxDate': maxDate,
+                        },
+                    ],
+                },
+            },
+        };
+
+        generateDescriptions(schema);
+
+        const createdAtSchema = schema.properties?.createdAt as JSONSchema;
+        expect(createdAtSchema.description).toBe('Date · 80% · range: 2020-01-01 – 2024-12-31');
+    });
+
+    it('includes true/false counts for boolean fields', () => {
+        const schema: JSONSchema = {
+            'x-documentsInspected': 100,
+            properties: {
+                active: {
+                    'x-occurrence': 100,
+                    anyOf: [
+                        {
+                            type: 'boolean',
+                            'x-bsonType': 'boolean',
+                            'x-typeOccurrence': 100,
+                            'x-trueCount': 80,
+                            'x-falseCount': 20,
+                        },
+                    ],
+                },
+            },
+        };
+
+        generateDescriptions(schema);
+
+        const activeSchema = schema.properties?.active as JSONSchema;
+        expect(activeSchema.description).toBe('Boolean · 100% · true: 80, false: 20');
+    });
+
+    it('handles nested object fields (descriptions at nested level)', () => {
+        const schema: JSONSchema = {
+            'x-documentsInspected': 100,
+            properties: {
+                address: {
+                    'x-occurrence': 100,
+                    anyOf: [
+                        {
+                            type: 'object',
+                            'x-bsonType': 'object',
+                            'x-typeOccurrence': 100,
+                            'x-documentsInspected': 100,
+                            properties: {
+                                city: {
+                                    'x-occurrence': 100,
+                                    anyOf: [
+                                        {
+                                            type: 'string',
+                                            'x-bsonType': 'string',
+                                            'x-typeOccurrence': 100,
+                                            'x-minLength': 2,
+                                            'x-maxLength': 30,
+                                        },
+                                    ],
+                                },
+                            },
+                        },
+                    ],
+                },
+            },
+        };
+
+        generateDescriptions(schema);
+
+        // The parent (address) should also get a description
+        const addressSchema = schema.properties?.address as JSONSchema;
+        expect(addressSchema.description).toBe('Object · 100%');
+
+        // The nested city should get its own description
+        const addressTypeEntry = (addressSchema.anyOf as JSONSchema[])[0];
+        const citySchema = addressTypeEntry.properties?.city as JSONSchema;
+        expect(citySchema.description).toBe('String · 100% · length: 2–30');
+    });
+
+    it('handles polymorphic fields (shows multiple types)', () => {
+        const schema: JSONSchema = {
+            'x-documentsInspected': 100,
+            properties: {
+                value: {
+                    'x-occurrence': 95,
+                    anyOf: [
+                        {
+                            type: 'number',
+                            'x-bsonType': 'int32',
+                            'x-typeOccurrence': 60,
+                            'x-minValue': 1,
+                            'x-maxValue': 100,
+                        },
+                        {
+                            type: 'string',
+                            'x-bsonType': 'string',
+                            'x-typeOccurrence': 35,
+                        },
+                    ],
+                },
+            },
+        };
+
+        generateDescriptions(schema);
+
+        const valueSchema = schema.properties?.value as JSONSchema;
+        // Dominant type first, then secondary
+        expect(valueSchema.description).toBe('Int32 | String · 95% · range: 1–100');
+    });
+});
diff --git a/src/utils/json/data-api/autocomplete/generateDescriptions.ts b/src/utils/json/data-api/autocomplete/generateDescriptions.ts
new file mode 100644
index 000000000..2f4f28867
--- /dev/null
+++ b/src/utils/json/data-api/autocomplete/generateDescriptions.ts
@@ -0,0 +1,218 @@
+/*---------------------------------------------------------------------------------------------
+ *  Copyright (c) Microsoft Corporation. All rights reserved.
+ *  Licensed under the MIT License. See License.txt in the project root for license information.
+ *--------------------------------------------------------------------------------------------*/
+
+import { BSONTypes, type JSONSchema } from '@vscode-documentdb/schema-analyzer';
+import Denque from 'denque';
+
+/**
+ * Work item for BFS traversal of the schema tree.
+ */
+interface WorkItem {
+    schemaNode: JSONSchema;
+    parentDocumentsInspected: number;
+}
+
+/**
+ * Post-processor that mutates the schema in-place, adding human-readable
+ * `description` strings to each property node. Descriptions include:
+ * - Dominant type name(s)
+ * - Occurrence percentage (based on `x-occurrence / parentDocumentsInspected`)
+ * - Type-specific stats (length, range, true/false counts, etc.)
+ *
+ * Uses BFS to traverse all property levels.
+ */
+export function generateDescriptions(schema: JSONSchema): void {
+    const rootDocumentsInspected = (schema['x-documentsInspected'] as number) ?? 0;
+
+    const queue = new Denque<WorkItem>();
+
+    // Seed the queue with root-level properties
+    if (schema.properties) {
+        for (const propName of Object.keys(schema.properties)) {
+            const propSchema = schema.properties[propName] as JSONSchema;
+            if (typeof propSchema === 'boolean') continue;
+
+            queue.push({
+                schemaNode: propSchema,
+                parentDocumentsInspected: rootDocumentsInspected,
+            });
+        }
+    }
+
+    while (queue.length > 0) {
+        const item = queue.shift();
+        if (!item) continue;
+
+        const { schemaNode, parentDocumentsInspected } = item;
+
+        // Collect type display names from anyOf entries
+        const typeNames = collectTypeDisplayNames(schemaNode);
+
+        // Build description parts
+        const parts: string[] = [];
+
+        // Part 1: Type info
+        if (typeNames.length > 0) {
+            parts.push(typeNames.join(' | '));
+        }
+
+        // Part 2: Occurrence percentage
+        if (parentDocumentsInspected > 0) {
+            const occurrence = (schemaNode['x-occurrence'] as number) ?? 0;
+            const percentage = ((occurrence / parentDocumentsInspected) * 100).toFixed(0);
+            parts.push(`${percentage}%`);
+        }
+
+        // Part 3: Stats from the dominant type entry
+        const dominantEntry = getDominantTypeEntry(schemaNode);
+        if (dominantEntry) {
+            const statString = getStatString(dominantEntry);
+            if (statString) {
+                parts.push(statString);
+            }
+
+            // If the dominant entry is an object with properties, enqueue children
+            if (dominantEntry.type === 'object' && dominantEntry.properties) {
+                const objectDocumentsInspected = (dominantEntry['x-documentsInspected'] as number) ?? 0;
+                for (const childName of Object.keys(dominantEntry.properties)) {
+                    const childSchema = dominantEntry.properties[childName] as JSONSchema;
+                    if (typeof childSchema === 'boolean') continue;
+
+                    queue.push({
+                        schemaNode: childSchema,
+                        parentDocumentsInspected: objectDocumentsInspected,
+                    });
+                }
+            }
+        }
+
+        // Set the description
+        if (parts.length > 0) {
+            schemaNode.description = parts.join(' · ');
+        }
+    }
+}
+
+/**
+ * Collects display names for all types in a schema node's `anyOf` entries.
+ * Returns them ordered by descending `x-typeOccurrence`.
+ */
+function collectTypeDisplayNames(schemaNode: JSONSchema): string[] {
+    if (!schemaNode.anyOf || schemaNode.anyOf.length === 0) {
+        return [];
+    }
+
+    const entries: Array<{ name: string; occurrence: number }> = [];
+    for (const entry of schemaNode.anyOf) {
+        if (typeof entry === 'boolean') continue;
+        const bsonType = (entry['x-bsonType'] as string) ?? '';
+        const occurrence = (entry['x-typeOccurrence'] as number) ?? 0;
+        const name = bsonType
+            ? BSONTypes.toDisplayString(bsonType as BSONTypes)
+            : ((entry.type as string) ?? 'Unknown');
+        entries.push({ name, occurrence });
+    }
+
+    // Sort by occurrence descending so dominant type comes first
+    entries.sort((a, b) => b.occurrence - a.occurrence);
+    return entries.map((e) => e.name);
+}
+
+/**
+ * Returns the anyOf entry with the highest `x-typeOccurrence`.
+ */
+function getDominantTypeEntry(schemaNode: JSONSchema): JSONSchema | null {
+    if (!schemaNode.anyOf || schemaNode.anyOf.length === 0) {
+        return null;
+    }
+
+    let maxOccurrence = -1;
+    let dominant: JSONSchema | null = null;
+
+    for (const entry of schemaNode.anyOf) {
+        if (typeof entry === 'boolean') continue;
+        const occurrence = (entry['x-typeOccurrence'] as number) ?? 0;
+        if (occurrence > maxOccurrence) {
+            maxOccurrence = occurrence;
+            dominant = entry;
+        }
+    }
+
+    return dominant;
+}
+
+/**
+ * Returns a type-specific stats string for the given type entry, or undefined if
+ * no relevant stats are available.
+ */
+function getStatString(typeEntry: JSONSchema): string | undefined {
+    const bsonType = (typeEntry['x-bsonType'] as string) ?? '';
+
+    switch (bsonType) {
+        case 'string':
+        case 'binary': {
+            const minLen = typeEntry['x-minLength'] as number | undefined;
+            const maxLen = typeEntry['x-maxLength'] as number | undefined;
+            if (minLen !== undefined && maxLen !== undefined) {
+                return `length: ${String(minLen)}–${String(maxLen)}`;
+            }
+            return undefined;
+        }
+
+        case 'int32':
+        case 'double':
+        case 'long':
+        case 'decimal128':
+        case 'number': {
+            const minVal = typeEntry['x-minValue'] as number | undefined;
+            const maxVal = typeEntry['x-maxValue'] as number | undefined;
+            if (minVal !== undefined && maxVal !== undefined) {
+                return `range: ${String(minVal)}–${String(maxVal)}`;
+            }
+            return undefined;
+        }
+
+        case 'date': {
+            const minDate = typeEntry['x-minDate'] as number | undefined;
+            const maxDate = typeEntry['x-maxDate'] as number | undefined;
+            if (minDate !== undefined && maxDate !== undefined) {
+                const minISO = new Date(minDate).toISOString().split('T')[0];
+                const maxISO = new Date(maxDate).toISOString().split('T')[0];
+                return `range: ${minISO} – ${maxISO}`;
+            }
+            return undefined;
+        }
+
+        case 'boolean': {
+            const trueCount = typeEntry['x-trueCount'] as number | undefined;
+            const falseCount = typeEntry['x-falseCount'] as number | undefined;
+            if (trueCount !== undefined && falseCount !== undefined) {
+                return `true: ${String(trueCount)}, false: ${String(falseCount)}`;
+            }
+            return undefined;
+        }
+
+        case 'array': {
+            const minItems = typeEntry['x-minItems'] as number | undefined;
+            const maxItems = typeEntry['x-maxItems'] as number | undefined;
+            if (minItems !== undefined && maxItems !== undefined) {
+                return `items: ${String(minItems)}–${String(maxItems)}`;
+            }
+            return undefined;
+        }
+
+        case 'object': {
+            const minProps = typeEntry['x-minProperties'] as number | undefined;
+            const maxProps = typeEntry['x-maxProperties'] as number | undefined;
+            if (minProps !== undefined && maxProps !== undefined) {
+                return `properties: ${String(minProps)}–${String(maxProps)}`;
+            }
+            return undefined;
+        }
+
+        default:
+            return undefined;
+    }
+}
diff --git a/src/utils/json/data-api/autocomplete/getKnownFields.test.ts b/src/utils/json/data-api/autocomplete/getKnownFields.test.ts
new file mode 100644
index 000000000..d0680e2f3
--- /dev/null
+++ b/src/utils/json/data-api/autocomplete/getKnownFields.test.ts
@@ -0,0 +1,128 @@
+/*---------------------------------------------------------------------------------------------
+ *  Copyright (c) Microsoft Corporation. All rights reserved.
+ *  Licensed under the MIT License. See License.txt in the project root for license information.
+ *--------------------------------------------------------------------------------------------*/
+
+import { type FieldEntry, getKnownFields, SchemaAnalyzer } from '@vscode-documentdb/schema-analyzer';
+import { ObjectId } from 'bson';
+
+describe('getKnownFields', () => {
+    it('returns bsonType for primitive fields', () => {
+        const analyzer = SchemaAnalyzer.fromDocument({
+            _id: new ObjectId(),
+            name: 'Alice',
+            age: 42,
+            score: 3.14,
+            active: true,
+        });
+        const fields = getKnownFields(analyzer.getSchema());
+
+        const nameField = fields.find((f: FieldEntry) => f.path === 'name');
+        expect(nameField?.type).toBe('string');
+        expect(nameField?.bsonType).toBe('string');
+
+        const ageField = fields.find((f: FieldEntry) => f.path === 'age');
+        expect(ageField?.type).toBe('number');
+        // bsonType could be 'double' or 'int32' depending on JS runtime
+        expect(['double', 'int32']).toContain(ageField?.bsonType);
+
+        const activeField = fields.find((f: FieldEntry) => f.path === 'active');
+        expect(activeField?.type).toBe('boolean');
+        expect(activeField?.bsonType).toBe('boolean');
+    });
+
+    it('returns _id first and sorts alphabetically', () => {
+        const analyzer = SchemaAnalyzer.fromDocument({
+            _id: new ObjectId(),
+            zebra: 1,
+            apple: 2,
+            mango: 3,
+        });
+        const fields = getKnownFields(analyzer.getSchema());
+        const paths = fields.map((f: FieldEntry) => f.path);
+
+        expect(paths[0]).toBe('_id');
+        // Remaining should be alphabetical
+        expect(paths.slice(1)).toEqual(['apple', 'mango', 'zebra']);
+    });
+
+    it('detects optional fields', () => {
+        const analyzer = new SchemaAnalyzer();
+        analyzer.addDocument({ _id: new ObjectId(), name: 'Alice', age: 30 });
+        analyzer.addDocument({ _id: new ObjectId(), name: 'Bob' }); // no 'age'
+
+        const fields = getKnownFields(analyzer.getSchema());
+
+        const nameField = fields.find((f: FieldEntry) => f.path === 'name');
+        expect(nameField?.isSparse).toBeUndefined(); // present in all docs
+
+        const ageField = fields.find((f: FieldEntry) => f.path === 'age');
+        expect(ageField?.isSparse).toBe(true); // missing in doc2
+    });
+
+    it('returns bsonTypes for polymorphic fields', () => {
+        const analyzer = new SchemaAnalyzer();
+        analyzer.addDocument({ _id: new ObjectId(), value: 'hello' });
+        analyzer.addDocument({ _id: new ObjectId(), value: 42 });
+
+        const fields = getKnownFields(analyzer.getSchema());
+        const valueField = fields.find((f: FieldEntry) => f.path === 'value');
+
+        expect(valueField?.bsonTypes).toBeDefined();
+        expect(valueField?.bsonTypes).toHaveLength(2);
+        expect(valueField?.bsonTypes).toContain('string');
+        // Could be 'double' or 'int32'
+        expect(valueField?.bsonTypes?.some((t: string) => ['double', 'int32'].includes(t))).toBe(true);
+    });
+
+    it('returns arrayItemBsonType for array fields', () => {
+        const analyzer = SchemaAnalyzer.fromDocument({
+            _id: new ObjectId(),
+            tags: ['a', 'b', 'c'],
+            scores: [10, 20, 30],
+        });
+        const fields = getKnownFields(analyzer.getSchema());
+
+        const tagsField = fields.find((f: FieldEntry) => f.path === 'tags');
+        expect(tagsField?.type).toBe('array');
+        expect(tagsField?.bsonType).toBe('array');
+        expect(tagsField?.arrayItemBsonType).toBe('string');
+
+        const scoresField = fields.find((f: FieldEntry) => f.path === 'scores');
+        expect(scoresField?.type).toBe('array');
+        expect(scoresField?.arrayItemBsonType).toBeDefined();
+    });
+
+    it('handles nested object fields', () => {
+        const analyzer = SchemaAnalyzer.fromDocument({
+            _id: new ObjectId(),
+            user: {
+                name: 'Alice',
+                profile: {
+                    bio: 'hello',
+                },
+            },
+        });
+        const fields = getKnownFields(analyzer.getSchema());
+        const paths = fields.map((f: FieldEntry) => f.path);
+
+        // Objects are expanded, not leaf nodes
+        expect(paths).not.toContain('user');
+        expect(paths).toContain('user.name');
+        expect(paths).toContain('user.profile.bio');
+    });
+
+    it('detects optional nested fields', () => {
+        const analyzer = new SchemaAnalyzer();
+        analyzer.addDocument({ _id: new ObjectId(), user: { name: 'Alice', age: 30 } });
+        analyzer.addDocument({ _id: new ObjectId(), user: { name: 'Bob' } }); // no age in nested obj
+
+        const fields = getKnownFields(analyzer.getSchema());
+
+        const nameField = fields.find((f: FieldEntry) => f.path === 'user.name');
+        expect(nameField?.isSparse).toBeUndefined(); // present in both objects
+
+        const ageField = fields.find((f: FieldEntry) => f.path === 'user.age');
+        expect(ageField?.isSparse).toBe(true); // missing in doc2's user object
+    });
+});
diff --git a/src/utils/json/data-api/autocomplete/toFieldCompletionItems.test.ts b/src/utils/json/data-api/autocomplete/toFieldCompletionItems.test.ts
new file mode 100644
index 000000000..37a7ecc4e
--- /dev/null
+++ b/src/utils/json/data-api/autocomplete/toFieldCompletionItems.test.ts
@@ -0,0 +1,129 @@
+/*---------------------------------------------------------------------------------------------
+ *  Copyright (c) Microsoft Corporation. All rights reserved.
+ *  Licensed under the MIT License. See License.txt in the project root for license information.
+ *--------------------------------------------------------------------------------------------*/
+
+import { type FieldEntry } from '@vscode-documentdb/schema-analyzer';
+import { toFieldCompletionItems } from './toFieldCompletionItems';
+
+describe('toFieldCompletionItems', () => {
+    it('converts simple fields', () => {
+        const fields: FieldEntry[] = [
+            { path: 'name', type: 'string', bsonType: 'string' },
+            { path: 'age', type: 'number', bsonType: 'int32' },
+        ];
+
+        const result = toFieldCompletionItems(fields);
+
+        expect(result).toHaveLength(2);
+        expect(result[0].fieldName).toBe('name');
+        expect(result[0].displayType).toBe('String');
+        expect(result[0].bsonType).toBe('string');
+        expect(result[0].insertText).toBe('name');
+
+        expect(result[1].fieldName).toBe('age');
+        expect(result[1].displayType).toBe('Int32');
+        expect(result[1].bsonType).toBe('int32');
+        expect(result[1].insertText).toBe('age');
+    });
+
+    it('escapes dotted paths in insertText', () => {
+        const fields: FieldEntry[] = [
+            { path: 'address.city', type: 'string', bsonType: 'string' },
+            { path: 'user.profile.bio', type: 'string', bsonType: 'string' },
+        ];
+
+        const result = toFieldCompletionItems(fields);
+
+        expect(result[0].insertText).toBe('"address.city"');
+        expect(result[1].insertText).toBe('"user.profile.bio"');
+    });
+
+    it('quotes field names with dashes', () => {
+        const fields: FieldEntry[] = [{ path: 'order-items', type: 'string', bsonType: 'string' }];
+        const result = toFieldCompletionItems(fields);
+        expect(result[0].insertText).toBe('"order-items"');
+        expect(result[0].fieldName).toBe('order-items'); // display stays unescaped
+    });
+
+    it('quotes field names with brackets', () => {
+        const fields: FieldEntry[] = [{ path: 'items[0]', type: 'string', bsonType: 'string' }];
+        const result = toFieldCompletionItems(fields);
+        expect(result[0].insertText).toBe('"items[0]"');
+    });
+
+    it('quotes field names starting with a digit', () => {
+        const fields: FieldEntry[] = [{ path: '123abc', type: 'string', bsonType: 'string' }];
+        const result = toFieldCompletionItems(fields);
+        expect(result[0].insertText).toBe('"123abc"');
+    });
+
+    it('escapes embedded double quotes in insertText', () => {
+        const fields: FieldEntry[] = [{ path: 'say"hi"', type: 'string', bsonType: 'string' }];
+        const result = toFieldCompletionItems(fields);
+        expect(result[0].insertText).toBe('"say\\"hi\\""');
+        expect(result[0].fieldName).toBe('say"hi"'); // display stays unescaped
+    });
+
+    it('escapes backslashes in insertText', () => {
+        const fields: FieldEntry[] = [{ path: 'back\\slash', type: 'string', bsonType: 'string' }];
+        const result = toFieldCompletionItems(fields);
+        expect(result[0].insertText).toBe('"back\\\\slash"');
+    });
+
+    it('does not quote valid identifiers', () => {
+        const fields: FieldEntry[] = [
+            { path: 'name', type: 'string', bsonType: 'string' },
+            { path: '_id', type: 'string', bsonType: 'objectid' },
+            { path: '$type', type: 'string', bsonType: 'string' },
+        ];
+        const result = toFieldCompletionItems(fields);
+        expect(result[0].insertText).toBe('name');
+        expect(result[1].insertText).toBe('_id');
+        expect(result[2].insertText).toBe('$type');
+    });
+
+    it('adds $ prefix to referenceText', () => {
+        const fields: FieldEntry[] = [
+            { path: 'age', type: 'number', bsonType: 'int32' },
+            { path: 'address.city', type: 'string', bsonType: 'string' },
+        ];
+
+        const result = toFieldCompletionItems(fields);
+
+        expect(result[0].referenceText).toBe('$age');
+        expect(result[1].referenceText).toBe('$address.city');
+    });
+
+    it('preserves isSparse', () => {
+        const fields: FieldEntry[] = [
+            { path: 'name', type: 'string', bsonType: 'string', isSparse: false },
+            { path: 'nickname', type: 'string', bsonType: 'string', isSparse: true },
+            { path: 'email', type: 'string', bsonType: 'string' }, // undefined → false
+        ];
+
+        const result = toFieldCompletionItems(fields);
+
+        expect(result[0].isSparse).toBe(false);
+        expect(result[1].isSparse).toBe(true);
+        expect(result[2].isSparse).toBe(false);
+    });
+
+    it('uses correct displayType', () => {
+        const fields: FieldEntry[] = [
+            { path: '_id', type: 'string', bsonType: 'objectid' },
+            { path: 'createdAt', type: 'string', bsonType: 'date' },
+            { path: 'active', type: 'boolean', bsonType: 'boolean' },
+            { path: 'score', type: 'number', bsonType: 'double' },
+            { path: 'tags', type: 'array', bsonType: 'array' },
+        ];
+
+        const result = toFieldCompletionItems(fields);
+
+        expect(result[0].displayType).toBe('ObjectId');
+        expect(result[1].displayType).toBe('Date');
+        expect(result[2].displayType).toBe('Boolean');
+        expect(result[3].displayType).toBe('Double');
+        expect(result[4].displayType).toBe('Array');
+    });
+});
diff --git a/src/utils/json/data-api/autocomplete/toFieldCompletionItems.ts b/src/utils/json/data-api/autocomplete/toFieldCompletionItems.ts
new file mode 100644
index 000000000..60e299590
--- /dev/null
+++ b/src/utils/json/data-api/autocomplete/toFieldCompletionItems.ts
@@ -0,0 +1,86 @@
+/*---------------------------------------------------------------------------------------------
+ *  Copyright (c) Microsoft Corporation. All rights reserved.
+ *  Licensed under the MIT License. See License.txt in the project root for license information.
+ *--------------------------------------------------------------------------------------------*/
+
+import { BSONTypes, type FieldEntry } from '@vscode-documentdb/schema-analyzer';
+
+/**
+ * Completion-ready data for a single field entry.
+ *
+ * Design intent:
+ * - `fieldName` is the human-readable, unescaped field path shown in the completion list.
+ *   Users see clean names like "address.city" or "order-items" without quotes or escaping.
+ * - `insertText` is the escaped/quoted form that gets inserted when the user selects a
+ *   completion item. For simple identifiers it matches `fieldName`; for names containing
+ *   special characters (dots, spaces, `$`, etc.) it is wrapped in double quotes.
+ * - `referenceText` is the `$`-prefixed aggregation field reference (e.g., "$age").
+ */
+export interface FieldCompletionData {
+    /** The full dot-notated field name, e.g., "address.city" — kept unescaped for display */
+    fieldName: string;
+    /** Human-readable type display, e.g., "String", "Date", "ObjectId" */
+    displayType: string;
+    /** Raw BSON type from FieldEntry */
+    bsonType: string;
+    /** All observed BSON types for polymorphic fields (e.g., ["string", "int32"]) */
+    bsonTypes?: string[];
+    /** Human-readable display strings for all observed types (e.g., ["String", "Int32"]) */
+    displayTypes?: string[];
+    /** Whether the field was not present in every inspected document (statistical observation, not a constraint) */
+    isSparse: boolean;
+    /** Text to insert when the user selects this completion — quoted/escaped if the field name contains special chars */
+    insertText: string;
+    /**
+     * Field reference for aggregation expressions, e.g., "$age", "$address.city".
+     *
+     * TODO: The simple `$field.path` syntax is invalid MQL for field names containing dots,
+     * spaces, or `$` characters. For such fields, the correct MQL syntax is
+     * `{ $getField: "fieldName" }`. This should be addressed when the aggregation
+     * completion provider is wired up — either by using `$getField` for special names
+     * or by making `referenceText` optional for fields that cannot use the `$` prefix syntax.
+     */
+    referenceText: string;
+}
+
+/**
+ * Matches valid JavaScript/TypeScript identifiers.
+ * A valid identifier starts with a letter, underscore, or dollar sign,
+ * followed by zero or more letters, digits, underscores, or dollar signs.
+ *
+ * Field names that do NOT match this pattern must be quoted and escaped
+ * in `insertText` to produce valid query expressions.
+ */
+const JS_IDENTIFIER_PATTERN = /^[a-zA-Z_$][a-zA-Z0-9_$]*$/;
+
+/**
+ * Converts an array of FieldEntry objects into completion-ready FieldCompletionData items.
+ *
+ * @param fields - Array of FieldEntry objects from getKnownFields
+ * @returns Array of FieldCompletionData ready for use in editor completions
+ */
+export function toFieldCompletionItems(fields: FieldEntry[]): FieldCompletionData[] {
+    return fields.map((entry) => {
+        const displayType = BSONTypes.toDisplayString(entry.bsonType as BSONTypes);
+        const needsQuoting = !JS_IDENTIFIER_PATTERN.test(entry.path);
+
+        let insertText: string;
+        if (needsQuoting) {
+            const escaped = entry.path.replace(/\\/g, '\\\\').replace(/"/g, '\\"');
+            insertText = `"${escaped}"`;
+        } else {
+            insertText = entry.path;
+        }
+
+        return {
+            fieldName: entry.path,
+            displayType,
+            bsonType: entry.bsonType,
+            bsonTypes: entry.bsonTypes,
+            displayTypes: entry.bsonTypes?.map((t) => BSONTypes.toDisplayString(t as BSONTypes)),
+            isSparse: entry.isSparse ?? false,
+            insertText,
+            referenceText: `$${entry.path}`,
+        };
+    });
+}
diff --git a/src/utils/json/data-api/autocomplete/toTypeScriptDefinition.test.ts b/src/utils/json/data-api/autocomplete/toTypeScriptDefinition.test.ts
new file mode 100644
index 000000000..d003b9ded
--- /dev/null
+++ b/src/utils/json/data-api/autocomplete/toTypeScriptDefinition.test.ts
@@ -0,0 +1,318 @@
+/*---------------------------------------------------------------------------------------------
+ *  Copyright (c) Microsoft Corporation. All rights reserved.
+ *  Licensed under the MIT License. See License.txt in the project root for license information.
+ *--------------------------------------------------------------------------------------------*/
+
+import { type JSONSchema } from '@vscode-documentdb/schema-analyzer';
+import { toTypeScriptDefinition } from './toTypeScriptDefinition';
+
+describe('toTypeScriptDefinition', () => {
+    it('generates basic interface with primitive types', () => {
+        const schema: JSONSchema = {
+            'x-documentsInspected': 100,
+            properties: {
+                _id: {
+                    'x-occurrence': 100,
+                    anyOf: [
+                        {
+                            type: 'string',
+                            'x-bsonType': 'objectid',
+                            'x-typeOccurrence': 100,
+                        },
+                    ],
+                },
+                name: {
+                    'x-occurrence': 100,
+                    anyOf: [
+                        {
+                            type: 'string',
+                            'x-bsonType': 'string',
+                            'x-typeOccurrence': 100,
+                        },
+                    ],
+                },
+                age: {
+                    'x-occurrence': 100,
+                    anyOf: [
+                        {
+                            type: 'number',
+                            'x-bsonType': 'int32',
+                            'x-typeOccurrence': 100,
+                        },
+                    ],
+                },
+            },
+        };
+
+        const result = toTypeScriptDefinition(schema, 'users');
+
+        expect(result).toContain('interface UsersDocument {');
+        expect(result).toContain('    _id: ObjectId;');
+        expect(result).toContain('    name: string;');
+        expect(result).toContain('    age: number;');
+        expect(result).toContain('}');
+    });
+
+    it('marks optional fields with ?', () => {
+        const schema: JSONSchema = {
+            'x-documentsInspected': 100,
+            properties: {
+                name: {
+                    'x-occurrence': 100,
+                    anyOf: [
+                        {
+                            type: 'string',
+                            'x-bsonType': 'string',
+                            'x-typeOccurrence': 100,
+                        },
+                    ],
+                },
+                nickname: {
+                    'x-occurrence': 50,
+                    anyOf: [
+                        {
+                            type: 'string',
+                            'x-bsonType': 'string',
+                            'x-typeOccurrence': 50,
+                        },
+                    ],
+                },
+            },
+        };
+
+        const result = toTypeScriptDefinition(schema, 'users');
+
+        expect(result).toContain('    name: string;');
+        expect(result).toContain('    nickname?: string;');
+    });
+
+    it('handles nested objects as inline blocks', () => {
+        const schema: JSONSchema = {
+            'x-documentsInspected': 100,
+            properties: {
+                address: {
+                    'x-occurrence': 100,
+                    anyOf: [
+                        {
+                            type: 'object',
+                            'x-bsonType': 'object',
+                            'x-typeOccurrence': 100,
+                            'x-documentsInspected': 100,
+                            properties: {
+                                city: {
+                                    'x-occurrence': 100,
+                                    anyOf: [
+                                        {
+                                            type: 'string',
+                                            'x-bsonType': 'string',
+                                            'x-typeOccurrence': 100,
+                                        },
+                                    ],
+                                },
+                                zip: {
+                                    'x-occurrence': 100,
+                                    anyOf: [
+                                        {
+                                            type: 'string',
+                                            'x-bsonType': 'string',
+                                            'x-typeOccurrence': 100,
+                                        },
+                                    ],
+                                },
+                            },
+                        },
+                    ],
+                },
+            },
+        };
+
+        const result = toTypeScriptDefinition(schema, 'users');
+
+        expect(result).toContain('    address: {');
+        expect(result).toContain('        city: string;');
+        expect(result).toContain('        zip: string;');
+        expect(result).toContain('    };');
+    });
+
+    it('handles arrays with element types', () => {
+        const schema: JSONSchema = {
+            'x-documentsInspected': 100,
+            properties: {
+                tags: {
+                    'x-occurrence': 100,
+                    anyOf: [
+                        {
+                            type: 'array',
+                            'x-bsonType': 'array',
+                            'x-typeOccurrence': 100,
+                            items: {
+                                anyOf: [
+                                    {
+                                        type: 'string',
+                                        'x-bsonType': 'string',
+                                        'x-typeOccurrence': 100,
+                                    },
+                                ],
+                            },
+                        },
+                    ],
+                },
+            },
+        };
+
+        const result = toTypeScriptDefinition(schema, 'posts');
+
+        expect(result).toContain('    tags: string[];');
+    });
+
+    it('handles polymorphic fields as unions', () => {
+        const schema: JSONSchema = {
+            'x-documentsInspected': 100,
+            properties: {
+                metadata: {
+                    'x-occurrence': 80,
+                    anyOf: [
+                        {
+                            type: 'string',
+                            'x-bsonType': 'string',
+                            'x-typeOccurrence': 50,
+                        },
+                        {
+                            type: 'number',
+                            'x-bsonType': 'int32',
+                            'x-typeOccurrence': 20,
+                        },
+                        {
+                            type: 'null',
+                            'x-bsonType': 'null',
+                            'x-typeOccurrence': 10,
+                        },
+                    ],
+                },
+            },
+        };
+
+        const result = toTypeScriptDefinition(schema, 'items');
+
+        expect(result).toContain('    metadata?: string | number | null;');
+    });
+
+    it('PascalCase conversion for collection name', () => {
+        expect(toTypeScriptDefinition({ 'x-documentsInspected': 0 }, 'users')).toContain('interface UsersDocument');
+        expect(toTypeScriptDefinition({ 'x-documentsInspected': 0 }, 'order_items')).toContain(
+            'interface OrderItemsDocument',
+        );
+        expect(toTypeScriptDefinition({ 'x-documentsInspected': 0 }, 'my-awesome-collection')).toContain(
+            'interface MyAwesomeCollectionDocument',
+        );
+    });
+
+    it('prefixes with _ when collection name starts with a digit', () => {
+        expect(toTypeScriptDefinition({ 'x-documentsInspected': 0 }, '123abc')).toContain('interface _123abcDocument');
+        expect(toTypeScriptDefinition({ 'x-documentsInspected': 0 }, '99_bottles')).toContain(
+            'interface _99BottlesDocument',
+        );
+    });
+
+    it('falls back to CollectionDocument when name is only separators', () => {
+        expect(toTypeScriptDefinition({ 'x-documentsInspected': 0 }, '---')).toContain('interface CollectionDocument');
+        expect(toTypeScriptDefinition({ 'x-documentsInspected': 0 }, '_ _ _')).toContain(
+            'interface CollectionDocument',
+        );
+    });
+
+    it('falls back to CollectionDocument for empty string', () => {
+        expect(toTypeScriptDefinition({ 'x-documentsInspected': 0 }, '')).toContain('interface CollectionDocument');
+    });
+
+    describe('special character field names', () => {
+        function makeSchemaWithField(fieldName: string): JSONSchema {
+            return {
+                'x-documentsInspected': 100,
+                properties: {
+                    [fieldName]: {
+                        'x-occurrence': 100,
+                        anyOf: [
+                            {
+                                type: 'string',
+                                'x-bsonType': 'string',
+                                'x-typeOccurrence': 100,
+                            },
+                        ],
+                    },
+                },
+            };
+        }
+
+        it('leaves valid identifiers unquoted', () => {
+            const result = toTypeScriptDefinition(makeSchemaWithField('age'), 'test');
+            expect(result).toContain('    age: string;');
+        });
+
+        it('leaves underscore-prefixed identifiers unquoted', () => {
+            const result = toTypeScriptDefinition(makeSchemaWithField('_id'), 'test');
+            expect(result).toContain('    _id: string;');
+        });
+
+        it('leaves dollar-prefixed identifiers unquoted', () => {
+            const result = toTypeScriptDefinition(makeSchemaWithField('$type'), 'test');
+            expect(result).toContain('    $type: string;');
+        });
+
+        it('quotes field names with dashes', () => {
+            const result = toTypeScriptDefinition(makeSchemaWithField('order-items'), 'test');
+            expect(result).toContain('    "order-items": string;');
+        });
+
+        it('quotes field names with dots', () => {
+            const result = toTypeScriptDefinition(makeSchemaWithField('a.b'), 'test');
+            expect(result).toContain('    "a.b": string;');
+        });
+
+        it('quotes field names with spaces', () => {
+            const result = toTypeScriptDefinition(makeSchemaWithField('my field'), 'test');
+            expect(result).toContain('    "my field": string;');
+        });
+
+        it('quotes field names with brackets', () => {
+            const result = toTypeScriptDefinition(makeSchemaWithField('items[0]'), 'test');
+            expect(result).toContain('    "items[0]": string;');
+        });
+
+        it('escapes embedded double quotes in field names', () => {
+            const result = toTypeScriptDefinition(makeSchemaWithField('say"hi"'), 'test');
+            expect(result).toContain('    "say\\"hi\\"": string;');
+        });
+
+        it('escapes backslashes in field names', () => {
+            const result = toTypeScriptDefinition(makeSchemaWithField('back\\slash'), 'test');
+            expect(result).toContain('    "back\\\\slash": string;');
+        });
+
+        it('quotes field names that start with a digit', () => {
+            const result = toTypeScriptDefinition(makeSchemaWithField('123abc'), 'test');
+            expect(result).toContain('    "123abc": string;');
+        });
+
+        it('preserves optionality with quoted field names', () => {
+            const schema: JSONSchema = {
+                'x-documentsInspected': 100,
+                properties: {
+                    'order-items': {
+                        'x-occurrence': 50,
+                        anyOf: [
+                            {
+                                type: 'string',
+                                'x-bsonType': 'string',
+                                'x-typeOccurrence': 50,
+                            },
+                        ],
+                    },
+                },
+            };
+
+            const result = toTypeScriptDefinition(schema, 'test');
+            expect(result).toContain('    "order-items"?: string;');
+        });
+    });
+});
diff --git a/src/utils/json/data-api/autocomplete/toTypeScriptDefinition.ts b/src/utils/json/data-api/autocomplete/toTypeScriptDefinition.ts
new file mode 100644
index 000000000..17328dfeb
--- /dev/null
+++ b/src/utils/json/data-api/autocomplete/toTypeScriptDefinition.ts
@@ -0,0 +1,272 @@
+/*---------------------------------------------------------------------------------------------
+ *  Copyright (c) Microsoft Corporation. All rights reserved.
+ *  Licensed under the MIT License. See License.txt in the project root for license information.
+ *--------------------------------------------------------------------------------------------*/
+
+import { BSONTypes, type JSONSchema } from '@vscode-documentdb/schema-analyzer';
+
+/**
+ * Maps a BSON type string to the corresponding TypeScript type representation.
+ */
+const bsonToTypeScriptMap: Record<string, string> = {
+    [BSONTypes.String]: 'string',
+    [BSONTypes.Int32]: 'number',
+    [BSONTypes.Double]: 'number',
+    [BSONTypes.Long]: 'number',
+    [BSONTypes.Decimal128]: 'number',
+    [BSONTypes.Number]: 'number',
+    [BSONTypes.Boolean]: 'boolean',
+    [BSONTypes.Date]: 'Date',
+    [BSONTypes.ObjectId]: 'ObjectId',
+    [BSONTypes.Null]: 'null',
+    [BSONTypes.Undefined]: 'undefined',
+    [BSONTypes.Binary]: 'Binary',
+    [BSONTypes.RegExp]: 'RegExp',
+    [BSONTypes.UUID]: 'UUID',
+    [BSONTypes.UUID_LEGACY]: 'UUID',
+    [BSONTypes.Timestamp]: 'Timestamp',
+    [BSONTypes.MinKey]: 'MinKey',
+    [BSONTypes.MaxKey]: 'MaxKey',
+    [BSONTypes.Code]: 'Code',
+    [BSONTypes.CodeWithScope]: 'Code',
+    [BSONTypes.DBRef]: 'DBRef',
+    [BSONTypes.Map]: 'Map<string, unknown>',
+    [BSONTypes.Symbol]: 'symbol',
+};
+
+/**
+ * Converts a BSON type string to a TypeScript type string.
+ */
+function bsonTypeToTS(bsonType: string): string {
+    return bsonToTypeScriptMap[bsonType] ?? 'unknown';
+}
+
+/**
+ * Matches valid JavaScript/TypeScript identifiers.
+ * A valid identifier starts with a letter, underscore, or dollar sign,
+ * followed by zero or more letters, digits, underscores, or dollar signs.
+ */
+const JS_IDENTIFIER_PATTERN = /^[a-zA-Z_$][a-zA-Z0-9_$]*$/;
+
+/**
+ * Returns a safe TypeScript property name for use in interface definitions.
+ * If the name is a valid JS identifier, it is returned as-is.
+ * Otherwise, it is wrapped in double quotes with internal quotes and backslashes escaped.
+ *
+ * Examples:
+ *  - "age" → "age" (valid identifier, unchanged)
+ *  - "order-items" → '"order-items"' (dash)
+ *  - "a.b" → '"a.b"' (dot)
+ *  - "my field" → '"my field"' (space)
+ *  - 'say"hi"' → '"say\\"hi\\""' (embedded quotes escaped)
+ */
+function safePropertyName(name: string): string {
+    if (JS_IDENTIFIER_PATTERN.test(name)) {
+        return name;
+    }
+    const escaped = name.replace(/\\/g, '\\\\').replace(/"/g, '\\"');
+    return `"${escaped}"`;
+}
+
+/**
+ * Converts a collection name to PascalCase and appends "Document".
+ * If the result would start with a digit, a leading `_` is prepended.
+ * If the collection name contains only separators or is empty, falls back to "CollectionDocument".
+ *
+ * Examples:
+ *  - "users" → "UsersDocument"
+ *  - "order_items" → "OrderItemsDocument"
+ *  - "123abc" → "_123abcDocument"
+ *  - "---" → "CollectionDocument"
+ */
+function toInterfaceName(collectionName: string): string {
+    const pascal = collectionName
+        .split(/[_\-\s]+/)
+        .filter((s) => s.length > 0)
+        .map((segment) => segment.charAt(0).toUpperCase() + segment.slice(1))
+        .join('');
+
+    if (pascal.length === 0) {
+        return 'CollectionDocument';
+    }
+
+    // Prefix with _ if the first character is a digit (invalid TS identifier start)
+    const prefix = /^[0-9]/.test(pascal) ? '_' : '';
+    return `${prefix}${pascal}Document`;
+}
+
+/**
+ * Generates a TypeScript interface definition string from a JSONSchema
+ * produced by the SchemaAnalyzer.
+ *
+ * @param schema - The JSON Schema with x- extensions from SchemaAnalyzer
+ * @param collectionName - The MongoDB API collection name, used to derive the interface name
+ * @returns A formatted TypeScript interface definition string
+ */
+export function toTypeScriptDefinition(schema: JSONSchema, collectionName: string): string {
+    const interfaceName = toInterfaceName(collectionName);
+    const rootDocumentsInspected = (schema['x-documentsInspected'] as number) ?? 0;
+
+    const lines: string[] = [];
+    lines.push(`interface ${interfaceName} {`);
+
+    if (schema.properties) {
+        renderProperties(schema.properties, rootDocumentsInspected, 1, lines);
+    }
+
+    lines.push('}');
+    return lines.join('\n');
+}
+
+/**
+ * Renders property lines for a set of JSON Schema properties at a given indent level.
+ */
+function renderProperties(
+    properties: Record<string, JSONSchema | boolean>,
+    parentDocumentsInspected: number,
+    indentLevel: number,
+    lines: string[],
+): void {
+    const indent = '    '.repeat(indentLevel);
+
+    for (const [propName, propSchema] of Object.entries(properties)) {
+        if (typeof propSchema === 'boolean') continue;
+
+        const isOptional = isFieldOptional(propSchema, parentDocumentsInspected);
+        const optionalMarker = isOptional ? '?' : '';
+        const tsType = resolveTypeString(propSchema, indentLevel);
+        const safeName = safePropertyName(propName);
+
+        lines.push(`${indent}${safeName}${optionalMarker}: ${tsType};`);
+    }
+}
+
+/**
+ * Returns true if the field's occurrence is less than the parent's document count.
+ */
+function isFieldOptional(schemaNode: JSONSchema, parentDocumentsInspected: number): boolean {
+    const occurrence = (schemaNode['x-occurrence'] as number) ?? 0;
+    return parentDocumentsInspected > 0 && occurrence < parentDocumentsInspected;
+}
+
+/**
+ * Resolves a full TypeScript type string for a schema node by examining its
+ * `anyOf` entries. Handles primitives, objects (inline blocks), and arrays.
+ */
+function resolveTypeString(schemaNode: JSONSchema, indentLevel: number): string {
+    if (!schemaNode.anyOf || schemaNode.anyOf.length === 0) {
+        return 'unknown';
+    }
+
+    const typeStrings: string[] = [];
+
+    for (const entry of schemaNode.anyOf) {
+        if (typeof entry === 'boolean') continue;
+        const ts = singleEntryToTS(entry, indentLevel);
+        if (ts && !typeStrings.includes(ts)) {
+            typeStrings.push(ts);
+        }
+    }
+
+    if (typeStrings.length === 0) {
+        return 'unknown';
+    }
+
+    return typeStrings.join(' | ');
+}
+
+/**
+ * Converts a single `anyOf` type entry to a TypeScript type string.
+ */
+function singleEntryToTS(entry: JSONSchema, indentLevel: number): string {
+    const bsonType = (entry['x-bsonType'] as string) ?? '';
+
+    // Object with nested properties → inline block
+    if (entry.type === 'object' && entry.properties) {
+        return renderInlineObject(entry, indentLevel);
+    }
+
+    // Array → determine element types
+    if (entry.type === 'array' || bsonType === (BSONTypes.Array as string)) {
+        return renderArrayType(entry, indentLevel);
+    }
+
+    // Primitive or mapped type
+    if (bsonType) {
+        return bsonTypeToTS(bsonType);
+    }
+
+    // Fallback to JSON type
+    const jsonType = entry.type as string | undefined;
+    if (jsonType) {
+        return jsonType;
+    }
+
+    return 'unknown';
+}
+
+/**
+ * Renders an inline object type `{ field: type; ... }`.
+ */
+function renderInlineObject(entry: JSONSchema, indentLevel: number): string {
+    const lines: string[] = [];
+    const objectDocumentsInspected = (entry['x-documentsInspected'] as number) ?? 0;
+
+    lines.push('{');
+
+    if (entry.properties) {
+        renderProperties(entry.properties, objectDocumentsInspected, indentLevel + 1, lines);
+    }
+
+    const closingIndent = '    '.repeat(indentLevel);
+    lines.push(`${closingIndent}}`);
+
+    return lines.join('\n');
+}
+
+/**
+ * Renders an array type, e.g., `string[]` or `(string | number)[]`.
+ */
+function renderArrayType(entry: JSONSchema, indentLevel: number): string {
+    const itemsSchema = entry.items;
+
+    if (!itemsSchema || typeof itemsSchema === 'boolean') {
+        return 'unknown[]';
+    }
+
+    // Items specified as a single schema (not an array of schemas)
+    if (!Array.isArray(itemsSchema)) {
+        const itemSchema = itemsSchema as JSONSchema;
+
+        if (itemSchema.anyOf && itemSchema.anyOf.length > 0) {
+            const elementTypes: string[] = [];
+            for (const itemEntry of itemSchema.anyOf) {
+                if (typeof itemEntry === 'boolean') continue;
+                const ts = singleEntryToTS(itemEntry, indentLevel);
+                if (ts && !elementTypes.includes(ts)) {
+                    elementTypes.push(ts);
+                }
+            }
+
+            if (elementTypes.length === 0) {
+                return 'unknown[]';
+            }
+
+            if (elementTypes.length === 1) {
+                return `${elementTypes[0]}[]`;
+            }
+
+            return `(${elementTypes.join(' | ')})[]`;
+        }
+
+        // Single item type without anyOf
+        const bsonType = (itemSchema['x-bsonType'] as string) ?? '';
+        if (bsonType) {
+            return `${bsonTypeToTS(bsonType)}[]`;
+        }
+
+        return 'unknown[]';
+    }
+
+    return 'unknown[]';
+}
diff --git a/src/utils/json/mongo/MongoBSONTypes.ts b/src/utils/json/mongo/MongoBSONTypes.ts
deleted file mode 100644
index fa97add9c..000000000
--- a/src/utils/json/mongo/MongoBSONTypes.ts
+++ /dev/null
@@ -1,200 +0,0 @@
-/*---------------------------------------------------------------------------------------------
- *  Copyright (c) Microsoft Corporation. All rights reserved.
- *  Licensed under the MIT License. See License.txt in the project root for license information.
- *--------------------------------------------------------------------------------------------*/
-
-import {
-    Binary,
-    BSONSymbol,
-    Code,
-    DBRef,
-    Decimal128,
-    Double,
-    Int32,
-    Long,
-    MaxKey,
-    MinKey,
-    ObjectId,
-    Timestamp,
-    UUID,
-} from 'mongodb';
-
-/**
- * Represents the different data types that can be stored in a MongoDB document.
- * The string representation is casesensitive and should match the MongoDB documentation.
- * https://www.mongodb.com/docs/manual/reference/bson-types/
- */
-export enum MongoBSONTypes {
-    String = 'string',
-    Number = 'number',
-    Int32 = 'int32',
-    Double = 'double',
-    Decimal128 = 'decimal128',
-    Long = 'long',
-    Boolean = 'boolean',
-    Object = 'object',
-    Array = 'array',
-    Null = 'null',
-    Undefined = 'undefined',
-    Date = 'date',
-    RegExp = 'regexp',
-    Binary = 'binary',
-    ObjectId = 'objectid',
-    Symbol = 'symbol',
-    Timestamp = 'timestamp',
-    UUID = 'uuid',
-    UUID_LEGACY = 'uuid-legacy', // old UUID subtype, used in some legacy data
-    MinKey = 'minkey',
-    MaxKey = 'maxkey',
-    DBRef = 'dbref',
-    Code = 'code',
-    CodeWithScope = 'codewithscope',
-    Map = 'map',
-    // Add any deprecated types if necessary
-    _UNKNOWN_ = '_unknown_', // Catch-all for unknown types
-}
-
-export namespace MongoBSONTypes {
-    const displayStringMap: Record<MongoBSONTypes, string> = {
-        [MongoBSONTypes.String]: 'String',
-        [MongoBSONTypes.Number]: 'Number',
-        [MongoBSONTypes.Int32]: 'Int32',
-        [MongoBSONTypes.Double]: 'Double',
-        [MongoBSONTypes.Decimal128]: 'Decimal128',
-        [MongoBSONTypes.Long]: 'Long',
-        [MongoBSONTypes.Boolean]: 'Boolean',
-        [MongoBSONTypes.Object]: 'Object',
-        [MongoBSONTypes.Array]: 'Array',
-        [MongoBSONTypes.Null]: 'Null',
-        [MongoBSONTypes.Undefined]: 'Undefined',
-        [MongoBSONTypes.Date]: 'Date',
-        [MongoBSONTypes.RegExp]: 'RegExp',
-        [MongoBSONTypes.Binary]: 'Binary',
-        [MongoBSONTypes.ObjectId]: 'ObjectId',
-        [MongoBSONTypes.Symbol]: 'Symbol',
-        [MongoBSONTypes.Timestamp]: 'Timestamp',
-        [MongoBSONTypes.MinKey]: 'MinKey',
-        [MongoBSONTypes.MaxKey]: 'MaxKey',
-        [MongoBSONTypes.DBRef]: 'DBRef',
-        [MongoBSONTypes.Code]: 'Code',
-        [MongoBSONTypes.CodeWithScope]: 'CodeWithScope',
-        [MongoBSONTypes.Map]: 'Map',
-        [MongoBSONTypes._UNKNOWN_]: 'Unknown',
-        [MongoBSONTypes.UUID]: 'UUID',
-        [MongoBSONTypes.UUID_LEGACY]: 'UUID (Legacy)',
-    };
-
-    export function toDisplayString(type: MongoBSONTypes): string {
-        return displayStringMap[type] || 'Unknown';
-    }
-
-    export function toString(type: MongoBSONTypes): string {
-        return type;
-    }
-
-    /**
-     * Converts a MongoDB data type to a case sensitive JSON data type
-     * @param type The MongoDB data type
-     * @returns A corresponding JSON data type (please note: it's case sensitive)
-     */
-    export function toJSONType(type: MongoBSONTypes): string {
-        switch (type) {
-            case MongoBSONTypes.String:
-            case MongoBSONTypes.Symbol:
-            case MongoBSONTypes.Date:
-            case MongoBSONTypes.Timestamp:
-            case MongoBSONTypes.ObjectId:
-            case MongoBSONTypes.RegExp:
-            case MongoBSONTypes.Binary:
-            case MongoBSONTypes.Code:
-            case MongoBSONTypes.UUID:
-            case MongoBSONTypes.UUID_LEGACY:
-                return 'string';
-
-            case MongoBSONTypes.Boolean:
-                return 'boolean';
-
-            case MongoBSONTypes.Int32:
-            case MongoBSONTypes.Long:
-            case MongoBSONTypes.Double:
-            case MongoBSONTypes.Decimal128:
-                return 'number';
-
-            case MongoBSONTypes.Object:
-            case MongoBSONTypes.Map:
-            case MongoBSONTypes.DBRef:
-            case MongoBSONTypes.CodeWithScope:
-                return 'object';
-
-            case MongoBSONTypes.Array:
-                return 'array';
-
-            case MongoBSONTypes.Null:
-            case MongoBSONTypes.Undefined:
-            case MongoBSONTypes.MinKey:
-            case MongoBSONTypes.MaxKey:
-                return 'null';
-
-            default:
-                return 'string'; // Default to string for unknown types
-        }
-    }
-
-    /**
-     * Accepts a value from a MongoDB 'Document' object and returns the inferred type.
-     * @param value The value of a field in a MongoDB 'Document' object
-     * @returns
-     */
-    export function inferType(value: unknown): MongoBSONTypes {
-        if (value === null) return MongoBSONTypes.Null;
-        if (value === undefined) return MongoBSONTypes.Undefined;
-
-        switch (typeof value) {
-            case 'string':
-                return MongoBSONTypes.String;
-            case 'number':
-                return MongoBSONTypes.Double; // JavaScript numbers are doubles
-            case 'boolean':
-                return MongoBSONTypes.Boolean;
-            case 'object':
-                if (Array.isArray(value)) {
-                    return MongoBSONTypes.Array;
-                }
-
-                // Check for common BSON types first
-                if (value instanceof ObjectId) return MongoBSONTypes.ObjectId;
-                if (value instanceof Int32) return MongoBSONTypes.Int32;
-                if (value instanceof Double) return MongoBSONTypes.Double;
-                if (value instanceof Date) return MongoBSONTypes.Date;
-                if (value instanceof Timestamp) return MongoBSONTypes.Timestamp;
-
-                // Less common types
-                if (value instanceof Decimal128) return MongoBSONTypes.Decimal128;
-                if (value instanceof Long) return MongoBSONTypes.Long;
-                if (value instanceof MinKey) return MongoBSONTypes.MinKey;
-                if (value instanceof MaxKey) return MongoBSONTypes.MaxKey;
-                if (value instanceof BSONSymbol) return MongoBSONTypes.Symbol;
-                if (value instanceof DBRef) return MongoBSONTypes.DBRef;
-                if (value instanceof Map) return MongoBSONTypes.Map;
-                if (value instanceof UUID && value.sub_type === Binary.SUBTYPE_UUID) return MongoBSONTypes.UUID;
-                if (value instanceof UUID && value.sub_type === Binary.SUBTYPE_UUID_OLD)
-                    return MongoBSONTypes.UUID_LEGACY;
-                if (value instanceof Buffer || value instanceof Binary) return MongoBSONTypes.Binary;
-                if (value instanceof RegExp) return MongoBSONTypes.RegExp;
-                if (value instanceof Code) {
-                    if (value.scope) {
-                        return MongoBSONTypes.CodeWithScope;
-                    } else {
-                        return MongoBSONTypes.Code;
-                    }
-                }
-
-                // Default to Object if none of the above match
-                return MongoBSONTypes.Object;
-            default:
-                // This should never happen, but if it does, we'll catch it here
-                // TODO: add telemetry somewhere to know when it happens (not here, this could get hit too often)
-                return MongoBSONTypes._UNKNOWN_;
-        }
-    }
-}
diff --git a/src/utils/json/mongo/SchemaAnalyzer.test.ts b/src/utils/json/mongo/SchemaAnalyzer.test.ts
deleted file mode 100644
index 731791611..000000000
--- a/src/utils/json/mongo/SchemaAnalyzer.test.ts
+++ /dev/null
@@ -1,255 +0,0 @@
-/*---------------------------------------------------------------------------------------------
- *  Copyright (c) Microsoft Corporation. All rights reserved.
- *  Licensed under the MIT License. See License.txt in the project root for license information.
- *--------------------------------------------------------------------------------------------*/
-
-import { type JSONSchema, type JSONSchemaRef } from '../JSONSchema';
-import { getPropertyNamesAtLevel, updateSchemaWithDocument } from './SchemaAnalyzer';
-import {
-    arraysWithDifferentDataTypes,
-    complexDocument,
-    complexDocumentsArray,
-    complexDocumentWithOddTypes,
-    embeddedDocumentOnly,
-    flatDocument,
-    sparseDocumentsArray,
-} from './mongoTestDocuments';
-
-describe('DocumentDB Schema Analyzer', () => {
-    it('prints out schema for testing', () => {
-        const schema: JSONSchema = {};
-        updateSchemaWithDocument(schema, embeddedDocumentOnly);
-        console.log(JSON.stringify(schema, null, 2));
-        expect(schema).toBeDefined();
-    });
-
-    it('supports many documents', () => {
-        const schema: JSONSchema = {};
-        sparseDocumentsArray.forEach((doc) => updateSchemaWithDocument(schema, doc));
-        expect(schema).toBeDefined();
-
-        // Check that 'x-documentsInspected' is correct
-        expect(schema['x-documentsInspected']).toBe(sparseDocumentsArray.length);
-
-        // Check that the schema has the correct root properties
-        const expectedRootProperties = new Set(['_id', 'name', 'age', 'email', 'isActive', 'score', 'description']);
-
-        expect(Object.keys(schema.properties || {})).toEqual(
-            expect.arrayContaining(Array.from(expectedRootProperties)),
-        );
-
-        // Check that the 'name' field is detected correctly
-        const nameField: JSONSchema = schema.properties?.['name'];
-        expect(nameField).toBeDefined();
-        expect(nameField?.['x-occurrence']).toBeGreaterThan(0);
-
-        // Access 'anyOf' to get the type entries
-        const nameFieldTypes = nameField.anyOf?.map((typeEntry) => typeEntry['type']);
-        expect(nameFieldTypes).toContain('string');
-
-        // Check that the 'age' field has the correct type
-        const ageField: JSONSchema = schema.properties?.['age'];
-        expect(ageField).toBeDefined();
-        const ageFieldTypes = ageField.anyOf?.map((typeEntry) => typeEntry['type']);
-        expect(ageFieldTypes).toContain('number');
-
-        // Check that the 'isActive' field is a boolean
-        const isActiveField: JSONSchema = schema.properties?.['isActive'];
-        expect(isActiveField).toBeDefined();
-        const isActiveTypes = isActiveField.anyOf?.map((typeEntry) => typeEntry['type']);
-        expect(isActiveTypes).toContain('boolean');
-
-        // Check that the 'description' field is optional (occurs in some documents)
-        const descriptionField = schema.properties?.['description'];
-        expect(descriptionField).toBeDefined();
-        expect(descriptionField?.['x-occurrence']).toBeLessThan(sparseDocumentsArray.length);
-    });
-
-    it('detects all BSON types from flatDocument', () => {
-        const schema: JSONSchema = {};
-        updateSchemaWithDocument(schema, flatDocument);
-
-        // Check that all fields are detected
-        const expectedFields = Object.keys(flatDocument);
-        expect(Object.keys(schema.properties || {})).toEqual(expect.arrayContaining(expectedFields));
-
-        // Helper function to get the 'x-bsonType' from a field
-        function getBsonType(fieldName: string): string | undefined {
-            const field = schema.properties?.[fieldName];
-            const anyOf = field?.anyOf;
-            return anyOf && anyOf[0]?.['x-bsonType'];
-        }
-
-        // Check that specific BSON types are correctly identified
-        expect(getBsonType('int32Field')).toBe('int32');
-        expect(getBsonType('doubleField')).toBe('double');
-        expect(getBsonType('decimalField')).toBe('decimal128');
-        expect(getBsonType('dateField')).toBe('date');
-        expect(getBsonType('objectIdField')).toBe('objectid');
-        expect(getBsonType('codeField')).toBe('code');
-        expect(getBsonType('uuidField')).toBe('uuid');
-        expect(getBsonType('uuidLegacyField')).toBe('uuid-legacy');
-    });
-
-    it('detects embedded objects correctly', () => {
-        const schema: JSONSchema = {};
-        updateSchemaWithDocument(schema, embeddedDocumentOnly);
-
-        // Check that the root properties are detected
-        expect(schema.properties).toHaveProperty('personalInfo');
-        expect(schema.properties).toHaveProperty('jobInfo');
-
-        // Access 'personalInfo' properties
-        const personalInfoAnyOf = schema.properties && schema.properties['personalInfo']?.anyOf;
-        const personalInfoProperties = personalInfoAnyOf?.[0]?.properties;
-        expect(personalInfoProperties).toBeDefined();
-        expect(personalInfoProperties).toHaveProperty('name');
-        expect(personalInfoProperties).toHaveProperty('age');
-        expect(personalInfoProperties).toHaveProperty('married');
-        expect(personalInfoProperties).toHaveProperty('address');
-
-        // Access 'address' properties within 'personalInfo'
-        const addressAnyOf = personalInfoProperties['address'].anyOf;
-        const addressProperties = addressAnyOf?.[0]?.properties;
-        expect(addressProperties).toBeDefined();
-        expect(addressProperties).toHaveProperty('street');
-        expect(addressProperties).toHaveProperty('city');
-        expect(addressProperties).toHaveProperty('zip');
-    });
-
-    it('detects arrays and their element types correctly', () => {
-        const schema: JSONSchema = {};
-        updateSchemaWithDocument(schema, arraysWithDifferentDataTypes);
-
-        // Check that arrays are detected
-        expect(schema.properties).toHaveProperty('integersArray');
-        expect(schema.properties).toHaveProperty('stringsArray');
-        expect(schema.properties).toHaveProperty('booleansArray');
-        expect(schema.properties).toHaveProperty('mixedArray');
-        expect(schema.properties).toHaveProperty('datesArray');
-
-        // Helper function to get item types from an array field
-        function getArrayItemTypes(fieldName: string): string[] | undefined {
-            const field = schema.properties?.[fieldName];
-            const anyOf = field?.anyOf;
-            const itemsAnyOf: JSONSchemaRef[] = anyOf?.[0]?.items?.anyOf;
-            return itemsAnyOf?.map((typeEntry) => typeEntry['type']);
-        }
-
-        // Check that 'integersArray' has elements of type 'number'
-        const integerItemTypes = getArrayItemTypes('integersArray');
-        expect(integerItemTypes).toContain('number');
-
-        // Check that 'stringsArray' has elements of type 'string'
-        const stringItemTypes = getArrayItemTypes('stringsArray');
-        expect(stringItemTypes).toContain('string');
-
-        // Check that 'mixedArray' contains multiple types
-        const mixedItemTypes = getArrayItemTypes('mixedArray');
-        expect(mixedItemTypes).toEqual(expect.arrayContaining(['number', 'string', 'boolean', 'object', 'null']));
-    });
-
-    it('handles arrays within objects and objects within arrays', () => {
-        const schema: JSONSchema = {};
-        updateSchemaWithDocument(schema, complexDocument);
-
-        // Access 'user.profile.hobbies'
-        const userProfile = schema.properties && schema.properties['user'].anyOf?.[0]?.properties?.['profile'];
-        const hobbies = userProfile?.anyOf?.[0]?.properties?.['hobbies'];
-        // eslint-disable-next-line @typescript-eslint/no-unsafe-call
-        const hobbiesItemTypes = hobbies?.anyOf?.[0]?.items?.anyOf?.map((typeEntry) => typeEntry['type']);
-        expect(hobbiesItemTypes).toContain('string');
-
-        // Access 'user.profile.addresses'
-        const addresses = userProfile?.anyOf?.[0]?.properties?.['addresses'];
-        // eslint-disable-next-line @typescript-eslint/no-unsafe-call
-        const addressItemTypes = addresses?.anyOf?.[0]?.items?.anyOf?.map((typeEntry) => typeEntry['type']);
-        expect(addressItemTypes).toContain('object');
-
-        // Check that 'orders' is an array
-        const orders = schema.properties && schema.properties['orders'];
-        expect(orders).toBeDefined();
-        const ordersType = orders.anyOf?.[0]?.type;
-        expect(ordersType).toBe('array');
-
-        // Access 'items' within 'orders'
-        const orderItems = orders.anyOf?.[0]?.items?.anyOf?.[0]?.properties?.['items'];
-        const orderItemsType = orderItems?.anyOf?.[0]?.type;
-        expect(orderItemsType).toBe('array');
-    });
-
-    it('updates schema correctly when processing multiple documents', () => {
-        const schema: JSONSchema = {};
-        complexDocumentsArray.forEach((doc) => updateSchemaWithDocument(schema, doc));
-
-        // Check that 'x-documentsInspected' is correct
-        expect(schema['x-documentsInspected']).toBe(complexDocumentsArray.length);
-
-        // Check that some fields are present from different documents
-        expect(schema.properties).toHaveProperty('stringField');
-        expect(schema.properties).toHaveProperty('personalInfo');
-        expect(schema.properties).toHaveProperty('integersArray');
-        expect(schema.properties).toHaveProperty('user');
-
-        // Check that 'integersArray' has correct min and max values
-        const integersArray = schema.properties && schema.properties['integersArray'];
-        const integerItemType = integersArray.anyOf?.[0]?.items?.anyOf?.[0];
-        expect(integerItemType?.['x-minValue']).toBe(1);
-        expect(integerItemType?.['x-maxValue']).toBe(5);
-
-        // Check that 'orders.items.price' is detected as Decimal128
-        const orders = schema.properties && schema.properties['orders'];
-        const orderItems = orders.anyOf?.[0]?.items?.anyOf?.[0]?.properties?.['items'];
-        const priceField = orderItems?.anyOf?.[0]?.items?.anyOf?.[0]?.properties?.['price'];
-        const priceFieldType = priceField?.anyOf?.[0];
-        expect(priceFieldType?.['x-bsonType']).toBe('decimal128');
-    });
-
-    describe('traverses schema', () => {
-        it('with valid paths', () => {
-            const schema: JSONSchema = {};
-            updateSchemaWithDocument(schema, complexDocument);
-
-            let propertiesAtRoot = getPropertyNamesAtLevel(schema, []);
-            expect(propertiesAtRoot).toHaveLength(4);
-
-            propertiesAtRoot = getPropertyNamesAtLevel(schema, ['user']);
-            expect(propertiesAtRoot).toHaveLength(3);
-
-            propertiesAtRoot = getPropertyNamesAtLevel(schema, ['user', 'profile']);
-            expect(propertiesAtRoot).toHaveLength(4);
-        });
-
-        it('with broken paths', () => {
-            const schema: JSONSchema = {};
-            updateSchemaWithDocument(schema, complexDocument);
-
-            const propertiesAtRoot = getPropertyNamesAtLevel(schema, []);
-            expect(propertiesAtRoot).toHaveLength(4);
-
-            expect(() => getPropertyNamesAtLevel(schema, ['no-entry'])).toThrow();
-
-            expect(() => getPropertyNamesAtLevel(schema, ['user', 'no-entry'])).toThrow();
-        });
-
-        it('with sparse docs and mixed types', () => {
-            const schema: JSONSchema = {};
-            updateSchemaWithDocument(schema, complexDocument);
-            updateSchemaWithDocument(schema, complexDocumentWithOddTypes);
-
-            let propertiesAtRoot = getPropertyNamesAtLevel(schema, []);
-            expect(propertiesAtRoot).toHaveLength(4);
-
-            propertiesAtRoot = getPropertyNamesAtLevel(schema, ['user']);
-            expect(propertiesAtRoot).toHaveLength(3);
-            expect(propertiesAtRoot).toEqual(['email', 'profile', 'username']);
-
-            propertiesAtRoot = getPropertyNamesAtLevel(schema, ['user', 'profile']);
-            expect(propertiesAtRoot).toHaveLength(4);
-            expect(propertiesAtRoot).toEqual(['addresses', 'firstName', 'hobbies', 'lastName']);
-
-            propertiesAtRoot = getPropertyNamesAtLevel(schema, ['history']);
-            expect(propertiesAtRoot).toHaveLength(6);
-        });
-    });
-});
diff --git a/src/utils/json/mongo/autocomplete/basicMongoFindFilterSchema.json b/src/utils/json/mongo/autocomplete/basicMongoFindFilterSchema.json
deleted file mode 100644
index a886411a4..000000000
--- a/src/utils/json/mongo/autocomplete/basicMongoFindFilterSchema.json
+++ /dev/null
@@ -1,173 +0,0 @@
-{
-  "$schema": "http://json-schema.org/draft-07/schema#",
-  "$id": "mongodb-generic-filter-schema",
-  "title": "MongoDB Generic Find Filter Schema",
-  "type": "object",
-  "additionalProperties": {
-    "oneOf": [
-      {
-        "title": "Direct Value",
-        "description": "A direct value for equality matching on any field.",
-        "examples": ["example", 42, true, null]
-      },
-      {
-        "title": "Operator-Based Query",
-        "$ref": "#/definitions/operatorObject",
-        "examples": [
-          { "$gt": 10 },
-          { "$lt": 100 },
-          { "$gte": 5 },
-          { "$lte": 50 },
-          { "$in": ["red", "blue", "green"] },
-          { "$nin": ["yellow", "purple"] },
-          { "$exists": false },
-          { "$regex": "^start.*end$" },
-          { "$gt": 10, "$lt": 20 },
-          { "$in": [1, 2, 3], "$nin": [4, 5] }
-        ]
-      }
-    ]
-  },
-  "properties": {
-    "$or": {
-      "type": "array",
-      "items": { "$ref": "#" },
-      "description": "Joins query clauses with a logical OR.",
-      "examples": [
-        [{ "status": "A" }, { "qty": { "$lt": 30 } }],
-        [{ "age": { "$gte": 18 } }, { "membership": "gold" }],
-        [{ "category": { "$in": ["electronics", "books"] } }, { "onSale": true }]
-      ]
-    },
-    "$and": {
-      "type": "array",
-      "items": { "$ref": "#" },
-      "description": "Joins query clauses with a logical AND.",
-      "examples": [
-        [{ "status": "A" }, { "qty": { "$gt": 20, "$lt": 50 } }],
-        [{ "verified": true }, { "email": { "$exists": true } }],
-        [{ "price": { "$gte": 100 } }, { "stock": { "$lte": 500 } }]
-      ]
-    },
-    "$not": {
-      "oneOf": [{ "$ref": "#" }],
-      "description": "Inverts the effect of a query expression.",
-      "examples": [
-        { "price": { "$gt": 100 } },
-        { "status": { "$eq": "inactive" } },
-        { "category": { "$in": ["outdated", "discontinued"] } }
-      ]
-    },
-    "$nor": {
-      "type": "array",
-      "items": { "$ref": "#" },
-      "description": "Joins query clauses with a logical NOR.",
-      "examples": [
-        [{ "price": 1.99 }, { "qty": { "$lt": 20 } }],
-        [{ "status": "A" }, { "onSale": true }],
-        [{ "rating": { "$gte": 4.5 } }, { "reviews": { "$gt": 100 } }]
-      ]
-    }
-  },
-  "definitions": {
-    "operatorObject": {
-      "type": "object",
-      "properties": {
-        "$eq": {
-          "description": "Matches values that are equal to a specified value.",
-          "examples": ["active", 100, true]
-        },
-        "$ne": {
-          "description": "Matches all values that are not equal to a specified value.",
-          "examples": ["inactive", 0, false]
-        },
-        "$gt": {
-          "description": "Matches values that are greater than a specified value.",
-          "examples": [10, 100]
-        },
-        "$gte": {
-          "description": "Matches values that are greater than or equal to a specified value.",
-          "examples": [5, 50]
-        },
-        "$lt": {
-          "description": "Matches values that are less than a specified value.",
-          "examples": [20, 80]
-        },
-        "$lte": {
-          "description": "Matches values that are less than or equal to a specified value.",
-          "examples": [15, 75]
-        },
-        "$in": {
-          "type": "array",
-          "description": "Matches any of the values specified in an array.",
-          "examples": [
-            ["red", "green", "blue"],
-            [1, 2, 3],
-            ["small", "medium", "large"]
-          ]
-        },
-        "$nin": {
-          "type": "array",
-          "description": "Matches none of the values specified in an array.",
-          "examples": [
-            ["yellow", "purple"],
-            [4, 5, 6],
-            ["extra-large", "xxl"]
-          ]
-        },
-        "$exists": {
-          "type": "boolean",
-          "description": "Matches documents that have the specified field.",
-          "examples": [true, false]
-        },
-        "$regex": {
-          "description": "Provides regular expression capabilities for pattern matching strings.",
-          "examples": ["^start", "end$", ".*pattern.*", "^[A-Z]{3}[0-9]{2}$"]
-        },
-        "$size": {
-          "type": "integer",
-          "description": "Matches any array with the specified number of elements.",
-          "examples": [0, 5, 10]
-        },
-        "$type": {
-          "description": "Matches values based on their BSON type.",
-          "examples": [1, "string", "object"]
-        },
-        "$all": {
-          "type": "array",
-          "description": "Matches arrays that contain all elements specified in the query.",
-          "examples": [
-            ["red", "blue"],
-            [10, 20],
-            ["feature1", "feature2"]
-          ]
-        },
-        "$elemMatch": {
-          "type": "object",
-          "description": "Matches documents that contain an array field with at least one element that matches the specified query criteria.",
-          "examples": [
-            { "score": { "$gt": 80 } },
-            { "dimensions": { "$lt": 50, "$gt": 20 } },
-            { "attributes": { "color": "red", "size": "M" } }
-          ]
-        }
-      },
-      "additionalProperties": false,
-      "description": "An object containing MongoDB query operators and their corresponding values.",
-      "minProperties": 1,
-      "examples": [
-        { "$gt": 10 },
-        { "$lt": 100 },
-        { "$gte": 5 },
-        { "$lte": 50 },
-        { "$in": ["value1", "value2"] },
-        { "$gt": 10, "$lt": 20 },
-        { "$exists": true },
-        { "$regex": "^[a-z]+$" },
-        { "$in": [1, 2, 3], "$nin": [4, 5, 6] },
-        { "$elemMatch": { "score": { "$gte": 80 } } }
-      ]
-    }
-  },
-  "description": "Generic schema for MongoDB find query filters without knowledge of specific fields."
-}
diff --git a/src/utils/json/mongo/autocomplete/generateMongoFindJsonSchema.ts b/src/utils/json/mongo/autocomplete/generateMongoFindJsonSchema.ts
deleted file mode 100644
index 0f0fa7bbe..000000000
--- a/src/utils/json/mongo/autocomplete/generateMongoFindJsonSchema.ts
+++ /dev/null
@@ -1,270 +0,0 @@
-/*---------------------------------------------------------------------------------------------
- *  Copyright (c) Microsoft Corporation. All rights reserved.
- *  Licensed under the MIT License. See License.txt in the project root for license information.
- *--------------------------------------------------------------------------------------------*/
-
-import { type FieldEntry } from './getKnownFields';
-
-/**
- * Generates a JSON schema for MongoDB find filter queries.
- *
- * This function is a short-term solution for providing autocompletion for MongoDB find filter queries.
- * A MongoDB find filter query is a JSON document that can range from simple to complex structures.
- * Basic autocompletion can be provided using a modified JSON schema, which is what we've done here.
- *
- * The long-term plan is to provide a more sophisticated auto-completion using, for example,
- * the suggestion API that Monaco provides. This will be looked at in the future.
- *
- * @param fieldEntries - An array of field entries where each entry contains:
- *   - path: A string representing the full path of the field in the dataset (e.g., "age", "address.city").
- *   - type: The most common or expected data type for that field (e.g., "number", "string").
- *
- * The data provided is supposed to contain all known data paths from the expected dataset,
- * focusing only on leaf nodes.
- *
- * The returned JSON schema can be directly added to the Monaco editor to activate autocompletion.
- *
- * @returns A JSON schema object that can be used for autocompletion in the Monaco editor.
- */
-export function generateMongoFindJsonSchema(fieldEntries: FieldEntry[]) {
-    // Initialize the base schema object
-    const schema = {
-        $schema: 'http://json-schema.org/draft-07/schema#',
-        $id: 'mongodb-filter-schema',
-        title: 'MongoDB Find Filter Schema',
-        type: 'object',
-        properties: {},
-        additionalProperties: {
-            oneOf: [
-                {
-                    title: 'Direct Value',
-                    description: 'A direct value for equality matching on an unknown field.',
-                    examples: ['value', 123, true, null],
-                },
-                {
-                    title: 'Operator-Based Query',
-                    $ref: '#/definitions/operatorObjectUnknown',
-                    examples: [{ $ne: 'inactive' }, { $exists: true }],
-                },
-            ],
-        },
-        definitions: {
-            operatorObject: {
-                type: 'object',
-                properties: {
-                    $eq: {
-                        description: 'Matches values that are equal to a specified value.',
-                        examples: [21, 'active', true],
-                    },
-                    $ne: {
-                        description: 'Matches all values that are not equal to a specified value.',
-                        examples: [30, 'inactive', false],
-                    },
-                    $gt: {
-                        description: 'Matches values that are greater than a specified value.',
-                        examples: [25, 100],
-                    },
-                    $gte: {
-                        description: 'Matches values that are greater than or equal to a specified value.',
-                        examples: [18, 50],
-                    },
-                    $lt: {
-                        description: 'Matches values that are less than a specified value.',
-                        examples: [65, 100],
-                    },
-                    $lte: {
-                        description: 'Matches values that are less than or equal to a specified value.',
-                        examples: [30, 75],
-                    },
-                    $in: {
-                        type: 'array',
-                        description: 'Matches any of the values specified in an array.',
-                        examples: [
-                            ['red', 'blue'],
-                            [21, 30, 40],
-                        ],
-                    },
-                    $nin: {
-                        type: 'array',
-                        description: 'Matches none of the values specified in an array.',
-                        examples: [['green'], [50, 60]],
-                    },
-                    $exists: {
-                        type: 'boolean',
-                        description: 'Matches documents that have the specified field.',
-                        examples: [true, false],
-                    },
-                    $regex: {
-                        description: 'Provides regular expression capabilities for pattern matching strings.',
-                        examples: ['^re', '.*blue$', '^[A-Z]+'],
-                    },
-                },
-                additionalProperties: false,
-                description: 'An object containing a MongoDB query operator and its corresponding value.',
-                minProperties: 1,
-            },
-            operatorObjectUnknown: {
-                $ref: '#/definitions/operatorObject',
-            },
-        },
-        description:
-            'Schema for MongoDB find query filters, supporting known fields with various operators for querying documents.',
-    };
-
-    // Set to collect all full paths
-    const fullPathsSet = new Set<string>();
-
-    // Function to generate examples based on type
-    function generateExamples(type: string): unknown[] {
-        let examples;
-        if (type === 'number') {
-            examples = [42, 100];
-            // eslint-disable-next-line @typescript-eslint/no-unsafe-call, @typescript-eslint/no-unsafe-member-access
-            examples.push(false); // odd type
-            // eslint-disable-next-line @typescript-eslint/no-unsafe-call, @typescript-eslint/no-unsafe-member-access
-            examples.push(null);
-        } else if (type === 'string') {
-            examples = ['red', 'blue'];
-            // eslint-disable-next-line @typescript-eslint/no-unsafe-call, @typescript-eslint/no-unsafe-member-access
-            examples.push(null);
-        } else if (type === 'boolean') {
-            examples = [true, false];
-            // eslint-disable-next-line @typescript-eslint/no-unsafe-call, @typescript-eslint/no-unsafe-member-access
-            examples.push(null);
-        } else {
-            examples = ['value', 123, true, null];
-        }
-        return examples as [];
-    }
-
-    // Function to generate examples for operator-based queries
-    function generateOperatorExamples(type: string): unknown[] {
-        let examples;
-        if (type === 'number') {
-            examples = [{ $gt: 25 }, { $in: [20, 30, 40] }];
-        } else if (type === 'string') {
-            examples = [{ $regex: '^re' }, { $ne: 'blue' }];
-        } else if (type === 'boolean') {
-            examples = [{ $eq: true }, { $ne: false }];
-        } else {
-            examples = [{ $exists: true }];
-        }
-        return examples as [];
-    }
-
-    // Function to create nested properties based on path components
-    function createNestedProperty(
-        // eslint-disable-next-line @typescript-eslint/no-explicit-any
-        obj: any,
-        pathComponents: string[],
-        type: string,
-        currentPath: string = '',
-    ) {
-        const fieldName = pathComponents[0];
-        const newPath = currentPath ? `${currentPath}.${fieldName}` : fieldName;
-
-        fullPathsSet.add(newPath);
-
-        if (pathComponents.length === 1) {
-            // Leaf node
-            const examples = generateExamples(type);
-            const operatorExamples = generateOperatorExamples(type);
-            // eslint-disable-next-line @typescript-eslint/no-unsafe-member-access
-            obj[fieldName] = {
-                oneOf: [
-                    {
-                        title: 'Direct Value',
-                        description: `A direct value for equality matching on the '${fieldName}' field.`,
-                        examples: examples,
-                    },
-                    {
-                        title: 'Operator-Based Query',
-                        $ref: '#/definitions/operatorObject',
-                        examples: operatorExamples,
-                    },
-                ],
-            };
-        } else {
-            // Nested object
-            // eslint-disable-next-line @typescript-eslint/no-unsafe-member-access
-            if (!obj[fieldName]) {
-                // eslint-disable-next-line @typescript-eslint/no-unsafe-member-access
-                obj[fieldName] = {
-                    type: 'object',
-                    properties: {},
-                    additionalProperties: false,
-                    description: `Embedded '${fieldName}' object containing fields.`,
-                };
-            }
-            createNestedProperty(
-                // eslint-disable-next-line @typescript-eslint/no-unsafe-member-access
-                obj[fieldName]['properties'],
-                pathComponents.slice(1),
-                type,
-                newPath,
-            );
-        }
-    }
-
-    // Process each fieldEntry
-    for (const fieldEntry of fieldEntries) {
-        const pathComponents = fieldEntry.path.split('.');
-        // eslint-disable-next-line @typescript-eslint/no-unsafe-member-access
-        createNestedProperty(schema['properties'], pathComponents, fieldEntry.type);
-    }
-
-    // Function to get type for a full path
-    function getTypeForFullPath(fullPath: string): string | undefined {
-        for (const fieldEntry of fieldEntries) {
-            if (fieldEntry.path === fullPath) {
-                return fieldEntry.type;
-            }
-        }
-        return undefined;
-    }
-
-    // Create properties with full paths at the root level
-    for (const fullPath of fullPathsSet) {
-        const type = getTypeForFullPath(fullPath) || 'string';
-        const examples = generateExamples(type);
-        const operatorExamples = generateOperatorExamples(type);
-
-        // eslint-disable-next-line @typescript-eslint/no-unsafe-member-access
-        schema['properties'][fullPath] = {
-            oneOf: [
-                {
-                    title: 'Direct Value',
-                    description: `A direct value for equality matching on the '${fullPath}' field.`,
-                    examples: examples,
-                },
-                {
-                    title: 'Operator-Based Query',
-                    $ref: '#/definitions/operatorObject',
-                    examples: operatorExamples,
-                },
-            ],
-        };
-    }
-
-    // Add logical operators
-    const logicalOperators = ['$or', '$and', '$not', '$nor'];
-    for (const operator of logicalOperators) {
-        if (operator === '$not') {
-            // eslint-disable-next-line @typescript-eslint/no-unsafe-member-access
-            schema['properties'][operator] = {
-                oneOf: [{ $ref: '#' }],
-                description: `Inverts the effect of a query expression.`,
-            };
-        } else {
-            // eslint-disable-next-line @typescript-eslint/no-unsafe-member-access
-            schema['properties'][operator] = {
-                type: 'array',
-                items: { $ref: '#' },
-                description: `Joins query clauses with a logical ${operator.toUpperCase().substring(1)}.`,
-            };
-        }
-    }
-
-    // eslint-disable-next-line @typescript-eslint/no-unsafe-return
-    return schema;
-}
diff --git a/src/utils/json/mongo/autocomplete/getKnownFields.ts b/src/utils/json/mongo/autocomplete/getKnownFields.ts
deleted file mode 100644
index a82277a73..000000000
--- a/src/utils/json/mongo/autocomplete/getKnownFields.ts
+++ /dev/null
@@ -1,95 +0,0 @@
-/*---------------------------------------------------------------------------------------------
- *  Copyright (c) Microsoft Corporation. All rights reserved.
- *  Licensed under the MIT License. See License.txt in the project root for license information.
- *--------------------------------------------------------------------------------------------*/
-
-import Denque from 'denque';
-import { type JSONSchema } from '../../JSONSchema';
-
-export interface FieldEntry {
-    path: string;
-    type: string;
-}
-
-/**
- * This function traverses our JSON Schema object and collects all leaf property paths
- * along with their most common data types.
- *
- * This information is needed for auto-completion support
- *
- * The approach is as follows:
- * - Initialize a queue with the root properties of the schema to perform a breadth-first traversal.
- * - While the queue is not empty:
- *   - Dequeue the next item, which includes the current schema node and its path.
- *   - Determine the most common type for the current node by looking at the 'x-typeOccurrence' field.
- *   - If the most common type is an object with properties:
- *     - Enqueue its child properties with their updated paths into the queue for further traversal.
- *   - Else if the most common type is a leaf type (e.g., string, number, boolean):
- *     - Add the current path and type to the result array as it represents a leaf property.
- * - Continue this process until all nodes have been processed.
- * - Return the result array containing objects with 'path' and 'type' for each leaf property.
- */
-export function getKnownFields(schema: JSONSchema): FieldEntry[] {
-    const result: Array<{ path: string; type: string }> = [];
-    type QueueItem = {
-        path: string;
-        schemaNode: JSONSchema;
-    };
-
-    const queue: Denque<QueueItem> = new Denque();
-
-    // Initialize the queue with root properties
-    if (schema.properties) {
-        for (const propName of Object.keys(schema.properties)) {
-            const propSchema = schema.properties[propName] as JSONSchema;
-            queue.push({ path: propName, schemaNode: propSchema });
-        }
-    }
-
-    while (queue.length > 0) {
-        const item = queue.shift();
-        if (!item) continue;
-
-        const { path, schemaNode } = item;
-        const mostCommonTypeEntry = getMostCommonTypeEntry(schemaNode);
-
-        if (mostCommonTypeEntry) {
-            if (mostCommonTypeEntry.type === 'object' && mostCommonTypeEntry.properties) {
-                // Not a leaf node, enqueue its properties
-                for (const childName of Object.keys(mostCommonTypeEntry.properties)) {
-                    const childSchema = mostCommonTypeEntry.properties[childName] as JSONSchema;
-                    queue.push({ path: `${path}.${childName}`, schemaNode: childSchema });
-                }
-            } else {
-                // Leaf node, add to result
-                result.push({ path: path, type: mostCommonTypeEntry.type as string });
-            }
-        }
-    }
-
-    return result;
-}
-
-/**
- * Helper function to get the most common type entry from a schema node.
- * It looks for the 'anyOf' array and selects the type with the highest 'x-typeOccurrence'.
- */
-function getMostCommonTypeEntry(schemaNode: JSONSchema): JSONSchema | null {
-    if (schemaNode.anyOf && schemaNode.anyOf.length > 0) {
-        let maxOccurrence = -1;
-        let mostCommonTypeEntry: JSONSchema | null = null;
-
-        for (const typeEntry of schemaNode.anyOf as JSONSchema[]) {
-            const occurrence = typeEntry['x-typeOccurrence'] || 0;
-            if (occurrence > maxOccurrence) {
-                maxOccurrence = occurrence;
-                mostCommonTypeEntry = typeEntry;
-            }
-        }
-        return mostCommonTypeEntry;
-    } else if (schemaNode.type) {
-        // If 'anyOf' is not present, use the 'type' field directly
-        return schemaNode;
-    }
-    return null;
-}
diff --git a/src/utils/slickgrid/mongo/toSlickGridTable.test.ts b/src/utils/slickgrid/mongo/toSlickGridTable.test.ts
index 69156bf1b..4b1d0af3f 100644
--- a/src/utils/slickgrid/mongo/toSlickGridTable.test.ts
+++ b/src/utils/slickgrid/mongo/toSlickGridTable.test.ts
@@ -76,7 +76,6 @@ describe('toSlickGridTable', () => {
 
     it('at a nested level', () => {
         const tableData = getDataAtPath(mongoDocuments, ['nestedDocument']);
-        console.log(tableData);
 
         expect(tableData).toHaveLength(5);
         expect(tableData[0]['key']).toBeDefined();
diff --git a/src/utils/slickgrid/mongo/toSlickGridTable.ts b/src/utils/slickgrid/mongo/toSlickGridTable.ts
index 737fcb7c0..5deb51fe0 100644
--- a/src/utils/slickgrid/mongo/toSlickGridTable.ts
+++ b/src/utils/slickgrid/mongo/toSlickGridTable.ts
@@ -3,11 +3,10 @@
  *  Licensed under the MIT License. See License.txt in the project root for license information.
  *--------------------------------------------------------------------------------------------*/
 
+import { BSONTypes, valueToDisplayString } from '@vscode-documentdb/schema-analyzer';
 import { EJSON } from 'bson';
 import { type Document, type WithId } from 'mongodb';
 import { type TableDataEntry } from '../../../documentdb/ClusterSession';
-import { MongoBSONTypes } from '../../json/mongo/MongoBSONTypes';
-import { valueToDisplayString } from '../../json/mongo/MongoValueFormatters';
 
 /**
  * Extracts data from a list of MongoDB documents at a specified path.
@@ -45,8 +44,8 @@ export function getDataAtPath(documents: WithId<Document>[], path: string[]): Ta
         // we also make sure that the '_id' field is always included in the data!
         if (doc._id) {
             row['_id'] = {
-                value: valueToDisplayString(doc._id, MongoBSONTypes.inferType(doc._id)),
-                type: MongoBSONTypes.inferType(doc._id),
+                value: valueToDisplayString(doc._id, BSONTypes.inferType(doc._id)),
+                type: BSONTypes.inferType(doc._id),
             };
             // TODO: problem here -> what if the user has a field with this name...
             row['x-objectid'] = EJSON.stringify(doc._id, { relaxed: false }); // this is crucial, we need to retain the _id field for future queries from the table view
@@ -72,13 +71,13 @@ export function getDataAtPath(documents: WithId<Document>[], path: string[]): Ta
                     continue;
                 } else {
                     const value: unknown = subdocument[key];
-                    const type: MongoBSONTypes = MongoBSONTypes.inferType(value);
+                    const type: BSONTypes = BSONTypes.inferType(value);
 
                     // eslint-disable-next-line
                     if (value instanceof Array) {
                         row[key] = {
                             value: `array[${value.length}]`,
-                            type: MongoBSONTypes.Array,
+                            type: BSONTypes.Array,
                         };
                     } else {
                         row[key] = { value: valueToDisplayString(value, type), type: type };
diff --git a/src/utils/slickgrid/mongo/toSlickGridTree.ts b/src/utils/slickgrid/mongo/toSlickGridTree.ts
index 9d3742cfe..849ad42b6 100644
--- a/src/utils/slickgrid/mongo/toSlickGridTree.ts
+++ b/src/utils/slickgrid/mongo/toSlickGridTree.ts
@@ -3,9 +3,8 @@
  *  Licensed under the MIT License. See License.txt in the project root for license information.
  *--------------------------------------------------------------------------------------------*/
 
+import { BSONTypes, valueToDisplayString } from '@vscode-documentdb/schema-analyzer';
 import { type Document, type ObjectId, type WithId } from 'mongodb';
-import { MongoBSONTypes } from '../../json/mongo/MongoBSONTypes';
-import { valueToDisplayString } from '../../json/mongo/MongoValueFormatters';
 
 /**
  * The data structure for a single node entry in the tree data structure for SlickGrid.
@@ -113,10 +112,10 @@ export function documentToSlickGridTree(document: WithId<Document>, idPrefix?: s
             continue;
         }
 
-        const dataType: MongoBSONTypes = MongoBSONTypes.inferType(stackEntry.value);
+        const dataType: BSONTypes = BSONTypes.inferType(stackEntry.value);
 
         switch (dataType) {
-            case MongoBSONTypes.Object: {
+            case BSONTypes.Object: {
                 tree.push({
                     id: globalEntryId,
                     field: `${stackEntry.key}`,
@@ -131,7 +130,7 @@ export function documentToSlickGridTree(document: WithId<Document>, idPrefix?: s
                 });
                 break;
             }
-            case MongoBSONTypes.Array: {
+            case BSONTypes.Array: {
                 const value = stackEntry.value as unknown[];
 
                 tree.push({
@@ -157,7 +156,7 @@ export function documentToSlickGridTree(document: WithId<Document>, idPrefix?: s
                     id: globalEntryId,
                     field: `${stackEntry.key}`,
                     value: valueToDisplayString(stackEntry.value, dataType),
-                    type: MongoBSONTypes.toDisplayString(MongoBSONTypes.inferType(stackEntry.value)),
+                    type: BSONTypes.toDisplayString(BSONTypes.inferType(stackEntry.value)),
                     parentId: stackEntry.parentId,
                 });
                 break;
diff --git a/src/webviews/components/MonacoAutoHeight.tsx b/src/webviews/components/MonacoAutoHeight.tsx
index 9625d8f01..e151b230b 100644
--- a/src/webviews/components/MonacoAutoHeight.tsx
+++ b/src/webviews/components/MonacoAutoHeight.tsx
@@ -196,14 +196,21 @@ export const MonacoAutoHeight = (props: MonacoAutoHeightProps) => {
     /**
      * Configures the Tab key behavior for the Monaco editor.
      *
-     * When called, this function sets up or removes a keydown handler for the Tab key.
-     * If `shouldTrap` is true, Tab/Shift+Tab are trapped within the editor (focus remains in editor).
-     * If `shouldTrap` is false, Tab/Shift+Tab move focus to the next/previous focusable element outside the editor.
+     * When `shouldTrap` is true, Tab/Shift+Tab are trapped within the editor
+     * (default Monaco behavior for code indentation).
+     *
+     * When `shouldTrap` is false, Tab/Shift+Tab move focus to the next/previous
+     * focusable element outside the editor — UNLESS the editor is in snippet
+     * tab-stop mode (`inSnippetMode`), in which case Tab navigates between
+     * snippet placeholders. After the snippet session ends (final tab stop or
+     * ESC), Tab reverts to moving focus out of the editor.
+     *
+     * Uses `editor.addAction` with a precondition context key expression
+     * (`!inSnippetMode`) rather than `onKeyDown` interception, so Monaco's
+     * built-in snippet navigation takes priority when a snippet is active.
      *
      * @param {monacoEditor.editor.IStandaloneCodeEditor} editor - The Monaco editor instance.
      * @param {boolean} shouldTrap - Whether to trap Tab key in the editor.
-     *   - true: Tab/Shift+Tab are trapped in the editor.
-     *   - false: Tab/Shift+Tab move focus to next/previous element.
      */
     const configureTabKeyMode = (editor: monacoEditor.editor.IStandaloneCodeEditor, shouldTrap: boolean) => {
         if (tabKeyDisposerRef.current) {
@@ -215,17 +222,30 @@ export const MonacoAutoHeight = (props: MonacoAutoHeightProps) => {
             return;
         }
 
-        tabKeyDisposerRef.current = editor.onKeyDown((event) => {
-            if (event.keyCode !== monacoEditor.KeyCode.Tab) {
-                return;
-            }
-
-            event.preventDefault();
-            event.stopPropagation();
+        // Register Tab and Shift+Tab actions that only fire when NOT in snippet mode.
+        // When inSnippetMode is true, Monaco's built-in snippet Tab handler takes over.
+        const tabAction = editor.addAction({
+            id: 'documentdb.tab.moveFocusNext',
+            label: 'Move Focus to Next Element',
+            keybindings: [monacoEditor.KeyCode.Tab],
+            precondition: '!inSnippetMode',
+            run: () => moveFocus(editor, 'next'),
+        });
 
-            const direction = event.browserEvent.shiftKey ? 'previous' : 'next';
-            moveFocus(editor, direction);
+        const shiftTabAction = editor.addAction({
+            id: 'documentdb.tab.moveFocusPrevious',
+            label: 'Move Focus to Previous Element',
+            keybindings: [monacoEditor.KeyMod.Shift | monacoEditor.KeyCode.Tab],
+            precondition: '!inSnippetMode',
+            run: () => moveFocus(editor, 'previous'),
         });
+
+        tabKeyDisposerRef.current = {
+            dispose: () => {
+                tabAction.dispose();
+                shiftTabAction.dispose();
+            },
+        };
     };
 
     /**
diff --git a/src/webviews/components/MonacoEditor.tsx b/src/webviews/components/MonacoEditor.tsx
index c08e2087d..7e6f84530 100644
--- a/src/webviews/components/MonacoEditor.tsx
+++ b/src/webviews/components/MonacoEditor.tsx
@@ -75,11 +75,20 @@ export const MonacoEditor = ({ onEscapeEditor, onMount, ...props }: MonacoEditor
             disposablesRef.current.forEach((d) => d.dispose());
             disposablesRef.current = [];
 
-            // Register Escape key handler to exit the editor
+            // Register Escape key handler to exit the editor.
+            // The context expression ensures ESC is only handled when:
+            // - The suggest (autocomplete) widget is NOT visible
+            // - The editor is NOT in snippet tab-stop mode
+            // This allows Monaco's built-in handlers to dismiss the suggest
+            // widget or exit snippet mode first, before our handler fires.
             if (onEscapeEditor) {
-                editor.addCommand(monacoInstance.KeyCode.Escape, () => {
-                    onEscapeEditor();
-                });
+                editor.addCommand(
+                    monacoInstance.KeyCode.Escape,
+                    () => {
+                        onEscapeEditor();
+                    },
+                    '!suggestWidgetVisible && !inSnippetMode',
+                );
             }
 
             // Announce escape hint once when editor gains focus
diff --git a/src/webviews/documentdb/collectionView/CollectionView.tsx b/src/webviews/documentdb/collectionView/CollectionView.tsx
index 23908bb93..863a07bed 100644
--- a/src/webviews/documentdb/collectionView/CollectionView.tsx
+++ b/src/webviews/documentdb/collectionView/CollectionView.tsx
@@ -12,6 +12,7 @@ import { Announcer } from '../../api/webview-client/accessibility';
 import { useConfiguration } from '../../api/webview-client/useConfiguration';
 import { useTrpcClient } from '../../api/webview-client/useTrpcClient';
 import { useSelectiveContextMenuPrevention } from '../../api/webview-client/utils/useSelectiveContextMenuPrevention';
+import { setCompletionContext } from '../../documentdbQuery';
 import './collectionView.scss';
 import {
     CollectionViewContext,
@@ -351,17 +352,24 @@ export const CollectionView = (): JSX.Element => {
     }
 
     function updateAutoCompletionData(): void {
-        trpcClient.mongoClusters.collectionView.getAutocompletionSchema
+        trpcClient.mongoClusters.collectionView.getFieldCompletionData
             .query()
-            .then(async (schema) => {
-                void (await currentContextRef.current.queryEditor?.setJsonSchema(schema));
+            .then((fields) => {
+                setCompletionContext(configuration.sessionId, { fields });
             })
             .catch((error) => {
-                void trpcClient.common.displayErrorMessage.mutate({
-                    message: l10n.t('Error while loading the autocompletion data'),
-                    modal: false,
-                    cause: error instanceof Error ? error.message : String(error),
-                });
+                console.debug('Failed to update field completion data:', error);
+                // Non-blocking — completion will work without fields
+                trpcClient.common.reportEvent
+                    .mutate({
+                        eventName: 'fieldCompletionDataFetchFailed',
+                        properties: {
+                            error: error instanceof Error ? error.message : String(error),
+                        },
+                    })
+                    .catch(() => {
+                        // best-effort telemetry, swallow errors
+                    });
             });
     }
 
diff --git a/src/webviews/documentdb/collectionView/collectionViewContext.ts b/src/webviews/documentdb/collectionView/collectionViewContext.ts
index 435396ce6..e3a64fa63 100644
--- a/src/webviews/documentdb/collectionView/collectionViewContext.ts
+++ b/src/webviews/documentdb/collectionView/collectionViewContext.ts
@@ -97,7 +97,6 @@ export type CollectionViewContextType = {
             skip: number;
             limit: number;
         };
-        setJsonSchema(schema: object): Promise<void>; //monacoEditor.languages.json.DiagnosticsOptions, but we don't want to import monacoEditor here
     };
     isAiRowVisible: boolean; // Controls visibility of the AI prompt row in QueryEditor
     queryInsights: QueryInsightsState; // Query insights state for progressive loading
diff --git a/src/webviews/documentdb/collectionView/collectionViewRouter.ts b/src/webviews/documentdb/collectionView/collectionViewRouter.ts
index fec8eb05d..8e00cfa70 100644
--- a/src/webviews/documentdb/collectionView/collectionViewRouter.ts
+++ b/src/webviews/documentdb/collectionView/collectionViewRouter.ts
@@ -4,15 +4,14 @@
  *--------------------------------------------------------------------------------------------*/
 
 import { callWithTelemetryAndErrorHandling, type IActionContext } from '@microsoft/vscode-azext-utils';
+import { type FieldEntry } from '@vscode-documentdb/schema-analyzer';
 import * as fs from 'fs';
 import { type Document } from 'mongodb';
 import * as path from 'path';
 import * as vscode from 'vscode';
-import { type JSONSchema } from 'vscode-json-languageservice';
 import { z } from 'zod';
 import { ClusterSession } from '../../../documentdb/ClusterSession';
 import { getConfirmationAsInSettings } from '../../../utils/dialogs/getConfirmation';
-import { getKnownFields, type FieldEntry } from '../../../utils/json/mongo/autocomplete/getKnownFields';
 import { publicProcedureWithTelemetry, router, type WithTelemetry } from '../../api/extension-server/trpc';
 
 import * as l10n from '@vscode/l10n';
@@ -39,9 +38,7 @@ import { Views } from '../../../documentdb/Views';
 import { ext } from '../../../extensionVariables';
 import { QueryInsightsAIService } from '../../../services/ai/QueryInsightsAIService';
 import { type CollectionItem } from '../../../tree/documentdb/CollectionItem';
-// eslint-disable-next-line import/no-internal-modules
-import basicFindQuerySchema from '../../../utils/json/mongo/autocomplete/basicMongoFindFilterSchema.json';
-import { generateMongoFindJsonSchema } from '../../../utils/json/mongo/autocomplete/generateMongoFindJsonSchema';
+import { toFieldCompletionItems } from '../../../utils/json/data-api/autocomplete/toFieldCompletionItems';
 import { promptAfterActionEventually } from '../../../utils/survey';
 import { UsageImpact } from '../../../utils/surveyTypes';
 import { type BaseRouterContext } from '../../api/configuration/appRouter';
@@ -234,25 +231,16 @@ export const collectionsViewRouter = router({
 
             return { documentCount: size };
         }),
-    getAutocompletionSchema: publicProcedureWithTelemetry
+    getFieldCompletionData: publicProcedureWithTelemetry
         // procedure type
         .query(({ ctx }) => {
             const myCtx = ctx as WithTelemetry<RouterContext>;
 
             const session: ClusterSession = ClusterSession.getSession(myCtx.sessionId);
 
-            const _currentJsonSchema = session.getCurrentSchema();
-            const autoCompletionData: FieldEntry[] = getKnownFields(_currentJsonSchema);
+            const fieldEntries: FieldEntry[] = session.getKnownFields();
 
-            let querySchema: JSONSchema;
-
-            if (autoCompletionData.length > 0) {
-                querySchema = generateMongoFindJsonSchema(autoCompletionData);
-            } else {
-                querySchema = basicFindQuerySchema;
-            }
-
-            return querySchema;
+            return toFieldCompletionItems(fieldEntries);
         }),
     getCurrentPageAsTable: publicProcedureWithTelemetry
         // parameters
diff --git a/src/webviews/documentdb/collectionView/components/queryEditor/QueryEditor.tsx b/src/webviews/documentdb/collectionView/components/queryEditor/QueryEditor.tsx
index 7b6d7e8ec..8bfaabc77 100644
--- a/src/webviews/documentdb/collectionView/components/queryEditor/QueryEditor.tsx
+++ b/src/webviews/documentdb/collectionView/components/queryEditor/QueryEditor.tsx
@@ -10,9 +10,16 @@ import { useContext, useEffect, useRef, useState, type JSX } from 'react';
 import { InputWithProgress } from '../../../../components/InputWithProgress';
 // eslint-disable-next-line import/no-internal-modules
 import type * as monacoEditor from 'monaco-editor/esm/vs/editor/editor.api';
-// eslint-disable-next-line import/no-internal-modules
-import basicFindQuerySchema from '../../../../../utils/json/mongo/autocomplete/basicMongoFindFilterSchema.json';
 import { useConfiguration } from '../../../../api/webview-client/useConfiguration';
+import {
+    buildEditorUri,
+    clearCompletionContext,
+    EditorType,
+    LANGUAGE_ID,
+    registerDocumentDBQueryLanguage,
+    validateExpression,
+    type Diagnostic,
+} from '../../../../documentdbQuery';
 import { type CollectionViewWebviewConfigurationType } from '../../collectionViewController';
 
 import { ArrowResetRegular, SendRegular, SettingsFilled, SettingsRegular } from '@fluentui/react-icons';
@@ -24,6 +31,31 @@ import { CollectionViewContext } from '../../collectionViewContext';
 import { useHideScrollbarsDuringResize } from '../../hooks/useHideScrollbarsDuringResize';
 import './queryEditor.scss';
 
+/**
+ * Convert a Diagnostic from the documentdb-query validator to a Monaco marker.
+ */
+function toMonacoMarker(
+    diagnostic: Diagnostic,
+    model: monacoEditor.editor.ITextModel,
+    monaco: typeof monacoEditor,
+): monacoEditor.editor.IMarkerData {
+    const startPos = model.getPositionAt(diagnostic.startOffset);
+    const endPos = model.getPositionAt(diagnostic.endOffset);
+    return {
+        severity:
+            diagnostic.severity === 'error'
+                ? monaco.MarkerSeverity.Error
+                : diagnostic.severity === 'warning'
+                  ? monaco.MarkerSeverity.Warning
+                  : monaco.MarkerSeverity.Info,
+        message: diagnostic.message,
+        startLineNumber: startPos.lineNumber,
+        startColumn: startPos.column,
+        endLineNumber: endPos.lineNumber,
+        endColumn: endPos.column,
+    };
+}
+
 interface QueryEditorProps {
     onExecuteRequest: () => void;
 }
@@ -46,7 +78,6 @@ export const QueryEditor = ({ onExecuteRequest }: QueryEditorProps): JSX.Element
     // AI prompt history (survives hide/show of AI input)
     const [aiPromptHistory, setAiPromptHistory] = useState<string[]>([]);
 
-    const schemaAbortControllerRef = useRef<AbortController | null>(null);
     const aiGenerationAbortControllerRef = useRef<AbortController | null>(null);
     const aiInputRef = useRef<HTMLInputElement | null>(null);
 
@@ -57,12 +88,162 @@ export const QueryEditor = ({ onExecuteRequest }: QueryEditorProps): JSX.Element
 
     const hideScrollbarsTemporarily = useHideScrollbarsDuringResize();
 
-    const handleEditorDidMount = (editor: monacoEditor.editor.IStandaloneCodeEditor, monaco: typeof monacoEditor) => {
-        editor.setValue('{  }');
+    /**
+     * Creates a Monaco model with a URI scheme for the given editor type.
+     * This enables the completion provider to identify which editor the request is for.
+     */
+    const createEditorModel = (
+        editor: monacoEditor.editor.IStandaloneCodeEditor,
+        monaco: typeof monacoEditor,
+        editorType: EditorType,
+        initialValue: string,
+    ): monacoEditor.editor.ITextModel => {
+        const uri = monaco.Uri.parse(buildEditorUri(editorType, configuration.sessionId));
+        let model = monaco.editor.getModel(uri);
+        if (!model) {
+            model = monaco.editor.createModel(initialValue, LANGUAGE_ID, uri);
+        }
+        editor.setModel(model);
+        return model;
+    };
+
+    /**
+     * Sets up debounced validation on editor content changes.
+     * Returns a cleanup function to clear any pending timeout.
+     */
+    const setupValidation = (
+        editor: monacoEditor.editor.IStandaloneCodeEditor,
+        monaco: typeof monacoEditor,
+        model: monacoEditor.editor.ITextModel,
+    ): (() => void) => {
+        let validationTimeout: ReturnType<typeof setTimeout>;
+        const disposable = editor.onDidChangeModelContent(() => {
+            clearTimeout(validationTimeout);
+            validationTimeout = setTimeout(() => {
+                const diagnostics = validateExpression(editor.getValue());
+                const markers = diagnostics.map((d) => toMonacoMarker(d, model, monaco));
+                monaco.editor.setModelMarkers(model, 'documentdb-query', markers);
+            }, 300);
+        });
+        return () => {
+            clearTimeout(validationTimeout);
+            disposable.dispose();
+        };
+    };
+
+    /**
+     * Cancels any active snippet session on the given editor.
+     *
+     * After a snippet completion (e.g., `fieldName: $1`), Monaco keeps the
+     * snippet session alive and highlights the tab-stop placeholder. If the
+     * user continues typing, the highlight grows — the "ghost selection"
+     * bug. Calling this function ends the snippet session cleanly.
+     */
+    const cancelSnippetSession = (editor: monacoEditor.editor.IStandaloneCodeEditor): void => {
+        const controller = editor.getContribution('snippetController2') as { cancel: () => void } | null | undefined;
+        controller?.cancel();
+    };
+
+    /** Characters that signal the end of a field-value pair and should exit snippet mode. */
+    const SNIPPET_EXIT_CHARS = new Set([',', '}', ']']);
+
+    /**
+     * Sets up pattern-based auto-trigger of completions.
+     * When a content change results in a trigger character followed by a
+     * space (`: `, `, `, `{ `, `[ `) at the end of the inserted text,
+     * completions are triggered automatically after a short delay. This
+     * handles both manual typing and completion acceptance.
+     *
+     * Also cancels any active snippet session when a delimiter character
+     * (`,`, `}`, `]`) is typed, preventing the "ghost selection" bug
+     * where the tab-stop highlight expands as the user continues typing.
+     *
+     * Returns a cleanup function.
+     */
+    const setupSmartTrigger = (editor: monacoEditor.editor.IStandaloneCodeEditor): (() => void) => {
+        let triggerTimeout: ReturnType<typeof setTimeout>;
+        const contentDisposable = editor.onDidChangeModelContent((e) => {
+            clearTimeout(triggerTimeout);
+
+            const change = e.changes[0];
+            if (!change || change.text.length === 0) return;
+
+            // Cancel snippet session when the user *types* a delimiter character.
+            // Only applies to single-character edits (user keystrokes), not to
+            // multi-character completion insertions which may legitimately
+            // contain commas or braces as part of the snippet text.
+            if (change.text.length === 1 && SNIPPET_EXIT_CHARS.has(change.text)) {
+                cancelSnippetSession(editor);
+            }
+
+            const model = editor.getModel();
+            if (!model) return;
+
+            // Calculate the offset at the end of the inserted text in the new model
+            const endOffset = change.rangeOffset + change.text.length;
+
+            // We need at least 2 chars to check for ": " or ", "
+            if (endOffset < 2) return;
+
+            const fullText = model.getValue();
+            const lastTwo = fullText.substring(endOffset - 2, endOffset);
+            if (lastTwo === ': ' || lastTwo === ', ' || lastTwo === '{ ' || lastTwo === '[ ') {
+                triggerTimeout = setTimeout(() => {
+                    editor.trigger('smart-trigger', 'editor.action.triggerSuggest', {});
+                }, 50);
+            }
+        });
+
+        // Cancel snippet session when the editor loses focus (Option D).
+        // If the user clicks away while a tab-stop is highlighted, the
+        // highlight should not persist when they return.
+        const blurDisposable = editor.onDidBlurEditorText(() => {
+            cancelSnippetSession(editor);
+        });
+
+        // Cancel snippet session on Enter or Ctrl+Enter / Cmd+Enter.
+        // Enter commits the current line and should exit snippet mode.
+        // Ctrl+Enter triggers query execution and should also exit snippet mode
+        // so the tab-stop highlight doesn't persist after running a query.
+        const keyDownDisposable = editor.onKeyDown((e) => {
+            if (e.browserEvent.key === 'Enter') {
+                cancelSnippetSession(editor);
+            }
+        });
+
+        return () => {
+            clearTimeout(triggerTimeout);
+            contentDisposable.dispose();
+            blurDisposable.dispose();
+            keyDownDisposable.dispose();
+        };
+    };
 
+    // Track validation cleanup functions
+    const filterValidationCleanupRef = useRef<(() => void) | null>(null);
+    const projectValidationCleanupRef = useRef<(() => void) | null>(null);
+    const sortValidationCleanupRef = useRef<(() => void) | null>(null);
+    const filterSmartTriggerCleanupRef = useRef<(() => void) | null>(null);
+    const projectSmartTriggerCleanupRef = useRef<(() => void) | null>(null);
+    const sortSmartTriggerCleanupRef = useRef<(() => void) | null>(null);
+    const handleEditorDidMount = (editor: monacoEditor.editor.IStandaloneCodeEditor, monaco: typeof monacoEditor) => {
         // Store the filter editor reference
         filterEditorRef.current = editor;
 
+        // Register the documentdb-query language (idempotent — safe to call on every mount).
+        // Pass the tRPC openUrl handler so hover links can be opened via the extension host,
+        // bypassing the webview sandbox's popup restrictions.
+        void registerDocumentDBQueryLanguage(monaco, (url) => void trpcClient.common.openUrl.mutate({ url }));
+
+        // Create model with URI scheme for contextual completions
+        const model = createEditorModel(editor, monaco, EditorType.Filter, '{  }');
+
+        // Set up debounced validation
+        filterValidationCleanupRef.current = setupValidation(editor, monaco, model);
+
+        // Set up smart-trigger for completions after ": " and ", "
+        filterSmartTriggerCleanupRef.current = setupSmartTrigger(editor);
+
         const getCurrentQueryFunction = () => ({
             filter: filterValue,
             project: projectValue,
@@ -76,78 +257,8 @@ export const QueryEditor = ({ onExecuteRequest }: QueryEditorProps): JSX.Element
             ...prev,
             queryEditor: {
                 getCurrentQuery: getCurrentQueryFunction,
-                /**
-                 * Dynamically sets the JSON schema for the Monaco editor's validation and autocompletion.
-                 *
-                 * NOTE: This function can encounter network errors if called immediately after the
-                 * editor mounts, as the underlying JSON web worker may not have finished loading.
-                 * To mitigate this, a delay is introduced before attempting to set the schema.
-                 *
-                 * A more robust long-term solution should be implemented to programmatically
-                 * verify that the JSON worker is initialized before this function proceeds.
-                 *
-                 * An AbortController is used to prevent race conditions when this function is
-                 * called in quick succession (e.g., rapid "refresh" clicks). It ensures that
-                 * any pending schema update is cancelled before a new one begins, guaranteeing
-                 * a clean, predictable state and allowing the Monaco worker to initialize correctly.
-                 */
-                setJsonSchema: async (schema) => {
-                    // Use the ref to cancel the previous operation
-                    if (schemaAbortControllerRef.current) {
-                        schemaAbortControllerRef.current.abort();
-                    }
-
-                    // Create and store the new AbortController in the ref
-                    const abortController = new AbortController();
-                    schemaAbortControllerRef.current = abortController;
-                    const signal = abortController.signal;
-
-                    try {
-                        // Wait for 2 seconds to give the worker time to initialize
-                        await new Promise((resolve) => setTimeout(resolve, 2000));
-
-                        // If the operation was cancelled during the delay, abort early
-                        if (signal.aborted) {
-                            return;
-                        }
-
-                        // Check if JSON language features are available and set the schema
-                        if (monaco.languages.json?.jsonDefaults) {
-                            monaco.languages.json.jsonDefaults.setDiagnosticsOptions({
-                                validate: false,
-                                schemas: [
-                                    {
-                                        uri: 'mongodb-filter-query-schema.json',
-                                        fileMatch: ['*'],
-                                        schema: schema,
-                                    },
-                                ],
-                            });
-                        }
-                    } catch (error) {
-                        // The error is likely an uncaught exception in the worker,
-                        // but we catch here just in case.
-                        console.warn('Error setting JSON schema:', error);
-                    }
-                },
             },
         }));
-
-        // initialize the monaco editor with the schema that's basic
-        // as we don't know the schema of the collection available
-        // this is a fallback for the case when the autocompletion feature fails.
-        monaco.languages.json.jsonDefaults.setDiagnosticsOptions({
-            validate: true,
-            schemas: [
-                {
-                    uri: 'mongodb-filter-query-schema.json', // Unique identifier
-                    fileMatch: ['*'], // Apply to all JSON files or specify as needed
-
-                    schema: basicFindQuerySchema,
-                    // schema: generateMongoFindJsonSchema(fieldEntries)
-                },
-            ],
-        });
     };
 
     const monacoOptions: editor.IStandaloneEditorConstructionOptions = {
@@ -173,19 +284,58 @@ export const QueryEditor = ({ onExecuteRequest }: QueryEditorProps): JSX.Element
         automaticLayout: false,
     };
 
+    // Intercept link clicks in Monaco hover tooltips.
+    // Monaco renders hover markdown links as <a> tags, but the webview CSP
+    // blocks direct navigation. Capture clicks and route through tRPC.
+    const editorContainerRef = useRef<HTMLDivElement | null>(null);
+    useEffect(() => {
+        const container = editorContainerRef.current;
+        if (!container) return;
+
+        const handleLinkClick = (e: MouseEvent): void => {
+            const target = e.target as HTMLElement;
+            const anchor = target.closest('a');
+            if (!anchor) return;
+
+            const href = anchor.getAttribute('href');
+            if (href && (href.startsWith('https://') || href.startsWith('http://'))) {
+                e.preventDefault();
+                e.stopPropagation();
+                void trpcClient.common.openUrl.mutate({ url: href });
+            }
+        };
+
+        container.addEventListener('click', handleLinkClick, true);
+        return () => container.removeEventListener('click', handleLinkClick, true);
+    }, [trpcClient]);
+
     // Cleanup any pending operations when component unmounts
     useEffect(() => {
         return () => {
-            if (schemaAbortControllerRef.current) {
-                schemaAbortControllerRef.current.abort();
-                schemaAbortControllerRef.current = null;
-            }
             if (aiGenerationAbortControllerRef.current) {
                 aiGenerationAbortControllerRef.current.abort();
                 aiGenerationAbortControllerRef.current = null;
             }
+
+            // Clean up validation timeouts
+            filterValidationCleanupRef.current?.();
+            projectValidationCleanupRef.current?.();
+            sortValidationCleanupRef.current?.();
+
+            // Clean up smart-trigger listeners
+            filterSmartTriggerCleanupRef.current?.();
+            projectSmartTriggerCleanupRef.current?.();
+            sortSmartTriggerCleanupRef.current?.();
+
+            // Dispose Monaco models
+            filterEditorRef.current?.getModel()?.dispose();
+            projectEditorRef.current?.getModel()?.dispose();
+            sortEditorRef.current?.getModel()?.dispose();
+
+            // Clear completion store for this session
+            clearCompletionContext(configuration.sessionId);
         };
-    }, []);
+    }, [configuration.sessionId]);
 
     // Update getCurrentQuery function whenever state changes
     useEffect(() => {
@@ -342,7 +492,7 @@ export const QueryEditor = ({ onExecuteRequest }: QueryEditorProps): JSX.Element
     };
 
     return (
-        <div className="queryEditor">
+        <div className="queryEditor" ref={editorContainerRef}>
             {/* Optional AI prompt row */}
             <Collapse visible={configuration.enableAIQueryGeneration && currentContext.isAiRowVisible} unmountOnExit>
                 <div className={`aiRow${isAiActive ? ' ai-active' : ''}`}>
@@ -397,7 +547,7 @@ export const QueryEditor = ({ onExecuteRequest }: QueryEditorProps): JSX.Element
                     <MonacoAutoHeight
                         height={'100%'}
                         width={'100%'}
-                        language="json"
+                        language={LANGUAGE_ID}
                         adaptiveHeight={{
                             enabled: true,
                             maxLines: 10,
@@ -409,14 +559,14 @@ export const QueryEditor = ({ onExecuteRequest }: QueryEditorProps): JSX.Element
                         }}
                         onMount={(editor, monaco) => {
                             handleEditorDidMount(editor, monaco);
-                            // Sync initial value
+                            // Sync editor content to state
                             editor.onDidChangeModelContent(() => {
                                 setFilterValue(editor.getValue());
                             });
                         }}
                         options={{
                             ...monacoOptions,
-                            ariaLabel: l10n.t('Filter: Enter the DocumentDB query filter in JSON format'),
+                            ariaLabel: l10n.t('Filter: Enter the DocumentDB query filter'),
                         }}
                     />
                 </div>
@@ -508,16 +658,31 @@ export const QueryEditor = ({ onExecuteRequest }: QueryEditorProps): JSX.Element
                             <MonacoAutoHeight
                                 height={'100%'}
                                 width={'100%'}
-                                language="json"
+                                language={LANGUAGE_ID}
                                 adaptiveHeight={{
                                     enabled: true,
                                     maxLines: 5,
                                     minLines: 1,
                                     lineHeight: 19,
                                 }}
-                                onMount={(editor) => {
+                                onMount={(editor, monaco) => {
+                                    // Register language (idempotent)
+                                    void registerDocumentDBQueryLanguage(
+                                        monaco,
+                                        (url) => void trpcClient.common.openUrl.mutate({ url }),
+                                    );
+
                                     projectEditorRef.current = editor;
-                                    editor.setValue(projectValue);
+
+                                    // Create model with URI scheme for project completions
+                                    const model = createEditorModel(editor, monaco, EditorType.Project, projectValue);
+
+                                    // Set up validation
+                                    projectValidationCleanupRef.current = setupValidation(editor, monaco, model);
+
+                                    // Set up smart-trigger
+                                    projectSmartTriggerCleanupRef.current = setupSmartTrigger(editor);
+
                                     editor.onDidChangeModelContent(() => {
                                         setProjectValue(editor.getValue());
                                     });
@@ -539,16 +704,31 @@ export const QueryEditor = ({ onExecuteRequest }: QueryEditorProps): JSX.Element
                             <MonacoAutoHeight
                                 height={'100%'}
                                 width={'100%'}
-                                language="json"
+                                language={LANGUAGE_ID}
                                 adaptiveHeight={{
                                     enabled: true,
                                     maxLines: 5,
                                     minLines: 1,
                                     lineHeight: 19,
                                 }}
-                                onMount={(editor) => {
+                                onMount={(editor, monaco) => {
+                                    // Register language (idempotent)
+                                    void registerDocumentDBQueryLanguage(
+                                        monaco,
+                                        (url) => void trpcClient.common.openUrl.mutate({ url }),
+                                    );
+
                                     sortEditorRef.current = editor;
-                                    editor.setValue(sortValue);
+
+                                    // Create model with URI scheme for sort completions
+                                    const model = createEditorModel(editor, monaco, EditorType.Sort, sortValue);
+
+                                    // Set up validation
+                                    sortValidationCleanupRef.current = setupValidation(editor, monaco, model);
+
+                                    // Set up smart-trigger
+                                    sortSmartTriggerCleanupRef.current = setupSmartTrigger(editor);
+
                                     editor.onDidChangeModelContent(() => {
                                         setSortValue(editor.getValue());
                                     });
diff --git a/src/webviews/documentdbQuery/completionStore.test.ts b/src/webviews/documentdbQuery/completionStore.test.ts
new file mode 100644
index 000000000..fde71ed1b
--- /dev/null
+++ b/src/webviews/documentdbQuery/completionStore.test.ts
@@ -0,0 +1,126 @@
+/*---------------------------------------------------------------------------------------------
+ *  Copyright (c) Microsoft Corporation. All rights reserved.
+ *  Licensed under the MIT License. See License.txt in the project root for license information.
+ *--------------------------------------------------------------------------------------------*/
+
+import {
+    clearAllCompletionContexts,
+    clearCompletionContext,
+    getCompletionContext,
+    setCompletionContext,
+} from './completionStore';
+
+describe('completionStore', () => {
+    beforeEach(() => {
+        clearAllCompletionContexts();
+    });
+
+    test('setCompletionContext then getCompletionContext round-trips correctly', () => {
+        const context = {
+            fields: [
+                {
+                    fieldName: 'name',
+                    displayType: 'String',
+                    bsonType: 'string',
+                    isSparse: false,
+                    insertText: 'name',
+                    referenceText: '$name',
+                },
+            ],
+        };
+
+        setCompletionContext('session-1', context);
+        expect(getCompletionContext('session-1')).toEqual(context);
+    });
+
+    test('getCompletionContext returns undefined for unknown session', () => {
+        expect(getCompletionContext('unknown')).toBeUndefined();
+    });
+
+    test('clearCompletionContext removes the entry', () => {
+        setCompletionContext('session-1', { fields: [] });
+        expect(getCompletionContext('session-1')).toBeDefined();
+
+        clearCompletionContext('session-1');
+        expect(getCompletionContext('session-1')).toBeUndefined();
+    });
+
+    test('clearCompletionContext is a no-op for unknown session', () => {
+        expect(() => clearCompletionContext('unknown')).not.toThrow();
+    });
+
+    test('clearAllCompletionContexts removes all entries', () => {
+        setCompletionContext('session-1', { fields: [] });
+        setCompletionContext('session-2', { fields: [] });
+
+        clearAllCompletionContexts();
+
+        expect(getCompletionContext('session-1')).toBeUndefined();
+        expect(getCompletionContext('session-2')).toBeUndefined();
+    });
+
+    test('setCompletionContext overwrites existing data', () => {
+        const original = {
+            fields: [
+                {
+                    fieldName: 'old',
+                    displayType: 'String',
+                    bsonType: 'string',
+                    isSparse: false,
+                    insertText: 'old',
+                    referenceText: '$old',
+                },
+            ],
+        };
+        const updated = {
+            fields: [
+                {
+                    fieldName: 'new',
+                    displayType: 'Number',
+                    bsonType: 'double',
+                    isSparse: true,
+                    insertText: 'new',
+                    referenceText: '$new',
+                },
+            ],
+        };
+
+        setCompletionContext('session-1', original);
+        setCompletionContext('session-1', updated);
+
+        expect(getCompletionContext('session-1')).toEqual(updated);
+    });
+
+    test('multiple sessions are independent', () => {
+        const ctx1 = {
+            fields: [
+                {
+                    fieldName: 'a',
+                    displayType: 'String',
+                    bsonType: 'string',
+                    isSparse: false,
+                    insertText: 'a',
+                    referenceText: '$a',
+                },
+            ],
+        };
+        const ctx2 = {
+            fields: [
+                {
+                    fieldName: 'b',
+                    displayType: 'Number',
+                    bsonType: 'int32',
+                    isSparse: true,
+                    insertText: 'b',
+                    referenceText: '$b',
+                },
+            ],
+        };
+
+        setCompletionContext('session-1', ctx1);
+        setCompletionContext('session-2', ctx2);
+
+        expect(getCompletionContext('session-1')).toEqual(ctx1);
+        expect(getCompletionContext('session-2')).toEqual(ctx2);
+    });
+});
diff --git a/src/webviews/documentdbQuery/completionStore.ts b/src/webviews/documentdbQuery/completionStore.ts
new file mode 100644
index 000000000..b97ed859d
--- /dev/null
+++ b/src/webviews/documentdbQuery/completionStore.ts
@@ -0,0 +1,36 @@
+/*---------------------------------------------------------------------------------------------
+ *  Copyright (c) Microsoft Corporation. All rights reserved.
+ *  Licensed under the MIT License. See License.txt in the project root for license information.
+ *--------------------------------------------------------------------------------------------*/
+
+import { type FieldCompletionData } from '../../utils/json/data-api/autocomplete/toFieldCompletionItems';
+
+/**
+ * Completion context for a single editor session.
+ * Holds dynamic field data fetched from the extension host after query execution.
+ */
+export interface CompletionContext {
+    fields: FieldCompletionData[];
+}
+
+const store = new Map<string, CompletionContext>();
+
+/** Update field data for a session (called after query execution). */
+export function setCompletionContext(sessionId: string, context: CompletionContext): void {
+    store.set(sessionId, context);
+}
+
+/** Get field data for a session. */
+export function getCompletionContext(sessionId: string): CompletionContext | undefined {
+    return store.get(sessionId);
+}
+
+/** Remove a session's data (called on tab close / dispose). */
+export function clearCompletionContext(sessionId: string): void {
+    store.delete(sessionId);
+}
+
+/** Clear all sessions (for testing). */
+export function clearAllCompletionContexts(): void {
+    store.clear();
+}
diff --git a/src/webviews/documentdbQuery/completions/README.md b/src/webviews/documentdbQuery/completions/README.md
new file mode 100644
index 000000000..bb060d369
--- /dev/null
+++ b/src/webviews/documentdbQuery/completions/README.md
@@ -0,0 +1,110 @@
+# Completions Module
+
+Context-sensitive completion items for the `documentdb-query` Monaco language.
+
+## Architecture
+
+```
+registerLanguage.ts
+  └─ provideCompletionItems()
+       │
+       ├─ cursorContext.ts        ← detect semantic cursor position
+       │
+       └─ completions/
+            ├─ createCompletionItems.ts   ← main entry, context routing
+            ├─ mapCompletionItems.ts      ← operator/field → CompletionItem
+            ├─ typeSuggestions.ts         ← type-aware value suggestions
+            ├─ snippetUtils.ts           ← snippet text manipulation
+            └─ completionKnowledge.ts    ← curated domain rules & constants
+```
+
+### Flow
+
+1. Monaco calls `provideCompletionItems()` (registered in `registerLanguage.ts`)
+2. `detectCursorContext()` scans backward from the cursor to determine the semantic position
+3. `createCompletionItems()` routes to the appropriate builder:
+   - **key / array-element** → field names + key-position operators
+   - **value** → type suggestions + operators (with braces) + BSON constructors + JS globals
+   - **operator** → operators only (braces stripped, type-aware sorting)
+   - **empty** (unknown + needsWrapping) → key-position completions with `{ }` wrapping
+   - **unknown** (ambiguous, no wrapping) → all completions (fields, all operators, BSON constructors, JS globals)
+
+## Sorting
+
+Completion items use `sortText` prefixes so Monaco displays them in the intended order. Lower prefixes appear higher in the list.
+
+### Empty position (no braces)
+
+Same as key position. All insertions wrapped with `{ }`.
+
+| Prefix | Content | Example |
+|--------|---------|---------|
+| `0_fieldName` | Schema field names (wrapped) | `{ age: $1 }`, `{ name: $1 }` |
+| `1_$and` | Key-position operators (with braces) | `{ $and: [...] }` |
+
+### Value position
+
+| Prefix | Content | Example |
+|--------|---------|---------|
+| `00_00` – `00_99` | Type suggestions | `true` / `false` for boolean fields |
+| `0_$eq` – `2_$op` | Query operators (type-aware) | `{ $eq: … }`, `{ $gt: … }` |
+| `3_ObjectId` | BSON constructors | `ObjectId(…)`, `ISODate(…)` |
+| `4_Date` | JS globals | `Date`, `Math`, `RegExp`, `Infinity` |
+
+### Key position
+
+| Prefix | Content | Example |
+|--------|---------|---------|
+| `0_fieldName` | Schema field names | `age`, `name`, `_id` |
+| `1_$and` | Key-position operators | `$and`, `$or`, `$nor` |
+
+### Operator position (type-aware)
+
+When the field's BSON type is known, operators are tiered by relevance:
+
+| Prefix | Tier | Meaning |
+|--------|------|---------|
+| `0_` | Type-relevant | Operator's `applicableBsonTypes` matches the field |
+| `1a_` | Comparison (universal) | `$eq`, `$ne`, `$gt`, `$in`, etc. — no type restriction, most commonly used |
+| `1b_` | Other universal | Element/evaluation/geospatial operators with no type restriction |
+| `2_` | Non-matching | Operator has type restrictions that don't match the field |
+
+Within each tier, operators sort alphabetically by name (`$eq` < `$gt` < `$in`).
+
+**Example — boolean field `isActive`:**
+- Tier `1a_`: `$eq`, `$gt`, `$gte`, `$in`, `$lt`, `$lte`, `$ne`, `$nin` (comparison)
+- Tier `1b_`: `$exists`, `$type`, `$mod`, `$expr`, `$jsonSchema` (other universal)
+- Tier `2_`: `$regex` (string-only), `$elemMatch` (array-only), `$bitsAllSet` (int/long-only)
+
+### Decision matrix
+
+```
+Has field type info?
+├─ NO  → no sortText override (Monaco default alphabetical)
+├─ YES
+│   ├─ Operator has applicableBsonTypes matching field? → "0_"
+│   ├─ Operator has no applicableBsonTypes?
+│   │   ├─ Is comparison operator (meta = query:comparison)? → "1a_"
+│   │   └─ Other category? → "1b_"
+│   └─ Operator has applicableBsonTypes NOT matching field? → "2_"
+```
+
+## Key concepts
+
+### `completionKnowledge.ts`
+
+Curated domain rules that go beyond the auto-generated operator registry in `documentdb-constants`. Contains:
+
+- **`KEY_POSITION_OPERATORS`** — operators valid only at query root level (`$and`, `$or`, etc.)
+- **`LABEL_PLACEHOLDER`** — the `…` character used in display labels
+- **`INFO_INDICATOR`** — the `ℹ` character prepended to example descriptions
+
+### Snippet handling
+
+Operator snippets in `documentdb-constants` include outer braces: `{ $gt: ${1:value} }`.
+
+- **Empty position**: operators keep full braces (user has no braces); fields wrapped with `{ ... }`
+- **Value position**: inserted as-is (user is replacing the entire value)
+- **Operator position**: outer `{ }` stripped via `stripOuterBraces()` (user is already inside braces)
+- **Key position**: outer `{ }` stripped (user is already inside the query object)
+- **`$` escaping**: `escapeSnippetDollars()` prevents Monaco from treating `$gt` as a variable reference
diff --git a/src/webviews/documentdbQuery/completions/completionKnowledge.ts b/src/webviews/documentdbQuery/completions/completionKnowledge.ts
new file mode 100644
index 000000000..148539e60
--- /dev/null
+++ b/src/webviews/documentdbQuery/completions/completionKnowledge.ts
@@ -0,0 +1,98 @@
+/*---------------------------------------------------------------------------------------------
+ *  Copyright (c) Microsoft Corporation. All rights reserved.
+ *  Licensed under the MIT License. See License.txt in the project root for license information.
+ *--------------------------------------------------------------------------------------------*/
+
+/**
+ * Completion knowledge — curated domain rules for the completion provider.
+ *
+ * This file centralises "knowledge" that is **not** part of the generic
+ * DocumentDB operator registry (`documentdb-constants`) but is essential for
+ * producing high-quality, context-sensitive completions in the query editor.
+ *
+ * ### Why this file exists
+ *
+ * The `documentdb-constants` package is auto-generated from the official
+ * operator reference and is intentionally kept generic — it describes *what*
+ * operators exist, not *where* they are syntactically valid.
+ *
+ * However the completion provider needs to know additional rules:
+ *
+ * 1. **Which operators are only valid at key (root) position?**
+ *    `$and`, `$or`, `$nor`, etc. accept sub-queries, not field values.
+ *    Showing them inside a field's operator list (`{ age: { $and … } }`) is
+ *    misleading, so we need an explicit list to filter them out of
+ *    operator-position completions and include them in key-position completions.
+ *
+ * 2. **Placeholder character for labels**
+ *    A single Unicode character used in completion-list labels to represent
+ *    "user fills this in". Must render well in all editors and at any font size.
+ *
+ * Adding new knowledge here keeps the completion provider self-documented and
+ * avoids magic values scattered across multiple files.
+ */
+
+/**
+ * Operators that are syntactically valid only at the **key position** (the
+ * root level of a query document, or inside a `$and`/`$or`/`$nor` array
+ * element).
+ *
+ * These operators accept sub-expressions or arrays of sub-queries as their
+ * values — they do **not** operate on a specific field's BSON value. For
+ * example:
+ *
+ * ```js
+ * // ✅ Valid — key position
+ * { $and: [{ age: { $gt: 18 } }, { name: "Alice" }] }
+ *
+ * // ❌ Invalid — operator position on field 'age'
+ * { age: { $and: … } }
+ * ```
+ *
+ * **`$not` is intentionally excluded** — despite being a logical operator,
+ * `$not` is a field-level operator that wraps a single field's expression:
+ * `{ price: { $not: { $gt: 1.99 } } }`. It does NOT work at query root.
+ *
+ * The completion provider uses this set to:
+ * - **Include** these operators at key position and array-element position
+ * - **Exclude** them from operator position (inside `{ field: { … } }`)
+ * - **Exclude** them from value position
+ *
+ * Source: DocumentDB query language specification — logical and meta operators.
+ */
+export const KEY_POSITION_OPERATORS = new Set([
+    '$and',
+    '$or',
+    '$nor',
+    '$comment',
+    '$expr',
+    '$jsonSchema',
+    '$text',
+    '$where',
+]);
+
+/**
+ * Placeholder character used in completion-list **labels** to indicate where
+ * the user should type a value.
+ *
+ * This is purely cosmetic — the actual insertText uses Monaco snippet tab stops
+ * (`${1:placeholder}`). The label placeholder is what users see in the
+ * completion picker before selecting an item.
+ *
+ * We use the horizontal ellipsis `…` (U+2026) because:
+ * - It is universally understood as "something goes here"
+ * - It renders reliably across all monospace and proportional fonts
+ * - It is visually lightweight and does not distract from the operator syntax
+ *
+ * Previously we used `▪` (U+25AA, Black Small Square) but it was too subtle
+ * at small font sizes and less semantically clear.
+ */
+export const LABEL_PLACEHOLDER = '\u2026'; // … (horizontal ellipsis)
+
+/**
+ * Info indicator for completion descriptions that contain usage examples.
+ *
+ * Prepended to description strings that show example values to differentiate
+ * them from plain type labels (e.g., `"ℹ e.g. ends with '.com'"` vs `"string literal"`).
+ */
+export const INFO_INDICATOR = '\u2139'; // ℹ (information source)
diff --git a/src/webviews/documentdbQuery/completions/createCompletionItems.ts b/src/webviews/documentdbQuery/completions/createCompletionItems.ts
new file mode 100644
index 000000000..960c096b1
--- /dev/null
+++ b/src/webviews/documentdbQuery/completions/createCompletionItems.ts
@@ -0,0 +1,377 @@
+/*---------------------------------------------------------------------------------------------
+ *  Copyright (c) Microsoft Corporation. All rights reserved.
+ *  Licensed under the MIT License. See License.txt in the project root for license information.
+ *--------------------------------------------------------------------------------------------*/
+
+/**
+ * Context-sensitive completion item creation for the `documentdb-query` language.
+ *
+ * This module is the main entry point for the completion provider. It uses
+ * cursor context detection to determine which completions to show and delegates
+ * to specialized functions for each context (key, value, operator, etc.).
+ */
+
+import {
+    FILTER_COMPLETION_META,
+    getFilteredCompletions,
+    PROJECTION_COMPLETION_META,
+} from '@vscode-documentdb/documentdb-constants';
+// eslint-disable-next-line import/no-internal-modules
+import type * as monacoEditor from 'monaco-editor/esm/vs/editor/editor.api';
+import { getCompletionContext } from '../completionStore';
+import { type CursorContext } from '../cursorContext';
+import { EditorType } from '../languageConfig';
+import { KEY_POSITION_OPERATORS } from './completionKnowledge';
+import { createJsGlobalCompletionItems } from './jsGlobals';
+import { mapFieldToCompletionItem, mapOperatorToCompletionItem } from './mapCompletionItems';
+import { createTypeSuggestions } from './typeSuggestions';
+
+/**
+ * Parameters for creating completion items.
+ */
+export interface CreateCompletionItemsParams {
+    /** The editor type parsed from the model URI (undefined if URI doesn't match). */
+    editorType: EditorType | undefined;
+    /** The session ID for looking up dynamic field completions. */
+    sessionId: string | undefined;
+    /** The range to insert completions at. */
+    range: monacoEditor.IRange;
+    /** Whether the cursor is immediately after a '$' character. */
+    isDollarPrefix: boolean;
+    /** The Monaco editor API. */
+    monaco: typeof monacoEditor;
+    /**
+     * Optional BSON types of the field the cursor is operating on.
+     * When provided, operators are sorted by type relevance.
+     */
+    fieldBsonTypes?: readonly string[];
+    /**
+     * When true, completion snippets should include outer `{ }` wrapping.
+     * Set when the editor content has no braces (user cleared the editor),
+     * so that inserted completions produce valid query syntax.
+     */
+    needsWrapping?: boolean;
+    /**
+     * Optional cursor context from the heuristic cursor position detector.
+     * When provided, completions are filtered based on the semantic position
+     * of the cursor. When undefined, falls back to showing all completions
+     * (fields, operators, BSON constructors, and JS globals).
+     */
+    cursorContext?: CursorContext;
+}
+
+// KEY_POSITION_OPERATORS is imported from ./completionKnowledge
+// Re-export for backwards compatibility and testing
+export { KEY_POSITION_OPERATORS } from './completionKnowledge';
+
+/**
+ * Returns the completion meta tags appropriate for the given editor type.
+ *
+ * Exported for testing.
+ */
+export function getMetaTagsForEditorType(editorType: EditorType | undefined): readonly string[] {
+    switch (editorType) {
+        case EditorType.Filter:
+            return FILTER_COMPLETION_META;
+        case EditorType.Project:
+        case EditorType.Sort:
+            return PROJECTION_COMPLETION_META;
+        default:
+            return FILTER_COMPLETION_META;
+    }
+}
+
+/**
+ * Creates Monaco completion items based on the editor context.
+ *
+ * Main entry point called by the CompletionItemProvider.
+ *
+ * Context routing:
+ * - **key**: field names + key-position operators ($and, $or, etc.)
+ * - **value**: type suggestions + operators (with braces) + BSON constructors
+ * - **operator**: operators (without braces) with type-aware sorting
+ * - **array-element**: same as key position
+ * - **empty** (unknown + needsWrapping): key-position completions with `{ }` wrapping
+ * - **unknown** (ambiguous): all completions — full discovery fallback
+ */
+export function createCompletionItems(params: CreateCompletionItemsParams): monacoEditor.languages.CompletionItem[] {
+    const { editorType, sessionId, range, monaco, fieldBsonTypes, cursorContext, needsWrapping } = params;
+
+    if (!cursorContext || cursorContext.position === 'unknown') {
+        if (needsWrapping) {
+            // EMPTY editor — no braces present. Show key-position completions
+            // (fields + root operators) with { } wrapping so inserted items
+            // produce valid syntax.
+            return createEmptyEditorCompletions(editorType, sessionId, range, monaco);
+        }
+        // Genuinely UNKNOWN — show all completions as a discovery fallback.
+        return createAllCompletions(editorType, sessionId, range, monaco);
+    }
+
+    switch (cursorContext.position) {
+        case 'key':
+        case 'array-element':
+            return createKeyPositionCompletions(editorType, sessionId, range, monaco);
+
+        case 'value': {
+            const fieldBsonType = cursorContext.fieldBsonType;
+            return createValuePositionCompletions(editorType, range, monaco, fieldBsonType);
+        }
+
+        case 'operator': {
+            const bsonTypes = cursorContext.fieldBsonType ? [cursorContext.fieldBsonType] : fieldBsonTypes;
+            return createOperatorPositionCompletions(editorType, range, monaco, bsonTypes);
+        }
+
+        default:
+            return createAllCompletions(editorType, sessionId, range, monaco);
+    }
+}
+
+// ---------- Context-specific completion builders ----------
+
+/**
+ * Empty editor completions — shows key-position items with `{ }` wrapping.
+ *
+ * Used when the editor has no braces (user cleared content). Behaves like
+ * key position but wraps all inserted completions with outer `{ }` so they
+ * produce valid query syntax.
+ */
+function createEmptyEditorCompletions(
+    editorType: EditorType | undefined,
+    sessionId: string | undefined,
+    range: monacoEditor.IRange,
+    monaco: typeof monacoEditor,
+): monacoEditor.languages.CompletionItem[] {
+    const metaTags = getMetaTagsForEditorType(editorType);
+    const allEntries = getFilteredCompletions({ meta: [...metaTags] });
+
+    // Key-position operators — keep outer braces (don't strip)
+    const keyEntries = allEntries.filter((e) => KEY_POSITION_OPERATORS.has(e.value));
+    const operatorItems = keyEntries.map((entry) => {
+        const item = mapOperatorToCompletionItem(entry, range, monaco);
+        item.sortText = `1_${entry.value}`;
+        return item;
+    });
+
+    // Fields — wrap insertText with `{ ... }` for valid syntax
+    const fieldItems = getFieldCompletionItems(sessionId, range, monaco).map((item) => ({
+        ...item,
+        insertText: `{ ${item.insertText as string} }`,
+    }));
+
+    return [...fieldItems, ...operatorItems];
+}
+
+/**
+ * All completions — used when cursor context is genuinely ambiguous (UNKNOWN).
+ * Shows fields, all operators, BSON constructors, and JS globals.
+ * Full discovery fallback for positions the parser can't classify.
+ */
+function createAllCompletions(
+    editorType: EditorType | undefined,
+    sessionId: string | undefined,
+    range: monacoEditor.IRange,
+    monaco: typeof monacoEditor,
+): monacoEditor.languages.CompletionItem[] {
+    const metaTags = getMetaTagsForEditorType(editorType);
+    const allEntries = getFilteredCompletions({ meta: [...metaTags] });
+
+    const fieldItems = getFieldCompletionItems(sessionId, range, monaco);
+
+    const operatorItems = allEntries
+        .filter((e) => e.meta !== 'bson' && e.meta !== 'variable' && e.standalone !== false)
+        .map((entry) => mapOperatorToCompletionItem(entry, range, monaco));
+
+    const bsonItems = allEntries
+        .filter((e) => e.meta === 'bson')
+        .map((entry) => {
+            const item = mapOperatorToCompletionItem(entry, range, monaco);
+            item.sortText = `3_${entry.value}`;
+            return item;
+        });
+
+    const jsGlobals = createJsGlobalCompletionItems(range, monaco);
+
+    return [...fieldItems, ...operatorItems, ...bsonItems, ...jsGlobals];
+}
+
+function createKeyPositionCompletions(
+    editorType: EditorType | undefined,
+    sessionId: string | undefined,
+    range: monacoEditor.IRange,
+    monaco: typeof monacoEditor,
+): monacoEditor.languages.CompletionItem[] {
+    const metaTags = getMetaTagsForEditorType(editorType);
+    const allEntries = getFilteredCompletions({ meta: [...metaTags] });
+
+    const keyEntries = allEntries.filter((e) => KEY_POSITION_OPERATORS.has(e.value));
+    const operatorItems = keyEntries.map((entry) => {
+        // Strip outer braces — the user is already inside `{ }` at key position,
+        // so inserting the full `{ $and: [...] }` would create double braces.
+        const item = mapOperatorToCompletionItem(entry, range, monaco, undefined, true);
+        item.sortText = `1_${entry.value}`;
+        return item;
+    });
+
+    const fieldItems = getFieldCompletionItems(sessionId, range, monaco);
+    return [...fieldItems, ...operatorItems];
+}
+
+/**
+ * Value position completions:
+ * - **Project editor**: `1` (include) and `0` (exclude) — the most common projection values
+ * - **Sort editor**: `1` (ascending) and `-1` (descending)
+ * - **Filter editor** (default):
+ *   1. Type-aware suggestions (sort `00_`) — e.g., `true`/`false` for booleans
+ *   2. Query operators with brace-wrapping snippets (sort `0_`–`2_`)
+ *   3. BSON constructors (sort `3_`)
+ *   4. JS globals: Date, Math, RegExp, etc. (sort `4_`)
+ */
+function createValuePositionCompletions(
+    editorType: EditorType | undefined,
+    range: monacoEditor.IRange,
+    monaco: typeof monacoEditor,
+    fieldBsonType: string | undefined,
+): monacoEditor.languages.CompletionItem[] {
+    // Project editor: only show include/exclude values
+    if (editorType === EditorType.Project) {
+        return createProjectValueCompletions(range, monaco);
+    }
+
+    // Sort editor: only show ascending/descending values
+    if (editorType === EditorType.Sort) {
+        return createSortValueCompletions(range, monaco);
+    }
+
+    const metaTags = getMetaTagsForEditorType(editorType);
+    const allEntries = getFilteredCompletions({ meta: [...metaTags] });
+
+    // 1. Type-aware suggestions (highest priority)
+    const typeSuggestions = createTypeSuggestions(fieldBsonType, range, monaco);
+
+    // 2. Operators, excluding key-position-only operators.
+    //    When fieldBsonType is known, apply type-aware sorting so comparison
+    //    operators (e.g., $eq) appear above irrelevant ones (e.g., $bitsAllSet).
+    const fieldBsonTypes = fieldBsonType ? [fieldBsonType] : undefined;
+    const operatorEntries = allEntries.filter(
+        (e) =>
+            e.meta !== 'bson' &&
+            e.meta !== 'variable' &&
+            e.standalone !== false &&
+            !KEY_POSITION_OPERATORS.has(e.value),
+    );
+    const operatorItems = operatorEntries.map((entry) => {
+        const item = mapOperatorToCompletionItem(entry, range, monaco, fieldBsonTypes);
+        // If type-aware sorting produced a prefix, keep it; otherwise default to 0_
+        if (!item.sortText) {
+            item.sortText = `0_${entry.value}`;
+        }
+        return item;
+    });
+
+    // 3. BSON constructors (sort prefix 3_ — after all operator tiers: 0_, 1a_, 1b_, 2_)
+    const bsonEntries = allEntries.filter((e) => e.meta === 'bson');
+    const bsonItems = bsonEntries.map((entry) => {
+        const item = mapOperatorToCompletionItem(entry, range, monaco);
+        item.sortText = `3_${entry.value}`;
+        return item;
+    });
+
+    // 4. JS globals: Date, Math, RegExp, Infinity, NaN, undefined (sort prefix 4_)
+    const jsGlobals = createJsGlobalCompletionItems(range, monaco);
+
+    return [...typeSuggestions, ...operatorItems, ...bsonItems, ...jsGlobals];
+}
+
+/**
+ * Value completions for the **project** editor: `1` (include) and `0` (exclude).
+ *
+ * Projection operators like `$slice` and `$elemMatch` are already available
+ * via operator-position completions; these simple numeric values cover the
+ * most common use case.
+ */
+function createProjectValueCompletions(
+    range: monacoEditor.IRange,
+    monaco: typeof monacoEditor,
+): monacoEditor.languages.CompletionItem[] {
+    return [
+        {
+            label: { label: '1', description: 'include field' },
+            kind: monaco.languages.CompletionItemKind.Value,
+            insertText: '1',
+            sortText: '00_1',
+            preselect: true,
+            range,
+        },
+        {
+            label: { label: '0', description: 'exclude field' },
+            kind: monaco.languages.CompletionItemKind.Value,
+            insertText: '0',
+            sortText: '00_0',
+            range,
+        },
+    ];
+}
+
+/**
+ * Value completions for the **sort** editor: `1` (ascending) and `-1` (descending).
+ */
+function createSortValueCompletions(
+    range: monacoEditor.IRange,
+    monaco: typeof monacoEditor,
+): monacoEditor.languages.CompletionItem[] {
+    return [
+        {
+            label: { label: '1', description: 'ascending' },
+            kind: monaco.languages.CompletionItemKind.Value,
+            insertText: '1',
+            sortText: '00_1',
+            preselect: true,
+            range,
+        },
+        {
+            label: { label: '-1', description: 'descending' },
+            kind: monaco.languages.CompletionItemKind.Value,
+            insertText: '-1',
+            sortText: '00_-1',
+            range,
+        },
+    ];
+}
+
+function createOperatorPositionCompletions(
+    editorType: EditorType | undefined,
+    range: monacoEditor.IRange,
+    monaco: typeof monacoEditor,
+    fieldBsonTypes: readonly string[] | undefined,
+): monacoEditor.languages.CompletionItem[] {
+    const metaTags = getMetaTagsForEditorType(editorType);
+    const allEntries = getFilteredCompletions({ meta: [...metaTags] });
+
+    const operatorEntries = allEntries.filter(
+        (e) =>
+            e.meta !== 'bson' &&
+            e.meta !== 'variable' &&
+            e.standalone !== false &&
+            !KEY_POSITION_OPERATORS.has(e.value),
+    );
+    return operatorEntries.map((entry) => mapOperatorToCompletionItem(entry, range, monaco, fieldBsonTypes, true));
+}
+
+function getFieldCompletionItems(
+    sessionId: string | undefined,
+    range: monacoEditor.IRange,
+    monaco: typeof monacoEditor,
+): monacoEditor.languages.CompletionItem[] {
+    const fieldItems: monacoEditor.languages.CompletionItem[] = [];
+    if (sessionId) {
+        const context = getCompletionContext(sessionId);
+        if (context) {
+            for (const field of context.fields) {
+                fieldItems.push(mapFieldToCompletionItem(field, range, monaco));
+            }
+        }
+    }
+    return fieldItems;
+}
diff --git a/src/webviews/documentdbQuery/completions/index.ts b/src/webviews/documentdbQuery/completions/index.ts
new file mode 100644
index 000000000..c9fa6db8c
--- /dev/null
+++ b/src/webviews/documentdbQuery/completions/index.ts
@@ -0,0 +1,33 @@
+/*---------------------------------------------------------------------------------------------
+ *  Copyright (c) Microsoft Corporation. All rights reserved.
+ *  Licensed under the MIT License. See License.txt in the project root for license information.
+ *--------------------------------------------------------------------------------------------*/
+
+/**
+ * Completion items for the `documentdb-query` language.
+ *
+ * This folder contains context-sensitive completion logic:
+ * - `createCompletionItems.ts` — main entry point, context branching
+ * - `mapCompletionItems.ts` — operator/field → CompletionItem mapping
+ * - `typeSuggestions.ts` — type-aware value suggestions (bool → true/false, etc.)
+ * - `jsGlobals.ts` — JS globals available in the shell-bson-parser sandbox (Date, Math, etc.)
+ * - `snippetUtils.ts` — snippet text manipulation (brace stripping, $ escaping)
+ */
+
+export { INFO_INDICATOR, LABEL_PLACEHOLDER } from './completionKnowledge';
+export {
+    KEY_POSITION_OPERATORS,
+    createCompletionItems,
+    getMetaTagsForEditorType,
+    type CreateCompletionItemsParams,
+} from './createCompletionItems';
+export { createJsGlobalCompletionItems } from './jsGlobals';
+export {
+    getCategoryLabel,
+    getCompletionKindForMeta,
+    getOperatorSortPrefix,
+    mapFieldToCompletionItem,
+    mapOperatorToCompletionItem,
+} from './mapCompletionItems';
+export { escapeSnippetDollars, stripOuterBraces } from './snippetUtils';
+export { createTypeSuggestions } from './typeSuggestions';
diff --git a/src/webviews/documentdbQuery/completions/jsGlobals.ts b/src/webviews/documentdbQuery/completions/jsGlobals.ts
new file mode 100644
index 000000000..483485f54
--- /dev/null
+++ b/src/webviews/documentdbQuery/completions/jsGlobals.ts
@@ -0,0 +1,172 @@
+/*---------------------------------------------------------------------------------------------
+ *  Copyright (c) Microsoft Corporation. All rights reserved.
+ *  Licensed under the MIT License. See License.txt in the project root for license information.
+ *--------------------------------------------------------------------------------------------*/
+
+/**
+ * JavaScript global completions for the `documentdb-query` language.
+ *
+ * The `documentdb-query` language uses `@mongodb-js/shell-bson-parser` to
+ * execute queries. That parser runs in a sandboxed scope that exposes a
+ * limited set of JavaScript globals beyond the BSON constructors (which are
+ * already registered in `documentdb-constants`).
+ *
+ * This module provides completion items for those JS globals so they appear
+ * in the value-position completion list. They are NOT added to
+ * `documentdb-constants` because they are runtime JS constructs, not
+ * DocumentDB API operators.
+ *
+ * ### Supported JS globals (from shell-bson-parser's sandbox scope)
+ *
+ * **Class expressions** (object with whitelisted methods):
+ * - `Date` — `new Date()`, `Date()`, `Date.now()`, plus instance methods
+ * - `Math` — `Math.floor()`, `Math.min()`, `Math.max()`, etc.
+ *
+ * **Globals** (primitive values):
+ * - `Infinity`, `NaN`, `undefined`
+ *
+ * **Constructor functions** (SCOPE_ANY / SCOPE_NEW / SCOPE_CALL):
+ * - `RegExp` — already handled by the JS tokenizer, but listed for completeness
+ *
+ * Source: `node_modules/@mongodb-js/shell-bson-parser/dist/scope.js`
+ * (SCOPE_ANY, SCOPE_NEW, SCOPE_CALL, GLOBALS, ALLOWED_CLASS_EXPRESSIONS)
+ */
+
+// eslint-disable-next-line import/no-internal-modules
+import type * as monacoEditor from 'monaco-editor/esm/vs/editor/editor.api';
+import { escapeSnippetDollars } from './snippetUtils';
+
+/** A JS global completion definition. */
+interface JsGlobalDef {
+    /** Display label (e.g., "Date") */
+    label: string;
+    /** Optional snippet to insert (otherwise label is used) */
+    snippet?: string;
+    /** Short description shown right-aligned in the completion list */
+    description: string;
+    /** Documentation shown in the details panel */
+    documentation: string;
+}
+
+/**
+ * JS globals available in shell-bson-parser's sandbox.
+ *
+ * These are the class expressions and global values that the parser's
+ * sandboxed eval supports. BSON constructors (ObjectId, ISODate, etc.)
+ * are already provided by `documentdb-constants` and are NOT duplicated here.
+ */
+const JS_GLOBALS: readonly JsGlobalDef[] = [
+    // -- Class constructors --
+    {
+        label: 'Date',
+        snippet: 'new Date(${1})',
+        description: 'JS global',
+        documentation:
+            'JavaScript Date constructor.\n\n' +
+            'Usages:\n' +
+            '- `new Date()` — current time\n' +
+            '- `new Date("2025-01-01")` — specific date\n' +
+            '- `new Date(Date.now() - 14 * 24 * 60 * 60 * 1000)` — 14 days ago',
+    },
+    {
+        label: 'Date.now()',
+        snippet: 'Date.now()',
+        description: 'JS global',
+        documentation:
+            'Returns milliseconds since Unix epoch (Jan 1, 1970).\n\nUseful for relative date queries:\n```\n{ $gt: new Date(Date.now() - 7 * 24 * 60 * 60 * 1000) }\n```',
+    },
+    {
+        label: 'RegExp',
+        snippet: 'RegExp("${1:pattern}")',
+        description: 'JS global',
+        documentation:
+            'JavaScript RegExp constructor.\n\nExample: `RegExp("^test")`\n\nPrefer regex literals: `/^test/`',
+    },
+
+    // -- Math methods --
+    {
+        label: 'Math.floor()',
+        snippet: 'Math.floor(${1:value})',
+        description: 'JS global',
+        documentation: 'Round down to the nearest integer.\n\nExample: `Math.floor(3.7)` → `3`',
+    },
+    {
+        label: 'Math.ceil()',
+        snippet: 'Math.ceil(${1:value})',
+        description: 'JS global',
+        documentation: 'Round up to the nearest integer.\n\nExample: `Math.ceil(3.2)` → `4`',
+    },
+    {
+        label: 'Math.round()',
+        snippet: 'Math.round(${1:value})',
+        description: 'JS global',
+        documentation: 'Round to the nearest integer.\n\nExample: `Math.round(3.5)` → `4`',
+    },
+    {
+        label: 'Math.min()',
+        snippet: 'Math.min(${1:a}, ${2:b})',
+        description: 'JS global',
+        documentation: 'Return the smaller of two values.\n\nExample: `Math.min(1.7, 2)` → `1.7`',
+    },
+    {
+        label: 'Math.max()',
+        snippet: 'Math.max(${1:a}, ${2:b})',
+        description: 'JS global',
+        documentation: 'Return the larger of two values.\n\nExample: `Math.max(1.7, 2)` → `2`',
+    },
+
+    // -- Primitive globals --
+    {
+        label: 'Infinity',
+        description: 'JS global',
+        documentation: 'Numeric value representing infinity.\n\nExample: `{ $lt: Infinity }`',
+    },
+    {
+        label: 'NaN',
+        description: 'JS global',
+        documentation: 'Numeric value representing Not-a-Number.\n\nExample: `{ $ne: NaN }`',
+    },
+    {
+        label: 'undefined',
+        description: 'JS global',
+        documentation: 'The undefined value.\n\nExample: `{ field: undefined }` — matches missing fields.',
+    },
+];
+
+/**
+ * Creates completion items for JavaScript globals supported by the
+ * shell-bson-parser sandbox.
+ *
+ * These are shown at value position with sort prefix `4_` (after BSON
+ * constructors at `3_`).
+ *
+ * @param range - the insertion range
+ * @param monaco - the Monaco API
+ */
+export function createJsGlobalCompletionItems(
+    range: monacoEditor.IRange,
+    monaco: typeof monacoEditor,
+): monacoEditor.languages.CompletionItem[] {
+    return JS_GLOBALS.map((def) => {
+        const hasSnippet = !!def.snippet;
+        let insertText = hasSnippet ? def.snippet! : def.label;
+        if (hasSnippet) {
+            insertText = escapeSnippetDollars(insertText);
+        }
+
+        return {
+            label: {
+                label: def.label,
+                description: def.description,
+            },
+            kind: hasSnippet
+                ? monaco.languages.CompletionItemKind.Constructor
+                : monaco.languages.CompletionItemKind.Constant,
+            insertText,
+            insertTextRules: hasSnippet ? monaco.languages.CompletionItemInsertTextRule.InsertAsSnippet : undefined,
+            documentation: { value: def.documentation },
+            sortText: `4_${def.label}`,
+            range,
+        };
+    });
+}
diff --git a/src/webviews/documentdbQuery/completions/mapCompletionItems.ts b/src/webviews/documentdbQuery/completions/mapCompletionItems.ts
new file mode 100644
index 000000000..3f68537f5
--- /dev/null
+++ b/src/webviews/documentdbQuery/completions/mapCompletionItems.ts
@@ -0,0 +1,150 @@
+/*---------------------------------------------------------------------------------------------
+ *  Copyright (c) Microsoft Corporation. All rights reserved.
+ *  Licensed under the MIT License. See License.txt in the project root for license information.
+ *--------------------------------------------------------------------------------------------*/
+
+/**
+ * Functions for mapping operator and field data to Monaco CompletionItems.
+ */
+
+import { type OperatorEntry } from '@vscode-documentdb/documentdb-constants';
+// eslint-disable-next-line import/no-internal-modules
+import type * as monacoEditor from 'monaco-editor/esm/vs/editor/editor.api';
+import { type FieldCompletionData } from '../../../utils/json/data-api/autocomplete/toFieldCompletionItems';
+import { escapeSnippetDollars, stripOuterBraces } from './snippetUtils';
+
+/**
+ * Maps a meta tag category to a Monaco CompletionItemKind.
+ */
+export function getCompletionKindForMeta(
+    meta: string,
+    kinds: typeof monacoEditor.languages.CompletionItemKind,
+): number {
+    if (meta.startsWith('query')) return kinds.Operator;
+    if (meta.startsWith('expr')) return kinds.Function;
+    if (meta === 'bson') return kinds.Constructor;
+    if (meta === 'stage') return kinds.Module;
+    if (meta === 'accumulator') return kinds.Method;
+    if (meta === 'update') return kinds.Property;
+    if (meta === 'variable') return kinds.Variable;
+    if (meta === 'window') return kinds.Event;
+    if (meta === 'field:identifier') return kinds.Field;
+    return kinds.Text;
+}
+
+/**
+ * Computes a sortText prefix for an operator based on its type relevance
+ * to the given field BSON types.
+ *
+ * Sorting tiers (ascending = higher priority):
+ * - `"0_"` — Type-relevant: operator's `applicableBsonTypes` intersects with `fieldBsonTypes`
+ * - `"1a_"` — Comparison operators (universal): `$eq`, `$ne`, `$gt`, `$in`, etc.
+ *   These are the most commonly used operators for any field type.
+ * - `"1b_"` — Other universal operators: element, evaluation, geospatial, etc.
+ * - `"2_"` — Non-matching: operator's `applicableBsonTypes` is set but doesn't match
+ *
+ * Returns `undefined` when no field type info is available (no sorting override).
+ */
+export function getOperatorSortPrefix(
+    entry: OperatorEntry,
+    fieldBsonTypes: readonly string[] | undefined,
+): string | undefined {
+    if (!fieldBsonTypes || fieldBsonTypes.length === 0) {
+        return undefined;
+    }
+
+    if (!entry.applicableBsonTypes || entry.applicableBsonTypes.length === 0) {
+        // Promote comparison operators above other universal operators
+        return entry.meta === 'query:comparison' ? '1a_' : '1b_';
+    }
+
+    const hasMatch = entry.applicableBsonTypes.some((t) => fieldBsonTypes.includes(t));
+    return hasMatch ? '0_' : '2_';
+}
+
+/**
+ * Extracts a human-readable category label from a meta tag.
+ * `'query:comparison'` → `'comparison'`, `'bson'` → `'bson'`
+ */
+export function getCategoryLabel(meta: string): string {
+    const colonIndex = meta.indexOf(':');
+    return colonIndex >= 0 ? meta.substring(colonIndex + 1) : meta;
+}
+
+/**
+ * Maps an OperatorEntry from documentdb-constants to a Monaco CompletionItem.
+ *
+ * Pure function — safe for unit testing without a Monaco runtime.
+ *
+ * @param entry - the operator entry to map
+ * @param range - the insertion range
+ * @param monaco - the Monaco API
+ * @param fieldBsonTypes - optional BSON types of the field for type-aware sorting
+ * @param shouldStripBraces - when true, strip outer `{ }` from snippets (for operator position)
+ */
+export function mapOperatorToCompletionItem(
+    entry: OperatorEntry,
+    range: monacoEditor.IRange,
+    monaco: typeof monacoEditor,
+    fieldBsonTypes?: readonly string[],
+    shouldStripBraces?: boolean,
+): monacoEditor.languages.CompletionItem {
+    const hasSnippet = !!entry.snippet;
+    const sortPrefix = getOperatorSortPrefix(entry, fieldBsonTypes);
+    let insertText = hasSnippet ? entry.snippet! : entry.value;
+    if (shouldStripBraces && hasSnippet) {
+        insertText = stripOuterBraces(insertText);
+    }
+    if (hasSnippet) {
+        insertText = escapeSnippetDollars(insertText);
+    }
+
+    const categoryLabel = getCategoryLabel(entry.meta);
+
+    let documentationValue = entry.description;
+    if (entry.link) {
+        documentationValue += `\n\n[ⓘ Documentation](${entry.link})`;
+    }
+
+    return {
+        label: {
+            label: entry.value,
+            description: categoryLabel,
+        },
+        kind: getCompletionKindForMeta(entry.meta, monaco.languages.CompletionItemKind),
+        insertText,
+        insertTextRules: hasSnippet ? monaco.languages.CompletionItemInsertTextRule.InsertAsSnippet : undefined,
+        documentation: {
+            value: documentationValue,
+            isTrusted: true,
+        },
+        sortText: sortPrefix ? `${sortPrefix}${entry.value}` : undefined,
+        range,
+    };
+}
+
+/**
+ * Maps a FieldCompletionData entry to a Monaco CompletionItem.
+ *
+ * Fields are given a sort prefix of `"0_"` so they appear before operators.
+ * The insert text includes a trailing `: $1` snippet so that selecting a
+ * field name immediately places the cursor at the value position.
+ */
+export function mapFieldToCompletionItem(
+    field: FieldCompletionData,
+    range: monacoEditor.IRange,
+    monaco: typeof monacoEditor,
+): monacoEditor.languages.CompletionItem {
+    const sparseIndicator = field.isSparse ? ' (sparse)' : '';
+    return {
+        label: {
+            label: field.fieldName,
+            description: `${field.displayType}${sparseIndicator}`,
+        },
+        kind: monaco.languages.CompletionItemKind.Field,
+        insertText: `${field.insertText}: $1`,
+        insertTextRules: monaco.languages.CompletionItemInsertTextRule.InsertAsSnippet,
+        sortText: `0_${field.fieldName}`,
+        range,
+    };
+}
diff --git a/src/webviews/documentdbQuery/completions/snippetUtils.ts b/src/webviews/documentdbQuery/completions/snippetUtils.ts
new file mode 100644
index 000000000..e43b7f70c
--- /dev/null
+++ b/src/webviews/documentdbQuery/completions/snippetUtils.ts
@@ -0,0 +1,42 @@
+/*---------------------------------------------------------------------------------------------
+ *  Copyright (c) Microsoft Corporation. All rights reserved.
+ *  Licensed under the MIT License. See License.txt in the project root for license information.
+ *--------------------------------------------------------------------------------------------*/
+
+/**
+ * Utility functions for manipulating Monaco snippet text.
+ */
+
+/**
+ * Strips the outermost `{ ` and ` }` from an operator snippet.
+ *
+ * Operator snippets in documentdb-constants are designed for value position
+ * (e.g., `{ $gt: ${1:value} }`). At operator position, the user is already
+ * inside braces, so the outer wrapping must be removed to avoid double-nesting.
+ *
+ * Only strips if the snippet starts with `'{ '` and ends with `' }'`.
+ * Inner brackets/braces are preserved:
+ * - `{ $in: [${1:value}] }` → `$in: [${1:value}]`
+ * - `{ $gt: ${1:value} }` → `$gt: ${1:value}`
+ */
+export function stripOuterBraces(snippet: string): string {
+    if (snippet.startsWith('{ ') && snippet.endsWith(' }')) {
+        return snippet.slice(2, -2);
+    }
+    return snippet;
+}
+
+/**
+ * Escapes literal `$` signs in snippet text that would be misinterpreted
+ * as Monaco snippet variables.
+ *
+ * In Monaco snippet syntax, `$name` is a variable reference (resolves to empty
+ * for unknown variables). Operator names like `$gt` in snippets get consumed
+ * as variable references, producing empty output instead of the literal `$gt`.
+ *
+ * This function escapes `$` when followed by a letter (`$gt` → `\$gt`)
+ * while preserving tab stop syntax (`${1:value}` and `$1` are unchanged).
+ */
+export function escapeSnippetDollars(snippet: string): string {
+    return snippet.replace(/\$(?=[a-zA-Z])/g, '\\$');
+}
diff --git a/src/webviews/documentdbQuery/completions/typeSuggestions.ts b/src/webviews/documentdbQuery/completions/typeSuggestions.ts
new file mode 100644
index 000000000..391e3094f
--- /dev/null
+++ b/src/webviews/documentdbQuery/completions/typeSuggestions.ts
@@ -0,0 +1,248 @@
+/*---------------------------------------------------------------------------------------------
+ *  Copyright (c) Microsoft Corporation. All rights reserved.
+ *  Licensed under the MIT License. See License.txt in the project root for license information.
+ *--------------------------------------------------------------------------------------------*/
+
+/**
+ * Type-aware value suggestions for the completion provider.
+ *
+ * When the cursor is at a value position and the field's BSON type is known,
+ * this module provides contextual suggestions that match the field type:
+ * - Boolean fields → `true`, `false`
+ * - Number fields → range query snippet `{ $gt: ▪, $lt: ▪ }`
+ * - String fields → regex snippet, empty string literal
+ * - Date fields → ISODate constructor, date range snippet
+ * - ObjectId fields → ObjectId constructor
+ * - Null fields → `null`
+ * - Array fields → `$elemMatch` snippet
+ *
+ * These suggestions appear at the top of the completion list (sort prefix `00_`)
+ * to surface the most common patterns for each type.
+ */
+
+// eslint-disable-next-line import/no-internal-modules
+import type * as monacoEditor from 'monaco-editor/esm/vs/editor/editor.api';
+import { LABEL_PLACEHOLDER } from './completionKnowledge';
+import { escapeSnippetDollars } from './snippetUtils';
+
+/** A type suggestion definition. */
+interface TypeSuggestionDef {
+    /** Display label */
+    label: string;
+    /** Text or snippet to insert */
+    insertText: string;
+    /** Whether insertText is a snippet (has tab stops) */
+    isSnippet: boolean;
+    /** Description shown in the label area */
+    description: string;
+    /** Documentation shown in the details panel */
+    documentation?: string;
+}
+
+/**
+ * Maps BSON type strings to curated value suggestions.
+ *
+ * Each type maps to an array of suggestions ordered by likelihood.
+ * The suggestions use Monaco snippet syntax for tab stops.
+ */
+const TYPE_SUGGESTIONS: Record<string, readonly TypeSuggestionDef[]> = {
+    // BSONTypes.Boolean = 'boolean'
+    boolean: [
+        {
+            label: 'true',
+            insertText: 'true',
+            isSnippet: false,
+            description: 'boolean literal',
+            documentation: `Boolean literal \`true\`.\n\nExample: \`{ field: true }\``,
+        },
+        {
+            label: 'false',
+            insertText: 'false',
+            isSnippet: false,
+            description: 'boolean literal',
+            documentation: `Boolean literal \`false\`.\n\nExample: \`{ field: false }\``,
+        },
+    ],
+    // BSONTypes.Int32 = 'int32'
+    int32: numberSuggestions(),
+    // BSONTypes.Double = 'double'
+    double: numberSuggestions(),
+    // BSONTypes.Long = 'long'
+    long: numberSuggestions(),
+    // BSONTypes.Decimal128 = 'decimal128'
+    decimal128: numberSuggestions(),
+    // BSONTypes.Number = 'number' (generic number without specific subtype)
+    number: numberSuggestions(),
+    string: [
+        {
+            label: `{ $regex: /${LABEL_PLACEHOLDER}/ }`,
+            insertText: '{ $regex: /${1:pattern}/ }',
+            isSnippet: true,
+            description: 'pattern match',
+            documentation:
+                'Match string fields with a regex pattern.\n\n' +
+                'Example — ends with `.com`:\n```\n{ $regex: /\\.com$/ }\n```',
+        },
+        {
+            label: '{ $regex: /\\.com$/ }',
+            insertText: '{ $regex: /${1:\\.com$}/ }',
+            isSnippet: true,
+            description: `ends with .com - pattern match`,
+            documentation: 'Example pattern match for: ends with `.com`:\n```\n{ $regex: /\\.com$/ }\n```',
+        },
+        {
+            label: '""',
+            insertText: '"${1:text}"',
+            isSnippet: true,
+            description: 'string literal',
+            documentation: `Exact string match.\n\nExample: \`"active"\`, \`"pending"\``,
+        },
+    ],
+    date: [
+        {
+            label: `ISODate("${LABEL_PLACEHOLDER}")`,
+            insertText: `ISODate("\${1:${twoWeeksAgo()}}")`,
+            isSnippet: true,
+            description: 'date value',
+            documentation: `Match a specific date.\n\nExample: \`ISODate("${twoWeeksAgo()}")\``,
+        },
+        {
+            label: `{ $gt: ISODate("${LABEL_PLACEHOLDER}"), $lt: ISODate("${LABEL_PLACEHOLDER}") }`,
+            insertText: `{ $gt: ISODate("\${1:${twoWeeksAgo()}}"), $lt: ISODate("\${2:${todayISO()}}") }`,
+            isSnippet: true,
+            description: 'date range',
+            documentation: `Match dates within a range.\n\nExample: last 2 weeks — \`{ $gt: ISODate("${twoWeeksAgo()}"), $lt: ISODate("${todayISO()}") }\``,
+        },
+        {
+            label: `{ $gt: new Date(Date.now() - ${LABEL_PLACEHOLDER}) }`,
+            insertText: '{ $gt: new Date(Date.now() - ${1:14} * 24 * 60 * 60 * 1000) }',
+            isSnippet: true,
+            description: 'last N days',
+            documentation: `Match dates in the last N days relative to now.\n\nExample: last 14 days — \`{ $gt: new Date(Date.now() - 14 * 24 * 60 * 60 * 1000) }\``,
+        },
+    ],
+    objectid: [
+        {
+            label: `ObjectId("${LABEL_PLACEHOLDER}")`,
+            insertText: 'ObjectId("${1:hex}")',
+            isSnippet: true,
+            description: 'ObjectId value',
+            documentation: `Match by ObjectId.\n\nExample: \`ObjectId("507f1f77bcf86cd799439011")\``,
+        },
+    ],
+    null: [
+        {
+            label: 'null',
+            insertText: 'null',
+            isSnippet: false,
+            description: 'null literal',
+            documentation: `Match null or missing fields.\n\nExample: \`{ field: null }\``,
+        },
+    ],
+    array: [
+        {
+            label: `{ $elemMatch: { ${LABEL_PLACEHOLDER} } }`,
+            insertText: '{ $elemMatch: { ${1:query} } }',
+            isSnippet: true,
+            description: 'match element',
+            documentation: `Match arrays with at least one element satisfying the query.\n\nExample: \`{ $elemMatch: { status: "urgent" } }\``,
+        },
+        {
+            label: `{ $size: ${LABEL_PLACEHOLDER} }`,
+            insertText: '{ $size: ${1:length} }',
+            isSnippet: true,
+            description: 'array length',
+            documentation: `Match arrays with exactly N elements.\n\nExample: \`{ $size: 3 }\``,
+        },
+    ],
+};
+
+/** Shared number-type suggestions (int, double, long, decimal). */
+function numberSuggestions(): readonly TypeSuggestionDef[] {
+    return [
+        {
+            label: `{ $gt: ${LABEL_PLACEHOLDER}, $lt: ${LABEL_PLACEHOLDER} }`,
+            insertText: '{ $gt: ${1:min}, $lt: ${2:max} }',
+            isSnippet: true,
+            description: 'range query',
+            documentation: `Match numbers within a range.\n\nExample: between 18 and 65 — \`{ $gt: 18, $lt: 65 }\``,
+        },
+        {
+            label: `{ $gte: ${LABEL_PLACEHOLDER} }`,
+            insertText: '{ $gte: ${1:value} }',
+            isSnippet: true,
+            description: 'minimum value',
+            documentation: `Match numbers greater than or equal to a value.\n\nExample: at least 100 — \`{ $gte: 100 }\``,
+        },
+    ];
+}
+
+/**
+ * Returns an ISO 8601 timestamp for two weeks ago (UTC, midnight).
+ * Used as a sensible default date placeholder — recent enough to be practical.
+ */
+function twoWeeksAgo(): string {
+    const d = new Date();
+    d.setUTCDate(d.getUTCDate() - 14);
+    d.setUTCHours(0, 0, 0, 0);
+    return d.toISOString().replace('.000Z', 'Z');
+}
+
+/**
+ * Returns an ISO 8601 timestamp for today (UTC, end of day).
+ */
+function todayISO(): string {
+    const d = new Date();
+    d.setUTCHours(23, 59, 59, 0);
+    return d.toISOString().replace('.000Z', 'Z');
+}
+
+/**
+ * Creates type-aware value suggestions based on the field's BSON type.
+ *
+ * Returns an array of high-priority completion items (sort prefix `00_`)
+ * that appear at the top of the value-position completion list.
+ *
+ * Returns an empty array when the BSON type is unknown or has no specific suggestions.
+ *
+ * @param fieldBsonType - BSON type string from the schema (e.g., 'int32', 'string', 'boolean')
+ * @param range - the insertion range
+ * @param monaco - the Monaco API
+ */
+export function createTypeSuggestions(
+    fieldBsonType: string | undefined,
+    range: monacoEditor.IRange,
+    monaco: typeof monacoEditor,
+): monacoEditor.languages.CompletionItem[] {
+    if (!fieldBsonType) {
+        return [];
+    }
+
+    const suggestions = TYPE_SUGGESTIONS[fieldBsonType];
+    if (!suggestions) {
+        return [];
+    }
+
+    return suggestions.map((def, index) => {
+        let insertText = def.insertText;
+        if (def.isSnippet) {
+            insertText = escapeSnippetDollars(insertText);
+        }
+
+        return {
+            label: {
+                label: def.label,
+                description: def.description,
+            },
+            kind: def.isSnippet
+                ? monaco.languages.CompletionItemKind.Snippet
+                : monaco.languages.CompletionItemKind.Value,
+            insertText,
+            insertTextRules: def.isSnippet ? monaco.languages.CompletionItemInsertTextRule.InsertAsSnippet : undefined,
+            documentation: def.documentation ? { value: def.documentation } : undefined,
+            sortText: `00_${String(index).padStart(2, '0')}`,
+            preselect: index === 0,
+            range,
+        };
+    });
+}
diff --git a/src/webviews/documentdbQuery/cursorContext.test.ts b/src/webviews/documentdbQuery/cursorContext.test.ts
new file mode 100644
index 000000000..4ee6f57fe
--- /dev/null
+++ b/src/webviews/documentdbQuery/cursorContext.test.ts
@@ -0,0 +1,271 @@
+/*---------------------------------------------------------------------------------------------
+ *  Copyright (c) Microsoft Corporation. All rights reserved.
+ *  Licensed under the MIT License. See License.txt in the project root for license information.
+ *--------------------------------------------------------------------------------------------*/
+
+import { detectCursorContext, type CursorContext, type FieldTypeLookup } from './cursorContext';
+
+/**
+ * Helper: place cursor at the `|` marker in the input string.
+ * Returns { text, offset } with the `|` removed.
+ */
+function parseCursor(input: string): { text: string; offset: number } {
+    const idx = input.indexOf('|');
+    if (idx === -1) {
+        throw new Error(`Test input must contain a '|' cursor marker: "${input}"`);
+    }
+    return {
+        text: input.slice(0, idx) + input.slice(idx + 1),
+        offset: idx,
+    };
+}
+
+/** Shorthand to detect context from a `|`-marked string. */
+function detect(input: string, fieldLookup?: FieldTypeLookup): CursorContext {
+    const { text, offset } = parseCursor(input);
+    return detectCursorContext(text, offset, fieldLookup);
+}
+
+describe('detectCursorContext', () => {
+    // ---------------------------------------------------------------
+    // Step 1: Core context detection (complete expressions)
+    // ---------------------------------------------------------------
+    describe('Step 1: Core context detection', () => {
+        describe('key position (root)', () => {
+            it('detects key position in empty object', () => {
+                const result = detect('{ | }');
+                expect(result).toEqual({ position: 'key', depth: 1 });
+            });
+
+            it('detects key position after opening brace', () => {
+                const result = detect('{|}');
+                expect(result).toEqual({ position: 'key', depth: 1 });
+            });
+
+            it('detects key position after comma in root object', () => {
+                const result = detect('{ name: "Alice", | }');
+                expect(result).toEqual({ position: 'key', depth: 1 });
+            });
+        });
+
+        describe('value position', () => {
+            it('detects value position after colon', () => {
+                const result = detect('{ _id: | }');
+                expect(result).toEqual({ position: 'value', fieldName: '_id' });
+            });
+
+            it('detects value position for quoted key', () => {
+                const result = detect('{ "my.field": | }');
+                expect(result).toEqual({ position: 'value', fieldName: 'my.field' });
+            });
+
+            it('detects value position for single-quoted key', () => {
+                const result = detect("{ 'address.city': | }");
+                expect(result).toEqual({ position: 'value', fieldName: 'address.city' });
+            });
+
+            it('includes bsonType when fieldLookup provides it', () => {
+                const lookup: FieldTypeLookup = (name) => (name === 'age' ? 'int32' : undefined);
+                const result = detect('{ age: | }', lookup);
+                expect(result).toEqual({ position: 'value', fieldName: 'age', fieldBsonType: 'int32' });
+            });
+
+            it('omits bsonType when fieldLookup returns undefined', () => {
+                const lookup: FieldTypeLookup = () => undefined;
+                const result = detect('{ age: | }', lookup);
+                expect(result).toEqual({ position: 'value', fieldName: 'age' });
+            });
+        });
+
+        describe('operator position (nested object)', () => {
+            it('detects operator position inside nested object', () => {
+                const result = detect('{ age: { | } }');
+                expect(result).toEqual({ position: 'operator', fieldName: 'age' });
+            });
+
+            it('detects operator position with bsonType', () => {
+                const lookup: FieldTypeLookup = (name) => (name === 'age' ? 'int32' : undefined);
+                const result = detect('{ age: { | } }', lookup);
+                expect(result).toEqual({ position: 'operator', fieldName: 'age', fieldBsonType: 'int32' });
+            });
+
+            it('detects operator position after comma in nested object', () => {
+                const result = detect('{ age: { $gt: 5, | } }');
+                expect(result).toEqual({ position: 'operator', fieldName: 'age' });
+            });
+        });
+
+        describe('array-element position', () => {
+            it('detects array-element inside $and', () => {
+                const result = detect('{ $and: [ | ] }');
+                expect(result).toEqual({ position: 'array-element', parentOperator: '$and' });
+            });
+
+            it('detects array-element inside $or', () => {
+                const result = detect('{ $or: [ | ] }');
+                expect(result).toEqual({ position: 'array-element', parentOperator: '$or' });
+            });
+
+            it('detects array-element inside $nor', () => {
+                const result = detect('{ $nor: [ | ] }');
+                expect(result).toEqual({ position: 'array-element', parentOperator: '$nor' });
+            });
+        });
+
+        describe('key inside logical operator array element', () => {
+            it('detects key inside $and array element object', () => {
+                const result = detect('{ $and: [ { | } ] }');
+                expect(result.position).toBe('key');
+            });
+
+            it('detects key inside $or array element object after comma', () => {
+                const result = detect('{ $or: [ { x: 1 }, { | } ] }');
+                expect(result.position).toBe('key');
+            });
+        });
+
+        describe('edge cases', () => {
+            it('returns unknown for empty string', () => {
+                expect(detectCursorContext('', 0)).toEqual({ position: 'unknown' });
+            });
+
+            it('returns unknown for cursor at offset 0', () => {
+                expect(detectCursorContext('{ age: 1 }', 0)).toEqual({ position: 'unknown' });
+            });
+
+            it('returns unknown for null-ish text', () => {
+                expect(detectCursorContext('', 5)).toEqual({ position: 'unknown' });
+            });
+
+            it('clamps cursor offset to text length', () => {
+                // Cursor past end of text — should still work
+                const result = detectCursorContext('{ age: ', 100);
+                expect(result).toEqual({ position: 'value', fieldName: 'age' });
+            });
+        });
+    });
+
+    // ---------------------------------------------------------------
+    // Step 1.5: Incomplete / broken input (mid-typing states)
+    // ---------------------------------------------------------------
+    describe('Step 1.5: Incomplete / broken input', () => {
+        it('{ age: | — colon just typed, no closing brace', () => {
+            const result = detect('{ age: |');
+            expect(result).toEqual({ position: 'value', fieldName: 'age' });
+        });
+
+        it('{ age: $| — started typing BSON constructor', () => {
+            const result = detect('{ age: $|');
+            expect(result).toEqual({ position: 'value', fieldName: 'age' });
+        });
+
+        it('{ age: $ |} — dollar with closing brace', () => {
+            const result = detect('{ age: $ |}');
+            expect(result).toEqual({ position: 'value', fieldName: 'age' });
+        });
+
+        it('{ age: {| — opened nested object, no close', () => {
+            const result = detect('{ age: {|');
+            expect(result).toEqual({ position: 'operator', fieldName: 'age' });
+        });
+
+        it('{ age: { $| — partially typed operator', () => {
+            const result = detect('{ age: { $|');
+            expect(result).toEqual({ position: 'operator', fieldName: 'age' });
+        });
+
+        it('{ age: { $ |} — incomplete operator inside nested object', () => {
+            const result = detect('{ age: { $ |}');
+            expect(result).toEqual({ position: 'operator', fieldName: 'age' });
+        });
+
+        it('{ age: { $g| — partially typed $gt', () => {
+            const result = detect('{ age: { $g|');
+            expect(result).toEqual({ position: 'operator', fieldName: 'age' });
+        });
+
+        it('{ | — opened root object, no field name yet', () => {
+            const result = detect('{ |');
+            expect(result).toEqual({ position: 'key', depth: 1 });
+        });
+
+        it('{ a| — partially typed field name', () => {
+            const result = detect('{ a|');
+            expect(result).toEqual({ position: 'key', depth: 1 });
+        });
+
+        it('{ name: "Alice", | — comma after first pair, new key expected', () => {
+            const result = detect('{ name: "Alice", |');
+            expect(result).toEqual({ position: 'key', depth: 1 });
+        });
+
+        it('{ name: "Alice", a| — partially typed second field name', () => {
+            const result = detect('{ name: "Alice", a|');
+            expect(result).toEqual({ position: 'key', depth: 1 });
+        });
+
+        it('{ $and: [| — opened array for logical operator', () => {
+            const result = detect('{ $and: [|');
+            expect(result).toEqual({ position: 'array-element', parentOperator: '$and' });
+        });
+
+        it('{ $and: [ {| — inside $and array element object', () => {
+            const result = detect('{ $and: [ {|');
+            expect(result.position).toBe('key');
+        });
+
+        it('{ age: { $gt: 5, | — after comma inside nested operator object', () => {
+            const result = detect('{ age: { $gt: 5, |');
+            expect(result).toEqual({ position: 'operator', fieldName: 'age' });
+        });
+
+        it('{| — just the opening brace', () => {
+            const result = detect('{|');
+            expect(result).toEqual({ position: 'key', depth: 1 });
+        });
+
+        it('empty string → unknown', () => {
+            expect(detectCursorContext('', 0)).toEqual({ position: 'unknown' });
+        });
+
+        it('handles fieldLookup with incomplete input', () => {
+            const lookup: FieldTypeLookup = (name) => (name === 'age' ? 'int32' : undefined);
+            const result = detect('{ age: { $|', lookup);
+            expect(result).toEqual({ position: 'operator', fieldName: 'age', fieldBsonType: 'int32' });
+        });
+
+        it('{ $or: [ { name: "x" }, {| — second element in $or array', () => {
+            const result = detect('{ $or: [ { name: "x" }, {|');
+            expect(result.position).toBe('key');
+        });
+    });
+
+    // ---------------------------------------------------------------
+    // Multi-line expressions
+    // ---------------------------------------------------------------
+    describe('multi-line expressions', () => {
+        it('key position in multi-line object', () => {
+            const result = detect(`{
+  name: "Alice",
+  |
+}`);
+            expect(result).toEqual({ position: 'key', depth: 1 });
+        });
+
+        it('value position in multi-line object', () => {
+            const result = detect(`{
+  age: |
+}`);
+            expect(result).toEqual({ position: 'value', fieldName: 'age' });
+        });
+
+        it('operator position in multi-line nested object', () => {
+            const result = detect(`{
+  age: {
+    |
+  }
+}`);
+            expect(result).toEqual({ position: 'operator', fieldName: 'age' });
+        });
+    });
+});
diff --git a/src/webviews/documentdbQuery/cursorContext.ts b/src/webviews/documentdbQuery/cursorContext.ts
new file mode 100644
index 000000000..606db0f39
--- /dev/null
+++ b/src/webviews/documentdbQuery/cursorContext.ts
@@ -0,0 +1,393 @@
+/*---------------------------------------------------------------------------------------------
+ *  Copyright (c) Microsoft Corporation. All rights reserved.
+ *  Licensed under the MIT License. See License.txt in the project root for license information.
+ *--------------------------------------------------------------------------------------------*/
+
+/**
+ * Cursor context detection for the `documentdb-query` language.
+ *
+ * Determines the semantic position of the cursor within a DocumentDB query
+ * expression (e.g., key position, value position, operator position) using
+ * a heuristic character-scanning approach.
+ *
+ * This module is a pure function with no Monaco or VS Code dependencies,
+ * making it fully unit-testable.
+ */
+
+/**
+ * The semantic position of the cursor within a query expression.
+ *
+ * Used by the completion provider to determine which completions to show.
+ */
+export type CursorContext =
+    | { position: 'key'; depth: number }
+    | { position: 'value'; fieldName: string; fieldBsonType?: string }
+    | { position: 'operator'; fieldName: string; fieldBsonType?: string }
+    | { position: 'array-element'; parentOperator: string }
+    | { position: 'unknown' };
+
+/**
+ * A callback that resolves a field name to its BSON type string.
+ * Used to enrich cursor context with type information from the completion store.
+ */
+export type FieldTypeLookup = (fieldName: string) => string | undefined;
+
+/**
+ * Detects the semantic cursor context within a DocumentDB query expression.
+ *
+ * Uses a heuristic backward-scanning approach from the cursor position to
+ * determine whether the cursor is at a key, value, operator, or array-element
+ * position. Falls back to `{ position: 'unknown' }` when context cannot be
+ * determined.
+ *
+ * @param text - the full text of the editor
+ * @param cursorOffset - the 0-based character offset of the cursor
+ * @param fieldLookup - optional callback to resolve field names to BSON types
+ * @returns the detected cursor context
+ */
+export function detectCursorContext(text: string, cursorOffset: number, fieldLookup?: FieldTypeLookup): CursorContext {
+    if (!text || cursorOffset <= 0) {
+        return { position: 'unknown' };
+    }
+
+    // Clamp cursor to text length
+    const offset = Math.min(cursorOffset, text.length);
+
+    // Find the nearest structural character before the cursor
+    const scanResult = scanBackward(text, offset);
+
+    if (!scanResult) {
+        return { position: 'unknown' };
+    }
+
+    switch (scanResult.char) {
+        case ':':
+            return resolveValueContext(text, scanResult.index, fieldLookup);
+
+        case '{':
+            return resolveOpenBraceContext(text, scanResult.index, fieldLookup);
+
+        case ',':
+            return resolveCommaContext(text, scanResult.index, fieldLookup);
+
+        case '[':
+            return resolveOpenBracketContext(text, scanResult.index);
+
+        default:
+            return { position: 'unknown' };
+    }
+}
+
+// ---------- Internal helpers ----------
+
+/** Structural characters that define context boundaries. */
+const STRUCTURAL_CHARS = new Set([':', '{', ',', '[']);
+
+interface ScanResult {
+    char: string;
+    index: number;
+}
+
+// Known edge case: the backward scanner does not track whether characters
+// are inside quoted strings. A structural character that appears within a
+// string literal is still treated as structural. For example, in
+//   { msg: "{", | }
+// the `{` inside the string `"{"` would be found before the real opening
+// brace, causing a misclassification. This is acceptable for a completion
+// heuristic where rare edge cases degrade gracefully rather than break.
+
+/**
+ * Scans backward from the cursor, skipping whitespace and identifier characters
+ * (letters, digits, `_`, `$`, `.`, quotes), to find the nearest structural character.
+ *
+ * Identifier characters are skipped because the cursor may be mid-word
+ * (e.g., `{ ag|` — cursor is after 'g', but context is 'key' from the `{`).
+ */
+function scanBackward(text: string, offset: number): ScanResult | undefined {
+    let i = offset - 1;
+    while (i >= 0) {
+        const ch = text[i];
+        if (STRUCTURAL_CHARS.has(ch)) {
+            return { char: ch, index: i };
+        }
+        // Skip whitespace and identifier-like characters
+        if (isSkippable(ch)) {
+            i--;
+            continue;
+        }
+        // Hit something unexpected (e.g., '}', ']', ')') — stop scanning
+        // '}' and ']' indicate we've exited the current expression
+        return undefined;
+    }
+    return undefined;
+}
+
+/**
+ * Characters to skip during backward scanning.
+ * These are characters that can appear between a structural char and the cursor:
+ * - whitespace
+ * - identifier chars (a-z, A-Z, 0-9, _, $, .)
+ * - quote marks (the user may be inside a quoted key)
+ * - minus sign (for negative numbers)
+ */
+function isSkippable(ch: string): boolean {
+    return /[\s\w.$"'`\-/]/.test(ch);
+}
+
+/**
+ * Resolves context when ':' is found — cursor is in a value position.
+ *
+ * Examples:
+ * - `{ _id: | }` → value with fieldName '_id'
+ * - `{ age: | }` → value with fieldName 'age'
+ */
+function resolveValueContext(text: string, colonIndex: number, fieldLookup?: FieldTypeLookup): CursorContext {
+    const fieldName = extractKeyBeforeColon(text, colonIndex);
+    if (!fieldName) {
+        return { position: 'unknown' };
+    }
+    const fieldBsonType = fieldLookup?.(fieldName);
+    return {
+        position: 'value',
+        fieldName,
+        ...(fieldBsonType !== undefined && { fieldBsonType }),
+    };
+}
+
+/**
+ * Resolves context when '{' is found.
+ *
+ * Two sub-cases:
+ * 1. Root or top-level: `{ | }` → key position
+ * 2. After a colon: `{ age: { | } }` → operator position for field 'age'
+ */
+function resolveOpenBraceContext(text: string, braceIndex: number, fieldLookup?: FieldTypeLookup): CursorContext {
+    // Look backward from the '{' to find what precedes it
+    const beforeBrace = scanBackwardFrom(text, braceIndex);
+
+    if (beforeBrace && beforeBrace.char === ':') {
+        // Pattern: `fieldName: { | }` → operator position
+        const fieldName = extractKeyBeforeColon(text, beforeBrace.index);
+        if (fieldName) {
+            // If the field name starts with '$', this is a nested query object
+            // inside a logical operator like $and: [ { | } ], but the immediate
+            // '{' is after a ':' which makes it an operator context
+            const fieldBsonType = fieldLookup?.(fieldName);
+            return {
+                position: 'operator',
+                fieldName,
+                ...(fieldBsonType !== undefined && { fieldBsonType }),
+            };
+        }
+    }
+
+    if (beforeBrace && beforeBrace.char === '[') {
+        // Pattern: `$and: [ { | } ]` → key at depth 1
+        return resolveKeyInsideArray(text, beforeBrace.index);
+    }
+
+    if (beforeBrace && beforeBrace.char === ',') {
+        // Pattern: `$and: [ {...}, { | } ]` — inside an array after another element
+        return resolveCommaInsideArrayForBrace(text, beforeBrace.index);
+    }
+
+    // Root object or can't determine parent
+    // +1 because the brace at braceIndex is the one we're inside
+    const depth = computeDepth(text, braceIndex) + 1;
+    return { position: 'key', depth };
+}
+
+/**
+ * Resolves context when ',' is found.
+ *
+ * Sub-cases:
+ * 1. Inside an object: `{ name: "x", | }` → key position
+ * 2. Inside an operator object: `{ age: { $gt: 5, | } }` → operator position
+ * 3. Inside an array: `{ $and: [ {...}, | ] }` → array-element position
+ */
+function resolveCommaContext(text: string, commaIndex: number, fieldLookup?: FieldTypeLookup): CursorContext {
+    // Determine if comma is inside an array or an object by finding the
+    // nearest unmatched '[' or '{'
+    const enclosing = findEnclosingBracket(text, commaIndex);
+
+    if (!enclosing) {
+        return { position: 'unknown' };
+    }
+
+    if (enclosing.char === '[') {
+        // Inside an array — determine parent operator
+        return resolveOpenBracketContext(text, enclosing.index);
+    }
+
+    if (enclosing.char === '{') {
+        // Inside an object — is this a root-level object or a nested operator object?
+        return resolveOpenBraceContext(text, enclosing.index, fieldLookup);
+    }
+
+    return { position: 'unknown' };
+}
+
+/**
+ * Resolves context when '[' is found.
+ *
+ * Example: `{ $and: [ | ] }` → array-element with parentOperator '$and'
+ */
+function resolveOpenBracketContext(text: string, bracketIndex: number): CursorContext {
+    // Look backward from '[' to find the parent key via ':'
+    const beforeBracket = scanBackwardFrom(text, bracketIndex);
+
+    if (beforeBracket && beforeBracket.char === ':') {
+        const parentKey = extractKeyBeforeColon(text, beforeBracket.index);
+        if (parentKey && parentKey.startsWith('$')) {
+            return { position: 'array-element', parentOperator: parentKey };
+        }
+    }
+
+    return { position: 'unknown' };
+}
+
+/**
+ * Resolves key context when '{' is found immediately after '['.
+ * Pattern: `$and: [ { | } ]` → key at depth 1
+ */
+function resolveKeyInsideArray(text: string, bracketIndex: number): CursorContext {
+    // Check if this array belongs to a logical operator
+    const beforeBracket = scanBackwardFrom(text, bracketIndex);
+    if (beforeBracket && beforeBracket.char === ':') {
+        const parentKey = extractKeyBeforeColon(text, beforeBracket.index);
+        if (parentKey && parentKey.startsWith('$')) {
+            // Inside a logical operator array element — treat as key context
+            const depth = computeDepth(text, bracketIndex);
+            return { position: 'key', depth: depth + 1 };
+        }
+    }
+    const depth = computeDepth(text, bracketIndex);
+    return { position: 'key', depth: depth + 1 };
+}
+
+/**
+ * Resolves context when '{' is preceded by ',' inside an array.
+ * Pattern: `$and: [ {...}, { | } ]`
+ */
+function resolveCommaInsideArrayForBrace(text: string, commaIndex: number): CursorContext {
+    const enclosing = findEnclosingBracket(text, commaIndex);
+    if (enclosing && enclosing.char === '[') {
+        return resolveKeyInsideArray(text, enclosing.index);
+    }
+    return { position: 'key', depth: 0 };
+}
+
+// ---------- Character scanning utilities ----------
+
+/**
+ * Scans backward from a given index (exclusive), skipping whitespace
+ * and identifier characters, to find the nearest structural character.
+ */
+function scanBackwardFrom(text: string, index: number): ScanResult | undefined {
+    let i = index - 1;
+    while (i >= 0) {
+        const ch = text[i];
+        if (STRUCTURAL_CHARS.has(ch) || ch === ']' || ch === '}') {
+            if (ch === ']' || ch === '}') {
+                return undefined; // Hit a closing bracket — stop
+            }
+            return { char: ch, index: i };
+        }
+        if (isSkippable(ch)) {
+            i--;
+            continue;
+        }
+        return undefined;
+    }
+    return undefined;
+}
+
+/**
+ * Finds the nearest unmatched opening bracket (`{` or `[`) before the given index.
+ * Properly handles nested brackets by maintaining a balance counter.
+ */
+function findEnclosingBracket(text: string, index: number): ScanResult | undefined {
+    let braceDepth = 0;
+    let bracketDepth = 0;
+
+    for (let i = index - 1; i >= 0; i--) {
+        const ch = text[i];
+        switch (ch) {
+            case '}':
+                braceDepth++;
+                break;
+            case '{':
+                if (braceDepth > 0) {
+                    braceDepth--;
+                } else {
+                    return { char: '{', index: i };
+                }
+                break;
+            case ']':
+                bracketDepth++;
+                break;
+            case '[':
+                if (bracketDepth > 0) {
+                    bracketDepth--;
+                } else {
+                    return { char: '[', index: i };
+                }
+                break;
+        }
+    }
+    return undefined;
+}
+
+/**
+ * Extracts the key name immediately before a colon.
+ *
+ * Handles:
+ * - Unquoted keys: `age:` → 'age'
+ * - Single-quoted keys: `'my.field':` → 'my.field'
+ * - Double-quoted keys: `"my.field":` → 'my.field'
+ * - Dollar-prefixed: `$and:` → '$and'
+ */
+function extractKeyBeforeColon(text: string, colonIndex: number): string | undefined {
+    let i = colonIndex - 1;
+
+    // Skip whitespace before the colon
+    while (i >= 0 && /\s/.test(text[i])) {
+        i--;
+    }
+
+    if (i < 0) return undefined;
+
+    // Check if the key is quoted
+    const quoteChar = text[i];
+    if (quoteChar === '"' || quoteChar === "'") {
+        // Find the matching opening quote
+        const closeQuoteIndex = i;
+        i--;
+        while (i >= 0 && text[i] !== quoteChar) {
+            i--;
+        }
+        if (i < 0) return undefined; // Unmatched quote
+        return text.substring(i + 1, closeQuoteIndex);
+    }
+
+    // Unquoted key — collect identifier characters (including $ and .)
+    const end = i + 1;
+    while (i >= 0 && /[\w$.]/.test(text[i])) {
+        i--;
+    }
+    const key = text.substring(i + 1, end);
+    return key.length > 0 ? key : undefined;
+}
+
+/**
+ * Computes the brace nesting depth at a given position.
+ * Counts unmatched `{` before the index.
+ */
+function computeDepth(text: string, index: number): number {
+    let depth = 0;
+    for (let i = 0; i < index; i++) {
+        if (text[i] === '{') depth++;
+        if (text[i] === '}') depth--;
+    }
+    return Math.max(0, depth);
+}
diff --git a/src/webviews/documentdbQuery/documentdbQueryCompletionProvider.test.ts b/src/webviews/documentdbQuery/documentdbQueryCompletionProvider.test.ts
new file mode 100644
index 000000000..13521e3a0
--- /dev/null
+++ b/src/webviews/documentdbQuery/documentdbQueryCompletionProvider.test.ts
@@ -0,0 +1,1998 @@
+/*---------------------------------------------------------------------------------------------
+ *  Copyright (c) Microsoft Corporation. All rights reserved.
+ *  Licensed under the MIT License. See License.txt in the project root for license information.
+ *--------------------------------------------------------------------------------------------*/
+
+import {
+    FILTER_COMPLETION_META,
+    getFilteredCompletions,
+    PROJECTION_COMPLETION_META,
+    type OperatorEntry,
+} from '@vscode-documentdb/documentdb-constants';
+// eslint-disable-next-line import/no-internal-modules
+import type * as monacoEditor from 'monaco-editor/esm/vs/editor/editor.api';
+import { clearAllCompletionContexts, setCompletionContext } from './completionStore';
+import { type CursorContext } from './cursorContext';
+import {
+    createCompletionItems,
+    createTypeSuggestions,
+    escapeSnippetDollars,
+    getCategoryLabel,
+    getCompletionKindForMeta,
+    getMetaTagsForEditorType,
+    getOperatorSortPrefix,
+    mapFieldToCompletionItem,
+    mapOperatorToCompletionItem,
+    stripOuterBraces,
+} from './documentdbQueryCompletionProvider';
+import { EditorType } from './languageConfig';
+
+/**
+ * Minimal mock of `monaco.languages.CompletionItemKind` for testing.
+ * Uses distinct numeric values matching Monaco's enum.
+ */
+const mockCompletionItemKind: typeof monacoEditor.languages.CompletionItemKind = {
+    Method: 0,
+    Function: 1,
+    Constructor: 2,
+    Field: 3,
+    Variable: 4,
+    Class: 5,
+    Struct: 6,
+    Interface: 7,
+    Module: 8,
+    Property: 9,
+    Event: 10,
+    Operator: 11,
+    Unit: 12,
+    Value: 13,
+    Constant: 14,
+    Enum: 15,
+    EnumMember: 16,
+    Keyword: 17,
+    Text: 18,
+    Color: 19,
+    File: 20,
+    Reference: 21,
+    Customcolor: 22,
+    Folder: 23,
+    TypeParameter: 24,
+    User: 25,
+    Issue: 26,
+    Snippet: 27,
+};
+
+/** Minimal mock of `monaco.languages.CompletionItemInsertTextRule`. */
+const mockInsertTextRule = {
+    InsertAsSnippet: 4, // Same value as Monaco
+    KeepWhitespace: 1,
+    None: 0,
+} as typeof monacoEditor.languages.CompletionItemInsertTextRule;
+
+/**
+ * Creates a minimal Monaco API mock for testing completion provider functions.
+ */
+function createMockMonaco(): typeof monacoEditor {
+    return {
+        languages: {
+            CompletionItemKind: mockCompletionItemKind,
+            CompletionItemInsertTextRule: mockInsertTextRule,
+        },
+    } as unknown as typeof monacoEditor;
+}
+
+/**
+ * Extracts the label string from a CompletionItem's label,
+ * which may be a plain string or a CompletionItemLabel object.
+ */
+function getLabelText(label: string | monacoEditor.languages.CompletionItemLabel): string {
+    return typeof label === 'string' ? label : label.label;
+}
+
+/** Standard test range for all completion items. */
+const testRange: monacoEditor.IRange = {
+    startLineNumber: 1,
+    endLineNumber: 1,
+    startColumn: 1,
+    endColumn: 1,
+};
+
+describe('documentdbQueryCompletionProvider', () => {
+    describe('getCompletionKindForMeta', () => {
+        const kinds = mockCompletionItemKind;
+
+        test('maps query operators to Operator kind', () => {
+            expect(getCompletionKindForMeta('query', kinds)).toBe(kinds.Operator);
+            expect(getCompletionKindForMeta('query:comparison', kinds)).toBe(kinds.Operator);
+            expect(getCompletionKindForMeta('query:logical', kinds)).toBe(kinds.Operator);
+        });
+
+        test('maps expression operators to Function kind', () => {
+            expect(getCompletionKindForMeta('expr:arith', kinds)).toBe(kinds.Function);
+            expect(getCompletionKindForMeta('expr:string', kinds)).toBe(kinds.Function);
+        });
+
+        test('maps BSON constructors to Constructor kind', () => {
+            expect(getCompletionKindForMeta('bson', kinds)).toBe(kinds.Constructor);
+        });
+
+        test('maps stages to Module kind', () => {
+            expect(getCompletionKindForMeta('stage', kinds)).toBe(kinds.Module);
+        });
+
+        test('maps accumulators to Method kind', () => {
+            expect(getCompletionKindForMeta('accumulator', kinds)).toBe(kinds.Method);
+        });
+
+        test('maps update operators to Property kind', () => {
+            expect(getCompletionKindForMeta('update', kinds)).toBe(kinds.Property);
+        });
+
+        test('maps variables to Variable kind', () => {
+            expect(getCompletionKindForMeta('variable', kinds)).toBe(kinds.Variable);
+        });
+
+        test('maps window operators to Event kind', () => {
+            expect(getCompletionKindForMeta('window', kinds)).toBe(kinds.Event);
+        });
+
+        test('maps field identifiers to Field kind', () => {
+            expect(getCompletionKindForMeta('field:identifier', kinds)).toBe(kinds.Field);
+        });
+
+        test('maps unknown meta to Text kind', () => {
+            expect(getCompletionKindForMeta('unknown', kinds)).toBe(kinds.Text);
+        });
+    });
+
+    describe('mapOperatorToCompletionItem', () => {
+        const mockMonaco = createMockMonaco();
+
+        test('maps a simple operator entry without snippet', () => {
+            const entry: OperatorEntry = {
+                value: '$eq',
+                meta: 'query:comparison',
+                description: 'Matches values equal to a specified value.',
+            };
+
+            const item = mapOperatorToCompletionItem(entry, testRange, mockMonaco);
+
+            expect(getLabelText(item.label)).toBe('$eq');
+            expect(item.kind).toBe(mockCompletionItemKind.Operator);
+            expect(item.insertText).toBe('$eq');
+            expect(item.insertTextRules).toBeUndefined();
+            expect((item.documentation as { value: string }).value).toContain(
+                'Matches values equal to a specified value.',
+            );
+            expect(item.range).toBe(testRange);
+        });
+
+        test('maps an operator entry with snippet', () => {
+            const entry: OperatorEntry = {
+                value: '$gt',
+                meta: 'query:comparison',
+                description: 'Greater than',
+                snippet: '{ $gt: ${1:value} }',
+            };
+
+            const item = mapOperatorToCompletionItem(entry, testRange, mockMonaco);
+
+            expect(getLabelText(item.label)).toBe('$gt');
+            expect(item.insertText).toBe('{ \\$gt: ${1:value} }');
+            expect(item.insertTextRules).toBe(mockInsertTextRule.InsertAsSnippet);
+        });
+
+        test('maps a BSON constructor with link', () => {
+            const entry: OperatorEntry = {
+                value: 'ObjectId',
+                meta: 'bson',
+                description: 'Creates a new ObjectId value.',
+                snippet: 'ObjectId("${1:hex}")',
+                link: 'https://docs.example.com/objectid',
+            };
+
+            const item = mapOperatorToCompletionItem(entry, testRange, mockMonaco);
+
+            expect(getLabelText(item.label)).toBe('ObjectId');
+            expect(item.kind).toBe(mockCompletionItemKind.Constructor);
+            expect(item.insertText).toBe('ObjectId("${1:hex}")');
+            expect(item.insertTextRules).toBe(mockInsertTextRule.InsertAsSnippet);
+            const docValue = (item.documentation as { value: string }).value;
+            expect(docValue).toContain('Creates a new ObjectId value.');
+            expect(docValue).toContain('https://docs.example.com/objectid');
+        });
+
+        test('uses the provided range', () => {
+            const customRange: monacoEditor.IRange = {
+                startLineNumber: 3,
+                endLineNumber: 3,
+                startColumn: 5,
+                endColumn: 10,
+            };
+
+            const entry: OperatorEntry = {
+                value: '$in',
+                meta: 'query:comparison',
+                description: 'Matches any value in an array.',
+            };
+
+            const item = mapOperatorToCompletionItem(entry, customRange, mockMonaco);
+            expect(item.range).toBe(customRange);
+        });
+    });
+
+    describe('getMetaTagsForEditorType', () => {
+        test('returns FILTER_COMPLETION_META for Filter editor type', () => {
+            const tags = getMetaTagsForEditorType(EditorType.Filter);
+            expect(tags).toBe(FILTER_COMPLETION_META);
+        });
+
+        test('returns PROJECTION_COMPLETION_META for Project editor type', () => {
+            const tags = getMetaTagsForEditorType(EditorType.Project);
+            expect(tags).toBe(PROJECTION_COMPLETION_META);
+        });
+
+        test('returns PROJECTION_COMPLETION_META for Sort editor type', () => {
+            const tags = getMetaTagsForEditorType(EditorType.Sort);
+            expect(tags).toBe(PROJECTION_COMPLETION_META);
+        });
+
+        test('returns FILTER_COMPLETION_META for undefined (fallback)', () => {
+            const tags = getMetaTagsForEditorType(undefined);
+            expect(tags).toBe(FILTER_COMPLETION_META);
+        });
+    });
+
+    describe('createCompletionItems', () => {
+        const mockMonaco = createMockMonaco();
+
+        afterEach(() => {
+            clearAllCompletionContexts();
+        });
+
+        test('returns items for filter context using documentdb-constants', () => {
+            const items = createCompletionItems({
+                editorType: EditorType.Filter,
+                sessionId: undefined,
+                range: testRange,
+                isDollarPrefix: false,
+                monaco: mockMonaco,
+            });
+
+            // Should return the filter completions from documentdb-constants
+            expect(items.length).toBeGreaterThan(0);
+
+            // All items should have required CompletionItem properties
+            for (const item of items) {
+                expect(item.label).toBeDefined();
+                expect(getLabelText(item.label)).toBeDefined();
+                expect(item.kind).toBeDefined();
+                expect(item.insertText).toBeDefined();
+                expect(item.range).toBe(testRange);
+            }
+        });
+
+        test('filter completions include query operators like $eq, $gt, $match at value position', () => {
+            const items = createCompletionItems({
+                editorType: EditorType.Filter,
+                sessionId: undefined,
+                range: testRange,
+                isDollarPrefix: false,
+                monaco: mockMonaco,
+                cursorContext: { position: 'value', fieldName: 'x' },
+            });
+
+            const labels = items.map((item) => getLabelText(item.label));
+            expect(labels).toContain('$eq');
+            expect(labels).toContain('$gt');
+            expect(labels).toContain('$in');
+        });
+
+        test('filter completions include BSON constructors like ObjectId at value position', () => {
+            const items = createCompletionItems({
+                editorType: EditorType.Filter,
+                sessionId: undefined,
+                range: testRange,
+                isDollarPrefix: false,
+                monaco: mockMonaco,
+                cursorContext: { position: 'value', fieldName: 'x' },
+            });
+
+            const labels = items.map((item) => getLabelText(item.label));
+            expect(labels).toContain('ObjectId');
+            expect(labels).toContain('UUID');
+            expect(labels).toContain('ISODate');
+        });
+
+        test('filter completions do NOT include JS globals like console, Math, function', () => {
+            const items = createCompletionItems({
+                editorType: EditorType.Filter,
+                sessionId: undefined,
+                range: testRange,
+                isDollarPrefix: false,
+                monaco: mockMonaco,
+            });
+
+            const labels = items.map((item) => getLabelText(item.label));
+            expect(labels).not.toContain('console');
+            expect(labels).not.toContain('Math');
+            expect(labels).not.toContain('function');
+            expect(labels).not.toContain('window');
+            expect(labels).not.toContain('document');
+            expect(labels).not.toContain('Array');
+            expect(labels).not.toContain('Object');
+            expect(labels).not.toContain('String');
+        });
+
+        test('filter completions do NOT include aggregation stages', () => {
+            const items = createCompletionItems({
+                editorType: EditorType.Filter,
+                sessionId: undefined,
+                range: testRange,
+                isDollarPrefix: false,
+                monaco: mockMonaco,
+            });
+
+            const labels = items.map((item) => getLabelText(item.label));
+            // $match is a query operator AND a stage, but $group/$unwind are stage-only
+            expect(labels).not.toContain('$group');
+            expect(labels).not.toContain('$unwind');
+            expect(labels).not.toContain('$lookup');
+        });
+
+        test('filter completions at value position match getFilteredCompletions count for FILTER_COMPLETION_META', () => {
+            const items = createCompletionItems({
+                editorType: EditorType.Filter,
+                sessionId: undefined,
+                range: testRange,
+                isDollarPrefix: false,
+                monaco: mockMonaco,
+                cursorContext: { position: 'value', fieldName: 'x' },
+            });
+
+            const expected = getFilteredCompletions({ meta: [...FILTER_COMPLETION_META] });
+            // Value position includes operators + BSON constructors (minus key-position operators)
+            expect(items.length).toBeGreaterThan(0);
+            expect(items.length).toBeLessThanOrEqual(expected.length);
+        });
+
+        test('default (undefined editor type) matches filter completions', () => {
+            const filterItems = createCompletionItems({
+                editorType: EditorType.Filter,
+                sessionId: undefined,
+                range: testRange,
+                isDollarPrefix: false,
+                monaco: mockMonaco,
+            });
+
+            const defaultItems = createCompletionItems({
+                editorType: undefined,
+                sessionId: undefined,
+                range: testRange,
+                isDollarPrefix: false,
+                monaco: mockMonaco,
+            });
+
+            expect(defaultItems).toHaveLength(filterItems.length);
+        });
+    });
+
+    describe('mapFieldToCompletionItem', () => {
+        const mockMonaco = createMockMonaco();
+
+        test('maps a simple field to a CompletionItem', () => {
+            const field = {
+                fieldName: 'age',
+                displayType: 'Number',
+                bsonType: 'int32',
+                isSparse: false,
+                insertText: 'age',
+                referenceText: '$age',
+            };
+
+            const item = mapFieldToCompletionItem(field, testRange, mockMonaco);
+
+            expect(item.label).toEqual({ label: 'age', description: 'Number' });
+            expect(item.kind).toBe(mockCompletionItemKind.Field);
+            expect(item.insertText).toBe('age: $1');
+            expect(item.insertTextRules).toBe(mockInsertTextRule.InsertAsSnippet);
+            expect(item.sortText).toBe('0_age');
+            expect(item.range).toBe(testRange);
+        });
+
+        test('includes (sparse) indicator for sparse fields', () => {
+            const field = {
+                fieldName: 'optionalField',
+                displayType: 'String',
+                bsonType: 'string',
+                isSparse: true,
+                insertText: 'optionalField',
+                referenceText: '$optionalField',
+            };
+
+            const item = mapFieldToCompletionItem(field, testRange, mockMonaco);
+
+            expect((item.label as { description: string }).description).toBe('String (sparse)');
+        });
+
+        test('uses pre-escaped insertText for special field names', () => {
+            const field = {
+                fieldName: 'address.city',
+                displayType: 'String',
+                bsonType: 'string',
+                isSparse: false,
+                insertText: '"address.city"',
+                referenceText: '$address.city',
+            };
+
+            const item = mapFieldToCompletionItem(field, testRange, mockMonaco);
+
+            expect((item.label as { label: string }).label).toBe('address.city');
+            expect(item.insertText).toBe('"address.city": $1');
+        });
+    });
+
+    describe('field completions via store', () => {
+        const mockMonaco = createMockMonaco();
+
+        afterEach(() => {
+            clearAllCompletionContexts();
+        });
+
+        test('field completions appear when store has data', () => {
+            setCompletionContext('test-session', {
+                fields: [
+                    {
+                        fieldName: 'name',
+                        displayType: 'String',
+                        bsonType: 'string',
+                        isSparse: false,
+                        insertText: 'name',
+                        referenceText: '$name',
+                    },
+                    {
+                        fieldName: 'age',
+                        displayType: 'Number',
+                        bsonType: 'int32',
+                        isSparse: false,
+                        insertText: 'age',
+                        referenceText: '$age',
+                    },
+                ],
+            });
+
+            const items = createCompletionItems({
+                editorType: EditorType.Filter,
+                sessionId: 'test-session',
+                range: testRange,
+                isDollarPrefix: false,
+                monaco: mockMonaco,
+            });
+
+            const labels = items.map((i) => getLabelText(i.label));
+            expect(labels).toContain('name');
+            expect(labels).toContain('age');
+        });
+
+        test('field completions have sortText prefix so they sort first', () => {
+            setCompletionContext('test-session', {
+                fields: [
+                    {
+                        fieldName: 'name',
+                        displayType: 'String',
+                        bsonType: 'string',
+                        isSparse: false,
+                        insertText: 'name',
+                        referenceText: '$name',
+                    },
+                ],
+            });
+
+            const items = createCompletionItems({
+                editorType: EditorType.Filter,
+                sessionId: 'test-session',
+                range: testRange,
+                isDollarPrefix: false,
+                monaco: mockMonaco,
+            });
+
+            const fieldItem = items.find((i) => getLabelText(i.label) === 'name');
+            expect(fieldItem?.sortText).toBe('0_name');
+        });
+
+        test('empty store returns all operator completions', () => {
+            const items = createCompletionItems({
+                editorType: EditorType.Filter,
+                sessionId: 'nonexistent-session',
+                range: testRange,
+                isDollarPrefix: false,
+                monaco: mockMonaco,
+            });
+
+            // Without cursorContext, falls back to all completions
+            const labels = items.map((i) => getLabelText(i.label));
+            expect(labels).toContain('$and');
+            expect(labels).toContain('$or');
+            expect(labels).toContain('$gt');
+        });
+
+        test('undefined sessionId returns all operator completions', () => {
+            const items = createCompletionItems({
+                editorType: EditorType.Filter,
+                sessionId: undefined,
+                range: testRange,
+                isDollarPrefix: false,
+                monaco: mockMonaco,
+            });
+
+            // Without cursorContext, falls back to all completions
+            const labels = items.map((i) => getLabelText(i.label));
+            expect(labels).toContain('$and');
+            expect(labels).toContain('$or');
+            expect(labels).toContain('$gt');
+        });
+    });
+
+    describe('getOperatorSortPrefix', () => {
+        test('returns undefined when no fieldBsonTypes provided', () => {
+            const entry: OperatorEntry = {
+                value: '$eq',
+                meta: 'query:comparison',
+                description: 'Equals',
+            };
+            expect(getOperatorSortPrefix(entry, undefined)).toBeUndefined();
+            expect(getOperatorSortPrefix(entry, [])).toBeUndefined();
+        });
+
+        test('returns "1a_" for universal comparison operator (no applicableBsonTypes)', () => {
+            const entry: OperatorEntry = {
+                value: '$eq',
+                meta: 'query:comparison',
+                description: 'Equals',
+            };
+            expect(getOperatorSortPrefix(entry, ['string'])).toBe('1a_');
+        });
+
+        test('returns "1b_" for universal non-comparison operator', () => {
+            const entry: OperatorEntry = {
+                value: '$exists',
+                meta: 'query:element',
+                description: 'Exists',
+            };
+            expect(getOperatorSortPrefix(entry, ['string'])).toBe('1b_');
+        });
+
+        test('returns "0_" for type-relevant operator (applicableBsonTypes matches)', () => {
+            const entry: OperatorEntry = {
+                value: '$regex',
+                meta: 'query:evaluation',
+                description: 'Regex match',
+                applicableBsonTypes: ['string'],
+            };
+            expect(getOperatorSortPrefix(entry, ['string'])).toBe('0_');
+        });
+
+        test('returns "2_" for non-matching operator (applicableBsonTypes does not match)', () => {
+            const entry: OperatorEntry = {
+                value: '$regex',
+                meta: 'query:evaluation',
+                description: 'Regex match',
+                applicableBsonTypes: ['string'],
+            };
+            expect(getOperatorSortPrefix(entry, ['int32'])).toBe('2_');
+        });
+
+        test('handles polymorphic fields (multiple bsonTypes)', () => {
+            const regexEntry: OperatorEntry = {
+                value: '$regex',
+                meta: 'query:evaluation',
+                description: 'Regex match',
+                applicableBsonTypes: ['string'],
+            };
+            // Field is sometimes string, sometimes int32 — $regex should match
+            expect(getOperatorSortPrefix(regexEntry, ['int32', 'string'])).toBe('0_');
+        });
+
+        test('returns "2_" when operator types and field types have no intersection', () => {
+            const sizeEntry: OperatorEntry = {
+                value: '$size',
+                meta: 'query:array',
+                description: 'Array size',
+                applicableBsonTypes: ['array'],
+            };
+            expect(getOperatorSortPrefix(sizeEntry, ['string', 'int32'])).toBe('2_');
+        });
+    });
+
+    describe('type-aware operator sorting in mapOperatorToCompletionItem', () => {
+        const mockMonaco = createMockMonaco();
+
+        test('sortText is undefined when no fieldBsonTypes provided', () => {
+            const entry: OperatorEntry = {
+                value: '$eq',
+                meta: 'query:comparison',
+                description: 'Equals',
+            };
+            const item = mapOperatorToCompletionItem(entry, testRange, mockMonaco);
+            expect(item.sortText).toBeUndefined();
+        });
+
+        test('sortText is undefined when empty fieldBsonTypes provided', () => {
+            const entry: OperatorEntry = {
+                value: '$eq',
+                meta: 'query:comparison',
+                description: 'Equals',
+            };
+            const item = mapOperatorToCompletionItem(entry, testRange, mockMonaco, []);
+            expect(item.sortText).toBeUndefined();
+        });
+
+        test('universal comparison operator gets "1a_" prefix when fieldBsonTypes provided', () => {
+            const entry: OperatorEntry = {
+                value: '$eq',
+                meta: 'query:comparison',
+                description: 'Equals',
+            };
+            const item = mapOperatorToCompletionItem(entry, testRange, mockMonaco, ['int32']);
+            expect(item.sortText).toBe('1a_$eq');
+        });
+
+        test('type-relevant operator gets "0_" prefix', () => {
+            const entry: OperatorEntry = {
+                value: '$regex',
+                meta: 'query:evaluation',
+                description: 'Regex match',
+                applicableBsonTypes: ['string'],
+            };
+            const item = mapOperatorToCompletionItem(entry, testRange, mockMonaco, ['string']);
+            expect(item.sortText).toBe('0_$regex');
+        });
+
+        test('non-matching operator gets "2_" prefix (demoted, not hidden)', () => {
+            const entry: OperatorEntry = {
+                value: '$regex',
+                meta: 'query:evaluation',
+                description: 'Regex match',
+                applicableBsonTypes: ['string'],
+            };
+            const item = mapOperatorToCompletionItem(entry, testRange, mockMonaco, ['int32']);
+            expect(item.sortText).toBe('2_$regex');
+        });
+    });
+
+    describe('type-aware sorting via createCompletionItems', () => {
+        const mockMonaco = createMockMonaco();
+
+        afterEach(() => {
+            clearAllCompletionContexts();
+        });
+
+        test('without fieldBsonTypes, operators have no sortText at value position', () => {
+            const items = createCompletionItems({
+                editorType: EditorType.Filter,
+                sessionId: undefined,
+                range: testRange,
+                isDollarPrefix: false,
+                monaco: mockMonaco,
+                cursorContext: { position: 'value', fieldName: 'x' },
+            });
+
+            const regexItem = items.find((i) => getLabelText(i.label) === '$regex');
+            // At value position, operators get sort prefix 0_ (not type-aware)
+            expect(regexItem?.sortText).toBe('0_$regex');
+
+            const eqItem = items.find((i) => getLabelText(i.label) === '$eq');
+            expect(eqItem?.sortText).toBe('0_$eq');
+        });
+
+        test('with fieldBsonTypes=["string"] at operator position, $regex gets "0_" and $size gets "2_"', () => {
+            const items = createCompletionItems({
+                editorType: EditorType.Filter,
+                sessionId: undefined,
+                range: testRange,
+                isDollarPrefix: false,
+                monaco: mockMonaco,
+                fieldBsonTypes: ['string'],
+                cursorContext: { position: 'operator', fieldName: 'x' },
+            });
+
+            const regexItem = items.find((i) => getLabelText(i.label) === '$regex');
+            expect(regexItem?.sortText).toBe('0_$regex');
+
+            const sizeItem = items.find((i) => getLabelText(i.label) === '$size');
+            expect(sizeItem?.sortText).toBe('2_$size');
+
+            // Comparison operators like $eq get "1a_" (promoted over other universals)
+            const eqItem = items.find((i) => getLabelText(i.label) === '$eq');
+            expect(eqItem?.sortText).toBe('1a_$eq');
+        });
+
+        test('with fieldBsonTypes=["int32"] at operator position, $regex gets "2_" (demoted, still present)', () => {
+            const context: CursorContext = { position: 'operator', fieldName: 'x' };
+            const items = createCompletionItems({
+                editorType: EditorType.Filter,
+                sessionId: undefined,
+                range: testRange,
+                isDollarPrefix: false,
+                monaco: mockMonaco,
+                fieldBsonTypes: ['int32'],
+                cursorContext: context,
+            });
+
+            const labels = items.map((i) => getLabelText(i.label));
+            // $regex is still in the list, just demoted
+            expect(labels).toContain('$regex');
+
+            const regexItem = items.find((i) => getLabelText(i.label) === '$regex');
+            expect(regexItem?.sortText).toBe('2_$regex');
+
+            // Bitwise operators should match int
+            const bitsAllSetItem = items.find((i) => getLabelText(i.label) === '$bitsAllSet');
+            expect(bitsAllSetItem?.sortText).toBe('0_$bitsAllSet');
+        });
+
+        test('all operators still present regardless of fieldBsonTypes at operator position', () => {
+            const itemsWithoutType = createCompletionItems({
+                editorType: EditorType.Filter,
+                sessionId: undefined,
+                range: testRange,
+                isDollarPrefix: false,
+                monaco: mockMonaco,
+                cursorContext: { position: 'operator', fieldName: 'x' },
+            });
+
+            const itemsWithType = createCompletionItems({
+                editorType: EditorType.Filter,
+                sessionId: undefined,
+                range: testRange,
+                isDollarPrefix: false,
+                monaco: mockMonaco,
+                fieldBsonTypes: ['int32'],
+                cursorContext: { position: 'operator', fieldName: 'x' },
+            });
+
+            // Same number of items — nothing filtered out
+            expect(itemsWithType).toHaveLength(itemsWithoutType.length);
+        });
+
+        test('field items still get "0_" prefix even when fieldBsonTypes is set', () => {
+            setCompletionContext('test-session', {
+                fields: [
+                    {
+                        fieldName: 'age',
+                        displayType: 'Number',
+                        bsonType: 'int32',
+                        isSparse: false,
+                        insertText: 'age',
+                        referenceText: '$age',
+                    },
+                ],
+            });
+
+            const items = createCompletionItems({
+                editorType: EditorType.Filter,
+                sessionId: 'test-session',
+                range: testRange,
+                isDollarPrefix: false,
+                monaco: mockMonaco,
+                fieldBsonTypes: ['int32'],
+                cursorContext: { position: 'key', depth: 1 },
+            });
+
+            const fieldItem = items.find((i) => getLabelText(i.label) === 'age');
+            expect(fieldItem?.sortText).toBe('0_age');
+        });
+    });
+
+    describe('stripOuterBraces', () => {
+        test('strips outer { } from operator snippets', () => {
+            expect(stripOuterBraces('{ $gt: ${1:value} }')).toBe('$gt: ${1:value}');
+        });
+
+        test('preserves inner brackets', () => {
+            expect(stripOuterBraces('{ $in: [${1:value}] }')).toBe('$in: [${1:value}]');
+        });
+
+        test('preserves inner braces', () => {
+            expect(stripOuterBraces('{ $elemMatch: { ${1:query} } }')).toBe('$elemMatch: { ${1:query} }');
+        });
+
+        test('returns unchanged if not wrapped', () => {
+            expect(stripOuterBraces('ObjectId("${1:hex}")')).toBe('ObjectId("${1:hex}")');
+        });
+
+        test('returns unchanged for non-matching patterns', () => {
+            expect(stripOuterBraces('$gt')).toBe('$gt');
+        });
+    });
+
+    describe('getCategoryLabel', () => {
+        test('extracts sub-category from qualified meta tag', () => {
+            expect(getCategoryLabel('query:comparison')).toBe('comparison');
+            expect(getCategoryLabel('query:logical')).toBe('logical');
+            expect(getCategoryLabel('query:element')).toBe('element');
+            expect(getCategoryLabel('query:array')).toBe('array');
+        });
+
+        test('returns whole tag when no colon', () => {
+            expect(getCategoryLabel('bson')).toBe('bson');
+            expect(getCategoryLabel('variable')).toBe('variable');
+        });
+    });
+
+    describe('escapeSnippetDollars', () => {
+        test('escapes $ before operator names in snippets', () => {
+            expect(escapeSnippetDollars('{ $gt: ${1:value} }')).toBe('{ \\$gt: ${1:value} }');
+        });
+
+        test('preserves tab stop syntax', () => {
+            expect(escapeSnippetDollars('${1:value}')).toBe('${1:value}');
+            expect(escapeSnippetDollars('$1')).toBe('$1');
+        });
+
+        test('escapes multiple operator names', () => {
+            expect(escapeSnippetDollars('{ $and: [{ $gt: ${1:value} }] }')).toBe('{ \\$and: [{ \\$gt: ${1:value} }] }');
+        });
+
+        test('does not escape BSON constructor snippets', () => {
+            expect(escapeSnippetDollars('ObjectId("${1:hex}")')).toBe('ObjectId("${1:hex}")');
+        });
+
+        test('escapes stripped operator snippets', () => {
+            expect(escapeSnippetDollars('$gt: ${1:value}')).toBe('\\$gt: ${1:value}');
+            expect(escapeSnippetDollars('$in: [${1:value}]')).toBe('\\$in: [${1:value}]');
+        });
+    });
+
+    // ---------------------------------------------------------------
+    // Context-sensitive completions (Step 4.5)
+    // ---------------------------------------------------------------
+    describe('context-sensitive completions', () => {
+        const mockMonaco = createMockMonaco();
+
+        afterEach(() => {
+            clearAllCompletionContexts();
+        });
+
+        describe('key position', () => {
+            const keyContext: CursorContext = { position: 'key', depth: 1 };
+
+            test('shows field names when store has data', () => {
+                setCompletionContext('test-session', {
+                    fields: [
+                        {
+                            fieldName: 'name',
+                            displayType: 'String',
+                            bsonType: 'string',
+                            isSparse: false,
+                            insertText: 'name',
+                            referenceText: '$name',
+                        },
+                    ],
+                });
+
+                const items = createCompletionItems({
+                    editorType: EditorType.Filter,
+                    sessionId: 'test-session',
+                    range: testRange,
+                    isDollarPrefix: false,
+                    monaco: mockMonaco,
+                    cursorContext: keyContext,
+                });
+
+                const labels = items.map((i) => getLabelText(i.label));
+                expect(labels).toContain('name');
+            });
+
+            test('shows key-position operators ($and, $or, $nor)', () => {
+                const items = createCompletionItems({
+                    editorType: EditorType.Filter,
+                    sessionId: undefined,
+                    range: testRange,
+                    isDollarPrefix: false,
+                    monaco: mockMonaco,
+                    cursorContext: keyContext,
+                });
+
+                const labels = items.map((i) => getLabelText(i.label));
+                expect(labels).toContain('$and');
+                expect(labels).toContain('$or');
+                expect(labels).toContain('$nor');
+                expect(labels).toContain('$comment');
+                expect(labels).toContain('$expr');
+                // $not is a field-level operator, NOT a key-position operator
+                expect(labels).not.toContain('$not');
+            });
+
+            test('does NOT show value-level operators ($gt, $lt, $regex, $eq)', () => {
+                const items = createCompletionItems({
+                    editorType: EditorType.Filter,
+                    sessionId: undefined,
+                    range: testRange,
+                    isDollarPrefix: false,
+                    monaco: mockMonaco,
+                    cursorContext: keyContext,
+                });
+
+                const labels = items.map((i) => getLabelText(i.label));
+                expect(labels).not.toContain('$gt');
+                expect(labels).not.toContain('$lt');
+                expect(labels).not.toContain('$regex');
+                expect(labels).not.toContain('$eq');
+                expect(labels).not.toContain('$in');
+                expect(labels).not.toContain('$exists');
+            });
+
+            test('does NOT show BSON constructors', () => {
+                const items = createCompletionItems({
+                    editorType: EditorType.Filter,
+                    sessionId: undefined,
+                    range: testRange,
+                    isDollarPrefix: false,
+                    monaco: mockMonaco,
+                    cursorContext: keyContext,
+                });
+
+                const labels = items.map((i) => getLabelText(i.label));
+                expect(labels).not.toContain('ObjectId');
+                expect(labels).not.toContain('UUID');
+                expect(labels).not.toContain('ISODate');
+            });
+
+            test('fields sort before operators', () => {
+                setCompletionContext('test-session', {
+                    fields: [
+                        {
+                            fieldName: 'age',
+                            displayType: 'Number',
+                            bsonType: 'int32',
+                            isSparse: false,
+                            insertText: 'age',
+                            referenceText: '$age',
+                        },
+                    ],
+                });
+
+                const items = createCompletionItems({
+                    editorType: EditorType.Filter,
+                    sessionId: 'test-session',
+                    range: testRange,
+                    isDollarPrefix: false,
+                    monaco: mockMonaco,
+                    cursorContext: keyContext,
+                });
+
+                const fieldItem = items.find((i) => getLabelText(i.label) === 'age');
+                const andItem = items.find((i) => getLabelText(i.label) === '$and');
+                expect(fieldItem?.sortText).toBe('0_age');
+                expect(andItem?.sortText).toBe('1_$and');
+            });
+        });
+
+        describe('value position', () => {
+            const valueContext: CursorContext = { position: 'value', fieldName: 'age' };
+
+            test('shows BSON constructors', () => {
+                const items = createCompletionItems({
+                    editorType: EditorType.Filter,
+                    sessionId: undefined,
+                    range: testRange,
+                    isDollarPrefix: false,
+                    monaco: mockMonaco,
+                    cursorContext: valueContext,
+                });
+
+                const labels = items.map((i) => getLabelText(i.label));
+                expect(labels).toContain('ObjectId');
+                expect(labels).toContain('UUID');
+                expect(labels).toContain('ISODate');
+            });
+
+            test('shows query operators (with brace-wrapping snippets)', () => {
+                const items = createCompletionItems({
+                    editorType: EditorType.Filter,
+                    sessionId: undefined,
+                    range: testRange,
+                    isDollarPrefix: false,
+                    monaco: mockMonaco,
+                    cursorContext: valueContext,
+                });
+
+                const labels = items.map((i) => getLabelText(i.label));
+                expect(labels).toContain('$gt');
+                expect(labels).toContain('$eq');
+                expect(labels).toContain('$in');
+
+                // Operators should have their full brace-wrapping snippets at value position
+                const gtItem = items.find((i) => getLabelText(i.label) === '$gt');
+                expect(gtItem?.insertText).toBe('{ \\$gt: ${1:value} }');
+            });
+
+            test('operators sort before BSON constructors', () => {
+                const items = createCompletionItems({
+                    editorType: EditorType.Filter,
+                    sessionId: undefined,
+                    range: testRange,
+                    isDollarPrefix: false,
+                    monaco: mockMonaco,
+                    cursorContext: valueContext,
+                });
+
+                const gtItem = items.find((i) => getLabelText(i.label) === '$gt');
+                const objectIdItem = items.find((i) => getLabelText(i.label) === 'ObjectId');
+                expect(gtItem?.sortText).toBe('0_$gt');
+                expect(objectIdItem?.sortText).toBe('3_ObjectId');
+            });
+
+            test('includes JS globals and common methods after BSON constructors', () => {
+                const items = createCompletionItems({
+                    editorType: EditorType.Filter,
+                    sessionId: undefined,
+                    range: testRange,
+                    isDollarPrefix: false,
+                    monaco: mockMonaco,
+                    cursorContext: valueContext,
+                });
+
+                const labels = items.map((i) => getLabelText(i.label));
+                // Class constructors
+                expect(labels).toContain('Date');
+                expect(labels).toContain('RegExp');
+                // Static methods
+                expect(labels).toContain('Date.now()');
+                expect(labels).toContain('Math.floor()');
+                expect(labels).toContain('Math.min()');
+                expect(labels).toContain('Math.max()');
+                // Primitives
+                expect(labels).toContain('Infinity');
+
+                // JS globals sort after BSON constructors (4_ > 3_)
+                const dateItem = items.find((i) => getLabelText(i.label) === 'Date');
+                expect(dateItem?.sortText).toBe('4_Date');
+            });
+
+            test('does NOT show key-position operators ($and, $or)', () => {
+                const items = createCompletionItems({
+                    editorType: EditorType.Filter,
+                    sessionId: undefined,
+                    range: testRange,
+                    isDollarPrefix: false,
+                    monaco: mockMonaco,
+                    cursorContext: valueContext,
+                });
+
+                const labels = items.map((i) => getLabelText(i.label));
+                expect(labels).not.toContain('$and');
+                expect(labels).not.toContain('$or');
+            });
+
+            test('does NOT show field names', () => {
+                setCompletionContext('test-session', {
+                    fields: [
+                        {
+                            fieldName: 'name',
+                            displayType: 'String',
+                            bsonType: 'string',
+                            isSparse: false,
+                            insertText: 'name',
+                            referenceText: '$name',
+                        },
+                    ],
+                });
+
+                const items = createCompletionItems({
+                    editorType: EditorType.Filter,
+                    sessionId: 'test-session',
+                    range: testRange,
+                    isDollarPrefix: false,
+                    monaco: mockMonaco,
+                    cursorContext: valueContext,
+                });
+
+                const labels = items.map((i) => getLabelText(i.label));
+                expect(labels).not.toContain('name');
+            });
+        });
+
+        describe('operator position', () => {
+            const operatorContext: CursorContext = { position: 'operator', fieldName: 'age' };
+
+            test('shows comparison operators ($gt, $lt, $eq, $in) and $not', () => {
+                const items = createCompletionItems({
+                    editorType: EditorType.Filter,
+                    sessionId: undefined,
+                    range: testRange,
+                    isDollarPrefix: false,
+                    monaco: mockMonaco,
+                    cursorContext: operatorContext,
+                });
+
+                const labels = items.map((i) => getLabelText(i.label));
+                expect(labels).toContain('$gt');
+                expect(labels).toContain('$lt');
+                expect(labels).toContain('$eq');
+                expect(labels).toContain('$in');
+                expect(labels).toContain('$exists');
+                expect(labels).toContain('$regex');
+                // $not is a field-level operator, valid at operator position
+                expect(labels).toContain('$not');
+            });
+
+            test('does NOT show key-position operators ($and, $or)', () => {
+                const items = createCompletionItems({
+                    editorType: EditorType.Filter,
+                    sessionId: undefined,
+                    range: testRange,
+                    isDollarPrefix: false,
+                    monaco: mockMonaco,
+                    cursorContext: operatorContext,
+                });
+
+                const labels = items.map((i) => getLabelText(i.label));
+                expect(labels).not.toContain('$and');
+                expect(labels).not.toContain('$or');
+                expect(labels).not.toContain('$nor');
+            });
+
+            test('does NOT show BSON constructors', () => {
+                const items = createCompletionItems({
+                    editorType: EditorType.Filter,
+                    sessionId: undefined,
+                    range: testRange,
+                    isDollarPrefix: false,
+                    monaco: mockMonaco,
+                    cursorContext: operatorContext,
+                });
+
+                const labels = items.map((i) => getLabelText(i.label));
+                expect(labels).not.toContain('ObjectId');
+                expect(labels).not.toContain('UUID');
+            });
+
+            test('does NOT show field names', () => {
+                setCompletionContext('test-session', {
+                    fields: [
+                        {
+                            fieldName: 'name',
+                            displayType: 'String',
+                            bsonType: 'string',
+                            isSparse: false,
+                            insertText: 'name',
+                            referenceText: '$name',
+                        },
+                    ],
+                });
+
+                const items = createCompletionItems({
+                    editorType: EditorType.Filter,
+                    sessionId: 'test-session',
+                    range: testRange,
+                    isDollarPrefix: false,
+                    monaco: mockMonaco,
+                    cursorContext: operatorContext,
+                });
+
+                const labels = items.map((i) => getLabelText(i.label));
+                expect(labels).not.toContain('name');
+            });
+
+            test('applies type-aware sorting when fieldBsonType is available', () => {
+                const typedContext: CursorContext = {
+                    position: 'operator',
+                    fieldName: 'age',
+                    fieldBsonType: 'int32',
+                };
+
+                const items = createCompletionItems({
+                    editorType: EditorType.Filter,
+                    sessionId: undefined,
+                    range: testRange,
+                    isDollarPrefix: false,
+                    monaco: mockMonaco,
+                    cursorContext: typedContext,
+                });
+
+                // $regex has applicableBsonTypes=['string'], doesn't match 'int32' → demoted
+                const regexItem = items.find((i) => getLabelText(i.label) === '$regex');
+                expect(regexItem?.sortText).toBe('2_$regex');
+
+                // $bitsAllSet has applicableBsonTypes containing 'int32' → promoted
+                const bitsItem = items.find((i) => getLabelText(i.label) === '$bitsAllSet');
+                expect(bitsItem?.sortText).toBe('0_$bitsAllSet');
+
+                // $eq is universal comparison → promoted tier
+                const eqItem = items.find((i) => getLabelText(i.label) === '$eq');
+                expect(eqItem?.sortText).toBe('1a_$eq');
+            });
+
+            test('strips outer braces from operator snippets (Issue A fix)', () => {
+                const items = createCompletionItems({
+                    editorType: EditorType.Filter,
+                    sessionId: undefined,
+                    range: testRange,
+                    isDollarPrefix: false,
+                    monaco: mockMonaco,
+                    cursorContext: operatorContext,
+                });
+
+                // At operator position, snippets should NOT have outer { }
+                const gtItem = items.find((i) => getLabelText(i.label) === '$gt');
+                expect(gtItem?.insertText).toBe('\\$gt: ${1:value}');
+
+                const inItem = items.find((i) => getLabelText(i.label) === '$in');
+                expect(inItem?.insertText).toBe('\\$in: [${1:value}]');
+
+                const regexItem = items.find((i) => getLabelText(i.label) === '$regex');
+                expect(regexItem?.insertText).toBe('\\$regex: /${1:pattern}/');
+            });
+        });
+
+        describe('array-element position', () => {
+            const arrayContext: CursorContext = { position: 'array-element', parentOperator: '$and' };
+
+            test('behaves like key position (shows fields + key operators)', () => {
+                setCompletionContext('test-session', {
+                    fields: [
+                        {
+                            fieldName: 'age',
+                            displayType: 'Number',
+                            bsonType: 'int32',
+                            isSparse: false,
+                            insertText: 'age',
+                            referenceText: '$age',
+                        },
+                    ],
+                });
+
+                const items = createCompletionItems({
+                    editorType: EditorType.Filter,
+                    sessionId: 'test-session',
+                    range: testRange,
+                    isDollarPrefix: false,
+                    monaco: mockMonaco,
+                    cursorContext: arrayContext,
+                });
+
+                const labels = items.map((i) => getLabelText(i.label));
+                // Should include fields
+                expect(labels).toContain('age');
+                // Should include key-position operators
+                expect(labels).toContain('$and');
+                expect(labels).toContain('$or');
+                // Should NOT include value-level operators
+                expect(labels).not.toContain('$gt');
+                expect(labels).not.toContain('$regex');
+                // Should NOT include BSON constructors
+                expect(labels).not.toContain('ObjectId');
+            });
+        });
+
+        describe('unknown position', () => {
+            const unknownContext: CursorContext = { position: 'unknown' };
+
+            test('falls back to all completions', () => {
+                const itemsWithContext = createCompletionItems({
+                    editorType: EditorType.Filter,
+                    sessionId: undefined,
+                    range: testRange,
+                    isDollarPrefix: false,
+                    monaco: mockMonaco,
+                    cursorContext: unknownContext,
+                });
+
+                const itemsWithoutContext = createCompletionItems({
+                    editorType: EditorType.Filter,
+                    sessionId: undefined,
+                    range: testRange,
+                    isDollarPrefix: false,
+                    monaco: mockMonaco,
+                });
+
+                // Both should produce the same all-completions list
+                expect(itemsWithContext).toHaveLength(itemsWithoutContext.length);
+                const labels = itemsWithContext.map((i) => getLabelText(i.label));
+                // All completions include key-position operators
+                expect(labels).toContain('$and');
+                expect(labels).toContain('$or');
+                // Also include value-position operators and BSON constructors
+                expect(labels).toContain('$gt');
+                expect(labels).toContain('ObjectId');
+            });
+        });
+
+        describe('no cursorContext (undefined)', () => {
+            test('falls back to all completions (fields + operators + BSON + JS globals)', () => {
+                const items = createCompletionItems({
+                    editorType: EditorType.Filter,
+                    sessionId: undefined,
+                    range: testRange,
+                    isDollarPrefix: false,
+                    monaco: mockMonaco,
+                    cursorContext: undefined,
+                });
+
+                // Without cursorContext, shows all completions
+                const labels = items.map((i) => getLabelText(i.label));
+                expect(labels).toContain('$and');
+                expect(labels).toContain('$or');
+                expect(labels).toContain('$gt');
+                expect(labels).toContain('ObjectId');
+            });
+        });
+
+        describe('needsWrapping (empty editor, no braces)', () => {
+            test('field insertText is wrapped with { } when needsWrapping is true', () => {
+                setCompletionContext('test-session', {
+                    fields: [
+                        {
+                            fieldName: 'name',
+                            displayType: 'String',
+                            bsonType: 'string',
+                            isSparse: false,
+                            insertText: 'name',
+                            referenceText: '$name',
+                        },
+                    ],
+                });
+
+                const items = createCompletionItems({
+                    editorType: EditorType.Filter,
+                    sessionId: 'test-session',
+                    range: testRange,
+                    isDollarPrefix: false,
+                    monaco: mockMonaco,
+                    cursorContext: { position: 'unknown' },
+                    needsWrapping: true,
+                });
+
+                const fieldItem = items.find((i) => getLabelText(i.label) === 'name');
+                expect(fieldItem?.insertText).toBe('{ name: $1 }');
+            });
+
+            test('field insertText is NOT wrapped when needsWrapping is false', () => {
+                setCompletionContext('test-session', {
+                    fields: [
+                        {
+                            fieldName: 'name',
+                            displayType: 'String',
+                            bsonType: 'string',
+                            isSparse: false,
+                            insertText: 'name',
+                            referenceText: '$name',
+                        },
+                    ],
+                });
+
+                const items = createCompletionItems({
+                    editorType: EditorType.Filter,
+                    sessionId: 'test-session',
+                    range: testRange,
+                    isDollarPrefix: false,
+                    monaco: mockMonaco,
+                    cursorContext: { position: 'unknown' },
+                    needsWrapping: false,
+                });
+
+                const fieldItem = items.find((i) => getLabelText(i.label) === 'name');
+                expect(fieldItem?.insertText).toBe('name: $1');
+            });
+
+            test('operators keep full brace-wrapping snippets when needsWrapping is true', () => {
+                const items = createCompletionItems({
+                    editorType: EditorType.Filter,
+                    sessionId: undefined,
+                    range: testRange,
+                    isDollarPrefix: false,
+                    monaco: mockMonaco,
+                    cursorContext: { position: 'unknown' },
+                    needsWrapping: true,
+                });
+
+                // Operator snippets include { } already — they should NOT be stripped
+                const andItem = items.find((i) => getLabelText(i.label) === '$and');
+                expect(andItem?.insertText).toContain('{');
+                expect(andItem?.insertText).toContain('}');
+            });
+        });
+
+        // ---------------------------------------------------------------
+        // Category coverage: verify operator categories at each position
+        // ---------------------------------------------------------------
+        describe('operator category coverage by position', () => {
+            test('key position: only key-position operators, no field-level operators', () => {
+                const items = createCompletionItems({
+                    editorType: EditorType.Filter,
+                    sessionId: undefined,
+                    range: testRange,
+                    isDollarPrefix: false,
+                    monaco: mockMonaco,
+                    cursorContext: { position: 'key', depth: 1 },
+                });
+
+                const labels = items.map((i) => getLabelText(i.label));
+                // Key-position: logical combinators and meta operators
+                expect(labels).toContain('$and'); // query:logical
+                expect(labels).toContain('$or'); // query:logical
+                expect(labels).toContain('$nor'); // query:logical
+                expect(labels).toContain('$comment'); // query:comment
+                expect(labels).toContain('$expr'); // query:expr
+                // Field-level operators must NOT appear at key position
+                expect(labels).not.toContain('$all'); // query:array — field-level
+                expect(labels).not.toContain('$elemMatch'); // query:array — field-level
+                expect(labels).not.toContain('$size'); // query:array — field-level
+                expect(labels).not.toContain('$gt'); // query:comparison
+                expect(labels).not.toContain('$regex'); // query:evaluation
+                expect(labels).not.toContain('$exists'); // query:element
+                expect(labels).not.toContain('$not'); // query:logical — field-level
+            });
+
+            test('value position: includes operators from all field-level categories', () => {
+                const items = createCompletionItems({
+                    editorType: EditorType.Filter,
+                    sessionId: undefined,
+                    range: testRange,
+                    isDollarPrefix: false,
+                    monaco: mockMonaco,
+                    cursorContext: { position: 'value', fieldName: 'x' },
+                });
+
+                const labels = items.map((i) => getLabelText(i.label));
+                // Should include field-level operators from every category
+                expect(labels).toContain('$gt'); // query:comparison
+                expect(labels).toContain('$eq'); // query:comparison
+                expect(labels).toContain('$in'); // query:comparison
+                expect(labels).toContain('$regex'); // query:evaluation
+                expect(labels).toContain('$exists'); // query:element
+                expect(labels).toContain('$type'); // query:element
+                expect(labels).toContain('$all'); // query:array
+                expect(labels).toContain('$elemMatch'); // query:array
+                expect(labels).toContain('$size'); // query:array
+                expect(labels).toContain('$not'); // query:logical (field-level)
+                // Key-position operators should NOT be at value position
+                expect(labels).not.toContain('$and');
+                expect(labels).not.toContain('$or');
+            });
+
+            test('operator position: same field-level categories as value position', () => {
+                const items = createCompletionItems({
+                    editorType: EditorType.Filter,
+                    sessionId: undefined,
+                    range: testRange,
+                    isDollarPrefix: false,
+                    monaco: mockMonaco,
+                    cursorContext: { position: 'operator', fieldName: 'x' },
+                });
+
+                const labels = items.map((i) => getLabelText(i.label));
+                expect(labels).toContain('$gt'); // query:comparison
+                expect(labels).toContain('$regex'); // query:evaluation
+                expect(labels).toContain('$exists'); // query:element
+                expect(labels).toContain('$all'); // query:array
+                expect(labels).toContain('$not'); // query:logical (field-level)
+                expect(labels).not.toContain('$and'); // key-position only
+            });
+        });
+    });
+
+    // ---------------------------------------------------------------
+    // Type-aware value suggestions
+    // ---------------------------------------------------------------
+    describe('createTypeSuggestions', () => {
+        const mockMonaco = createMockMonaco();
+
+        test('returns empty array for undefined bsonType', () => {
+            const items = createTypeSuggestions(undefined, testRange, mockMonaco);
+            expect(items).toHaveLength(0);
+        });
+
+        test('returns empty array for unknown bsonType', () => {
+            const items = createTypeSuggestions('unknownType', testRange, mockMonaco);
+            expect(items).toHaveLength(0);
+        });
+
+        test('returns true/false for boolean fields', () => {
+            const items = createTypeSuggestions('boolean', testRange, mockMonaco);
+            expect(items).toHaveLength(2);
+
+            const labels = items.map((i) => getLabelText(i.label));
+            expect(labels).toContain('true');
+            expect(labels).toContain('false');
+
+            // Plain text, not snippets
+            const trueItem = items.find((i) => getLabelText(i.label) === 'true');
+            expect(trueItem?.insertText).toBe('true');
+            expect(trueItem?.insertTextRules).toBeUndefined();
+            expect(trueItem?.kind).toBe(mockCompletionItemKind.Value);
+        });
+
+        test('returns range query for int fields', () => {
+            const items = createTypeSuggestions('int32', testRange, mockMonaco);
+            expect(items.length).toBeGreaterThanOrEqual(1);
+
+            const labels = items.map((i) => getLabelText(i.label));
+            expect(labels[0]).toContain('$gt');
+            expect(labels[0]).toContain('$lt');
+
+            // Should be a snippet
+            expect(items[0].kind).toBe(mockCompletionItemKind.Snippet);
+        });
+
+        test('returns regex and empty string for string fields', () => {
+            const items = createTypeSuggestions('string', testRange, mockMonaco);
+            expect(items.length).toBeGreaterThanOrEqual(1);
+
+            const labels = items.map((i) => getLabelText(i.label));
+            expect(labels).toContain('{ $regex: /…/ }');
+        });
+
+        test('returns ISODate for date fields', () => {
+            const items = createTypeSuggestions('date', testRange, mockMonaco);
+            expect(items.length).toBeGreaterThanOrEqual(1);
+
+            const labels = items.map((i) => getLabelText(i.label));
+            expect(labels).toContain('ISODate("…")');
+        });
+
+        test('returns ObjectId for objectid fields', () => {
+            const items = createTypeSuggestions('objectid', testRange, mockMonaco);
+            expect(items).toHaveLength(1);
+
+            expect(getLabelText(items[0].label)).toBe('ObjectId("…")');
+        });
+
+        test('returns null for null fields', () => {
+            const items = createTypeSuggestions('null', testRange, mockMonaco);
+            expect(items).toHaveLength(1);
+
+            expect(getLabelText(items[0].label)).toBe('null');
+            expect(items[0].insertText).toBe('null');
+        });
+
+        test('returns elemMatch and size for array fields', () => {
+            const items = createTypeSuggestions('array', testRange, mockMonaco);
+            expect(items.length).toBeGreaterThanOrEqual(2);
+
+            const labels = items.map((i) => getLabelText(i.label));
+            expect(labels).toContain('{ $elemMatch: { … } }');
+            expect(labels).toContain('{ $size: … }');
+        });
+
+        test('suggestions have sort prefix 00_ (highest priority)', () => {
+            const items = createTypeSuggestions('boolean', testRange, mockMonaco);
+            for (const item of items) {
+                expect(item.sortText).toMatch(/^00_/);
+            }
+        });
+
+        test('first suggestion is preselected', () => {
+            const items = createTypeSuggestions('int32', testRange, mockMonaco);
+            expect(items[0].preselect).toBe(true);
+        });
+    });
+
+    describe('type suggestions in value position integration', () => {
+        const mockMonaco = createMockMonaco();
+
+        test('boolean field at value position shows true/false first', () => {
+            const context: CursorContext = { position: 'value', fieldName: 'isActive', fieldBsonType: 'boolean' };
+            const items = createCompletionItems({
+                editorType: EditorType.Filter,
+                sessionId: undefined,
+                range: testRange,
+                isDollarPrefix: false,
+                monaco: mockMonaco,
+                cursorContext: context,
+            });
+
+            const labels = items.map((i) => getLabelText(i.label));
+            // true/false should be present
+            expect(labels).toContain('true');
+            expect(labels).toContain('false');
+
+            // Operators should also be present
+            expect(labels).toContain('$eq');
+            expect(labels).toContain('$gt');
+
+            // true/false should sort before operators (00_ < 0_)
+            const trueItem = items.find((i) => getLabelText(i.label) === 'true');
+            const eqItem = items.find((i) => getLabelText(i.label) === '$eq');
+            expect(trueItem!.sortText! < eqItem!.sortText!).toBe(true);
+        });
+
+        test('int field at value position shows range query first', () => {
+            const context: CursorContext = { position: 'value', fieldName: 'age', fieldBsonType: 'int32' };
+            const items = createCompletionItems({
+                editorType: EditorType.Filter,
+                sessionId: undefined,
+                range: testRange,
+                isDollarPrefix: false,
+                monaco: mockMonaco,
+                cursorContext: context,
+            });
+
+            // Range query suggestion should be first (sort 00_00)
+            const first = items[0];
+            expect(getLabelText(first.label)).toContain('$gt');
+            expect(first.sortText).toBe('00_00');
+        });
+
+        test('unknown type at value position has no type suggestions', () => {
+            const context: CursorContext = { position: 'value', fieldName: 'data' };
+            const items = createCompletionItems({
+                editorType: EditorType.Filter,
+                sessionId: undefined,
+                range: testRange,
+                isDollarPrefix: false,
+                monaco: mockMonaco,
+                cursorContext: context,
+            });
+
+            // No type suggestions, but operators and BSON should still be present
+            const labels = items.map((i) => getLabelText(i.label));
+            expect(labels).toContain('$eq');
+            expect(labels).toContain('ObjectId');
+
+            // No items with 00_ sort prefix
+            expect(items.filter((i) => i.sortText?.startsWith('00_'))).toHaveLength(0);
+        });
+    });
+
+    // ---------------------------------------------------------------
+    // Project and Sort value completions
+    // ---------------------------------------------------------------
+    describe('project editor value completions', () => {
+        const mockMonaco = createMockMonaco();
+
+        test('shows 1 (include) and 0 (exclude) at value position', () => {
+            const context: CursorContext = { position: 'value', fieldName: 'name' };
+            const items = createCompletionItems({
+                editorType: EditorType.Project,
+                sessionId: undefined,
+                range: testRange,
+                isDollarPrefix: false,
+                monaco: mockMonaco,
+                cursorContext: context,
+            });
+
+            expect(items).toHaveLength(2);
+            const labels = items.map((i) => getLabelText(i.label));
+            expect(labels).toContain('1');
+            expect(labels).toContain('0');
+        });
+
+        test('1 (include) has description "include field"', () => {
+            const context: CursorContext = { position: 'value', fieldName: 'name' };
+            const items = createCompletionItems({
+                editorType: EditorType.Project,
+                sessionId: undefined,
+                range: testRange,
+                isDollarPrefix: false,
+                monaco: mockMonaco,
+                cursorContext: context,
+            });
+
+            const includeItem = items.find((i) => getLabelText(i.label) === '1');
+            expect((includeItem?.label as { description: string }).description).toBe('include field');
+        });
+
+        test('does NOT show operators, BSON constructors, or JS globals', () => {
+            const context: CursorContext = { position: 'value', fieldName: 'name' };
+            const items = createCompletionItems({
+                editorType: EditorType.Project,
+                sessionId: undefined,
+                range: testRange,
+                isDollarPrefix: false,
+                monaco: mockMonaco,
+                cursorContext: context,
+            });
+
+            const labels = items.map((i) => getLabelText(i.label));
+            expect(labels).not.toContain('$gt');
+            expect(labels).not.toContain('ObjectId');
+            expect(labels).not.toContain('Date');
+        });
+
+        test('1 is preselected', () => {
+            const context: CursorContext = { position: 'value', fieldName: 'name' };
+            const items = createCompletionItems({
+                editorType: EditorType.Project,
+                sessionId: undefined,
+                range: testRange,
+                isDollarPrefix: false,
+                monaco: mockMonaco,
+                cursorContext: context,
+            });
+
+            const includeItem = items.find((i) => getLabelText(i.label) === '1');
+            expect(includeItem?.preselect).toBe(true);
+        });
+    });
+
+    describe('sort editor value completions', () => {
+        const mockMonaco = createMockMonaco();
+
+        test('shows 1 (ascending) and -1 (descending) at value position', () => {
+            const context: CursorContext = { position: 'value', fieldName: 'age' };
+            const items = createCompletionItems({
+                editorType: EditorType.Sort,
+                sessionId: undefined,
+                range: testRange,
+                isDollarPrefix: false,
+                monaco: mockMonaco,
+                cursorContext: context,
+            });
+
+            expect(items).toHaveLength(2);
+            const labels = items.map((i) => getLabelText(i.label));
+            expect(labels).toContain('1');
+            expect(labels).toContain('-1');
+        });
+
+        test('-1 has description "descending"', () => {
+            const context: CursorContext = { position: 'value', fieldName: 'age' };
+            const items = createCompletionItems({
+                editorType: EditorType.Sort,
+                sessionId: undefined,
+                range: testRange,
+                isDollarPrefix: false,
+                monaco: mockMonaco,
+                cursorContext: context,
+            });
+
+            const descItem = items.find((i) => getLabelText(i.label) === '-1');
+            expect((descItem?.label as { description: string }).description).toBe('descending');
+        });
+
+        test('does NOT show operators, BSON constructors, or JS globals', () => {
+            const context: CursorContext = { position: 'value', fieldName: 'age' };
+            const items = createCompletionItems({
+                editorType: EditorType.Sort,
+                sessionId: undefined,
+                range: testRange,
+                isDollarPrefix: false,
+                monaco: mockMonaco,
+                cursorContext: context,
+            });
+
+            const labels = items.map((i) => getLabelText(i.label));
+            expect(labels).not.toContain('$gt');
+            expect(labels).not.toContain('ObjectId');
+            expect(labels).not.toContain('Date');
+        });
+
+        test('1 is preselected', () => {
+            const context: CursorContext = { position: 'value', fieldName: 'age' };
+            const items = createCompletionItems({
+                editorType: EditorType.Sort,
+                sessionId: undefined,
+                range: testRange,
+                isDollarPrefix: false,
+                monaco: mockMonaco,
+                cursorContext: context,
+            });
+
+            const ascItem = items.find((i) => getLabelText(i.label) === '1');
+            expect(ascItem?.preselect).toBe(true);
+        });
+    });
+
+    // ---------------------------------------------------------------
+    // Category-based completion coverage by cursor position
+    // ---------------------------------------------------------------
+    describe('completion categories by cursor position', () => {
+        const mockMonaco = createMockMonaco();
+
+        /**
+         * Helper: extracts the description (category label) from a CompletionItem.
+         * For operator items this is getCategoryLabel(meta), e.g., "comparison", "array".
+         * For JS globals it is "JS global".
+         * For fields it is the type, e.g., "Number".
+         */
+        function getDescription(label: string | monacoEditor.languages.CompletionItemLabel): string | undefined {
+            return typeof label === 'string' ? undefined : label.description;
+        }
+
+        /** Returns Set of distinct category descriptions from a completion list. */
+        function getCategories(items: monacoEditor.languages.CompletionItem[]): Set<string> {
+            const categories = new Set<string>();
+            for (const item of items) {
+                const desc = getDescription(item.label);
+                if (desc) categories.add(desc);
+            }
+            return categories;
+        }
+
+        describe('key position ({ <cursor> })', () => {
+            const keyContext: CursorContext = { position: 'key', depth: 1 };
+
+            test('includes "logical" category operators', () => {
+                const items = createCompletionItems({
+                    editorType: EditorType.Filter,
+                    sessionId: undefined,
+                    range: testRange,
+                    isDollarPrefix: false,
+                    monaco: mockMonaco,
+                    cursorContext: keyContext,
+                });
+
+                const categories = getCategories(items);
+                expect(categories.has('logical')).toBe(true);
+            });
+
+            test('does NOT include purely field-level categories (comparison, array, element)', () => {
+                const items = createCompletionItems({
+                    editorType: EditorType.Filter,
+                    sessionId: undefined,
+                    range: testRange,
+                    isDollarPrefix: false,
+                    monaco: mockMonaco,
+                    cursorContext: keyContext,
+                });
+
+                const categories = getCategories(items);
+                // These categories have NO operators in KEY_POSITION_OPERATORS
+                expect(categories.has('comparison')).toBe(false);
+                expect(categories.has('array')).toBe(false);
+                expect(categories.has('element')).toBe(false);
+                // Note: 'evaluation' IS present because $expr, $jsonSchema, $text are key-position
+            });
+
+            test('does NOT include "bson" or "JS global"', () => {
+                const items = createCompletionItems({
+                    editorType: EditorType.Filter,
+                    sessionId: undefined,
+                    range: testRange,
+                    isDollarPrefix: false,
+                    monaco: mockMonaco,
+                    cursorContext: keyContext,
+                });
+
+                const categories = getCategories(items);
+                expect(categories.has('bson')).toBe(false);
+                expect(categories.has('JS global')).toBe(false);
+            });
+        });
+
+        describe('value position ({ field: <cursor> })', () => {
+            const valueContext: CursorContext = { position: 'value', fieldName: 'x' };
+
+            test('includes field-level categories: comparison, array, evaluation, element', () => {
+                const items = createCompletionItems({
+                    editorType: EditorType.Filter,
+                    sessionId: undefined,
+                    range: testRange,
+                    isDollarPrefix: false,
+                    monaco: mockMonaco,
+                    cursorContext: valueContext,
+                });
+
+                const categories = getCategories(items);
+                expect(categories.has('comparison')).toBe(true);
+                expect(categories.has('array')).toBe(true);
+                expect(categories.has('evaluation')).toBe(true);
+                expect(categories.has('element')).toBe(true);
+            });
+
+            test('includes "bson" and "JS global"', () => {
+                const items = createCompletionItems({
+                    editorType: EditorType.Filter,
+                    sessionId: undefined,
+                    range: testRange,
+                    isDollarPrefix: false,
+                    monaco: mockMonaco,
+                    cursorContext: valueContext,
+                });
+
+                const categories = getCategories(items);
+                expect(categories.has('bson')).toBe(true);
+                expect(categories.has('JS global')).toBe(true);
+            });
+
+            test('does NOT include key-position-only operators', () => {
+                const items = createCompletionItems({
+                    editorType: EditorType.Filter,
+                    sessionId: undefined,
+                    range: testRange,
+                    isDollarPrefix: false,
+                    monaco: mockMonaco,
+                    cursorContext: valueContext,
+                });
+
+                const labels = items.map((i) => getLabelText(i.label));
+                expect(labels).not.toContain('$and');
+                expect(labels).not.toContain('$or');
+                expect(labels).not.toContain('$nor');
+            });
+        });
+
+        describe('operator position ({ field: { <cursor> } })', () => {
+            const operatorContext: CursorContext = { position: 'operator', fieldName: 'x' };
+
+            test('includes field-level categories: comparison, array, evaluation, element', () => {
+                const items = createCompletionItems({
+                    editorType: EditorType.Filter,
+                    sessionId: undefined,
+                    range: testRange,
+                    isDollarPrefix: false,
+                    monaco: mockMonaco,
+                    cursorContext: operatorContext,
+                });
+
+                const categories = getCategories(items);
+                expect(categories.has('comparison')).toBe(true);
+                expect(categories.has('array')).toBe(true);
+                expect(categories.has('evaluation')).toBe(true);
+                expect(categories.has('element')).toBe(true);
+            });
+
+            test('does NOT include "bson", "JS global", or key-position operators', () => {
+                const items = createCompletionItems({
+                    editorType: EditorType.Filter,
+                    sessionId: undefined,
+                    range: testRange,
+                    isDollarPrefix: false,
+                    monaco: mockMonaco,
+                    cursorContext: operatorContext,
+                });
+
+                const categories = getCategories(items);
+                expect(categories.has('bson')).toBe(false);
+                expect(categories.has('JS global')).toBe(false);
+
+                const labels = items.map((i) => getLabelText(i.label));
+                expect(labels).not.toContain('$and');
+                expect(labels).not.toContain('$or');
+            });
+        });
+
+        describe('unknown position (genuinely ambiguous — shows everything)', () => {
+            const unknownContext: CursorContext = { position: 'unknown' };
+
+            test('includes all categories (full discovery fallback)', () => {
+                const items = createCompletionItems({
+                    editorType: EditorType.Filter,
+                    sessionId: undefined,
+                    range: testRange,
+                    isDollarPrefix: false,
+                    monaco: mockMonaco,
+                    cursorContext: unknownContext,
+                });
+
+                const categories = getCategories(items);
+                // UNKNOWN shows everything as discovery
+                expect(categories.has('logical')).toBe(true);
+                expect(categories.has('comparison')).toBe(true);
+                expect(categories.has('array')).toBe(true);
+                expect(categories.has('bson')).toBe(true);
+                expect(categories.has('JS global')).toBe(true);
+            });
+
+            test('includes field names if store has data', () => {
+                setCompletionContext('test-session', {
+                    fields: [
+                        {
+                            fieldName: 'name',
+                            displayType: 'String',
+                            bsonType: 'string',
+                            isSparse: false,
+                            insertText: 'name',
+                            referenceText: '$name',
+                        },
+                    ],
+                });
+
+                const items = createCompletionItems({
+                    editorType: EditorType.Filter,
+                    sessionId: 'test-session',
+                    range: testRange,
+                    isDollarPrefix: false,
+                    monaco: mockMonaco,
+                    cursorContext: unknownContext,
+                });
+
+                const labels = items.map((i) => getLabelText(i.label));
+                expect(labels).toContain('name');
+            });
+        });
+    });
+});
diff --git a/src/webviews/documentdbQuery/documentdbQueryCompletionProvider.ts b/src/webviews/documentdbQuery/documentdbQueryCompletionProvider.ts
new file mode 100644
index 000000000..3d3b74643
--- /dev/null
+++ b/src/webviews/documentdbQuery/documentdbQueryCompletionProvider.ts
@@ -0,0 +1,32 @@
+/*---------------------------------------------------------------------------------------------
+ *  Copyright (c) Microsoft Corporation. All rights reserved.
+ *  Licensed under the MIT License. See License.txt in the project root for license information.
+ *--------------------------------------------------------------------------------------------*/
+
+/**
+ * Barrel re-export for the completions module.
+ *
+ * The completion provider logic has been refactored into the `completions/` folder:
+ * - `completions/createCompletionItems.ts` — main entry point, context branching
+ * - `completions/mapCompletionItems.ts` — operator/field → CompletionItem mapping
+ * - `completions/typeSuggestions.ts` — type-aware value suggestions
+ * - `completions/snippetUtils.ts` — snippet text manipulation
+ *
+ * This file preserves the original import path for existing consumers.
+ */
+
+// eslint-disable-next-line no-restricted-exports
+export {
+    KEY_POSITION_OPERATORS,
+    createCompletionItems,
+    createTypeSuggestions,
+    escapeSnippetDollars,
+    getCategoryLabel,
+    getCompletionKindForMeta,
+    getMetaTagsForEditorType,
+    getOperatorSortPrefix,
+    mapFieldToCompletionItem,
+    mapOperatorToCompletionItem,
+    stripOuterBraces,
+    type CreateCompletionItemsParams,
+} from './completions';
diff --git a/src/webviews/documentdbQuery/documentdbQueryHoverProvider.test.ts b/src/webviews/documentdbQuery/documentdbQueryHoverProvider.test.ts
new file mode 100644
index 000000000..720d7b72a
--- /dev/null
+++ b/src/webviews/documentdbQuery/documentdbQueryHoverProvider.test.ts
@@ -0,0 +1,197 @@
+/*---------------------------------------------------------------------------------------------
+ *  Copyright (c) Microsoft Corporation. All rights reserved.
+ *  Licensed under the MIT License. See License.txt in the project root for license information.
+ *--------------------------------------------------------------------------------------------*/
+
+import { type FieldCompletionData } from '../../utils/json/data-api/autocomplete/toFieldCompletionItems';
+import { getHoverContent, type FieldDataLookup } from './documentdbQueryHoverProvider';
+
+/** Creates a mock field lookup function from an array of fields. */
+function createFieldLookup(fields: FieldCompletionData[]): FieldDataLookup {
+    return (word: string) => fields.find((f) => f.fieldName === word);
+}
+
+describe('documentdbQueryHoverProvider', () => {
+    describe('getHoverContent', () => {
+        test('returns hover for known operator $gt', () => {
+            const hover = getHoverContent('$gt');
+            expect(hover).not.toBeNull();
+            expect(hover!.contents).toHaveLength(1);
+
+            const content = (hover!.contents[0] as { value: string }).value;
+            expect(content).toContain('**$gt**');
+        });
+
+        test('returns hover with description for $eq', () => {
+            const hover = getHoverContent('$eq');
+            expect(hover).not.toBeNull();
+
+            const content = (hover!.contents[0] as { value: string }).value;
+            expect(content).toContain('**$eq**');
+            expect(content.split('\n').length).toBeGreaterThan(1);
+        });
+
+        test('returns hover for BSON constructor ObjectId', () => {
+            const hover = getHoverContent('ObjectId');
+            expect(hover).not.toBeNull();
+
+            const content = (hover!.contents[0] as { value: string }).value;
+            expect(content).toContain('**ObjectId**');
+        });
+
+        test('returns null for unknown word', () => {
+            const hover = getHoverContent('foo');
+            expect(hover).toBeNull();
+        });
+
+        test('returns null for arbitrary text that is not an operator', () => {
+            const hover = getHoverContent('somethingRandom123');
+            expect(hover).toBeNull();
+        });
+
+        test('word without $ prefix matches operator when prefixed', () => {
+            const hover = getHoverContent('gt');
+            expect(hover).not.toBeNull();
+
+            const content = (hover!.contents[0] as { value: string }).value;
+            expect(content).toContain('**$gt**');
+        });
+
+        test('includes doc link when available', () => {
+            const hover = getHoverContent('$gt');
+            expect(hover).not.toBeNull();
+
+            const content = (hover!.contents[0] as { value: string }).value;
+            expect(content).toContain('Documentation]');
+        });
+
+        test('operator hover has isTrusted set for clickable links', () => {
+            const hover = getHoverContent('$gt');
+            expect(hover).not.toBeNull();
+
+            const hoverContent = hover!.contents[0] as { isTrusted?: boolean };
+            expect(hoverContent.isTrusted).toBe(true);
+        });
+
+        test('returns hover for UUID constructor', () => {
+            const hover = getHoverContent('UUID');
+            expect(hover).not.toBeNull();
+
+            const content = (hover!.contents[0] as { value: string }).value;
+            expect(content).toContain('**UUID**');
+        });
+    });
+
+    describe('field hover', () => {
+        const fields: FieldCompletionData[] = [
+            {
+                fieldName: 'age',
+                displayType: 'Number',
+                bsonType: 'int32',
+                isSparse: false,
+                insertText: 'age',
+                referenceText: '$age',
+            },
+            {
+                fieldName: 'nickname',
+                displayType: 'String',
+                bsonType: 'string',
+                isSparse: true,
+                insertText: 'nickname',
+                referenceText: '$nickname',
+            },
+            {
+                fieldName: 'rating',
+                displayType: 'Double',
+                bsonType: 'double',
+                bsonTypes: ['double', 'int32'],
+                displayTypes: ['Double', 'Int32'],
+                isSparse: true,
+                insertText: 'rating',
+                referenceText: '$rating',
+            },
+        ];
+
+        test('returns hover for a known field name', () => {
+            const hover = getHoverContent('age', createFieldLookup(fields));
+            expect(hover).not.toBeNull();
+
+            const content = (hover!.contents[0] as { value: string }).value;
+            expect(content).toContain('**age**');
+        });
+
+        test('shows "Inferred Type" section with type list', () => {
+            const hover = getHoverContent('age', createFieldLookup(fields));
+            expect(hover).not.toBeNull();
+
+            const content = (hover!.contents[0] as { value: string }).value;
+            expect(content).toContain('Inferred Type');
+            expect(content).toContain('Number');
+        });
+
+        test('shows multiple types for polymorphic fields', () => {
+            const hover = getHoverContent('rating', createFieldLookup(fields));
+            expect(hover).not.toBeNull();
+
+            const content = (hover!.contents[0] as { value: string }).value;
+            expect(content).toContain('Inferred Type');
+            expect(content).toContain('Double');
+            expect(content).toContain('Int32');
+        });
+
+        test('shows sparse indicator for sparse fields', () => {
+            const hover = getHoverContent('nickname', createFieldLookup(fields));
+            expect(hover).not.toBeNull();
+
+            const content = (hover!.contents[0] as { value: string }).value;
+            expect(content).toContain('**nickname**');
+            expect(content).toContain('sparse');
+            expect(content).toContain('not present in all documents');
+        });
+
+        test('does NOT show sparse indicator for non-sparse fields', () => {
+            const hover = getHoverContent('age', createFieldLookup(fields));
+            expect(hover).not.toBeNull();
+
+            const content = (hover!.contents[0] as { value: string }).value;
+            expect(content).not.toContain('sparse');
+        });
+
+        test('field hover does NOT set isTrusted (user data is not trusted)', () => {
+            const hover = getHoverContent('age', createFieldLookup(fields));
+            expect(hover).not.toBeNull();
+
+            const hoverContent = hover!.contents[0] as { isTrusted?: boolean };
+            expect(hoverContent.isTrusted).toBeUndefined();
+        });
+
+        test('returns null for unknown field when no operator match', () => {
+            const hover = getHoverContent('unknownField', createFieldLookup(fields));
+            expect(hover).toBeNull();
+        });
+
+        test('operators take priority over field names', () => {
+            const fieldsWithOperatorName: FieldCompletionData[] = [
+                {
+                    fieldName: 'gt',
+                    displayType: 'String',
+                    bsonType: 'string',
+                    isSparse: false,
+                    insertText: 'gt',
+                    referenceText: '$gt',
+                },
+            ];
+
+            const hover = getHoverContent('gt', createFieldLookup(fieldsWithOperatorName));
+            expect(hover).not.toBeNull();
+
+            const content = (hover!.contents[0] as { value: string }).value;
+            expect(content).toContain('**$gt**');
+        });
+
+        test('returns null for field when no fieldLookup provided', () => {
+            const hover = getHoverContent('age');
+            expect(hover).toBeNull();
+        });
+    });
+});
diff --git a/src/webviews/documentdbQuery/documentdbQueryHoverProvider.ts b/src/webviews/documentdbQuery/documentdbQueryHoverProvider.ts
new file mode 100644
index 000000000..23207293a
--- /dev/null
+++ b/src/webviews/documentdbQuery/documentdbQueryHoverProvider.ts
@@ -0,0 +1,104 @@
+/*---------------------------------------------------------------------------------------------
+ *  Copyright (c) Microsoft Corporation. All rights reserved.
+ *  Licensed under the MIT License. See License.txt in the project root for license information.
+ *--------------------------------------------------------------------------------------------*/
+
+/**
+ * Hover provider logic for the `documentdb-query` language.
+ *
+ * Provides inline documentation when hovering over operators,
+ * BSON constructors, and field names. Uses `documentdb-constants` for
+ * the operator registry and the completion store for field type info.
+ */
+
+import { getAllCompletions } from '@vscode-documentdb/documentdb-constants';
+// eslint-disable-next-line import/no-internal-modules
+import type * as monacoEditor from 'monaco-editor/esm/vs/editor/editor.api';
+import { type FieldCompletionData } from '../../utils/json/data-api/autocomplete/toFieldCompletionItems';
+import { escapeMarkdown } from '../utils/escapeMarkdown';
+
+/**
+ * A callback that resolves a word to field data from the completion store.
+ */
+export type FieldDataLookup = (word: string) => FieldCompletionData | undefined;
+
+/**
+ * Returns hover content for a word under the cursor.
+ *
+ * Tries multiple candidates to handle cases where:
+ * - The cursor is on `gt` after `$` (need to try `$gt`)
+ * - The cursor is on `ObjectId` (try as-is)
+ * - The cursor is on a field name like `age` (check field data)
+ *
+ * Operators/BSON constructors take priority over field names.
+ *
+ * @param word - The word at the cursor position
+ * @param fieldLookup - optional callback to resolve field names to field data
+ * @returns A Monaco Hover or null if no match
+ */
+export function getHoverContent(word: string, fieldLookup?: FieldDataLookup): monacoEditor.languages.Hover | null {
+    // Try with '$' prefix first (for operators where cursor lands after $)
+    // Then try the word as-is (for BSON constructors like ObjectId)
+    const candidates = word.startsWith('$') ? [word] : [`$${word}`, word];
+
+    const allEntries = getAllCompletions();
+
+    for (const candidate of candidates) {
+        const match = allEntries.find((e) => e.value === candidate);
+        if (match) {
+            const lines: string[] = [`**${match.value}**`];
+
+            if (match.description || match.link) {
+                lines.push('---');
+                lines.push('<br>');
+            }
+
+            if (match.description) {
+                lines.push(match.description);
+            }
+            if (match.link) {
+                lines.push(`[ⓘ Documentation](${match.link})`);
+            }
+
+            return {
+                contents: [{ value: lines.join('\n\n'), isTrusted: true, supportHtml: true }],
+            };
+        }
+    }
+
+    // If no operator match, try field name lookup
+    if (fieldLookup) {
+        const fieldData = fieldLookup(word);
+        if (fieldData) {
+            return buildFieldHover(fieldData);
+        }
+    }
+
+    return null;
+}
+
+/**
+ * Builds a hover tooltip for a field name.
+ */
+function buildFieldHover(field: FieldCompletionData): monacoEditor.languages.Hover {
+    const safeName = escapeMarkdown(field.fieldName);
+    let header = `**${safeName}**`;
+
+    if (field.isSparse) {
+        header += ' &nbsp;&nbsp; <small>sparse: not present in all documents</small>';
+    }
+
+    const lines: string[] = [header];
+
+    // Inferred types section
+    const typeList = field.displayTypes && field.displayTypes.length > 0 ? field.displayTypes : [field.displayType];
+    if (typeList && typeList.length > 0) {
+        lines.push('---');
+        lines.push('<br>');
+        lines.push(`Inferred Type: ${typeList.map((type) => `\`${escapeMarkdown(type)}\``).join(', ')}`);
+    }
+
+    return {
+        contents: [{ value: lines.join('\n\n'), supportHtml: true }],
+    };
+}
diff --git a/src/webviews/documentdbQuery/documentdbQueryValidator.test.ts b/src/webviews/documentdbQuery/documentdbQueryValidator.test.ts
new file mode 100644
index 000000000..40f807e10
--- /dev/null
+++ b/src/webviews/documentdbQuery/documentdbQueryValidator.test.ts
@@ -0,0 +1,243 @@
+/*---------------------------------------------------------------------------------------------
+ *  Copyright (c) Microsoft Corporation. All rights reserved.
+ *  Licensed under the MIT License. See License.txt in the project root for license information.
+ *--------------------------------------------------------------------------------------------*/
+
+import { levenshteinDistance, validateExpression } from './documentdbQueryValidator';
+
+describe('documentdbQueryValidator', () => {
+    describe('validateExpression', () => {
+        test('valid expression { age: { $gt: 25 } } produces no diagnostics', () => {
+            const diagnostics = validateExpression('{ age: { $gt: 25 } }');
+            expect(diagnostics).toHaveLength(0);
+        });
+
+        test('valid expression with multiple fields produces no diagnostics', () => {
+            const diagnostics = validateExpression('{ name: "Alice", age: 30 }');
+            expect(diagnostics).toHaveLength(0);
+        });
+
+        test('valid expression with BSON constructor produces no diagnostics', () => {
+            const diagnostics = validateExpression('{ _id: ObjectId("507f1f77bcf86cd799439011") }');
+            expect(diagnostics).toHaveLength(0);
+        });
+
+        test('valid expression with UUID constructor produces no diagnostics', () => {
+            const diagnostics = validateExpression('{ id: UUID("123e4567-e89b-12d3-a456-426614174000") }');
+            expect(diagnostics).toHaveLength(0);
+        });
+
+        test('valid expression with nested objects produces no diagnostics', () => {
+            const diagnostics = validateExpression('{ a: { b: { c: 1 } } }');
+            expect(diagnostics).toHaveLength(0);
+        });
+
+        test('syntax error { age: { $gt: } produces error diagnostic', () => {
+            const diagnostics = validateExpression('{ age: { $gt: } }');
+            expect(diagnostics.length).toBeGreaterThan(0);
+
+            const errorDiag = diagnostics.find((d) => d.severity === 'error');
+            expect(errorDiag).toBeDefined();
+        });
+
+        test('syntax error with unclosed brace produces error diagnostic', () => {
+            const diagnostics = validateExpression('{ age: 25');
+            expect(diagnostics.length).toBeGreaterThan(0);
+            expect(diagnostics[0].severity).toBe('error');
+        });
+
+        test('typo UUUD("...") produces warning "Did you mean UUID?"', () => {
+            const diagnostics = validateExpression('{ id: UUUD("abc") }');
+
+            const warnings = diagnostics.filter((d) => d.severity === 'warning');
+            expect(warnings.length).toBeGreaterThan(0);
+            expect(warnings[0].message).toContain('UUID');
+            expect(warnings[0].message).toContain('Did you mean');
+        });
+
+        test('typo Objected produces warning "Did you mean ObjectId?"', () => {
+            const diagnostics = validateExpression('{ id: ObjctId("abc") }');
+
+            const warnings = diagnostics.filter((d) => d.severity === 'warning');
+            expect(warnings.length).toBeGreaterThan(0);
+            expect(warnings[0].message).toContain('ObjectId');
+        });
+
+        test('unknown identifier foo used as function produces error', () => {
+            // "foo" is not close to any known identifier (Levenshtein > 2)
+            const diagnostics = validateExpression('{ id: foo("abc") }');
+            const errors = diagnostics.filter((d) => d.severity === 'error');
+            expect(errors).toHaveLength(1);
+            expect(errors[0].message).toContain("Unknown function 'foo'");
+        });
+
+        test('unknown identifier as field name is not flagged', () => {
+            // Field names (non-function identifiers) should never produce diagnostics
+            const diagnostics = validateExpression('{ unknownField: 1 }');
+            expect(diagnostics).toHaveLength(0);
+        });
+
+        test('unknown field name ___id is not flagged (field validation is out of scope)', () => {
+            // The validator does not validate field names against the schema.
+            // That requires integration with the completion store (known fields).
+            const diagnostics = validateExpression('{ ___id: 1 }');
+            expect(diagnostics).toHaveLength(0);
+        });
+
+        test('empty string produces no diagnostics', () => {
+            const diagnostics = validateExpression('');
+            expect(diagnostics).toHaveLength(0);
+        });
+
+        test('whitespace-only string produces no diagnostics', () => {
+            const diagnostics = validateExpression('   ');
+            expect(diagnostics).toHaveLength(0);
+        });
+
+        test('valid expression with Math.min produces no diagnostics', () => {
+            const diagnostics = validateExpression('{ rating: Math.min(1.7, 2) }');
+            expect(diagnostics).toHaveLength(0);
+        });
+
+        test('valid expression with Date.now produces no diagnostics', () => {
+            const diagnostics = validateExpression('{ ts: Date.now() }');
+            expect(diagnostics).toHaveLength(0);
+        });
+
+        test('typo Daate.now() produces warning "Did you mean Date?"', () => {
+            const diagnostics = validateExpression('{ _id: Daate.now() }');
+
+            const warnings = diagnostics.filter((d) => d.severity === 'warning');
+            expect(warnings.length).toBeGreaterThan(0);
+            expect(warnings[0].message).toContain('Date');
+            expect(warnings[0].message).toContain('Did you mean');
+        });
+
+        test('typo Maht.min() produces warning "Did you mean Math?"', () => {
+            const diagnostics = validateExpression('{ val: Maht.min(1, 2) }');
+
+            const warnings = diagnostics.filter((d) => d.severity === 'warning');
+            expect(warnings.length).toBeGreaterThan(0);
+            expect(warnings[0].message).toContain('Math');
+        });
+
+        test('typo Nubmer.parseInt() produces warning "Did you mean Number?"', () => {
+            const diagnostics = validateExpression('{ x: Nubmer.parseInt("42") }');
+
+            const warnings = diagnostics.filter((d) => d.severity === 'warning');
+            expect(warnings.length).toBeGreaterThan(0);
+            expect(warnings[0].message).toContain('Number');
+        });
+
+        test('completely unknown member call UdddddduaD.now() produces error', () => {
+            const diagnostics = validateExpression('{ _id: UdddddduaD.now() }');
+            const errors = diagnostics.filter((d) => d.severity === 'error');
+            expect(errors).toHaveLength(1);
+            expect(errors[0].message).toContain("Unknown identifier 'UdddddduaD'");
+        });
+
+        test('completely unknown direct call XyzAbc() produces error', () => {
+            const diagnostics = validateExpression('{ _id: XyzAbc("123") }');
+            const errors = diagnostics.filter((d) => d.severity === 'error');
+            expect(errors).toHaveLength(1);
+            expect(errors[0].message).toContain("Unknown function 'XyzAbc'");
+        });
+
+        test('new Daddddte() produces error for unknown constructor', () => {
+            const diagnostics = validateExpression(
+                '{ date: { $gt: new Daddddte(Date.now() - 14 * 24 * 60 * 60 * 1000) } }',
+            );
+            const errors = diagnostics.filter((d) => d.severity === 'error');
+            expect(errors).toHaveLength(1);
+            expect(errors[0].message).toContain("Unknown constructor 'Daddddte'");
+        });
+
+        test('new Dae() produces warning for near-miss constructor', () => {
+            const diagnostics = validateExpression('{ date: new Dae("2025-01-01") }');
+            const warnings = diagnostics.filter((d) => d.severity === 'warning');
+            expect(warnings).toHaveLength(1);
+            expect(warnings[0].message).toContain('Date');
+        });
+
+        test('new Date() produces no diagnostics', () => {
+            const diagnostics = validateExpression('{ date: new Date() }');
+            expect(diagnostics).toHaveLength(0);
+        });
+
+        test('new RegExp() produces no diagnostics', () => {
+            const diagnostics = validateExpression('{ name: { $regex: new RegExp("^test") } }');
+            expect(diagnostics).toHaveLength(0);
+        });
+
+        test('Date.nodw() does NOT produce a warning (method validation is out of scope)', () => {
+            // We validate the object (Date) but not individual method names.
+            // Date is a known global, so no warning. The .nodw() method name
+            // is not validated — that would require method-level knowledge.
+            const diagnostics = validateExpression('{ _id: Date.nodw() }');
+            expect(diagnostics).toHaveLength(0);
+        });
+
+        test('valid expression with ISODate constructor produces no diagnostics', () => {
+            const diagnostics = validateExpression('{ ts: ISODate("2024-01-01") }');
+            expect(diagnostics).toHaveLength(0);
+        });
+
+        test('valid expression with MinKey produces no diagnostics', () => {
+            const diagnostics = validateExpression('{ start: MinKey() }');
+            expect(diagnostics).toHaveLength(0);
+        });
+
+        test('valid expression with MaxKey produces no diagnostics', () => {
+            const diagnostics = validateExpression('{ end: MaxKey() }');
+            expect(diagnostics).toHaveLength(0);
+        });
+
+        test('valid expression with regex produces no diagnostics', () => {
+            const diagnostics = validateExpression('{ name: /^alice/i }');
+            expect(diagnostics).toHaveLength(0);
+        });
+
+        test('valid expression with array produces no diagnostics', () => {
+            const diagnostics = validateExpression('{ tags: { $in: ["a", "b"] } }');
+            expect(diagnostics).toHaveLength(0);
+        });
+
+        test('diagnostics have valid offsets within the input range', () => {
+            const code = '{ age: { $gt: } }';
+            const diagnostics = validateExpression(code);
+
+            for (const d of diagnostics) {
+                expect(d.startOffset).toBeGreaterThanOrEqual(0);
+                expect(d.endOffset).toBeLessThanOrEqual(code.length);
+                expect(d.startOffset).toBeLessThanOrEqual(d.endOffset);
+            }
+        });
+    });
+
+    describe('levenshteinDistance', () => {
+        test('identical strings have distance 0', () => {
+            expect(levenshteinDistance('UUID', 'UUID')).toBe(0);
+        });
+
+        test('one character difference has distance 1', () => {
+            expect(levenshteinDistance('UUID', 'UUUD')).toBe(1);
+        });
+
+        test('two character difference has distance 2', () => {
+            expect(levenshteinDistance('ObjectId', 'ObjctId')).toBeLessThanOrEqual(2);
+        });
+
+        test('completely different strings have high distance', () => {
+            expect(levenshteinDistance('UUID', 'something')).toBeGreaterThan(2);
+        });
+
+        test('empty string vs non-empty has distance equal to length', () => {
+            expect(levenshteinDistance('', 'abc')).toBe(3);
+            expect(levenshteinDistance('abc', '')).toBe(3);
+        });
+
+        test('both empty strings have distance 0', () => {
+            expect(levenshteinDistance('', '')).toBe(0);
+        });
+    });
+});
diff --git a/src/webviews/documentdbQuery/documentdbQueryValidator.ts b/src/webviews/documentdbQuery/documentdbQueryValidator.ts
new file mode 100644
index 000000000..95690247c
--- /dev/null
+++ b/src/webviews/documentdbQuery/documentdbQueryValidator.ts
@@ -0,0 +1,320 @@
+/*---------------------------------------------------------------------------------------------
+ *  Copyright (c) Microsoft Corporation. All rights reserved.
+ *  Licensed under the MIT License. See License.txt in the project root for license information.
+ *--------------------------------------------------------------------------------------------*/
+
+/**
+ * Validator for `documentdb-query` editor content.
+ *
+ * Uses `acorn` to parse the expression and `acorn-walk` to traverse the AST.
+ * Produces diagnostics for:
+ * - Syntax errors (severity: error)
+ * - Near-miss BSON constructor typos (severity: warning)
+ *
+ * This module is pure and testable — it does not depend on Monaco.
+ * The mapping from Diagnostic[] to Monaco markers happens in the editor mount handler.
+ */
+
+import { getAllCompletions } from '@vscode-documentdb/documentdb-constants';
+import * as acorn from 'acorn';
+import * as walk from 'acorn-walk';
+
+/**
+ * A diagnostic produced by the validator.
+ * Offsets are 0-based character positions in the original (unwrapped) input.
+ */
+export interface Diagnostic {
+    /** 0-based start character offset in the original input */
+    startOffset: number;
+    /** 0-based end character offset in the original input */
+    endOffset: number;
+    severity: 'error' | 'warning' | 'info';
+    message: string;
+}
+
+/**
+ * Known identifiers that should NOT be flagged as typos.
+ * These are globals available in shell-bson-parser's sandbox.
+ */
+const KNOWN_GLOBALS = new Set([
+    // BSON constructors (populated dynamically below)
+    // JS globals available in the sandbox
+    'Math',
+    'Date',
+    'ISODate',
+    'RegExp',
+    'Infinity',
+    'NaN',
+    'undefined',
+    'true',
+    'false',
+    'null',
+    'Map',
+    'Symbol',
+    // Common JS builtins that might appear in expressions
+    'Number',
+    'String',
+    'Boolean',
+    'Array',
+    'Object',
+    'parseInt',
+    'parseFloat',
+    'isNaN',
+    'isFinite',
+]);
+
+// Add all BSON constructors from documentdb-constants
+let bsonConstructorsLoaded = false;
+
+function ensureBsonConstructors(): void {
+    if (bsonConstructorsLoaded) return;
+    bsonConstructorsLoaded = true;
+
+    const allEntries = getAllCompletions();
+    for (const entry of allEntries) {
+        if (entry.meta === 'bson') {
+            KNOWN_GLOBALS.add(entry.value);
+        }
+    }
+}
+
+/**
+ * Computes the Levenshtein edit distance between two strings.
+ * Used for near-miss detection of BSON constructor typos.
+ */
+export function levenshteinDistance(a: string, b: string): number {
+    const m = a.length;
+    const n = b.length;
+
+    if (m === 0) return n;
+    if (n === 0) return m;
+
+    const dp: number[][] = Array.from({ length: m + 1 }, () => Array(n + 1).fill(0) as number[]);
+
+    for (let i = 0; i <= m; i++) dp[i][0] = i;
+    for (let j = 0; j <= n; j++) dp[0][j] = j;
+
+    for (let i = 1; i <= m; i++) {
+        for (let j = 1; j <= n; j++) {
+            if (a[i - 1] === b[j - 1]) {
+                dp[i][j] = dp[i - 1][j - 1];
+            } else {
+                dp[i][j] = 1 + Math.min(dp[i - 1][j], dp[i][j - 1], dp[i - 1][j - 1]);
+            }
+        }
+    }
+
+    return dp[m][n];
+}
+
+/**
+ * Finds the closest known identifier (BSON constructor or known global) to a given name.
+ * Returns the match and distance if within threshold, otherwise undefined.
+ *
+ * Searches both BSON constructor entries (from documentdb-constants) and
+ * KNOWN_GLOBALS (Date, Math, RegExp, etc.) for near-misses.
+ */
+function findNearMissKnownIdentifier(name: string): { match: string; distance: number } | undefined {
+    ensureBsonConstructors();
+
+    let bestMatch: string | undefined;
+    let bestDistance = Infinity;
+
+    // Check against BSON constructors
+    const allEntries = getAllCompletions();
+    for (const entry of allEntries) {
+        if (entry.meta === 'bson') {
+            const dist = levenshteinDistance(name.toLowerCase(), entry.value.toLowerCase());
+            if (dist <= 2 && dist < bestDistance) {
+                bestDistance = dist;
+                bestMatch = entry.value;
+            }
+        }
+    }
+
+    // Check against KNOWN_GLOBALS (Date, Math, RegExp, Number, etc.)
+    for (const known of KNOWN_GLOBALS) {
+        const dist = levenshteinDistance(name.toLowerCase(), known.toLowerCase());
+        if (dist <= 2 && dist < bestDistance) {
+            bestDistance = dist;
+            bestMatch = known;
+        }
+    }
+
+    if (bestMatch !== undefined && bestDistance <= 2) {
+        return { match: bestMatch, distance: bestDistance };
+    }
+
+    return undefined;
+}
+
+/**
+ * Validates a documentdb-query expression and returns diagnostics.
+ *
+ * @param code - The expression text from the editor (e.g., `{ age: { $gt: 25 } }`)
+ * @returns Array of diagnostics (empty if the expression is valid)
+ */
+export function validateExpression(code: string): Diagnostic[] {
+    ensureBsonConstructors();
+
+    const trimmed = code.trim();
+    if (trimmed.length === 0) {
+        return [];
+    }
+
+    const diagnostics: Diagnostic[] = [];
+
+    // Wrap in parentheses for acorn to parse as expression
+    // The offset adjustment accounts for the added '(' character
+    const wrapped = `(${code})`;
+
+    let ast: acorn.Node;
+    try {
+        ast = acorn.parseExpressionAt(wrapped, 0, {
+            ecmaVersion: 'latest',
+            sourceType: 'module',
+        });
+    } catch (error) {
+        if (error instanceof SyntaxError) {
+            const syntaxError = error as SyntaxError & { pos?: number; loc?: { line: number; column: number } };
+            // Adjust offset for the wrapping parenthesis
+            const pos = syntaxError.pos !== undefined ? syntaxError.pos - 1 : 0;
+            const startOffset = Math.max(0, Math.min(pos, code.length));
+            const endOffset = Math.min(startOffset + 1, code.length);
+
+            const message = syntaxError.message.replace(/\(\d+:\d+\)/, '').trim();
+            diagnostics.push({
+                startOffset,
+                endOffset,
+                severity: 'error',
+                message,
+            });
+        }
+        return diagnostics;
+    }
+
+    // Walk the AST to check identifiers
+    try {
+        walk.simple(ast, {
+            // Planned no-op: bare identifiers are intentionally not flagged.
+            // In DocumentDB queries, most identifiers are field names (e.g. `{ age: 1 }`)
+            // which are valid and shouldn't produce diagnostics. Only identifiers in
+            // call positions (BSON constructor typos) are checked — see CallExpression
+            // and MemberExpression handlers below.
+            Identifier(_node: acorn.Node & { name: string }) {
+                // no-op by design
+            },
+            CallExpression(
+                node: acorn.Node & {
+                    callee: acorn.Node & {
+                        name?: string;
+                        type: string;
+                        object?: acorn.Node & { name?: string; type: string };
+                    };
+                },
+            ) {
+                // Case 1: Direct call — e.g., ObjctId("abc")
+                if (node.callee.type === 'Identifier' && node.callee.name) {
+                    const name = node.callee.name;
+
+                    if (KNOWN_GLOBALS.has(name)) {
+                        return;
+                    }
+
+                    const nearMiss = findNearMissKnownIdentifier(name);
+                    const startOffset = node.callee.start - 1;
+                    const endOffset = node.callee.end - 1;
+                    if (nearMiss) {
+                        diagnostics.push({
+                            startOffset,
+                            endOffset,
+                            severity: 'warning',
+                            message: `Did you mean '${nearMiss.match}'?`,
+                        });
+                    } else {
+                        // No near-miss found — unknown function call will fail at runtime
+                        diagnostics.push({
+                            startOffset,
+                            endOffset,
+                            severity: 'error',
+                            message: `Unknown function '${name}'. Expected a BSON constructor (e.g., ObjectId, ISODate) or a known global (e.g., Date, Math).`,
+                        });
+                    }
+                }
+
+                // Case 2: Member call — e.g., Daate.now(), Maht.min()
+                // Check if the object is an unknown identifier that's a near-miss
+                if (
+                    node.callee.type === 'MemberExpression' &&
+                    node.callee.object &&
+                    node.callee.object.type === 'Identifier' &&
+                    node.callee.object.name
+                ) {
+                    const objName = node.callee.object.name;
+
+                    if (KNOWN_GLOBALS.has(objName)) {
+                        return;
+                    }
+
+                    const nearMiss = findNearMissKnownIdentifier(objName);
+                    const startOffset = node.callee.object.start - 1;
+                    const endOffset = node.callee.object.end - 1;
+                    if (nearMiss) {
+                        diagnostics.push({
+                            startOffset,
+                            endOffset,
+                            severity: 'warning',
+                            message: `Did you mean '${nearMiss.match}'?`,
+                        });
+                    } else {
+                        // No near-miss found — unknown object will fail at runtime
+                        diagnostics.push({
+                            startOffset,
+                            endOffset,
+                            severity: 'error',
+                            message: `Unknown identifier '${objName}'. Expected a known global (e.g., Date, Math).`,
+                        });
+                    }
+                }
+            },
+            // NewExpression has the same callee shape as CallExpression.
+            // e.g., `new Daddddte()` — the callee is an Identifier node.
+            NewExpression(
+                node: acorn.Node & {
+                    callee: acorn.Node & { name?: string; type: string };
+                },
+            ) {
+                if (node.callee.type === 'Identifier' && node.callee.name) {
+                    const name = node.callee.name;
+
+                    if (KNOWN_GLOBALS.has(name)) {
+                        return;
+                    }
+
+                    const nearMiss = findNearMissKnownIdentifier(name);
+                    const startOffset = node.callee.start - 1;
+                    const endOffset = node.callee.end - 1;
+                    if (nearMiss) {
+                        diagnostics.push({
+                            startOffset,
+                            endOffset,
+                            severity: 'warning',
+                            message: `Did you mean '${nearMiss.match}'?`,
+                        });
+                    } else {
+                        diagnostics.push({
+                            startOffset,
+                            endOffset,
+                            severity: 'error',
+                            message: `Unknown constructor '${name}'. Expected a BSON constructor (e.g., ObjectId, ISODate) or a known global (e.g., Date, RegExp).`,
+                        });
+                    }
+                }
+            },
+        });
+    } catch {
+        // If walking fails, just return syntax diagnostics we already have
+    }
+
+    return diagnostics;
+}
diff --git a/src/webviews/documentdbQuery/extractQuotedKey.test.ts b/src/webviews/documentdbQuery/extractQuotedKey.test.ts
new file mode 100644
index 000000000..8c4f9e3ec
--- /dev/null
+++ b/src/webviews/documentdbQuery/extractQuotedKey.test.ts
@@ -0,0 +1,79 @@
+/*---------------------------------------------------------------------------------------------
+ *  Copyright (c) Microsoft Corporation. All rights reserved.
+ *  Licensed under the MIT License. See License.txt in the project root for license information.
+ *--------------------------------------------------------------------------------------------*/
+
+import { extractQuotedKey } from './extractQuotedKey';
+
+describe('extractQuotedKey', () => {
+    test('extracts double-quoted key when cursor is inside', () => {
+        const line = '{ "address.street": "value" }';
+        //             01234567890123456789
+        const col = 5; // on 'a' of address
+        const result = extractQuotedKey(line, col);
+        expect(result).not.toBeNull();
+        expect(result!.key).toBe('address.street');
+    });
+
+    test('extracts single-quoted key when cursor is inside', () => {
+        const line = "{ 'address.street': 'value' }";
+        const col = 5;
+        const result = extractQuotedKey(line, col);
+        expect(result).not.toBeNull();
+        expect(result!.key).toBe('address.street');
+    });
+
+    test('returns null when cursor is not inside quotes', () => {
+        const line = '{ name: "value" }';
+        const col = 3; // on 'a' of name (unquoted)
+        const result = extractQuotedKey(line, col);
+        expect(result).toBeNull();
+    });
+
+    test('returns null when cursor is on a structural character', () => {
+        const line = '{ "key": "value" }';
+        const col = 0; // on '{'
+        const result = extractQuotedKey(line, col);
+        expect(result).toBeNull();
+    });
+
+    test('returns correct start/end for range highlighting', () => {
+        const line = '{ "address.street": 1 }';
+        //            0123456789012345678
+        const col = 10; // somewhere inside the quoted string
+        const result = extractQuotedKey(line, col);
+        expect(result).not.toBeNull();
+        expect(result!.start).toBe(2); // position of opening "
+        expect(result!.end).toBe(18); // position after closing "
+        expect(result!.key).toBe('address.street');
+    });
+
+    test('handles escaped quotes inside key', () => {
+        const line = '{ "key\\"name": 1 }';
+        const col = 5;
+        const result = extractQuotedKey(line, col);
+        expect(result).not.toBeNull();
+        expect(result!.key).toBe('key\\"name');
+    });
+
+    test('cursor on opening quote still works', () => {
+        const line = '{ "address.street": 1 }';
+        const col = 2; // on the opening "
+        const result = extractQuotedKey(line, col);
+        expect(result).not.toBeNull();
+        expect(result!.key).toBe('address.street');
+    });
+
+    test('cursor on closing quote still works', () => {
+        const line = '{ "address.street": 1 }';
+        const col = 17; // on the closing "
+        const result = extractQuotedKey(line, col);
+        expect(result).not.toBeNull();
+        expect(result!.key).toBe('address.street');
+    });
+
+    test('returns null for empty line', () => {
+        const result = extractQuotedKey('', 0);
+        expect(result).toBeNull();
+    });
+});
diff --git a/src/webviews/documentdbQuery/extractQuotedKey.ts b/src/webviews/documentdbQuery/extractQuotedKey.ts
new file mode 100644
index 000000000..b3e412f58
--- /dev/null
+++ b/src/webviews/documentdbQuery/extractQuotedKey.ts
@@ -0,0 +1,92 @@
+/*---------------------------------------------------------------------------------------------
+ *  Copyright (c) Microsoft Corporation. All rights reserved.
+ *  Licensed under the MIT License. See License.txt in the project root for license information.
+ *--------------------------------------------------------------------------------------------*/
+
+/**
+ * Extracts a quoted key string if the cursor is inside one.
+ *
+ * For `{ "address.street": 1 }`, when the cursor is anywhere between the
+ * opening and closing quotes, returns the unquoted key `"address.street"`
+ * along with the 0-based start/end positions of the full quoted string
+ * (including the quotes themselves, for hover range highlighting).
+ *
+ * Returns null if the cursor is not inside a quoted string.
+ *
+ * @param line - the full line content
+ * @param col0 - 0-based column position of the cursor
+ */
+export function extractQuotedKey(line: string, col0: number): { key: string; start: number; end: number } | null {
+    if (col0 < 0 || col0 >= line.length) return null;
+
+    // If cursor is on a quote, it could be the closing quote.
+    // Try treating the current position as the closing quote first.
+    const chAtCursor = line[col0];
+    if (chAtCursor === '"' || chAtCursor === "'") {
+        // Not escaped?
+        if (col0 === 0 || line[col0 - 1] !== '\\') {
+            // Try to find a matching opening quote before this one
+            const result = tryMatchAsClosingQuote(line, col0, chAtCursor);
+            if (result) return result;
+        }
+    }
+
+    // Scan backward to find the opening quote
+    let openQuoteIdx = -1;
+    let quoteChar: string | undefined;
+
+    for (let i = col0; i >= 0; i--) {
+        const ch = line[i];
+        if (ch === '"' || ch === "'") {
+            if (i > 0 && line[i - 1] === '\\') continue;
+            openQuoteIdx = i;
+            quoteChar = ch;
+            break;
+        }
+        if (ch === '{' || ch === '}' || ch === ':' || ch === ',') {
+            return null;
+        }
+    }
+
+    if (openQuoteIdx < 0 || !quoteChar) return null;
+
+    // Scan forward to find the closing quote
+    let closeQuoteIdx = -1;
+    for (let i = openQuoteIdx + 1; i < line.length; i++) {
+        if (line[i] === '\\') {
+            i++;
+            continue;
+        }
+        if (line[i] === quoteChar) {
+            closeQuoteIdx = i;
+            break;
+        }
+    }
+
+    if (closeQuoteIdx < 0) return null;
+    if (col0 < openQuoteIdx || col0 > closeQuoteIdx) return null;
+
+    const key = line.substring(openQuoteIdx + 1, closeQuoteIdx);
+    return { key, start: openQuoteIdx, end: closeQuoteIdx + 1 };
+}
+
+function tryMatchAsClosingQuote(
+    line: string,
+    closeIdx: number,
+    quoteChar: string,
+): { key: string; start: number; end: number } | null {
+    // Scan backward from before the closing quote to find the opening quote
+    for (let i = closeIdx - 1; i >= 0; i--) {
+        if (line[i] === '\\') continue;
+        if (line[i] === quoteChar) {
+            if (i > 0 && line[i - 1] === '\\') continue;
+            const key = line.substring(i + 1, closeIdx);
+            return { key, start: i, end: closeIdx + 1 };
+        }
+        // Stop at structural chars
+        if (line[i] === '{' || line[i] === '}' || line[i] === ':' || line[i] === ',') {
+            return null;
+        }
+    }
+    return null;
+}
diff --git a/src/webviews/documentdbQuery/index.ts b/src/webviews/documentdbQuery/index.ts
new file mode 100644
index 000000000..cb349f84a
--- /dev/null
+++ b/src/webviews/documentdbQuery/index.ts
@@ -0,0 +1,29 @@
+/*---------------------------------------------------------------------------------------------
+ *  Copyright (c) Microsoft Corporation. All rights reserved.
+ *  Licensed under the MIT License. See License.txt in the project root for license information.
+ *--------------------------------------------------------------------------------------------*/
+
+/**
+ * DocumentDB Query Language for Monaco Editor.
+ *
+ * This module provides the `documentdb-query` custom language that reuses
+ * the JavaScript Monarch tokenizer for syntax highlighting while providing
+ * custom completions from the `documentdb-constants` package.
+ *
+ * Usage:
+ * ```typescript
+ * import { registerDocumentDBQueryLanguage, LANGUAGE_ID } from './documentdbQuery';
+ *
+ * // During Monaco initialization:
+ * await registerDocumentDBQueryLanguage(monaco);
+ *
+ * // In editor props:
+ * <Editor language={LANGUAGE_ID} />
+ * ```
+ */
+
+export { clearCompletionContext, getCompletionContext, setCompletionContext } from './completionStore';
+export { detectCursorContext, type CursorContext, type FieldTypeLookup } from './cursorContext';
+export { validateExpression, type Diagnostic } from './documentdbQueryValidator';
+export { EditorType, LANGUAGE_ID, URI_SCHEME, buildEditorUri, parseEditorUri } from './languageConfig';
+export { registerDocumentDBQueryLanguage } from './registerLanguage';
diff --git a/src/webviews/documentdbQuery/isCursorInsideString.test.ts b/src/webviews/documentdbQuery/isCursorInsideString.test.ts
new file mode 100644
index 000000000..1c5468aff
--- /dev/null
+++ b/src/webviews/documentdbQuery/isCursorInsideString.test.ts
@@ -0,0 +1,87 @@
+/*---------------------------------------------------------------------------------------------
+ *  Copyright (c) Microsoft Corporation. All rights reserved.
+ *  Licensed under the MIT License. See License.txt in the project root for license information.
+ *--------------------------------------------------------------------------------------------*/
+
+import { isCursorInsideString } from './isCursorInsideString';
+
+describe('isCursorInsideString', () => {
+    test('returns false for empty text', () => {
+        expect(isCursorInsideString('', 0)).toBe(false);
+    });
+
+    test('returns false when cursor is outside any string', () => {
+        const text = '{ name: "Alice", age: 30 }';
+        // cursor after the comma, outside the string
+        const cursorOffset = text.indexOf(',') + 1;
+        expect(isCursorInsideString(text, cursorOffset)).toBe(false);
+    });
+
+    test('returns true when cursor is inside a double-quoted string', () => {
+        const text = '{ name: "Ali';
+        expect(isCursorInsideString(text, text.length)).toBe(true);
+    });
+
+    test('returns true when cursor is inside a single-quoted string', () => {
+        const text = "{ name: 'Ali";
+        expect(isCursorInsideString(text, text.length)).toBe(true);
+    });
+
+    test('returns false when cursor is after a closed string', () => {
+        const text = '{ name: "Alice" }';
+        // cursor at the space after closing quote
+        const cursorOffset = text.indexOf('"', 9) + 1;
+        expect(isCursorInsideString(text, cursorOffset)).toBe(false);
+    });
+
+    test('handles escaped quotes inside strings', () => {
+        const text = '{ name: "has\\"quote';
+        // cursor is still inside the string (the \" is escaped)
+        expect(isCursorInsideString(text, text.length)).toBe(true);
+    });
+
+    test('returns false after escaped quote followed by closing quote', () => {
+        const text = '{ name: "has\\"quote" }';
+        // cursor after the closing quote
+        const closingQuoteIdx = text.lastIndexOf('"');
+        expect(isCursorInsideString(text, closingQuoteIdx + 1)).toBe(false);
+    });
+
+    // Edge cases from the plan
+    test('{ name: "Alice", | } — cursor outside string after comma', () => {
+        const text = '{ name: "Alice", ';
+        expect(isCursorInsideString(text, text.length)).toBe(false);
+    });
+
+    test('{ name: "has:colon" } — cursor inside string at colon', () => {
+        const text = '{ name: "has:';
+        expect(isCursorInsideString(text, text.length)).toBe(true);
+    });
+
+    test('{ name: "has:colon", | } — cursor outside string after comma', () => {
+        const text = '{ name: "has:colon", ';
+        expect(isCursorInsideString(text, text.length)).toBe(false);
+    });
+
+    test('{ tags: ["a", | ] } — cursor outside string in array', () => {
+        const text = '{ tags: ["a", ';
+        expect(isCursorInsideString(text, text.length)).toBe(false);
+    });
+
+    test('{ msg: "has[bracket" } — cursor inside string at bracket', () => {
+        const text = '{ msg: "has[';
+        expect(isCursorInsideString(text, text.length)).toBe(true);
+    });
+
+    test('{ $and: [ | ] } — cursor outside string in array', () => {
+        const text = '{ $and: [ ';
+        expect(isCursorInsideString(text, text.length)).toBe(false);
+    });
+
+    test('handles mixed quote types correctly', () => {
+        const text = '{ name: "it\'s" }';
+        // The single quote inside double quotes doesn't close anything
+        const cursorAfterClosingDouble = text.indexOf('"', 9) + 1;
+        expect(isCursorInsideString(text, cursorAfterClosingDouble)).toBe(false);
+    });
+});
diff --git a/src/webviews/documentdbQuery/isCursorInsideString.ts b/src/webviews/documentdbQuery/isCursorInsideString.ts
new file mode 100644
index 000000000..7c605d1fe
--- /dev/null
+++ b/src/webviews/documentdbQuery/isCursorInsideString.ts
@@ -0,0 +1,47 @@
+/*---------------------------------------------------------------------------------------------
+ *  Copyright (c) Microsoft Corporation. All rights reserved.
+ *  Licensed under the MIT License. See License.txt in the project root for license information.
+ *--------------------------------------------------------------------------------------------*/
+
+/**
+ * Determines whether the cursor is inside a string literal.
+ *
+ * Scans the text from the beginning up to the cursor offset, tracking whether
+ * we are inside a single-quoted or double-quoted string. Escaped quotes
+ * (preceded by `\`) do not toggle the state.
+ *
+ * This is a lightweight heuristic for suppressing auto-trigger completions
+ * when the trigger character (`:`, `,`, `[`) appears inside a string value
+ * rather than as structural syntax.
+ *
+ * @param text - the full text of the editor
+ * @param cursorOffset - the 0-based character offset of the cursor
+ * @returns true if the cursor is inside a string literal
+ */
+export function isCursorInsideString(text: string, cursorOffset: number): boolean {
+    let inString: "'" | '"' | false = false;
+
+    for (let i = 0; i < cursorOffset && i < text.length; i++) {
+        const ch = text[i];
+
+        if (inString) {
+            // Check for escape character
+            if (ch === '\\') {
+                // Skip the next character (escaped)
+                i++;
+                continue;
+            }
+            // Check for closing quote
+            if (ch === inString) {
+                inString = false;
+            }
+        } else {
+            // Check for opening quote
+            if (ch === '"' || ch === "'") {
+                inString = ch;
+            }
+        }
+    }
+
+    return inString !== false;
+}
diff --git a/src/webviews/documentdbQuery/languageConfig.test.ts b/src/webviews/documentdbQuery/languageConfig.test.ts
new file mode 100644
index 000000000..97f276b52
--- /dev/null
+++ b/src/webviews/documentdbQuery/languageConfig.test.ts
@@ -0,0 +1,112 @@
+/*---------------------------------------------------------------------------------------------
+ *  Copyright (c) Microsoft Corporation. All rights reserved.
+ *  Licensed under the MIT License. See License.txt in the project root for license information.
+ *--------------------------------------------------------------------------------------------*/
+
+import { buildEditorUri, EditorType, LANGUAGE_ID, parseEditorUri, URI_SCHEME } from './languageConfig';
+
+describe('languageConfig', () => {
+    describe('constants', () => {
+        test('LANGUAGE_ID is "documentdb-query"', () => {
+            expect(LANGUAGE_ID).toBe('documentdb-query');
+        });
+
+        test('URI_SCHEME is "documentdb"', () => {
+            expect(URI_SCHEME).toBe('documentdb');
+        });
+    });
+
+    describe('EditorType', () => {
+        test('has expected enum values', () => {
+            expect(EditorType.Filter).toBe('filter');
+            expect(EditorType.Project).toBe('project');
+            expect(EditorType.Sort).toBe('sort');
+            expect(EditorType.Aggregation).toBe('aggregation');
+        });
+    });
+
+    describe('buildEditorUri', () => {
+        test('builds filter URI with session ID', () => {
+            const uri = buildEditorUri(EditorType.Filter, 'session-abc-123');
+            expect(uri).toBe('documentdb://filter/session-abc-123');
+        });
+
+        test('builds project URI with session ID', () => {
+            const uri = buildEditorUri(EditorType.Project, 'my-session');
+            expect(uri).toBe('documentdb://project/my-session');
+        });
+
+        test('builds sort URI with session ID', () => {
+            const uri = buildEditorUri(EditorType.Sort, 'sess-1');
+            expect(uri).toBe('documentdb://sort/sess-1');
+        });
+
+        test('builds aggregation URI with session ID', () => {
+            const uri = buildEditorUri(EditorType.Aggregation, 'agg-session');
+            expect(uri).toBe('documentdb://aggregation/agg-session');
+        });
+    });
+
+    describe('parseEditorUri', () => {
+        test('parses valid filter URI', () => {
+            const result = parseEditorUri('documentdb://filter/session-abc-123');
+            expect(result).toEqual({
+                editorType: EditorType.Filter,
+                sessionId: 'session-abc-123',
+            });
+        });
+
+        test('parses valid project URI', () => {
+            const result = parseEditorUri('documentdb://project/my-session');
+            expect(result).toEqual({
+                editorType: EditorType.Project,
+                sessionId: 'my-session',
+            });
+        });
+
+        test('parses valid sort URI', () => {
+            const result = parseEditorUri('documentdb://sort/sess-1');
+            expect(result).toEqual({
+                editorType: EditorType.Sort,
+                sessionId: 'sess-1',
+            });
+        });
+
+        test('parses valid aggregation URI', () => {
+            const result = parseEditorUri('documentdb://aggregation/agg-123');
+            expect(result).toEqual({
+                editorType: EditorType.Aggregation,
+                sessionId: 'agg-123',
+            });
+        });
+
+        test('returns undefined for unrecognized scheme', () => {
+            const result = parseEditorUri('vscode://filter/session-1');
+            expect(result).toBeUndefined();
+        });
+
+        test('returns undefined for unknown editor type', () => {
+            const result = parseEditorUri('documentdb://unknown/session-1');
+            expect(result).toBeUndefined();
+        });
+
+        test('returns undefined for malformed URI (no session)', () => {
+            const result = parseEditorUri('documentdb://filter');
+            expect(result).toBeUndefined();
+        });
+
+        test('returns undefined for empty string', () => {
+            const result = parseEditorUri('');
+            expect(result).toBeUndefined();
+        });
+
+        test('roundtrips with buildEditorUri', () => {
+            for (const editorType of Object.values(EditorType)) {
+                const sessionId = `test-session-${editorType}`;
+                const uri = buildEditorUri(editorType, sessionId);
+                const parsed = parseEditorUri(uri);
+                expect(parsed).toEqual({ editorType, sessionId });
+            }
+        });
+    });
+});
diff --git a/src/webviews/documentdbQuery/languageConfig.ts b/src/webviews/documentdbQuery/languageConfig.ts
new file mode 100644
index 000000000..5ad101a25
--- /dev/null
+++ b/src/webviews/documentdbQuery/languageConfig.ts
@@ -0,0 +1,67 @@
+/*---------------------------------------------------------------------------------------------
+ *  Copyright (c) Microsoft Corporation. All rights reserved.
+ *  Licensed under the MIT License. See License.txt in the project root for license information.
+ *--------------------------------------------------------------------------------------------*/
+
+/**
+ * Constants and configuration for the `documentdb-query` custom Monaco language.
+ *
+ * This language reuses the JavaScript Monarch tokenizer for syntax highlighting
+ * but does NOT attach the TypeScript/JavaScript language service worker.
+ * Completions are driven entirely by custom providers using `documentdb-constants`.
+ */
+
+/** The language identifier registered with Monaco. */
+export const LANGUAGE_ID = 'documentdb-query';
+
+/** URI scheme used for query editor models. */
+export const URI_SCHEME = 'documentdb';
+
+/**
+ * Known editor types for URI-based routing.
+ * The completion provider inspects `model.uri` to determine which
+ * completions to offer.
+ */
+export enum EditorType {
+    Filter = 'filter',
+    Project = 'project',
+    Sort = 'sort',
+    Aggregation = 'aggregation',
+}
+
+/**
+ * Builds a Monaco model URI for a given editor type and session.
+ *
+ * @param editorType - the type of query editor (filter, project, sort)
+ * @param sessionId - unique session identifier for this editor instance
+ * @returns a URI string like `documentdb://filter/session-abc-123`
+ */
+export function buildEditorUri(editorType: EditorType, sessionId: string): string {
+    return `${URI_SCHEME}://${editorType}/${sessionId}`;
+}
+
+/**
+ * Parses a Monaco model URI to extract the editor type.
+ *
+ * @param uri - the URI string (e.g., `documentdb://filter/session-abc-123`)
+ * @returns the EditorType or undefined if the URI doesn't match
+ */
+export function parseEditorUri(uri: string): { editorType: EditorType; sessionId: string } | undefined {
+    // Handle both URI objects and strings
+    const uriString = typeof uri === 'string' ? uri : String(uri);
+
+    const match = uriString.match(new RegExp(`^${URI_SCHEME}://([^/]+)/(.+)$`));
+    if (!match) {
+        return undefined;
+    }
+
+    const editorType = match[1] as EditorType;
+    const sessionId = match[2];
+
+    // Validate that it's a known editor type
+    if (!Object.values(EditorType).includes(editorType)) {
+        return undefined;
+    }
+
+    return { editorType, sessionId };
+}
diff --git a/src/webviews/documentdbQuery/registerLanguage.ts b/src/webviews/documentdbQuery/registerLanguage.ts
new file mode 100644
index 000000000..231eb9461
--- /dev/null
+++ b/src/webviews/documentdbQuery/registerLanguage.ts
@@ -0,0 +1,241 @@
+/*---------------------------------------------------------------------------------------------
+ *  Copyright (c) Microsoft Corporation. All rights reserved.
+ *  Licensed under the MIT License. See License.txt in the project root for license information.
+ *--------------------------------------------------------------------------------------------*/
+
+/**
+ * Registers the `documentdb-query` custom language with Monaco Editor.
+ *
+ * This module:
+ * 1. Registers the language ID with Monaco
+ * 2. Imports the JavaScript Monarch tokenizer for syntax highlighting
+ * 3. Registers a custom CompletionItemProvider scoped to `documentdb-query`
+ * 4. Registers a HoverProvider for operator/constructor documentation
+ *
+ * The JS tokenizer provides correct highlighting for:
+ * - Unquoted identifiers: `{ name: 1 }`
+ * - Single-quoted strings: `{ 'name': 1 }`
+ * - Double-quoted strings: `{ "name": 1 }`
+ * - BSON constructors: `ObjectId("...")`
+ * - Regex literals: `/^alice/i`
+ * - Comments, template literals, function bodies (for future $function support)
+ *
+ * Because this is a custom language ID, the TypeScript worker is NOT loaded,
+ * keeping the bundle ~400-600 KB lighter and ensuring a clean completion slate.
+ */
+
+// eslint-disable-next-line import/no-internal-modules
+import type * as monacoEditor from 'monaco-editor/esm/vs/editor/editor.api';
+import { getCompletionContext } from './completionStore';
+import { detectCursorContext } from './cursorContext';
+import { createCompletionItems } from './documentdbQueryCompletionProvider';
+import { getHoverContent } from './documentdbQueryHoverProvider';
+import { extractQuotedKey } from './extractQuotedKey';
+import { isCursorInsideString } from './isCursorInsideString';
+import { LANGUAGE_ID, parseEditorUri } from './languageConfig';
+
+/** Coalesces concurrent registrations into a single promise. */
+let registrationPromise: Promise<void> | undefined;
+
+/** Callback used to open external URLs via the extension host. */
+let openUrlHandler: ((url: string) => void) | undefined;
+
+/**
+ * Registers the `documentdb-query` language with Monaco.
+ *
+ * Safe to call multiple times — concurrent calls coalesce into one registration.
+ * The `openUrl` callback is updated on every call so the tRPC client reference
+ * stays current even after hot-reloads.
+ *
+ * @param monaco - the Monaco editor API instance
+ * @param openUrl - callback to open a URL via the extension host (avoids webview sandbox restrictions)
+ */
+export function registerDocumentDBQueryLanguage(
+    monaco: typeof monacoEditor,
+    openUrl?: (url: string) => void,
+): Promise<void> {
+    openUrlHandler = openUrl ?? openUrlHandler;
+    if (!registrationPromise) {
+        registrationPromise = doRegisterLanguage(monaco);
+    }
+    return registrationPromise;
+}
+
+async function doRegisterLanguage(monaco: typeof monacoEditor): Promise<void> {
+    // Step 1: Register the language ID
+    monaco.languages.register({ id: LANGUAGE_ID });
+
+    // Step 2: Import the JS Monarch tokenizer
+    // This path has been stable since Monaco 0.20 and exports { conf, language }
+    // eslint-disable-next-line import/no-internal-modules
+    const jsLanguage = (await import('monaco-editor/esm/vs/basic-languages/javascript/javascript.js')) as {
+        language: monacoEditor.languages.IMonarchLanguage;
+        conf: monacoEditor.languages.LanguageConfiguration;
+    };
+
+    // Step 3: Apply the JS tokenizer and language configuration to our custom language
+    monaco.languages.setMonarchTokensProvider(LANGUAGE_ID, jsLanguage.language);
+    monaco.languages.setLanguageConfiguration(LANGUAGE_ID, jsLanguage.conf);
+
+    // Register a link opener so that documentation links in hover tooltips
+    // are opened via the extension host (which calls vscode.env.openExternal).
+    // VS Code webview sandboxing blocks window.open/popups, so we route through
+    // the tRPC openUrl mutation when available, or fall back to window.open.
+    monaco.editor.registerLinkOpener({
+        open(resource) {
+            const url = resource.toString(true);
+            if (openUrlHandler) {
+                openUrlHandler(url);
+            } else {
+                window.open(url, '_blank');
+            }
+            return true;
+        },
+    });
+
+    // Step 4: Register the completion provider
+    monaco.languages.registerCompletionItemProvider(LANGUAGE_ID, {
+        triggerCharacters: ['$', '"', "'", '{', '.', ':', ',', '['],
+        provideCompletionItems: (
+            model: monacoEditor.editor.ITextModel,
+            position: monacoEditor.Position,
+        ): monacoEditor.languages.CompletionList => {
+            // Parse the model URI to determine editor context
+            const uriString = model.uri.toString();
+            const parsed = parseEditorUri(uriString);
+
+            // Get the word at the current position for range calculation
+            const wordInfo = model.getWordUntilPosition(position);
+            let range: monacoEditor.IRange = {
+                startLineNumber: position.lineNumber,
+                endLineNumber: position.lineNumber,
+                startColumn: wordInfo.startColumn,
+                endColumn: wordInfo.endColumn,
+            };
+
+            // Check if cursor is preceded by '$' (for operator completions)
+            // Monaco's getWordUntilPosition() does not treat '$' as part of a word boundary.
+            // When the user types '$g', wordInfo.startColumn points to 'g', not '$'.
+            // Without this fix, selecting '$gt' would insert '$$gt' (double dollar).
+            const lineContent = model.getLineContent(position.lineNumber);
+            // -2 because columns are 1-based: e.g. startColumn=1 → index -1 → undefined (safe).
+            // JS returns undefined for out-of-bounds array access, so (undefined === '$') → false.
+            const charBefore = lineContent[wordInfo.startColumn - 2];
+
+            if (charBefore === '$') {
+                range = { ...range, startColumn: range.startColumn - 1 };
+            }
+
+            // Detect cursor context for context-sensitive completions
+            const text = model.getValue();
+            const cursorOffset = model.getOffsetAt(position);
+
+            // Suppress completions when the cursor is inside a string literal.
+            // This prevents trigger characters like ':', ',', '[' from firing
+            // inside strings like { name: "has:colon" } or { msg: "has[bracket" }.
+            if (isCursorInsideString(text, cursorOffset)) {
+                return { suggestions: [] };
+            }
+
+            const sessionId = parsed?.sessionId;
+
+            // Build field lookup from completion store to enrich context with BSON types
+            const fieldLookup = (fieldName: string): string | undefined => {
+                if (!sessionId) return undefined;
+                const ctx = getCompletionContext(sessionId);
+                return ctx?.fields.find((f) => f.fieldName === fieldName)?.bsonType;
+            };
+
+            const cursorContext = detectCursorContext(text, cursorOffset, fieldLookup);
+
+            // Detect whether the editor content has braces. When the user clears
+            // the editor (deleting initial `{  }`), completions need to include
+            // wrapping braces so inserted snippets produce valid query syntax.
+            const needsWrapping = !text.includes('{');
+
+            // Build completion items based on context
+            const items = createCompletionItems({
+                editorType: parsed?.editorType,
+                sessionId,
+                range,
+                isDollarPrefix: charBefore === '$',
+                monaco,
+                cursorContext,
+                needsWrapping,
+            });
+
+            return { suggestions: items };
+        },
+    });
+
+    // Step 5: Register the hover provider
+    monaco.languages.registerHoverProvider(LANGUAGE_ID, {
+        provideHover: (
+            model: monacoEditor.editor.ITextModel,
+            position: monacoEditor.Position,
+        ): monacoEditor.languages.Hover | null => {
+            // Build field lookup from completion store for field hover info
+            const uriString = model.uri.toString();
+            const parsedUri = parseEditorUri(uriString);
+            const hoverFieldLookup = parsedUri?.sessionId
+                ? (word: string) => {
+                      const ctx = getCompletionContext(parsedUri.sessionId);
+                      return ctx?.fields.find((f) => f.fieldName === word);
+                  }
+                : undefined;
+
+            // Try to extract a quoted string key (e.g., "address.street")
+            // Monaco's getWordAtPosition treats quotes and dots as word boundaries,
+            // so for { "address.street": 1 } hovering on "address" would only match
+            // "address", not the full field name "address.street".
+            const lineContent = model.getLineContent(position.lineNumber);
+            const col0 = position.column - 1; // 0-based
+
+            const quotedResult = extractQuotedKey(lineContent, col0);
+            if (quotedResult) {
+                const hover = getHoverContent(quotedResult.key, hoverFieldLookup);
+                if (hover) {
+                    return {
+                        ...hover,
+                        range: {
+                            startLineNumber: position.lineNumber,
+                            endLineNumber: position.lineNumber,
+                            startColumn: quotedResult.start + 1, // 1-based
+                            endColumn: quotedResult.end + 1, // 1-based
+                        },
+                    };
+                }
+            }
+
+            // Fall back to standard word-based hover
+            const wordAtPosition = model.getWordAtPosition(position);
+            if (!wordAtPosition) {
+                return null;
+            }
+
+            const hover = getHoverContent(wordAtPosition.word, hoverFieldLookup);
+            if (!hover) {
+                return null;
+            }
+
+            // Set the range for the hover highlight
+            return {
+                ...hover,
+                range: {
+                    startLineNumber: position.lineNumber,
+                    endLineNumber: position.lineNumber,
+                    startColumn: wordAtPosition.startColumn,
+                    endColumn: wordAtPosition.endColumn,
+                },
+            };
+        },
+    });
+}
+
+/**
+ * Resets the registration state. For testing only.
+ * @internal
+ */
+export function _resetRegistration(): void {
+    registrationPromise = undefined;
+}
diff --git a/src/webviews/documentdbQuery/tdd/README.md b/src/webviews/documentdbQuery/tdd/README.md
new file mode 100644
index 000000000..27cbe2a63
--- /dev/null
+++ b/src/webviews/documentdbQuery/tdd/README.md
@@ -0,0 +1,33 @@
+# TDD Behavior Tests
+
+This folder contains **behavior-driven TDD tests** for the `documentdb-query`
+autocompletion system. These tests define the expected behavior contract — they
+were written BEFORE the implementation and drive the design.
+
+## Important: TDD Contract
+
+> **⚠️ LLM/Agent Instruction:**
+> These tests represent intentional behavior specifications. If a test in this
+> folder fails after a code change, **do NOT automatically fix the test**.
+> Instead, alert the user that a TDD behavior contract has been violated.
+> The failure likely means the implementation change broke an expected behavior.
+> The user must decide whether to update the test (changing the contract) or
+> fix the implementation.
+
+## Test Files
+
+| File | What it tests |
+|------|---------------|
+| `completionBehavior.test.ts` | Which completion categories appear at each cursor position, sorting order, and snippet wrapping |
+
+## Specification
+
+See [readme.completionBehavior.md](readme.completionBehavior.md) for the full
+behavior specification with ASCII art examples.
+
+## Why a separate folder?
+
+These tests verify cross-cutting **behavior** (the completion matrix), not a
+single class or module. They sit at the `documentdbQuery/tdd/` level because
+they test the combined output of `cursorContext`, `createCompletionItems`,
+`mapCompletionItems`, and `completionKnowledge` working together.
diff --git a/src/webviews/documentdbQuery/tdd/completionBehavior.test.ts b/src/webviews/documentdbQuery/tdd/completionBehavior.test.ts
new file mode 100644
index 000000000..b2187b114
--- /dev/null
+++ b/src/webviews/documentdbQuery/tdd/completionBehavior.test.ts
@@ -0,0 +1,534 @@
+/*---------------------------------------------------------------------------------------------
+ *  Copyright (c) Microsoft Corporation. All rights reserved.
+ *  Licensed under the MIT License. See License.txt in the project root for license information.
+ *--------------------------------------------------------------------------------------------*/
+
+/**
+ * TDD Behavior Tests — Completion Categories by Cursor Position
+ *
+ * These tests define the expected behavior CONTRACT for the autocompletion
+ * system. They were written BEFORE the implementation (TDD red→green cycle)
+ * and verify WHAT completions appear at each cursor position, not HOW they
+ * are produced internally.
+ *
+ * ⚠️ LLM/Agent Instruction:
+ * If these tests fail after a code change, do NOT automatically fix the tests.
+ * Alert the user that a TDD behavior contract has been violated. The failure
+ * means the implementation broke expected behavior. The user must decide
+ * whether to update the spec or fix the implementation.
+ *
+ * Tests are category-based (not specific-operator-based) to be resilient to
+ * changes in documentdb-constants. Categories come from the `description`
+ * field in CompletionItemLabel (e.g., "comparison", "logical", "bson").
+ *
+ * See: readme.completionBehavior.md for the full specification.
+ */
+
+// eslint-disable-next-line import/no-internal-modules
+import type * as monacoEditor from 'monaco-editor/esm/vs/editor/editor.api';
+import { clearAllCompletionContexts, setCompletionContext } from '../completionStore';
+import { type CursorContext } from '../cursorContext';
+import { createCompletionItems } from '../documentdbQueryCompletionProvider';
+import { EditorType } from '../languageConfig';
+
+// ---------- Test infrastructure ----------
+
+const mockCompletionItemKind: typeof monacoEditor.languages.CompletionItemKind = {
+    Method: 0,
+    Function: 1,
+    Constructor: 2,
+    Field: 3,
+    Variable: 4,
+    Class: 5,
+    Struct: 6,
+    Interface: 7,
+    Module: 8,
+    Property: 9,
+    Event: 10,
+    Operator: 11,
+    Unit: 12,
+    Value: 13,
+    Constant: 14,
+    Enum: 15,
+    EnumMember: 16,
+    Keyword: 17,
+    Text: 18,
+    Color: 19,
+    File: 20,
+    Reference: 21,
+    Customcolor: 22,
+    Folder: 23,
+    TypeParameter: 24,
+    User: 25,
+    Issue: 26,
+    Snippet: 27,
+};
+
+const mockInsertTextRule = {
+    InsertAsSnippet: 4,
+    KeepWhitespace: 1,
+    None: 0,
+} as typeof monacoEditor.languages.CompletionItemInsertTextRule;
+
+function createMockMonaco(): typeof monacoEditor {
+    return {
+        languages: {
+            CompletionItemKind: mockCompletionItemKind,
+            CompletionItemInsertTextRule: mockInsertTextRule,
+        },
+    } as unknown as typeof monacoEditor;
+}
+
+const testRange: monacoEditor.IRange = {
+    startLineNumber: 1,
+    endLineNumber: 1,
+    startColumn: 1,
+    endColumn: 1,
+};
+
+// ---------- Helpers ----------
+
+/** Extracts the description (category) from a CompletionItem label. */
+function getDescription(label: string | monacoEditor.languages.CompletionItemLabel): string | undefined {
+    return typeof label === 'string' ? undefined : label.description;
+}
+
+/** Returns the set of distinct categories present in a completion list. */
+function getCategories(items: monacoEditor.languages.CompletionItem[]): Set<string> {
+    const categories = new Set<string>();
+    for (const item of items) {
+        const desc = getDescription(item.label);
+        if (desc) categories.add(desc);
+    }
+    return categories;
+}
+
+/** Returns the label text from a CompletionItem. */
+function getLabelText(label: string | monacoEditor.languages.CompletionItemLabel): string {
+    return typeof label === 'string' ? label : label.label;
+}
+
+/**
+ * Returns all distinct sortText prefixes (the part before the underscore)
+ * found in a completion list.
+ */
+function getSortPrefixes(items: monacoEditor.languages.CompletionItem[]): Set<string> {
+    const prefixes = new Set<string>();
+    for (const item of items) {
+        if (item.sortText) {
+            const underscoreIdx = item.sortText.indexOf('_');
+            if (underscoreIdx > 0) {
+                prefixes.add(item.sortText.substring(0, underscoreIdx + 1));
+            }
+        }
+    }
+    return prefixes;
+}
+
+// ---------- Field data for tests ----------
+
+const testFields = [
+    {
+        fieldName: 'name',
+        displayType: 'String',
+        bsonType: 'string',
+        isSparse: false,
+        insertText: 'name',
+        referenceText: '$name',
+    },
+    {
+        fieldName: 'age',
+        displayType: 'Number',
+        bsonType: 'int32',
+        isSparse: false,
+        insertText: 'age',
+        referenceText: '$age',
+    },
+];
+
+// ---------- Key-position operator categories ----------
+// These are the categories that should appear at KEY / EMPTY positions.
+// We test by category name, not specific operators, for resilience.
+// (Used in assertions, not as a lookup — individual tests check specific categories.)
+
+// Field-level categories that should NOT appear at key/empty positions.
+// These categories have NO operators in KEY_POSITION_OPERATORS.
+// Note: 'logical' and 'evaluation' are shared — they have both key-position
+// operators ($and/$or for logical, $expr/$text for evaluation) and field-level
+// operators ($not for logical, $regex/$mod for evaluation).
+const FIELD_LEVEL_ONLY_CATEGORIES = ['comparison', 'array', 'element', 'bitwise', 'geospatial'];
+
+// =====================================================================
+// Tests
+// =====================================================================
+
+describe('TDD: Completion Behavior', () => {
+    const mockMonaco = createMockMonaco();
+
+    beforeAll(() => {
+        console.warn(
+            '\n⚠️  TDD CONTRACT TESTS — If any test below fails, do NOT auto-fix the test.\n' +
+                '    Alert the user that a TDD behavior contract has been violated.\n' +
+                '    The user must decide whether to update the spec or fix the implementation.\n',
+        );
+    });
+
+    afterEach(() => {
+        clearAllCompletionContexts();
+    });
+
+    // -----------------------------------------------------------------
+    // EMPTY position — no braces in editor
+    // -----------------------------------------------------------------
+    describe('EMPTY position (no braces, needsWrapping=true)', () => {
+        /**
+         * ┌──────────────────────────┐
+         * │ |                        │  ← cursor, no braces
+         * └──────────────────────────┘
+         *
+         * Expected: fields + key operators, all wrapped with { }
+         * NOT expected: comparison, array, evaluation, element, bson, JS global
+         */
+
+        function getEmptyCompletions(sessionId?: string): monacoEditor.languages.CompletionItem[] {
+            return createCompletionItems({
+                editorType: EditorType.Filter,
+                sessionId,
+                range: testRange,
+                isDollarPrefix: false,
+                monaco: mockMonaco,
+                cursorContext: { position: 'unknown' },
+                needsWrapping: true,
+            });
+        }
+
+        test('includes field names when store has data', () => {
+            setCompletionContext('s1', { fields: testFields });
+            const items = getEmptyCompletions('s1');
+            const labels = items.map((i) => getLabelText(i.label));
+            expect(labels).toContain('name');
+            expect(labels).toContain('age');
+        });
+
+        test('field insertText is wrapped with { }', () => {
+            setCompletionContext('s1', { fields: testFields });
+            const items = getEmptyCompletions('s1');
+            const nameItem = items.find((i) => getLabelText(i.label) === 'name');
+            expect(nameItem?.insertText).toMatch(/^\{.*\}$/);
+        });
+
+        test('includes key-position operator categories (logical)', () => {
+            const items = getEmptyCompletions();
+            const categories = getCategories(items);
+            expect(categories.has('logical')).toBe(true);
+        });
+
+        test('does NOT include field-level categories', () => {
+            const items = getEmptyCompletions();
+            const categories = getCategories(items);
+            for (const cat of FIELD_LEVEL_ONLY_CATEGORIES) {
+                expect(categories.has(cat)).toBe(false);
+            }
+        });
+
+        test('does NOT include "bson"', () => {
+            const items = getEmptyCompletions();
+            const categories = getCategories(items);
+            expect(categories.has('bson')).toBe(false);
+        });
+
+        test('does NOT include "JS global"', () => {
+            const items = getEmptyCompletions();
+            const categories = getCategories(items);
+            expect(categories.has('JS global')).toBe(false);
+        });
+
+        test('fields sort before operators (0_ < 1_)', () => {
+            setCompletionContext('s1', { fields: testFields });
+            const items = getEmptyCompletions('s1');
+            const fieldItem = items.find((i) => getLabelText(i.label) === 'name');
+            const operatorItems = items.filter((i) => getDescription(i.label) === 'logical');
+            expect(fieldItem?.sortText).toMatch(/^0_/);
+            expect(operatorItems.length).toBeGreaterThan(0);
+            expect(operatorItems[0]?.sortText).toMatch(/^1_/);
+        });
+    });
+
+    // -----------------------------------------------------------------
+    // KEY position — inside { }
+    // -----------------------------------------------------------------
+    describe('KEY position ({ | })', () => {
+        /**
+         * ┌──────────────────────────┐
+         * │ { |  }                   │  ← cursor inside braces
+         * └──────────────────────────┘
+         *
+         * Expected: fields + key operators
+         * NOT expected: comparison, array, evaluation, element, bson, JS global
+         */
+
+        const keyContext: CursorContext = { position: 'key', depth: 1 };
+
+        function getKeyCompletions(sessionId?: string): monacoEditor.languages.CompletionItem[] {
+            return createCompletionItems({
+                editorType: EditorType.Filter,
+                sessionId,
+                range: testRange,
+                isDollarPrefix: false,
+                monaco: mockMonaco,
+                cursorContext: keyContext,
+            });
+        }
+
+        test('includes key-position operator categories', () => {
+            const categories = getCategories(getKeyCompletions());
+            expect(categories.has('logical')).toBe(true);
+        });
+
+        test('does NOT include field-level categories', () => {
+            const categories = getCategories(getKeyCompletions());
+            for (const cat of FIELD_LEVEL_ONLY_CATEGORIES) {
+                expect(categories.has(cat)).toBe(false);
+            }
+        });
+
+        test('does NOT include "bson" or "JS global"', () => {
+            const categories = getCategories(getKeyCompletions());
+            expect(categories.has('bson')).toBe(false);
+            expect(categories.has('JS global')).toBe(false);
+        });
+
+        test('field sortText starts with 0_, operator sortText starts with 1_', () => {
+            setCompletionContext('s1', { fields: testFields });
+            const items = getKeyCompletions('s1');
+
+            // Every field item should have sortText starting with 0_
+            const fieldItems = items.filter((i) => getLabelText(i.label) === 'name' || getLabelText(i.label) === 'age');
+            for (const item of fieldItems) {
+                expect(item.sortText).toMatch(/^0_/);
+            }
+
+            // Every operator item should have sortText starting with 1_
+            const operatorItems = items.filter((i) => {
+                const desc = getDescription(i.label);
+                return desc === 'logical' || desc === 'evaluation' || desc === 'misc';
+            });
+            for (const item of operatorItems) {
+                expect(item.sortText).toMatch(/^1_/);
+            }
+        });
+    });
+
+    // -----------------------------------------------------------------
+    // VALUE position — { field: | }
+    // -----------------------------------------------------------------
+    describe('VALUE position ({ field: | })', () => {
+        /**
+         * ┌──────────────────────────┐
+         * │ { age: |  }              │  ← cursor at value position
+         * └──────────────────────────┘
+         *
+         * Expected: type suggestions + field-level operators + bson + JS globals
+         * NOT expected: key-position operators ($and, $or at root)
+         */
+
+        const valueContext: CursorContext = { position: 'value', fieldName: 'age' };
+
+        function getValueCompletions(): monacoEditor.languages.CompletionItem[] {
+            return createCompletionItems({
+                editorType: EditorType.Filter,
+                sessionId: undefined,
+                range: testRange,
+                isDollarPrefix: false,
+                monaco: mockMonaco,
+                cursorContext: valueContext,
+            });
+        }
+
+        test('includes field-level categories', () => {
+            const categories = getCategories(getValueCompletions());
+            for (const cat of FIELD_LEVEL_ONLY_CATEGORIES) {
+                expect(categories.has(cat)).toBe(true);
+            }
+        });
+
+        test('includes "bson" and "JS global"', () => {
+            const categories = getCategories(getValueCompletions());
+            expect(categories.has('bson')).toBe(true);
+            expect(categories.has('JS global')).toBe(true);
+        });
+
+        test('does NOT include key-position operators by label', () => {
+            const labels = getValueCompletions().map((i) => getLabelText(i.label));
+            // Check just a couple representative key operators
+            expect(labels).not.toContain('$and');
+            expect(labels).not.toContain('$or');
+        });
+
+        test('sort order: operators (0_) before bson (3_) before JS globals (4_)', () => {
+            const prefixes = getSortPrefixes(getValueCompletions());
+            expect(prefixes.has('0_')).toBe(true);
+            expect(prefixes.has('3_')).toBe(true);
+            expect(prefixes.has('4_')).toBe(true);
+        });
+
+        test('project editor shows only 1/0 at value position', () => {
+            const items = createCompletionItems({
+                editorType: EditorType.Project,
+                sessionId: undefined,
+                range: testRange,
+                isDollarPrefix: false,
+                monaco: mockMonaco,
+                cursorContext: valueContext,
+            });
+            expect(items).toHaveLength(2);
+            const labels = items.map((i) => getLabelText(i.label));
+            expect(labels).toContain('1');
+            expect(labels).toContain('0');
+        });
+
+        test('sort editor shows only 1/-1 at value position', () => {
+            const items = createCompletionItems({
+                editorType: EditorType.Sort,
+                sessionId: undefined,
+                range: testRange,
+                isDollarPrefix: false,
+                monaco: mockMonaco,
+                cursorContext: valueContext,
+            });
+            expect(items).toHaveLength(2);
+            const labels = items.map((i) => getLabelText(i.label));
+            expect(labels).toContain('1');
+            expect(labels).toContain('-1');
+        });
+    });
+
+    // -----------------------------------------------------------------
+    // OPERATOR position — { field: { | } }
+    // -----------------------------------------------------------------
+    describe('OPERATOR position ({ field: { | } })', () => {
+        /**
+         * ┌──────────────────────────┐
+         * │ { age: { |  } }          │  ← cursor inside operator object
+         * └──────────────────────────┘
+         *
+         * Expected: field-level operators (braces stripped)
+         * NOT expected: bson, JS global, key-position operators
+         */
+
+        const operatorContext: CursorContext = { position: 'operator', fieldName: 'age' };
+
+        function getOperatorCompletions(): monacoEditor.languages.CompletionItem[] {
+            return createCompletionItems({
+                editorType: EditorType.Filter,
+                sessionId: undefined,
+                range: testRange,
+                isDollarPrefix: false,
+                monaco: mockMonaco,
+                cursorContext: operatorContext,
+            });
+        }
+
+        test('includes field-level categories', () => {
+            const categories = getCategories(getOperatorCompletions());
+            for (const cat of FIELD_LEVEL_ONLY_CATEGORIES) {
+                expect(categories.has(cat)).toBe(true);
+            }
+        });
+
+        test('does NOT include "bson" or "JS global"', () => {
+            const categories = getCategories(getOperatorCompletions());
+            expect(categories.has('bson')).toBe(false);
+            expect(categories.has('JS global')).toBe(false);
+        });
+
+        test('does NOT include key-position operators', () => {
+            const labels = getOperatorCompletions().map((i) => getLabelText(i.label));
+            expect(labels).not.toContain('$and');
+            expect(labels).not.toContain('$or');
+        });
+    });
+
+    // -----------------------------------------------------------------
+    // ARRAY-ELEMENT position — { $and: [|] }
+    // -----------------------------------------------------------------
+    describe('ARRAY-ELEMENT position ({ $and: [|] })', () => {
+        /**
+         * Same behavior as KEY position
+         */
+
+        const arrayContext: CursorContext = { position: 'array-element', parentOperator: '$and' };
+
+        function getArrayElementCompletions(sessionId?: string): monacoEditor.languages.CompletionItem[] {
+            return createCompletionItems({
+                editorType: EditorType.Filter,
+                sessionId,
+                range: testRange,
+                isDollarPrefix: false,
+                monaco: mockMonaco,
+                cursorContext: arrayContext,
+            });
+        }
+
+        test('behaves like KEY: includes logical, excludes field-level categories', () => {
+            const categories = getCategories(getArrayElementCompletions());
+            expect(categories.has('logical')).toBe(true);
+            for (const cat of FIELD_LEVEL_ONLY_CATEGORIES) {
+                expect(categories.has(cat)).toBe(false);
+            }
+        });
+
+        test('includes fields when store has data', () => {
+            setCompletionContext('s1', { fields: testFields });
+            const labels = getArrayElementCompletions('s1').map((i) => getLabelText(i.label));
+            expect(labels).toContain('name');
+        });
+    });
+
+    // -----------------------------------------------------------------
+    // UNKNOWN position — genuinely ambiguous (show everything)
+    // -----------------------------------------------------------------
+    describe('UNKNOWN position (ambiguous, needsWrapping=false)', () => {
+        /**
+         * Genuinely unknown cursor position. Show everything as discovery.
+         * This is the fallback when the parser can't determine position AND
+         * the editor is not empty (has some content with braces but ambiguous).
+         */
+
+        function getUnknownCompletions(sessionId?: string): monacoEditor.languages.CompletionItem[] {
+            return createCompletionItems({
+                editorType: EditorType.Filter,
+                sessionId,
+                range: testRange,
+                isDollarPrefix: false,
+                monaco: mockMonaco,
+                cursorContext: { position: 'unknown' },
+                needsWrapping: false,
+            });
+        }
+
+        test('includes key-position categories', () => {
+            const categories = getCategories(getUnknownCompletions());
+            expect(categories.has('logical')).toBe(true);
+        });
+
+        test('includes field-level categories (full discovery)', () => {
+            const categories = getCategories(getUnknownCompletions());
+            for (const cat of FIELD_LEVEL_ONLY_CATEGORIES) {
+                expect(categories.has(cat)).toBe(true);
+            }
+        });
+
+        test('includes "bson" and "JS global"', () => {
+            const categories = getCategories(getUnknownCompletions());
+            expect(categories.has('bson')).toBe(true);
+            expect(categories.has('JS global')).toBe(true);
+        });
+
+        test('includes fields when store has data', () => {
+            setCompletionContext('s1', { fields: testFields });
+            const labels = getUnknownCompletions('s1').map((i) => getLabelText(i.label));
+            expect(labels).toContain('name');
+        });
+    });
+});
diff --git a/src/webviews/documentdbQuery/tdd/readme.completionBehavior.md b/src/webviews/documentdbQuery/tdd/readme.completionBehavior.md
new file mode 100644
index 000000000..575f4f9f8
--- /dev/null
+++ b/src/webviews/documentdbQuery/tdd/readme.completionBehavior.md
@@ -0,0 +1,135 @@
+# Completion Behavior Specification
+
+> **⚠️ LLM/Agent Instruction:**
+> This specification defines the expected completion behavior contract.
+> If tests based on this spec fail after a code change, **do NOT automatically
+> fix the tests**. Alert the user that a TDD behavior contract has been violated.
+> The user must decide whether the behavior change is intentional.
+
+## Cursor Positions & Expected Completions
+
+The completion system shows different items depending on the cursor's semantic
+position within a query expression. This spec defines what categories appear,
+their sort order, and how snippets are wrapped at each position.
+
+### Categories
+
+Completions are grouped by category (shown as the `description` field in the
+completion item label). The categories come from the operator's `meta` tag:
+
+| Category | Source | Example operators |
+|----------|--------|-------------------|
+| `logical` | `query:logical` | `$and`, `$or`, `$nor` |
+| `comparison` | `query:comparison` | `$eq`, `$gt`, `$in` |
+| `array` | `query:array` | `$all`, `$elemMatch`, `$size` |
+| `evaluation` | `query:evaluation` | `$regex`, `$mod` |
+| `element` | `query:element` | `$exists`, `$type` |
+| `bson` | `bson` | `ObjectId`, `UUID`, `ISODate` |
+| `JS global` | (hardcoded) | `Date`, `Math`, `RegExp` |
+| (field type) | field data | `String`, `Number`, etc. |
+
+### Position: EMPTY (no braces in editor)
+
+```
+┌──────────────────────────┐
+│ |                        │   ← cursor, editor has no braces
+└──────────────────────────┘
+```
+
+**Shows:** Fields + key-position operators only (same items as KEY)
+**Wrapping:** All insertions wrapped with `{ ... }`
+**Sort:** `0_` fields, `1_` key operators
+
+```
+Expected completions:
+  name      String     ← field, inserts: { name: $1 }
+  age       Number     ← field, inserts: { age: $1 }
+  $and      logical    ← key operator, inserts: { $and: [...] }
+  $or       logical    ← key operator
+  $nor      logical    ← key operator
+
+NOT shown:
+  $gt       comparison ← field-level, invalid at root
+  $all      array      ← field-level, invalid at root
+  ObjectId  bson       ← not valid at root key position
+  Date      JS global  ← not valid at root key position
+```
+
+### Position: KEY (`{ | }`)
+
+```
+┌──────────────────────────┐
+│ { |  }                   │   ← cursor inside braces
+└──────────────────────────┘
+```
+
+**Shows:** Fields + key-position operators
+**Wrapping:** None (already inside braces)
+**Sort:** `0_` fields, `1_` key operators
+**Snippets:** Outer `{ }` stripped from operator snippets
+
+```
+NOT shown: comparison, array, evaluation, element, bson, JS global
+```
+
+### Position: VALUE (`{ field: | }`)
+
+```
+┌──────────────────────────┐
+│ { age: |  }              │   ← cursor at value position
+└──────────────────────────┘
+```
+
+**Shows:** Type suggestions + field-level operators + BSON constructors + JS globals
+**Sort:** `00_` type suggestions, `0_`–`2_` operators, `3_` BSON, `4_` JS globals
+**Special:** Project editor → `1`/`0` only. Sort editor → `1`/`-1` only.
+
+```
+Shown categories: comparison, array, evaluation, element, logical ($not), bson, JS global
+NOT shown: key-position operators ($and, $or, $nor at root)
+```
+
+### Position: OPERATOR (`{ field: { | } }`)
+
+```
+┌──────────────────────────┐
+│ { age: { |  } }          │   ← cursor inside operator object
+└──────────────────────────┘
+```
+
+**Shows:** Field-level operators only (braces stripped)
+**Sort:** `0_` type-relevant, `1a_` comparison, `1b_` other universal, `2_` non-matching
+**Snippets:** Outer `{ }` stripped
+
+```
+Shown categories: comparison, array, evaluation, element, logical ($not)
+NOT shown: bson, JS global, key-position operators
+```
+
+### Position: ARRAY-ELEMENT (`{ $and: [|] }`)
+
+**Shows:** Same as KEY position
+**Sort:** Same as KEY position
+
+### Position: UNKNOWN (genuinely ambiguous)
+
+**Shows:** ALL completions (fields + all operators + BSON + JS globals)
+**Purpose:** Discovery fallback for positions the parser can't classify
+
+```
+Shown: everything — logical, comparison, array, evaluation, element, bson, JS global
+```
+
+## Sort Order Contract
+
+Each position has a defined sort prefix hierarchy. Items with lower prefixes
+appear higher in the completion list.
+
+| Position | Sort hierarchy |
+|----------|---------------|
+| EMPTY | `0_` fields → `1_` key operators |
+| KEY | `0_` fields → `1_` key operators |
+| VALUE | `00_` type suggestions → `0_`–`2_` operators → `3_` BSON → `4_` JS globals |
+| OPERATOR | `0_` type-relevant → `1a_` comparison → `1b_` universal → `2_` non-matching |
+| ARRAY-ELEMENT | same as KEY |
+| UNKNOWN | no enforced sort (Monaco default) |
diff --git a/src/webviews/index.scss b/src/webviews/index.scss
index f5fb4a7cd..561d1c072 100644
--- a/src/webviews/index.scss
+++ b/src/webviews/index.scss
@@ -87,3 +87,13 @@ $media-breakpoint-query-control-area: 1024px;
     @include input-focus-animation;
     @include input-hover;
 }
+
+/**
+ * Monaco suggest-details panel: ensure links show a pointer cursor.
+ * The hover widget applies this automatically, but the completion
+ * documentation panel does not — VS Code's webview CSS reset overrides it.
+ */
+.monaco-editor .suggest-details a,
+.monaco-editor .suggest-details-container a {
+    cursor: pointer;
+}
diff --git a/src/webviews/utils/escapeMarkdown.test.ts b/src/webviews/utils/escapeMarkdown.test.ts
new file mode 100644
index 000000000..4dbdfc1d5
--- /dev/null
+++ b/src/webviews/utils/escapeMarkdown.test.ts
@@ -0,0 +1,41 @@
+/*---------------------------------------------------------------------------------------------
+ *  Copyright (c) Microsoft Corporation. All rights reserved.
+ *  Licensed under the MIT License. See License.txt in the project root for license information.
+ *--------------------------------------------------------------------------------------------*/
+
+import { escapeMarkdown } from './escapeMarkdown';
+
+describe('escapeMarkdown', () => {
+    test('returns plain text unchanged', () => {
+        expect(escapeMarkdown('age')).toBe('age');
+    });
+
+    test('escapes markdown bold characters', () => {
+        expect(escapeMarkdown('**bold**')).toBe('\\*\\*bold\\*\\*');
+    });
+
+    test('escapes markdown link syntax', () => {
+        expect(escapeMarkdown('[click](https://evil.com)')).toBe('\\[click\\]\\(https://evil\\.com\\)');
+    });
+
+    test('escapes angle brackets (HTML tags)', () => {
+        expect(escapeMarkdown('<script>alert(1)</script>')).toBe('\\<script\\>alert\\(1\\)\\</script\\>');
+    });
+
+    test('escapes backticks', () => {
+        expect(escapeMarkdown('`code`')).toBe('\\`code\\`');
+    });
+
+    test('escapes ampersands', () => {
+        expect(escapeMarkdown('a&b')).toBe('a\\&b');
+    });
+
+    test('handles dotted field names', () => {
+        expect(escapeMarkdown('address.street')).toBe('address\\.street');
+    });
+
+    test('passes through numbers and underscores', () => {
+        // underscore IS a markdown metacharacter, so it gets escaped
+        expect(escapeMarkdown('field_1')).toBe('field\\_1');
+    });
+});
diff --git a/src/webviews/utils/escapeMarkdown.ts b/src/webviews/utils/escapeMarkdown.ts
new file mode 100644
index 000000000..245a63a29
--- /dev/null
+++ b/src/webviews/utils/escapeMarkdown.ts
@@ -0,0 +1,15 @@
+/*---------------------------------------------------------------------------------------------
+ *  Copyright (c) Microsoft Corporation. All rights reserved.
+ *  Licensed under the MIT License. See License.txt in the project root for license information.
+ *--------------------------------------------------------------------------------------------*/
+
+/**
+ * Escapes markdown metacharacters so user data renders as literal text.
+ *
+ * Covers characters that Markdown/HTML would otherwise interpret:
+ * `\`, `*`, `_`, `{`, `}`, `[`, `]`, `(`, `)`, `#`, `+`, `-`, `.`, `!`,
+ * `|`, `<`, `>`, `` ` ``, `~`, `&`
+ */
+export function escapeMarkdown(text: string): string {
+    return text.replace(/[\\*_{}[\]()#+\-.!|<>`~&]/g, '\\$&');
+}
diff --git a/test/mongoGetCommand.test.ts b/test/mongoGetCommand.test.ts
index 7b4ce3f4d..bf34fa867 100644
--- a/test/mongoGetCommand.test.ts
+++ b/test/mongoGetCommand.test.ts
@@ -797,7 +797,6 @@ suite('scrapbook parsing Tests', () => {
         const commands: MongoCommand[] = getAllCommandsFromText(text);
         const command: MongoCommand = findCommandAtPosition(commands, new Position(0, 0));
         const generatedRegExp = (<any>nonNullProp(command, 'argumentObjects')[0]).sku;
-        console.log('generatedRegExp', generatedRegExp);
         assert.deepEqual(generatedRegExp.options, 'i');
         assert.deepEqual(generatedRegExp.pattern, '789$');
     });
@@ -838,11 +837,8 @@ suite('scrapbook parsing Tests', () => {
     // The regex parsing tests following this test should help zero-in on which case isn't handled properly.
     test('test regular expression parsing - with many special cases', () => {
         const text = `db.test1.beep.find({ sku:  /^(hello?= world).*[^0-9]+|(world\\b\\*){0,2}$/ })`;
-        console.log(text);
         const commands: MongoCommand[] = getAllCommandsFromText(text);
-        console.log('commands', commands);
         const command: MongoCommand = findCommandAtPosition(commands, new Position(0, 0));
-        console.log('command', command);
         const generatedRegExp = (<any>nonNullProp(command, 'argumentObjects')[0]).sku;
         assert.deepEqual(generatedRegExp.options, '');
         assert.deepEqual(generatedRegExp.pattern, '^(hello?= world).*[^0-9]+|(world\\b\\*){0,2}$');
diff --git a/tsconfig.json b/tsconfig.json
index 894220ad0..f8f79d3a5 100644
--- a/tsconfig.json
+++ b/tsconfig.json
@@ -30,5 +30,6 @@
     ]
       */
   },
-  "exclude": ["node_modules", ".vscode-test"]
+  "exclude": ["node_modules", ".vscode-test", "packages/*/dist"],
+  "references": [{ "path": "packages/schema-analyzer" }, { "path": "packages/documentdb-constants" }]
 }