Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,9 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.1.0/).
### Changed
- Updated Coana CLI to v14.12.148.

### Fixed
- Prevent heap overflow in large monorepo scans by using streaming-based filtering to avoid accumulating all file paths in memory before filtering.

## [2.1.0](https://github.com/SocketDev/socket-cli/releases/tag/v2.1.0) - 2025-11-02

### Added
Expand Down
45 changes: 35 additions & 10 deletions packages/cli/src/utils/fs/glob.mts
Original file line number Diff line number Diff line change
Expand Up @@ -165,6 +165,14 @@ export function filterBySupportedScanFiles(
return filepaths.filter(p => micromatch.some(p, patterns, { dot: true }))
}

export function createSupportedFilesFilter(
supportedFiles: SocketSdkSuccessResult<'getReportSupportedFiles'>['data'],
): (filepath: string) => boolean {
const patterns = getSupportedFilePatterns(supportedFiles)
return (filepath: string) =>
micromatch.some(filepath, patterns, { dot: true })
}

export function getSupportedFilePatterns(
supportedFiles: SocketSdkSuccessResult<'getReportSupportedFiles'>['data'],
): string[] {
Expand All @@ -179,6 +187,10 @@ export function getSupportedFilePatterns(
}

type GlobWithGitIgnoreOptions = GlobOptions & {
// Optional filter function to apply during streaming.
// When provided, only files passing this filter are accumulated.
// This is critical for memory efficiency when scanning large monorepos.
filter?: ((filepath: string) => boolean) | undefined
socketConfig?: SocketYml | undefined
}

Expand All @@ -188,6 +200,7 @@ export async function globWithGitIgnore(
): Promise<string[]> {
const {
cwd = process.cwd(),
filter,
socketConfig,
...additionalOptions
} = { __proto__: null, ...options } as GlobWithGitIgnoreOptions
Expand Down Expand Up @@ -244,27 +257,39 @@ export async function globWithGitIgnore(
...additionalOptions,
} as GlobOptions

if (!hasNegatedPattern) {
// When no filter is provided and no negated patterns exist, use the fast path.
if (!hasNegatedPattern && !filter) {
return await fastGlob.glob(patterns as string[], globOptions)
}

// Add support for negated "ignore" patterns which many globbing libraries,
// including 'fast-glob', 'globby', and 'tinyglobby', lack support for.
const filtered: string[] = []
const ig = ignore().add([...ignores])
// Use streaming to avoid unbounded memory accumulation.
// This is critical for large monorepos with 100k+ files.
const results: string[] = []
const ig = hasNegatedPattern ? ignore().add([...ignores]) : null
const stream = fastGlob.globStream(
patterns as string[],
globOptions,
) as AsyncIterable<string>
for await (const p of stream) {
// Note: the input files must be INSIDE the cwd. If you get strange looking
// relative path errors here, most likely your path is outside the given cwd.
const relPath = globOptions.absolute ? path.relative(cwd, p) : p
if (!ig.ignores(relPath)) {
filtered.push(p)
// Check gitignore patterns with negation support.
if (ig) {
// Note: the input files must be INSIDE the cwd. If you get strange looking
// relative path errors here, most likely your path is outside the given cwd.
const relPath = globOptions.absolute ? path.relative(cwd, p) : p
if (ig.ignores(relPath)) {
continue
}
}
// Apply the optional filter to reduce memory usage.
// When scanning large monorepos, this filters early (e.g., to manifest files only)
// instead of accumulating all 100k+ files and filtering later.
if (filter && !filter(p)) {
continue
}
results.push(p)
}
return filtered
return results
}

export async function globWorkspace(
Expand Down
12 changes: 8 additions & 4 deletions packages/cli/src/utils/fs/path-resolve.mts
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@ import { WIN32 } from '@socketsecurity/lib/constants/platform'
import { isDirSync } from '@socketsecurity/lib/fs'

import {
filterBySupportedScanFiles,
createSupportedFilesFilter,
globWithGitIgnore,
pathsToGlobPatterns,
} from './glob.mts'
Expand Down Expand Up @@ -127,13 +127,17 @@ export async function getPackageFilesForScan(
...options,
} as PackageFilesForScanOptions

const filepaths = await globWithGitIgnore(
// Apply the supported files filter during streaming to avoid accumulating
// all files in memory. This is critical for large monorepos with 100k+ files
// where accumulating all paths before filtering causes OOM errors.
const filter = createSupportedFilesFilter(supportedFiles)

return await globWithGitIgnore(
pathsToGlobPatterns(inputPaths, options?.cwd),
{
cwd,
filter,
socketConfig,
},
)

return filterBySupportedScanFiles(filepaths!, supportedFiles)
}