Skip to content

Commit 483c95b

Browse files
fix: Gracefully handle large number of paths returned by pip show (#193)
execSync in NodeJS takes a maxBuffer argument; it will not dynamically allocate the output. When doing `pip show`, we could run into an ENOBUFS error if the buffer size was too small, as a part of path listing. This means there are two options: 1. Use exec (or spawn): This requires CPS-ing the code all the way up to main. 2. Use a bigger buffer. We go with option 2 because it's not worth changing everything to callbacks just for this. All the other I/O uses sync operations anyway. For the bigger buffer case, we put in retry logic because that is 'free'; it doesn't make sense to provide a CLI flag for the user to customize this, because what they'll do is look at the error and increase the value; so we just do it for them.
1 parent 88377ca commit 483c95b

File tree

1 file changed

+51
-6
lines changed

1 file changed

+51
-6
lines changed

packages/pyright-scip/src/virtualenv/environment.ts

Lines changed: 51 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,6 @@
11
import * as fs from 'fs';
22
import * as child_process from 'child_process';
3+
import * as os from 'os';
34
import PythonPackage from './PythonPackage';
45
import PythonEnvironment from './PythonEnvironment';
56
import { withStatus } from 'src/status';
@@ -28,16 +29,60 @@ let getPipCommand = () => {
2829
return pipCommand;
2930
};
3031

32+
function spawnSyncWithRetry(command: string, args: string[]): child_process.SpawnSyncReturns<string> {
33+
let maxBuffer = 1 * 1024 * 1024; // Start with 1MB (original default)
34+
const maxMemory = os.totalmem() * 0.1; // Don't use more than 10% of total system memory
35+
36+
while (true) {
37+
const result = child_process.spawnSync(command, args, {
38+
encoding: 'utf8',
39+
maxBuffer: maxBuffer,
40+
});
41+
42+
const error = result.error as NodeJS.ErrnoException | null;
43+
if (error && error.code === 'ENOBUFS') {
44+
const nextBuffer = maxBuffer * 10;
45+
if (nextBuffer > maxMemory) {
46+
throw new Error(
47+
`Command output too large: final attempt maxBuffer ${(maxBuffer / 1024 / 1024).toFixed(
48+
1
49+
)}MB (total memory: ${(maxMemory / 1024 / 1024).toFixed(1)}MB)`
50+
);
51+
}
52+
maxBuffer = nextBuffer;
53+
continue; // Retry with larger buffer
54+
}
55+
56+
return result;
57+
}
58+
}
59+
3160
function pipList(): PipInformation[] {
32-
return JSON.parse(child_process.execSync(`${getPipCommand()} list --format=json`).toString()) as PipInformation[];
61+
const result = spawnSyncWithRetry(getPipCommand(), ['list', '--format=json']);
62+
63+
if (result.status !== 0) {
64+
throw new Error(`pip list failed with code ${result.status}: ${result.stderr}`);
65+
}
66+
67+
return JSON.parse(result.stdout) as PipInformation[];
3368
}
3469

70+
// pipBulkShow returns the results of 'pip show', one for each package.
71+
//
72+
// It doesn't cross-check if the length of the output matches that of the input.
3573
function pipBulkShow(names: string[]): string[] {
36-
// TODO: This probably breaks with enough names. Should batch them into 512 or whatever the max for bash would be
37-
return child_process
38-
.execSync(`${getPipCommand()} show -f ${names.join(' ')}`)
39-
.toString()
40-
.split('\n---');
74+
// FIXME: The performance of this scales with the number of packages that
75+
// are installed in the Python distribution, not just the number of packages
76+
// that are requested. If 10K packages are installed, this can take several
77+
// minutes. However, it's not super obvious if there is a more performant
78+
// way to do this without hand-rolling the functionality ourselves.
79+
const result = spawnSyncWithRetry(getPipCommand(), ['show', '-f', ...names]);
80+
81+
if (result.status !== 0) {
82+
throw new Error(`pip show failed with code ${result.status}: ${result.stderr}`);
83+
}
84+
85+
return result.stdout.split('\n---').filter((pkg) => pkg.trim());
4186
}
4287

4388
export default function getEnvironment(

0 commit comments

Comments
 (0)