Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
20 commits
Select commit Hold shift + click to select a range
24b8cb2
Harden ls builtin: skip symlinks in -R, add MaxDirEntries limit, add …
matt-dz Mar 12, 2026
aa27d8f
Address review: add clarifying comment for MaxDirEntries check
matt-dz Mar 12, 2026
e76efda
Address review: MaxDirEntries warning instead of hard error
matt-dz Mar 12, 2026
fd87034
Early-exit directory counting to prevent OOM on oversized directories
matt-dz Mar 12, 2026
ef74af5
Paginated ls with truncated output on oversized directories
matt-dz Mar 12, 2026
00cb899
Merge branch 'main' into matt-dz/harden-ls
matt-dz Mar 12, 2026
aee9e01
Address review: fix negative offset crash, dot-dot limit slots, swall…
matt-dz Mar 12, 2026
b804c1b
Address review: cap offset at MaxDirEntries, decouple maxRead from of…
matt-dz Mar 12, 2026
ce6c63e
Address review: ignore --limit in recursive mode, document DoS tradeoffs
matt-dz Mar 12, 2026
7689681
Address review: streaming offset for O(n) pagination, ignore paginati…
matt-dz Mar 12, 2026
31e957e
Add comprehensive regression tests for ls pagination and DoS protection
matt-dz Mar 12, 2026
dce9a03
Remove PR review tag comments from regression tests
matt-dz Mar 12, 2026
9b235fd
Add ls pagination scenario tests
matt-dz Mar 12, 2026
3e8d677
Address review: sort dot entries with other entries, continue recursi…
matt-dz Mar 12, 2026
e66a826
Revert: keep early return after truncation, do not recurse past MaxDi…
matt-dz Mar 12, 2026
fef7ce1
Document intentional bash divergence: no recursion after truncation
matt-dz Mar 12, 2026
f151963
Address review: preserve ReadDir errors when truncation occurs
matt-dz Mar 12, 2026
acc7367
Extract collectDirEntries for testability, add error-during-truncatio…
matt-dz Mar 12, 2026
b5835fe
Fix Effective Go findings: use errors.Is for EOF, pre-allocate slice
matt-dz Mar 12, 2026
2f2887e
format files
matt-dz Mar 12, 2026
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion SHELL_FEATURES.md
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@ Blocked features are rejected before execution with exit code 2.
- ✅ `false` — return exit code 1
- ✅ `grep [-EFGivclLnHhoqsxw] [-e PATTERN] [-m NUM] [-A NUM] [-B NUM] [-C NUM] PATTERN [FILE]...` — print lines that match patterns; uses RE2 regex engine (linear-time, no backtracking)
- ✅ `head [-n N|-c N] [-q|-v] [FILE]...` — output the first part of files (default: first 10 lines); `-z`/`--zero-terminated` and `--follow` are rejected
- ✅ `ls [-1aAdFhlpRrSt] [FILE]...` — list directory contents
- ✅ `ls [-1aAdFhlpRrSt] [--offset N] [--limit N] [FILE]...` — list directory contents; `--offset`/`--limit` are non-standard pagination flags (single-directory only, silently ignored with `-R` or multiple arguments, capped at 1,000 entries per call); offset operates on filesystem order (not sorted order) for O(n) memory
- ✅ `printf FORMAT [ARGUMENT]...` — format and print data to stdout; supports `%s`, `%b`, `%c`, `%d`, `%i`, `%o`, `%u`, `%x`, `%X`, `%e`, `%E`, `%f`, `%F`, `%g`, `%G`, `%%`; format reuse for excess arguments; `%n` rejected (security risk); `-v` rejected
- ✅ `strings [-a] [-n MIN] [-t o|d|x] [-o] [-f] [-s SEP] [FILE]...` — print printable character sequences in files (default min length 4); offsets via `-t`/`-o`; filename prefix via `-f`; custom separator via `-s`
- ✅ `tail [-n N|-c N] [-q|-v] [-z] [FILE]...` — output the last part of files (default: last 10 lines); supports `+N` offset mode; `-f`/`--follow` is rejected
Expand Down
93 changes: 93 additions & 0 deletions interp/allowed_paths.go
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@ package interp

import (
"context"
"errors"
"fmt"
"io"
"io/fs"
Expand Down Expand Up @@ -192,6 +193,98 @@ func (s *pathSandbox) readDir(ctx context.Context, path string) ([]fs.DirEntry,
return entries, nil
}

// readDirLimited reads directory entries, skipping the first offset entries
// and returning up to maxRead entries sorted by name within the read window.
// Returns (entries, truncated, error). When truncated is true, the directory
// contained more entries beyond the returned set.
//
// The offset skips raw directory entries during reading (before sorting).
// This means offset does NOT correspond to positions in a sorted listing —
// pages may overlap or miss entries. This is an acceptable tradeoff to achieve
// O(n) memory regardless of offset value, where n = min(maxRead, entries).
func (s *pathSandbox) readDirLimited(ctx context.Context, path string, offset, maxRead int) ([]fs.DirEntry, bool, error) {
absPath := toAbs(path, HandlerCtx(ctx).Dir)
root, relPath, ok := s.resolve(absPath)
if !ok {
return nil, false, &os.PathError{Op: "readdir", Path: path, Err: os.ErrPermission}
}
f, err := root.Open(relPath)
if err != nil {
return nil, false, portablePathError(err)
}
defer f.Close()

// Defense-in-depth: clamp non-positive values.
if offset < 0 {
offset = 0
}
if maxRead <= 0 {
return nil, false, nil
}

const batchSize = 256
entries, truncated, lastErr := collectDirEntries(func(n int) ([]fs.DirEntry, error) {
return f.ReadDir(n)
}, batchSize, offset, maxRead)

if lastErr != nil {
return entries, truncated, portablePathError(lastErr)
}
return entries, truncated, nil
}

// collectDirEntries reads directory entries in batches using readBatch,
// skipping the first offset entries and collecting up to maxRead entries.
// Returns (entries, truncated, lastErr). Entries are sorted by name.
//
// NOTE: We intentionally truncate before reading all entries. For directories
// larger than maxRead, the returned entries are sorted within the read window
// but may not be the globally-smallest names. Reading all entries to get
// globally-correct sorting would defeat the DoS protection — a directory with
// millions of files would OOM or stall. The truncation warning communicates
// that output is incomplete.
func collectDirEntries(readBatch func(n int) ([]fs.DirEntry, error), batchSize, offset, maxRead int) ([]fs.DirEntry, bool, error) {
entries := make([]fs.DirEntry, 0, maxRead)
truncated := false
skipped := 0
var lastErr error

for {
batch, err := readBatch(batchSize)
for _, e := range batch {
if skipped < offset {
skipped++
continue
}
entries = append(entries, e)
}
// Capture non-EOF errors before checking truncation, since
// ReadDir can return partial entries alongside an error.
if err != nil && !errors.Is(err, io.EOF) {
lastErr = err
}
if len(entries) > maxRead {
truncated = true
break
}
if err != nil {
break
}
}

// Sort collected entries by name.
slices.SortFunc(entries, func(a, b fs.DirEntry) int {
return strings.Compare(a.Name(), b.Name())
})

// Trim to exactly maxRead if we overshot.
if truncated && len(entries) > maxRead {
entries = entries[:maxRead]
}

return entries, truncated, lastErr
}

// stat implements the restricted stat policy. It uses os.Root.Stat for
// metadata-only access — no file descriptor is opened, so it works on
// unreadable files and does not block on special files (e.g. FIFOs).
Expand Down
238 changes: 238 additions & 0 deletions interp/allowed_paths_internal_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -9,18 +9,41 @@ import (
"bytes"
"context"
"errors"
"fmt"
"io"
"io/fs"
"os"
"os/exec"
"path/filepath"
"runtime"
"strings"
"testing"
"time"

"github.com/stretchr/testify/assert"
"github.com/stretchr/testify/require"
"mvdan.cc/sh/v3/syntax"
)

// fakeDirEntry is a minimal fs.DirEntry for testing collectDirEntries.
type fakeDirEntry struct {
name string
}

func (f fakeDirEntry) Name() string { return f.name }
func (f fakeDirEntry) IsDir() bool { return false }
func (f fakeDirEntry) Type() fs.FileMode { return 0 }
func (f fakeDirEntry) Info() (fs.FileInfo, error) { return fakeFileInfo{name: f.name}, nil }

type fakeFileInfo struct{ name string }

func (f fakeFileInfo) Name() string { return f.name }
func (f fakeFileInfo) Size() int64 { return 0 }
func (f fakeFileInfo) Mode() fs.FileMode { return 0644 }
func (f fakeFileInfo) ModTime() time.Time { return time.Time{} }
func (f fakeFileInfo) IsDir() bool { return false }
func (f fakeFileInfo) Sys() any { return nil }

func systemExecAllowedPaths(t *testing.T) []string {
t.Helper()
if runtime.GOOS == "windows" {
Expand Down Expand Up @@ -197,3 +220,218 @@ func TestPathSandboxOpenRejectsWriteFlags(t *testing.T) {
require.NoError(t, err)
f.Close()
}

func TestReadDirLimited(t *testing.T) {
dir := t.TempDir()

// Create 10 files.
for i := range 10 {
require.NoError(t, os.WriteFile(filepath.Join(dir, fmt.Sprintf("f%02d", i)), nil, 0644))
}

sb, err := newPathSandbox([]string{dir})
require.NoError(t, err)
defer sb.Close()

ctx := context.WithValue(context.Background(), handlerCtxKey{}, HandlerContext{Dir: dir})

t.Run("maxRead below count returns truncated with first N entries", func(t *testing.T) {
entries, truncated, err := sb.readDirLimited(ctx, ".", 0, 5)
require.NoError(t, err)
assert.True(t, truncated)
assert.Len(t, entries, 5)
// Entries are sorted within the read window.
for i := 1; i < len(entries); i++ {
assert.True(t, entries[i-1].Name() < entries[i].Name(), "entries should be sorted")
}
})

t.Run("maxRead above count returns all entries not truncated", func(t *testing.T) {
entries, truncated, err := sb.readDirLimited(ctx, ".", 0, 20)
require.NoError(t, err)
assert.False(t, truncated)
assert.Len(t, entries, 10)
})

t.Run("empty directory", func(t *testing.T) {
emptyDir := filepath.Join(dir, "empty")
require.NoError(t, os.Mkdir(emptyDir, 0755))

entries, truncated, err := sb.readDirLimited(ctx, "empty", 0, 10)
require.NoError(t, err)
assert.False(t, truncated)
assert.Empty(t, entries)
})

t.Run("path outside sandbox returns permission error", func(t *testing.T) {
outsideDir := t.TempDir()
_, _, err := sb.readDirLimited(ctx, outsideDir, 0, 10)
require.Error(t, err)
assert.ErrorIs(t, err, os.ErrPermission)
})

t.Run("io.EOF is not returned as error", func(t *testing.T) {
// Use a fresh directory to avoid interference from other subtests.
eofDir := filepath.Join(dir, "eoftest")
require.NoError(t, os.Mkdir(eofDir, 0755))
for i := range 5 {
require.NoError(t, os.WriteFile(filepath.Join(eofDir, fmt.Sprintf("g%02d", i)), nil, 0644))
}
entries, truncated, err := sb.readDirLimited(ctx, "eoftest", 0, 1000)
require.NoError(t, err, "io.EOF should not be returned as error")
assert.False(t, truncated)
assert.Len(t, entries, 5)
})

t.Run("non-positive maxRead returns empty", func(t *testing.T) {
entries, truncated, err := sb.readDirLimited(ctx, ".", 0, 0)
require.NoError(t, err)
assert.False(t, truncated)
assert.Empty(t, entries)

entries, truncated, err = sb.readDirLimited(ctx, ".", 0, -5)
require.NoError(t, err)
assert.False(t, truncated)
assert.Empty(t, entries)
})

t.Run("offset skips entries", func(t *testing.T) {
// Use a fresh directory to avoid interference from other subtests.
offsetDir := filepath.Join(dir, "offsettest")
require.NoError(t, os.Mkdir(offsetDir, 0755))
for i := range 10 {
require.NoError(t, os.WriteFile(filepath.Join(offsetDir, fmt.Sprintf("h%02d", i)), nil, 0644))
}

// Read all 10 entries with no offset for reference.
all, _, err := sb.readDirLimited(ctx, "offsettest", 0, 100)
require.NoError(t, err)
assert.Len(t, all, 10)

// Skip first 5 entries, read up to 100.
entries, truncated, err := sb.readDirLimited(ctx, "offsettest", 5, 100)
require.NoError(t, err)
assert.False(t, truncated)
assert.Len(t, entries, 5, "should return remaining 5 entries after skipping 5")
})

t.Run("offset beyond count returns empty", func(t *testing.T) {
entries, truncated, err := sb.readDirLimited(ctx, "offsettest", 100, 10)
require.NoError(t, err)
assert.False(t, truncated)
assert.Empty(t, entries)
})

t.Run("offset plus maxRead with truncation", func(t *testing.T) {
// Skip 3, read 3 out of 10 => should get 3 entries, truncated.
entries, truncated, err := sb.readDirLimited(ctx, "offsettest", 3, 3)
require.NoError(t, err)
assert.True(t, truncated, "should be truncated since 10 - 3 > 3")
assert.Len(t, entries, 3)
// Entries should be sorted within the window.
for i := 1; i < len(entries); i++ {
assert.True(t, entries[i-1].Name() < entries[i].Name(), "entries should be sorted")
}
})

t.Run("negative offset clamped to zero", func(t *testing.T) {
entries, truncated, err := sb.readDirLimited(ctx, "offsettest", -10, 100)
require.NoError(t, err)
assert.False(t, truncated)
assert.Len(t, entries, 10, "negative offset should be treated as 0")
})
}

func TestCollectDirEntries(t *testing.T) {
makeEntries := func(names ...string) []fs.DirEntry {
out := make([]fs.DirEntry, len(names))
for i, n := range names {
out[i] = fakeDirEntry{name: n}
}
return out
}

t.Run("error in same batch as truncation is preserved", func(t *testing.T) {
ioErr := errors.New("disk I/O error")
callCount := 0
reader := func(n int) ([]fs.DirEntry, error) {
callCount++
// First batch: 5 entries, no error.
// Second batch: 3 entries + I/O error.
// With maxRead=6, the second batch pushes us to 8 > 6, triggering truncation.
// The error must still be preserved.
if callCount == 1 {
return makeEntries("f01", "f02", "f03", "f04", "f05"), nil
}
return makeEntries("f06", "f07", "f08"), ioErr
}

entries, truncated, err := collectDirEntries(reader, 10, 0, 6)
assert.True(t, truncated, "should be truncated")
assert.Len(t, entries, 6, "should trim to maxRead")
assert.ErrorIs(t, err, ioErr, "I/O error must be preserved even when truncation occurs")
})

t.Run("EOF is not returned as error", func(t *testing.T) {
callCount := 0
reader := func(n int) ([]fs.DirEntry, error) {
callCount++
if callCount == 1 {
return makeEntries("f01", "f02"), io.EOF
}
return nil, io.EOF
}

entries, truncated, err := collectDirEntries(reader, 10, 0, 100)
assert.False(t, truncated)
assert.Len(t, entries, 2)
assert.NoError(t, err, "io.EOF should not be returned as error")
})

t.Run("offset skips entries across batches", func(t *testing.T) {
callCount := 0
reader := func(n int) ([]fs.DirEntry, error) {
callCount++
if callCount == 1 {
return makeEntries("f01", "f02", "f03"), nil
}
if callCount == 2 {
return makeEntries("f04", "f05"), io.EOF
}
return nil, io.EOF
}

entries, truncated, err := collectDirEntries(reader, 10, 2, 100)
assert.False(t, truncated)
assert.NoError(t, err)
assert.Len(t, entries, 3, "should skip first 2, return remaining 3")
assert.Equal(t, "f03", entries[0].Name())
assert.Equal(t, "f04", entries[1].Name())
assert.Equal(t, "f05", entries[2].Name())
})

t.Run("error without truncation is preserved", func(t *testing.T) {
ioErr := errors.New("permission denied")
reader := func(n int) ([]fs.DirEntry, error) {
return makeEntries("f01", "f02"), ioErr
}

entries, truncated, err := collectDirEntries(reader, 10, 0, 100)
assert.False(t, truncated)
assert.Len(t, entries, 2)
assert.ErrorIs(t, err, ioErr)
})

t.Run("entries are sorted by name", func(t *testing.T) {
reader := func(n int) ([]fs.DirEntry, error) {
return makeEntries("cherry", "apple", "banana"), io.EOF
}

entries, truncated, err := collectDirEntries(reader, 10, 0, 100)
assert.False(t, truncated)
assert.NoError(t, err)
assert.Equal(t, "apple", entries[0].Name())
assert.Equal(t, "banana", entries[1].Name())
assert.Equal(t, "cherry", entries[2].Name())
})
}
6 changes: 6 additions & 0 deletions interp/builtins/builtins.go
Original file line number Diff line number Diff line change
Expand Up @@ -90,6 +90,12 @@ type CallContext struct {
// Entries are returned sorted by name.
ReadDir func(ctx context.Context, path string) ([]fs.DirEntry, error)

// ReadDirLimited reads directory entries, skipping the first offset entries
// and returning up to maxRead entries sorted by name within the read window.
// Returns (entries, truncated, error). When truncated is true, the directory
// contained more entries beyond the returned set.
ReadDirLimited func(ctx context.Context, path string, offset, maxRead int) ([]fs.DirEntry, bool, error)

// StatFile returns file info within the shell's path restrictions (follows symlinks).
StatFile func(ctx context.Context, path string) (fs.FileInfo, error)

Expand Down
Loading
Loading