Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 3 additions & 1 deletion cycode/cli/apps/scan/pre_receive/pre_receive_command.py
Original file line number Diff line number Diff line change
Expand Up @@ -47,7 +47,9 @@ def pre_receive_command(
timeout = configuration_manager.get_pre_receive_command_timeout(command_scan_type)
with TimeoutAfter(timeout):
branch_update_details = parse_pre_receive_input()
commit_range = calculate_pre_receive_commit_range(branch_update_details)
commit_range = calculate_pre_receive_commit_range(
repo_path=os.getcwd(), branch_update_details=branch_update_details
)
if not commit_range:
logger.info(
'No new commits found for pushed branch, %s',
Expand Down
34 changes: 27 additions & 7 deletions cycode/cli/files_collector/commit_range_documents.py
Original file line number Diff line number Diff line change
Expand Up @@ -104,19 +104,27 @@ def collect_commit_range_diff_documents(
return commit_documents_to_scan


def calculate_pre_receive_commit_range(branch_update_details: str) -> Optional[str]:
def calculate_pre_receive_commit_range(repo_path: str, branch_update_details: str) -> Optional[str]:
end_commit = _get_end_commit_from_branch_update_details(branch_update_details)

# branch is deleted, no need to perform scan
if end_commit == consts.EMPTY_COMMIT_SHA:
return None

start_commit = _get_oldest_unupdated_commit_for_branch(end_commit)
repo = git_proxy.get_repo(repo_path)
start_commit = _get_oldest_unupdated_commit_for_branch(repo, end_commit)

# no new commit to update found
if not start_commit:
return None

# If the oldest not-yet-updated commit has no parent (root commit or orphaned history),
# using '~1' will fail. In that case, scan from the end commit, which effectively
# includes the entire history reachable from it (which is exactly what we need here).

if not bool(repo.commit(start_commit).parents):
return f'{end_commit}'

return f'{start_commit}~1...{end_commit}'


Expand All @@ -126,10 +134,10 @@ def _get_end_commit_from_branch_update_details(update_details: str) -> str:
return end_commit


def _get_oldest_unupdated_commit_for_branch(commit: str) -> Optional[str]:
def _get_oldest_unupdated_commit_for_branch(repo: 'Repo', commit: str) -> Optional[str]:
# get a list of commits by chronological order that are not in the remote repository yet
# more info about rev-list command: https://git-scm.com/docs/git-rev-list
repo = git_proxy.get_repo(os.getcwd())

not_updated_commits = repo.git.rev_list(commit, '--topo-order', '--reverse', '--not', '--all')

commits = not_updated_commits.splitlines()
Expand Down Expand Up @@ -199,8 +207,7 @@ def parse_pre_receive_input() -> str:

:return: First branch update details (input's first line)
"""
# FIXME(MarshalX): this blocks main thread forever if called outside of pre-receive hook
pre_receive_input = sys.stdin.read().strip()
pre_receive_input = _read_hook_input_from_stdin()
if not pre_receive_input:
raise ValueError(
'Pre receive input was not found. Make sure that you are using this command only in pre-receive hook'
Expand All @@ -222,7 +229,7 @@ def parse_pre_push_input() -> str:

:return: First, push update details (input's first line)
""" # noqa: E501
pre_push_input = sys.stdin.read().strip()
pre_push_input = _read_hook_input_from_stdin()
if not pre_push_input:
raise ValueError(
'Pre push input was not found. Make sure that you are using this command only in pre-push hook'
Expand All @@ -232,6 +239,19 @@ def parse_pre_push_input() -> str:
return pre_push_input.splitlines()[0]


def _read_hook_input_from_stdin() -> str:
"""Read input from stdin when called from a hook.

If called manually from the command line, return an empty string so it doesn't block the main thread.

Returns:
Input from stdin
"""
if sys.stdin.isatty():
return ''
return sys.stdin.read().strip()


def _get_default_branches_for_merge_base(repo: 'Repo') -> list[str]:
"""Get a list of default branches to try for merge base calculation.

Expand Down
176 changes: 176 additions & 0 deletions tests/cli/files_collector/test_commit_range_documents.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,13 +12,22 @@
from cycode.cli.files_collector.commit_range_documents import (
_get_default_branches_for_merge_base,
calculate_pre_push_commit_range,
calculate_pre_receive_commit_range,
get_diff_file_path,
get_safe_head_reference_for_diff,
parse_commit_range,
parse_pre_push_input,
parse_pre_receive_input,
)
from cycode.cli.utils.path_utils import get_path_by_os

DUMMY_SHA_0 = '0' * 40
DUMMY_SHA_1 = '1' * 40
DUMMY_SHA_2 = '2' * 40
DUMMY_SHA_A = 'a' * 40
DUMMY_SHA_B = 'b' * 40
DUMMY_SHA_C = 'c' * 40


@contextmanager
def git_repository(path: str) -> Generator[Repo, None, None]:
Expand Down Expand Up @@ -871,3 +880,170 @@ def test_single_commit_spec(self) -> None:

parsed_from, parsed_to = parse_commit_range(a, temp_dir)
assert (parsed_from, parsed_to) == (a, c)


class TestParsePreReceiveInput:
"""Test the parse_pre_receive_input function with various pre-receive hook input scenarios."""

def test_parse_single_update_input(self) -> None:
"""Test parsing a single branch update input."""
pre_receive_input = f'{DUMMY_SHA_1} {DUMMY_SHA_2} refs/heads/main'

with patch('sys.stdin', StringIO(pre_receive_input)):
result = parse_pre_receive_input()
assert result == pre_receive_input

def test_parse_multiple_update_input_returns_first_line(self) -> None:
"""Test parsing multiple branch updates returns only the first line."""
pre_receive_input = f"""{DUMMY_SHA_0} {DUMMY_SHA_A} refs/heads/main
{DUMMY_SHA_B} {DUMMY_SHA_C} refs/heads/feature"""

with patch('sys.stdin', StringIO(pre_receive_input)):
result = parse_pre_receive_input()
assert result == f'{DUMMY_SHA_0} {DUMMY_SHA_A} refs/heads/main'

def test_parse_empty_input_raises_error(self) -> None:
"""Test that empty input raises ValueError."""
match = 'Pre receive input was not found'
with patch('sys.stdin', StringIO('')), pytest.raises(ValueError, match=match):
parse_pre_receive_input()


class TestCalculatePreReceiveCommitRange:
"""Test the calculate_pre_receive_commit_range function with representative scenarios."""

def test_branch_deletion_returns_none(self) -> None:
"""When end commit is all zeros (deletion), no scan is needed."""
update_details = f'{DUMMY_SHA_A} {consts.EMPTY_COMMIT_SHA} refs/heads/feature'
assert calculate_pre_receive_commit_range(os.getcwd(), update_details) is None

def test_no_new_commits_returns_none(self) -> None:
"""When there are no commits not in remote, return None."""
with tempfile.TemporaryDirectory() as server_dir:
server_repo = Repo.init(server_dir, bare=True)
try:
with tempfile.TemporaryDirectory() as work_dir:
work_repo = Repo.init(work_dir, b='main')
try:
# Create a single commit and push it to the server as main (end commit is already on a ref)
test_file = os.path.join(work_dir, 'file.txt')
with open(test_file, 'w') as f:
f.write('base')
work_repo.index.add(['file.txt'])
end_commit = work_repo.index.commit('initial')

work_repo.create_remote('origin', server_dir)
work_repo.remotes.origin.push('main:main')

update_details = f'{DUMMY_SHA_A} {end_commit.hexsha} refs/heads/main'
assert calculate_pre_receive_commit_range(server_dir, update_details) is None
finally:
work_repo.close()
finally:
server_repo.close()

def test_returns_triple_dot_range_from_oldest_unupdated(self) -> None:
"""Returns '<oldest>~1...<end>' when there are new commits to scan."""
with tempfile.TemporaryDirectory() as server_dir:
server_repo = Repo.init(server_dir, bare=True)
try:
with tempfile.TemporaryDirectory() as work_dir:
work_repo = Repo.init(work_dir, b='main')
try:
# Create commit A and push it to server as main (server has A on a ref)
a_path = os.path.join(work_dir, 'a.txt')
with open(a_path, 'w') as f:
f.write('A')
work_repo.index.add(['a.txt'])
work_repo.index.commit('A')

work_repo.create_remote('origin', server_dir)
work_repo.remotes.origin.push('main:main')

# Create commits B and C locally (not yet on server ref)
b_path = os.path.join(work_dir, 'b.txt')
with open(b_path, 'w') as f:
f.write('B')
work_repo.index.add(['b.txt'])
b_commit = work_repo.index.commit('B')

c_path = os.path.join(work_dir, 'c.txt')
with open(c_path, 'w') as f:
f.write('C')
work_repo.index.add(['c.txt'])
end_commit = work_repo.index.commit('C')

# Push the objects to a temporary ref and then delete that ref on server,
# so the objects exist but are not reachable from any ref.
work_repo.remotes.origin.push(f'{end_commit.hexsha}:refs/tmp/hold')
Repo(server_dir).git.update_ref('-d', 'refs/tmp/hold')

update_details = f'{DUMMY_SHA_A} {end_commit.hexsha} refs/heads/main'
result = calculate_pre_receive_commit_range(server_dir, update_details)
assert result == f'{b_commit.hexsha}~1...{end_commit.hexsha}'
finally:
work_repo.close()
finally:
server_repo.close()

def test_initial_oldest_commit_without_parent_returns_single_commit_range(self) -> None:
"""If oldest commit has no parent, avoid '~1' and scan from end commit only."""
with tempfile.TemporaryDirectory() as server_dir:
server_repo = Repo.init(server_dir, bare=True)
try:
with tempfile.TemporaryDirectory() as work_dir:
work_repo = Repo.init(work_dir, b='main')
try:
# Create a single root commit locally
p = os.path.join(work_dir, 'root.txt')
with open(p, 'w') as f:
f.write('root')
work_repo.index.add(['root.txt'])
end_commit = work_repo.index.commit('root')

work_repo.create_remote('origin', server_dir)
# Push objects to a temporary ref and delete it so server has objects but no refs
work_repo.remotes.origin.push(f'{end_commit.hexsha}:refs/tmp/hold')
Repo(server_dir).git.update_ref('-d', 'refs/tmp/hold')

update_details = f'{DUMMY_SHA_A} {end_commit.hexsha} refs/heads/main'
result = calculate_pre_receive_commit_range(server_dir, update_details)
assert result == end_commit.hexsha
finally:
work_repo.close()
finally:
server_repo.close()

def test_initial_oldest_commit_without_parent_with_two_commits_returns_single_commit_range(self) -> None:
"""If there are two new commits and the oldest has no parent, avoid '~1' and scan from end commit only."""
with tempfile.TemporaryDirectory() as server_dir:
server_repo = Repo.init(server_dir, bare=True)
try:
with tempfile.TemporaryDirectory() as work_dir:
work_repo = Repo.init(work_dir, b='main')
try:
# Create two commits locally: oldest has no parent, second on top
a_path = os.path.join(work_dir, 'a.txt')
with open(a_path, 'w') as f:
f.write('A')
work_repo.index.add(['a.txt'])
work_repo.index.commit('A')

d_path = os.path.join(work_dir, 'd.txt')
with open(d_path, 'w') as f:
f.write('D')
work_repo.index.add(['d.txt'])
end_commit = work_repo.index.commit('D')

work_repo.create_remote('origin', server_dir)
# Push objects to a temporary ref and delete it so server has objects but no refs
work_repo.remotes.origin.push(f'{end_commit.hexsha}:refs/tmp/hold')
Repo(server_dir).git.update_ref('-d', 'refs/tmp/hold')

update_details = f'{consts.EMPTY_COMMIT_SHA} {end_commit.hexsha} refs/heads/main'
result = calculate_pre_receive_commit_range(server_dir, update_details)
assert result == end_commit.hexsha
finally:
work_repo.close()
finally:
server_repo.close()