From 91a73980e48adc0735b5a10330ec6fda4839e455 Mon Sep 17 00:00:00 2001 From: gotbadger Date: Fri, 21 Nov 2025 09:38:25 +0000 Subject: [PATCH] CM-55207: improve testing and cover some edge cases in commit pre-receive --- .../scan/pre_receive/pre_receive_command.py | 4 +- .../files_collector/commit_range_documents.py | 34 +++- .../test_commit_range_documents.py | 176 ++++++++++++++++++ 3 files changed, 206 insertions(+), 8 deletions(-) diff --git a/cycode/cli/apps/scan/pre_receive/pre_receive_command.py b/cycode/cli/apps/scan/pre_receive/pre_receive_command.py index 3b85dc9e..f6265fd2 100644 --- a/cycode/cli/apps/scan/pre_receive/pre_receive_command.py +++ b/cycode/cli/apps/scan/pre_receive/pre_receive_command.py @@ -47,7 +47,9 @@ def pre_receive_command( timeout = configuration_manager.get_pre_receive_command_timeout(command_scan_type) with TimeoutAfter(timeout): branch_update_details = parse_pre_receive_input() - commit_range = calculate_pre_receive_commit_range(branch_update_details) + commit_range = calculate_pre_receive_commit_range( + repo_path=os.getcwd(), branch_update_details=branch_update_details + ) if not commit_range: logger.info( 'No new commits found for pushed branch, %s', diff --git a/cycode/cli/files_collector/commit_range_documents.py b/cycode/cli/files_collector/commit_range_documents.py index 8f8eccac..2ca54dd5 100644 --- a/cycode/cli/files_collector/commit_range_documents.py +++ b/cycode/cli/files_collector/commit_range_documents.py @@ -104,19 +104,27 @@ def collect_commit_range_diff_documents( return commit_documents_to_scan -def calculate_pre_receive_commit_range(branch_update_details: str) -> Optional[str]: +def calculate_pre_receive_commit_range(repo_path: str, branch_update_details: str) -> Optional[str]: end_commit = _get_end_commit_from_branch_update_details(branch_update_details) # branch is deleted, no need to perform scan if end_commit == consts.EMPTY_COMMIT_SHA: return None - start_commit = _get_oldest_unupdated_commit_for_branch(end_commit) + repo = git_proxy.get_repo(repo_path) + start_commit = _get_oldest_unupdated_commit_for_branch(repo, end_commit) # no new commit to update found if not start_commit: return None + # If the oldest not-yet-updated commit has no parent (root commit or orphaned history), + # using '~1' will fail. In that case, scan from the end commit, which effectively + # includes the entire history reachable from it (which is exactly what we need here). + + if not bool(repo.commit(start_commit).parents): + return f'{end_commit}' + return f'{start_commit}~1...{end_commit}' @@ -126,10 +134,10 @@ def _get_end_commit_from_branch_update_details(update_details: str) -> str: return end_commit -def _get_oldest_unupdated_commit_for_branch(commit: str) -> Optional[str]: +def _get_oldest_unupdated_commit_for_branch(repo: 'Repo', commit: str) -> Optional[str]: # get a list of commits by chronological order that are not in the remote repository yet # more info about rev-list command: https://git-scm.com/docs/git-rev-list - repo = git_proxy.get_repo(os.getcwd()) + not_updated_commits = repo.git.rev_list(commit, '--topo-order', '--reverse', '--not', '--all') commits = not_updated_commits.splitlines() @@ -199,8 +207,7 @@ def parse_pre_receive_input() -> str: :return: First branch update details (input's first line) """ - # FIXME(MarshalX): this blocks main thread forever if called outside of pre-receive hook - pre_receive_input = sys.stdin.read().strip() + pre_receive_input = _read_hook_input_from_stdin() if not pre_receive_input: raise ValueError( 'Pre receive input was not found. Make sure that you are using this command only in pre-receive hook' @@ -222,7 +229,7 @@ def parse_pre_push_input() -> str: :return: First, push update details (input's first line) """ # noqa: E501 - pre_push_input = sys.stdin.read().strip() + pre_push_input = _read_hook_input_from_stdin() if not pre_push_input: raise ValueError( 'Pre push input was not found. Make sure that you are using this command only in pre-push hook' @@ -232,6 +239,19 @@ def parse_pre_push_input() -> str: return pre_push_input.splitlines()[0] +def _read_hook_input_from_stdin() -> str: + """Read input from stdin when called from a hook. + + If called manually from the command line, return an empty string so it doesn't block the main thread. + + Returns: + Input from stdin + """ + if sys.stdin.isatty(): + return '' + return sys.stdin.read().strip() + + def _get_default_branches_for_merge_base(repo: 'Repo') -> list[str]: """Get a list of default branches to try for merge base calculation. diff --git a/tests/cli/files_collector/test_commit_range_documents.py b/tests/cli/files_collector/test_commit_range_documents.py index cd30d1eb..d27d24a3 100644 --- a/tests/cli/files_collector/test_commit_range_documents.py +++ b/tests/cli/files_collector/test_commit_range_documents.py @@ -12,13 +12,22 @@ from cycode.cli.files_collector.commit_range_documents import ( _get_default_branches_for_merge_base, calculate_pre_push_commit_range, + calculate_pre_receive_commit_range, get_diff_file_path, get_safe_head_reference_for_diff, parse_commit_range, parse_pre_push_input, + parse_pre_receive_input, ) from cycode.cli.utils.path_utils import get_path_by_os +DUMMY_SHA_0 = '0' * 40 +DUMMY_SHA_1 = '1' * 40 +DUMMY_SHA_2 = '2' * 40 +DUMMY_SHA_A = 'a' * 40 +DUMMY_SHA_B = 'b' * 40 +DUMMY_SHA_C = 'c' * 40 + @contextmanager def git_repository(path: str) -> Generator[Repo, None, None]: @@ -871,3 +880,170 @@ def test_single_commit_spec(self) -> None: parsed_from, parsed_to = parse_commit_range(a, temp_dir) assert (parsed_from, parsed_to) == (a, c) + + +class TestParsePreReceiveInput: + """Test the parse_pre_receive_input function with various pre-receive hook input scenarios.""" + + def test_parse_single_update_input(self) -> None: + """Test parsing a single branch update input.""" + pre_receive_input = f'{DUMMY_SHA_1} {DUMMY_SHA_2} refs/heads/main' + + with patch('sys.stdin', StringIO(pre_receive_input)): + result = parse_pre_receive_input() + assert result == pre_receive_input + + def test_parse_multiple_update_input_returns_first_line(self) -> None: + """Test parsing multiple branch updates returns only the first line.""" + pre_receive_input = f"""{DUMMY_SHA_0} {DUMMY_SHA_A} refs/heads/main +{DUMMY_SHA_B} {DUMMY_SHA_C} refs/heads/feature""" + + with patch('sys.stdin', StringIO(pre_receive_input)): + result = parse_pre_receive_input() + assert result == f'{DUMMY_SHA_0} {DUMMY_SHA_A} refs/heads/main' + + def test_parse_empty_input_raises_error(self) -> None: + """Test that empty input raises ValueError.""" + match = 'Pre receive input was not found' + with patch('sys.stdin', StringIO('')), pytest.raises(ValueError, match=match): + parse_pre_receive_input() + + +class TestCalculatePreReceiveCommitRange: + """Test the calculate_pre_receive_commit_range function with representative scenarios.""" + + def test_branch_deletion_returns_none(self) -> None: + """When end commit is all zeros (deletion), no scan is needed.""" + update_details = f'{DUMMY_SHA_A} {consts.EMPTY_COMMIT_SHA} refs/heads/feature' + assert calculate_pre_receive_commit_range(os.getcwd(), update_details) is None + + def test_no_new_commits_returns_none(self) -> None: + """When there are no commits not in remote, return None.""" + with tempfile.TemporaryDirectory() as server_dir: + server_repo = Repo.init(server_dir, bare=True) + try: + with tempfile.TemporaryDirectory() as work_dir: + work_repo = Repo.init(work_dir, b='main') + try: + # Create a single commit and push it to the server as main (end commit is already on a ref) + test_file = os.path.join(work_dir, 'file.txt') + with open(test_file, 'w') as f: + f.write('base') + work_repo.index.add(['file.txt']) + end_commit = work_repo.index.commit('initial') + + work_repo.create_remote('origin', server_dir) + work_repo.remotes.origin.push('main:main') + + update_details = f'{DUMMY_SHA_A} {end_commit.hexsha} refs/heads/main' + assert calculate_pre_receive_commit_range(server_dir, update_details) is None + finally: + work_repo.close() + finally: + server_repo.close() + + def test_returns_triple_dot_range_from_oldest_unupdated(self) -> None: + """Returns '~1...' when there are new commits to scan.""" + with tempfile.TemporaryDirectory() as server_dir: + server_repo = Repo.init(server_dir, bare=True) + try: + with tempfile.TemporaryDirectory() as work_dir: + work_repo = Repo.init(work_dir, b='main') + try: + # Create commit A and push it to server as main (server has A on a ref) + a_path = os.path.join(work_dir, 'a.txt') + with open(a_path, 'w') as f: + f.write('A') + work_repo.index.add(['a.txt']) + work_repo.index.commit('A') + + work_repo.create_remote('origin', server_dir) + work_repo.remotes.origin.push('main:main') + + # Create commits B and C locally (not yet on server ref) + b_path = os.path.join(work_dir, 'b.txt') + with open(b_path, 'w') as f: + f.write('B') + work_repo.index.add(['b.txt']) + b_commit = work_repo.index.commit('B') + + c_path = os.path.join(work_dir, 'c.txt') + with open(c_path, 'w') as f: + f.write('C') + work_repo.index.add(['c.txt']) + end_commit = work_repo.index.commit('C') + + # Push the objects to a temporary ref and then delete that ref on server, + # so the objects exist but are not reachable from any ref. + work_repo.remotes.origin.push(f'{end_commit.hexsha}:refs/tmp/hold') + Repo(server_dir).git.update_ref('-d', 'refs/tmp/hold') + + update_details = f'{DUMMY_SHA_A} {end_commit.hexsha} refs/heads/main' + result = calculate_pre_receive_commit_range(server_dir, update_details) + assert result == f'{b_commit.hexsha}~1...{end_commit.hexsha}' + finally: + work_repo.close() + finally: + server_repo.close() + + def test_initial_oldest_commit_without_parent_returns_single_commit_range(self) -> None: + """If oldest commit has no parent, avoid '~1' and scan from end commit only.""" + with tempfile.TemporaryDirectory() as server_dir: + server_repo = Repo.init(server_dir, bare=True) + try: + with tempfile.TemporaryDirectory() as work_dir: + work_repo = Repo.init(work_dir, b='main') + try: + # Create a single root commit locally + p = os.path.join(work_dir, 'root.txt') + with open(p, 'w') as f: + f.write('root') + work_repo.index.add(['root.txt']) + end_commit = work_repo.index.commit('root') + + work_repo.create_remote('origin', server_dir) + # Push objects to a temporary ref and delete it so server has objects but no refs + work_repo.remotes.origin.push(f'{end_commit.hexsha}:refs/tmp/hold') + Repo(server_dir).git.update_ref('-d', 'refs/tmp/hold') + + update_details = f'{DUMMY_SHA_A} {end_commit.hexsha} refs/heads/main' + result = calculate_pre_receive_commit_range(server_dir, update_details) + assert result == end_commit.hexsha + finally: + work_repo.close() + finally: + server_repo.close() + + def test_initial_oldest_commit_without_parent_with_two_commits_returns_single_commit_range(self) -> None: + """If there are two new commits and the oldest has no parent, avoid '~1' and scan from end commit only.""" + with tempfile.TemporaryDirectory() as server_dir: + server_repo = Repo.init(server_dir, bare=True) + try: + with tempfile.TemporaryDirectory() as work_dir: + work_repo = Repo.init(work_dir, b='main') + try: + # Create two commits locally: oldest has no parent, second on top + a_path = os.path.join(work_dir, 'a.txt') + with open(a_path, 'w') as f: + f.write('A') + work_repo.index.add(['a.txt']) + work_repo.index.commit('A') + + d_path = os.path.join(work_dir, 'd.txt') + with open(d_path, 'w') as f: + f.write('D') + work_repo.index.add(['d.txt']) + end_commit = work_repo.index.commit('D') + + work_repo.create_remote('origin', server_dir) + # Push objects to a temporary ref and delete it so server has objects but no refs + work_repo.remotes.origin.push(f'{end_commit.hexsha}:refs/tmp/hold') + Repo(server_dir).git.update_ref('-d', 'refs/tmp/hold') + + update_details = f'{consts.EMPTY_COMMIT_SHA} {end_commit.hexsha} refs/heads/main' + result = calculate_pre_receive_commit_range(server_dir, update_details) + assert result == end_commit.hexsha + finally: + work_repo.close() + finally: + server_repo.close()