Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
37 changes: 19 additions & 18 deletions src/cluecode/copyrights.py
Original file line number Diff line number Diff line change
Expand Up @@ -4282,35 +4282,36 @@ def is_candidate(prepared_line):
return False

if is_only_digit_and_punct(prepared_line):
if TRACE:
logger_debug(f'is_candidate: is_only_digit_and_punct:\n{prepared_line!r}')

return False

if gibberish_detector.detect_gibberish(prepared_line):
if TRACE:
logger_debug(f'is_candidate: gibberish_detector.detect_gibberish:\n{prepared_line!r}')
return False

lowered = prepared_line.lower()

# ----------------------------------------------------------
# Ignore (c) ONLY when it appears inside a URL path
# ----------------------------------------------------------
if '(c)' in lowered:
# remove spaces to reconstruct possible broken URL
compact = lowered.replace(' ', '')

# match http://.../(c)/...
if re.search(r'https?://[^ ]*\(c\)[^ ]*', compact):
return False

# ----------------------------------------------------------
# Original logic continues
# ----------------------------------------------------------
if copyrights_hint.years(prepared_line):
return True
else:
pass
prepared_line = prepared_line.lower()

for marker in copyrights_hint.statement_markers:
if marker in prepared_line:
if marker in lowered:
return True

return False

def is_inside_statement(
chars_only_line,
markers=('copyright', 'copyrights', 'copyrightby',) + copyrights_hint.all_years,
):
"""
Return True if a line ends with some strings that indicate we are still
inside a statement.
"""
return chars_only_line and chars_only_line.endswith(markers)


def is_end_of_statement(chars_only_line):
Expand Down
21 changes: 20 additions & 1 deletion tests/cluecode/test_copyrights_basic.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,26 @@
from commoncode.testcase import FileBasedTesting
from cluecode import copyrights
from cluecode.copyrights import prepare_text_line
from cluecode.copyrights import remove_non_chars
from cluecode import copyrights
from cluecode.copyrights import prepare_text_line, remove_non_chars

def test_copyright_symbol_inside_url_is_ignored():
text = "See http://example.com/(c)/path for more information."

prepped = prepare_text_line(text)

# sanity check
assert '(c)' in prepped

# URLs containing (c) should NOT be copyright candidates
assert not copyrights.is_candidate(prepped)

def test_copyright_with_url_is_still_candidate():
text = "Copyright (c) 2000 Foo, http://example.com"

prepped = prepare_text_line(text)

assert copyrights.is_candidate(prepped)


class TestTextPreparation(FileBasedTesting):
Expand Down