diff --git a/.github/workflows/compute-entropy.py b/.github/workflows/compute-entropy.py new file mode 100644 index 0000000000..42655913bb --- /dev/null +++ b/.github/workflows/compute-entropy.py @@ -0,0 +1,64 @@ +#!/usr/bin/env python3 +import sys, math, json, subprocess +from collections import Counter +from pathlib import Path + +def shannon_entropy(text: str) -> float: + if not text or len(text) < 10: + return 0.0 + freq = Counter(text) + probs = [count / len(text) for count in freq.values()] + return -sum(p * math.log2(p) for p in probs if p > 0) + +# Get changed files safely for pull_request events +changed_files = [] +try: + # GitHub provides github.event.pull_request.base.sha and head.sha in the context + base_sha = subprocess.check_output(['git', 'rev-parse', 'origin/${{ github.base_ref }}'], text=True).strip() + changed_files = subprocess.check_output( + ['git', 'diff', '--name-only', base_sha, 'HEAD'], text=True + ).splitlines() +except subprocess.CalledProcessError: + # Fallback for first-time PRs or edge cases: use the merge-base or just files in HEAD + try: + changed_files = subprocess.check_output( + ['git', 'diff', '--name-only', 'HEAD~1', 'HEAD'], text=True + ).splitlines() + except subprocess.CalledProcessError: + # Last resort: all files in the repo + changed_files = subprocess.check_output(['git', 'ls-files'], text=True).splitlines() + +results = [] +total_ent = 0.0 +count = 0 + +for f in changed_files: + path = Path(f.strip()) + if not path.exists() or path.suffix in {'.png', '.jpg', '.gif', '.bin', '.lock', '.exe', '.dll', '.so'}: + continue + try: + content = path.read_text(encoding='utf-8', errors='ignore') + ent = shannon_entropy(content) + results.append(f"{f}: {ent:.3f}") + total_ent += ent + count += 1 + except Exception: + pass + +avg = round(total_ent / count, 3) if count > 0 else 0.0 + +verdict = ( + "✅ Mid-4 beauty detected (thoughtful human code!)" if 4.3 <= avg <= 4.7 else + "⚠️ Consider review — entropy outside sweet spot" if avg > 0 else + "No source files changed" +) + +with open('/tmp/beauty.json', 'w') as f: + json.dump({ + "average_entropy": avg, + "verdict": verdict, + "files": results[:20] + }, f, indent=2) + +print(f"Average entropy: {avg}") +print(verdict) diff --git a/.github/workflows/entropy-beauty-scan.yml b/.github/workflows/entropy-beauty-scan.yml new file mode 100644 index 0000000000..e80695f49c --- /dev/null +++ b/.github/workflows/entropy-beauty-scan.yml @@ -0,0 +1,73 @@ +name: Entropy Beauty + TruffleHog Scan + +on: [push, pull_request, release] + +permissions: + contents: read + pull-requests: write + issues: write # must be at workflow level for push/merge events + +jobs: + scan: + runs-on: ubuntu-latest + steps: + - name: Checkout code (full history) + uses: actions/checkout@v4 + with: + fetch-depth: 0 + + - name: Run TruffleHog + uses: trufflesecurity/trufflehog@main + with: + path: . + extra_args: --results=verified,unknown --filter-entropy=3.5 --json + + - name: Compute mid-4 beauty entropy + run: python .github/workflows/compute-entropy.py + + - name: Post summary comment (PR only) + if: github.event_name == 'pull_request' + uses: actions/github-script@v7 + with: + github-token: ${{ secrets.GITHUB_TOKEN }} + script: | + const fs = require('fs'); + + // Read TruffleHog output — it prints one JSON object per line (NDJSON) + let findings = []; + if (fs.existsSync('trufflehog.json')) { + try { + const lines = fs.readFileSync('trufflehog.json', 'utf8').trim().split('\n'); + findings = lines.map(line => { + try { return JSON.parse(line); } catch(e) { return null; } + }).filter(Boolean); + } catch(e) {} + } else { + // Fallback: the action also logs to GITHUB_STEP_SUMMARY, but we use the file from the Python step + console.log("No trufflehog.json found, using empty findings"); + } + + const beauty = JSON.parse(fs.readFileSync('/tmp/beauty.json', 'utf8')); + + let body = `## 🐷 TruffleHog + Entropy Beauty Scan\n\n`; + body += `**Average entropy of changed code:** ${beauty.average_entropy} bits/char\n`; + body += `**Verdict:** ${beauty.verdict}\n\n`; + + if (beauty.files && beauty.files.length) { + body += `**Changed files entropy:**\n\`\`\`\n${beauty.files.join('\n')}\n\`\`\`\n\n`; + } + + if (findings.length > 0) { + body += `⚠️ **TruffleHog found ${findings.length} potential issue(s)**\n`; + } else { + body += `✅ No secrets or suspicious high-entropy strings found.\n`; + } + + body += `\n*Mid-4 beauty heuristic in action — powered by our entropy chats! 😊*`; + + await github.rest.issues.createComment({ + owner: context.repo.owner, + repo: context.repo.repo, + issue_number: context.issue.number, + body: body + });