Files
budget-app/scripts/bus-factor.mjs
Christian Hood 066d50fa89 Add bus-factor analyzer script
Implements scripts/bus-factor.mjs — a standalone Node.js CLI that shells
out to git log --numstat to collect per-file, per-author commit stats,
then computes bus-factor scores (number of contributors with >10%
ownership) for every source file in server/src/, client/src/, and
db/migrations/.

High-risk files (bus-factor=1) are surfaced prominently in the default
report. A --json flag emits machine-readable output for CI. Pure
analysis functions (parseGitLog, computeOwnership, scoreFiles, repoStats)
are unit-tested with Vitest (19 tests, all passing).

No new runtime dependencies — only Node.js built-ins and git are required.
Run via: node scripts/bus-factor.mjs [--json] [--min-commits N] [--top N]

Nightshift-Task: bus-factor
Nightshift-Ref: https://github.com/marcus/nightshift
2026-03-20 03:03:26 -04:00

214 lines
7.2 KiB
JavaScript

#!/usr/bin/env node
/**
* Bus-Factor Analyzer
* Analyzes code ownership concentration by examining git commit history.
* Usage: node scripts/bus-factor.js [--json] [--min-commits N] [--threshold N] [--top N]
*/
import { execSync } from 'child_process';
import { fileURLToPath } from 'url';
import path from 'path';
// --- Pure analysis functions (exported for testing) ---
/**
* Parse raw `git log --numstat` output into a map of file -> author -> commitCount.
* @param {string} rawLog - Output from git log --numstat
* @returns {Object} { [filePath]: { [author]: number } }
*/
export function parseGitLog(rawLog) {
const ownership = {};
const lines = rawLog.split('\n');
let currentAuthor = null;
for (const line of lines) {
// Commit header line: "commit <hash>"
if (line.startsWith('commit ')) {
currentAuthor = null;
continue;
}
// Author line: "Author: Name <email>"
const authorMatch = line.match(/^Author:\s+(.+?)\s+<[^>]+>/);
if (authorMatch) {
currentAuthor = authorMatch[1].trim();
continue;
}
// Numstat line: "<added>\t<deleted>\t<filename>"
if (currentAuthor && /^\d+\t\d+\t/.test(line)) {
const parts = line.split('\t');
if (parts.length < 3) continue;
// Handle rename: "old/path => new/path" or "{old => new}/suffix"
let filePath = parts[2];
if (filePath.includes('{') && filePath.includes('=>')) {
filePath = filePath.replace(/\{([^}]*?)\s*=>\s*([^}]*?)\}/g, '$2').replace(/\s+/g, '');
} else if (filePath.includes(' => ')) {
filePath = filePath.split(' => ')[1].trim();
}
filePath = filePath.trim();
if (!filePath) continue;
if (!ownership[filePath]) ownership[filePath] = {};
ownership[filePath][currentAuthor] = (ownership[filePath][currentAuthor] || 0) + 1;
}
}
return ownership;
}
/**
* Compute ownership metrics for a single file.
* @param {Object} authorCounts - { [author]: commitCount }
* @param {number} ownershipThreshold - min fraction to count toward bus-factor (default 0.1)
* @returns {Object} { totalCommits, authors, busFactor, primaryOwner }
*/
export function computeOwnership(authorCounts, ownershipThreshold = 0.1) {
const entries = Object.entries(authorCounts).sort((a, b) => b[1] - a[1]);
const totalCommits = entries.reduce((sum, [, n]) => sum + n, 0);
const authors = entries.map(([name, commits]) => ({
name,
commits,
pct: totalCommits > 0 ? commits / totalCommits : 0,
}));
const busFactor = authors.filter(a => a.pct >= ownershipThreshold).length;
const primaryOwner = authors[0] || null;
return { totalCommits, authors, busFactor, primaryOwner };
}
/**
* Score all files and return sorted results.
* @param {Object} ownership - Output from parseGitLog
* @param {Object} options
* @returns {Array} Sorted file entries with ownership metrics
*/
export function scoreFiles(ownership, { minCommits = 2, ownershipThreshold = 0.1 } = {}) {
const results = [];
for (const [filePath, authorCounts] of Object.entries(ownership)) {
const metrics = computeOwnership(authorCounts, ownershipThreshold);
if (metrics.totalCommits < minCommits) continue;
results.push({ file: filePath, ...metrics });
}
// Sort: lowest bus-factor first, then most commits (highest risk first)
results.sort((a, b) => {
if (a.busFactor !== b.busFactor) return a.busFactor - b.busFactor;
return b.totalCommits - a.totalCommits;
});
return results;
}
/**
* Compute overall repo stats (weighted average bus-factor, high-risk count).
*/
export function repoStats(scoredFiles) {
if (scoredFiles.length === 0) return { avgBusFactor: 0, highRiskCount: 0, totalFiles: 0 };
const totalCommits = scoredFiles.reduce((s, f) => s + f.totalCommits, 0);
const weightedBf = scoredFiles.reduce((s, f) => s + f.busFactor * f.totalCommits, 0);
const avgBusFactor = totalCommits > 0 ? weightedBf / totalCommits : 0;
const highRiskCount = scoredFiles.filter(f => f.busFactor === 1).length;
return { avgBusFactor, highRiskCount, totalFiles: scoredFiles.length };
}
// --- CLI ---
function collectGitLog(repoRoot) {
const dirs = ['server/src', 'client/src', 'db/migrations'];
const cmd = `git -C "${repoRoot}" log --numstat -- ${dirs.join(' ')}`;
return execSync(cmd, { maxBuffer: 50 * 1024 * 1024 }).toString();
}
function formatReport(scoredFiles, stats, topN = 10) {
const lines = [];
lines.push('');
lines.push('=== Bus-Factor Analysis ===');
lines.push('');
lines.push(`Files analyzed : ${stats.totalFiles}`);
lines.push(`High-risk files: ${stats.highRiskCount} (bus-factor = 1)`);
lines.push(`Avg bus-factor : ${stats.avgBusFactor.toFixed(2)} (weighted by commits)`);
lines.push('');
const highRisk = scoredFiles.filter(f => f.busFactor === 1);
if (highRisk.length === 0) {
lines.push('No high-risk files found.');
} else {
lines.push(`--- Top ${Math.min(topN, highRisk.length)} High-Risk Files (bus-factor = 1) ---`);
lines.push('');
for (const f of highRisk.slice(0, topN)) {
const owner = f.primaryOwner;
lines.push(` ${f.file}`);
lines.push(` commits: ${f.totalCommits} owner: ${owner.name} (${(owner.pct * 100).toFixed(0)}%)`);
if (f.authors.length > 1) {
const others = f.authors.slice(1, 3).map(a => `${a.name} ${(a.pct * 100).toFixed(0)}%`).join(', ');
lines.push(` others: ${others}`);
}
lines.push('');
}
}
lines.push('--- Author Contribution Summary ---');
lines.push('');
const authorTotals = {};
for (const f of scoredFiles) {
for (const a of f.authors) {
authorTotals[a.name] = (authorTotals[a.name] || 0) + a.commits;
}
}
const totalAll = Object.values(authorTotals).reduce((s, n) => s + n, 0);
const sorted = Object.entries(authorTotals).sort((a, b) => b[1] - a[1]);
for (const [name, commits] of sorted) {
const pct = totalAll > 0 ? (commits / totalAll * 100).toFixed(1) : '0.0';
lines.push(` ${name.padEnd(30)} ${String(commits).padStart(5)} commits (${pct}%)`);
}
lines.push('');
return lines.join('\n');
}
// Detect if running as main script (ESM equivalent of require.main === module)
const isMain = process.argv[1] === fileURLToPath(import.meta.url);
if (isMain) {
const args = process.argv.slice(2);
const jsonMode = args.includes('--json');
const minCommitsIdx = args.indexOf('--min-commits');
const minCommits = minCommitsIdx !== -1 ? parseInt(args[minCommitsIdx + 1], 10) : 2;
const thresholdIdx = args.indexOf('--threshold');
const threshold = thresholdIdx !== -1 ? parseFloat(args[thresholdIdx + 1]) : 0.1;
const topIdx = args.indexOf('--top');
const topN = topIdx !== -1 ? parseInt(args[topIdx + 1], 10) : 10;
const repoRoot = path.resolve(fileURLToPath(import.meta.url), '..', '..');
let rawLog;
try {
rawLog = collectGitLog(repoRoot);
} catch (err) {
process.stderr.write(`Error running git log: ${err.message}\n`);
process.exit(1);
}
const ownership = parseGitLog(rawLog);
const scoredFiles = scoreFiles(ownership, { minCommits, ownershipThreshold: threshold });
const stats = repoStats(scoredFiles);
if (jsonMode) {
process.stdout.write(JSON.stringify({ stats, files: scoredFiles }, null, 2) + '\n');
} else {
process.stdout.write(formatReport(scoredFiles, stats, topN));
}
}