Files
2025-11-29 17:56:41 +08:00

258 lines
7.6 KiB
JavaScript
Executable File

#!/usr/bin/env node
/**
* trace-analyzer.js
* Analyze distributed tracing data to identify bottlenecks
*
* Usage: node trace-analyzer.js <trace-id>
* node trace-analyzer.js <trace-id> --format=json
* node trace-analyzer.js --file=trace.json
*/
const fs = require('fs');
const path = require('path');
// Parse arguments
const args = process.argv.slice(2);
let traceId = null;
let traceFile = null;
let outputFormat = 'text'; // text or json
for (const arg of args) {
if (arg.startsWith('--file=')) {
traceFile = arg.split('=')[1];
} else if (arg.startsWith('--format=')) {
outputFormat = arg.split('=')[1];
} else if (!arg.startsWith('--')) {
traceId = arg;
}
}
// Mock trace data (in production, fetch from APM/tracing system)
function getMockTraceData(id) {
return {
traceId: id,
rootSpan: {
spanId: 'span-1',
service: 'frontend',
operation: 'GET /dashboard',
startTime: 1698345600000,
duration: 8250, // ms
children: [
{
spanId: 'span-2',
service: 'api',
operation: 'GET /api/dashboard',
startTime: 1698345600010,
duration: 8200,
children: [
{
spanId: 'span-3',
service: 'api',
operation: 'db.query',
startTime: 1698345600020,
duration: 7800, // SLOW!
tags: {
'db.statement': 'SELECT * FROM users WHERE last_login_at > ...',
'db.type': 'postgresql',
},
children: [],
},
{
spanId: 'span-4',
service: 'api',
operation: 'cache.get',
startTime: 1698345608200,
duration: 5,
children: [],
},
],
},
],
},
};
}
// Load trace from file or mock
function loadTrace() {
if (traceFile) {
try {
const data = fs.readFileSync(traceFile, 'utf8');
return JSON.parse(data);
} catch (error) {
console.error(`❌ Error loading trace file: ${error.message}`);
process.exit(1);
}
} else if (traceId) {
return getMockTraceData(traceId);
} else {
console.error('Usage: node trace-analyzer.js <trace-id> OR --file=trace.json');
process.exit(1);
}
}
// Analyze trace
function analyzeTrace(trace) {
const analysis = {
traceId: trace.traceId,
totalDuration: trace.rootSpan.duration,
rootOperation: trace.rootSpan.operation,
spanCount: 0,
slowSpans: [],
bottlenecks: [],
serviceBreakdown: {},
};
// Traverse spans
function traverseSpans(span, depth = 0) {
analysis.spanCount++;
// Track service time
if (!analysis.serviceBreakdown[span.service]) {
analysis.serviceBreakdown[span.service] = {
totalTime: 0,
calls: 0,
};
}
analysis.serviceBreakdown[span.service].totalTime += span.duration;
analysis.serviceBreakdown[span.service].calls++;
// Identify slow spans (>1s)
if (span.duration > 1000) {
analysis.slowSpans.push({
service: span.service,
operation: span.operation,
duration: span.duration,
percentage: ((span.duration / analysis.totalDuration) * 100).toFixed(1),
depth,
});
}
// Traverse children
if (span.children) {
span.children.forEach(child => traverseSpans(child, depth + 1));
}
}
traverseSpans(trace.rootSpan);
// Sort slow spans by duration
analysis.slowSpans.sort((a, b) => b.duration - a.duration);
// Identify bottlenecks (spans taking >50% of total time)
analysis.bottlenecks = analysis.slowSpans.filter(
span => parseFloat(span.percentage) > 50
);
return analysis;
}
// Format duration
function formatDuration(ms) {
if (ms < 1000) return `${ms}ms`;
return `${(ms / 1000).toFixed(2)}s`;
}
// Print analysis (text format)
function printAnalysis(analysis) {
console.log('========================================');
console.log('DISTRIBUTED TRACE ANALYSIS');
console.log('========================================');
console.log(`Trace ID: ${analysis.traceId}`);
console.log(`Root Operation: ${analysis.rootOperation}`);
console.log(`Total Duration: ${formatDuration(analysis.totalDuration)}`);
console.log(`Total Spans: ${analysis.spanCount}`);
console.log('');
// Service breakdown
console.log('📊 SERVICE BREAKDOWN');
console.log('-------------------');
console.log(`${'Service'.padEnd(20)} ${'Time'.padEnd(15)} ${'Calls'.padEnd(10)} ${'% of Total'.padEnd(15)}`);
console.log('-'.repeat(70));
for (const [service, data] of Object.entries(analysis.serviceBreakdown)) {
const percentage = ((data.totalTime / analysis.totalDuration) * 100).toFixed(1);
console.log(
`${service.padEnd(20)} ${formatDuration(data.totalTime).padEnd(15)} ${String(data.calls).padEnd(10)} ${percentage}%`
);
}
console.log('');
// Slow spans
if (analysis.slowSpans.length > 0) {
console.log(`🐌 SLOW SPANS (>${formatDuration(1000)})`);
console.log('-------------------');
console.log(`${'Service'.padEnd(15)} ${'Operation'.padEnd(30)} ${'Duration'.padEnd(15)} ${'% of Total'.padEnd(15)}`);
console.log('-'.repeat(80));
for (const span of analysis.slowSpans.slice(0, 10)) {
console.log(
`${span.service.padEnd(15)} ${span.operation.padEnd(30)} ${formatDuration(span.duration).padEnd(15)} ${span.percentage}%`
);
}
console.log('');
}
// Bottlenecks
if (analysis.bottlenecks.length > 0) {
console.log('🚨 BOTTLENECKS (>50% of total time)');
console.log('-----------------------------------');
for (const bottleneck of analysis.bottlenecks) {
console.log(`⚠️ ${bottleneck.service} - ${bottleneck.operation}`);
console.log(` Duration: ${formatDuration(bottleneck.duration)} (${bottleneck.percentage}% of trace)`);
console.log('');
}
}
// Recommendations
console.log('💡 RECOMMENDATIONS');
console.log('-----------------');
if (analysis.bottlenecks.length > 0) {
console.log('🔴 CRITICAL: Bottlenecks detected!');
for (const bottleneck of analysis.bottlenecks) {
console.log(` - Optimize ${bottleneck.service}.${bottleneck.operation} (${bottleneck.percentage}% of trace)`);
// Specific recommendations based on operation
if (bottleneck.operation.includes('db.query')) {
console.log(' → Add database index, optimize query, add caching');
} else if (bottleneck.operation.includes('http')) {
console.log(' → Add timeout, cache response, use async processing');
} else if (bottleneck.operation.includes('cache')) {
console.log(' → Check cache hit rate, optimize cache key');
}
}
} else if (analysis.slowSpans.length > 0) {
console.log('🟡 Some slow spans detected:');
for (const span of analysis.slowSpans.slice(0, 3)) {
console.log(` - ${span.service}.${span.operation}: ${formatDuration(span.duration)}`);
}
} else {
console.log('✅ No obvious performance issues detected.');
console.log(' All spans complete in reasonable time.');
}
console.log('');
console.log('Next steps:');
console.log(' - Profile slowest spans');
console.log(' - Check for N+1 queries, missing indexes');
console.log(' - Add caching where appropriate');
console.log(' - Review external API timeouts');
console.log('');
}
// Main
function main() {
const trace = loadTrace();
const analysis = analyzeTrace(trace);
if (outputFormat === 'json') {
console.log(JSON.stringify(analysis, null, 2));
} else {
printAnalysis(analysis);
}
}
main();