skills/mlops/llmops-guardian/token-cost-analyzer/SKILL.md
Use this skill when analyzing and optimizing LLM API costs. Activate when the user wants to reduce AI API spending, understand token usage, audit LLM costs, optimize prompts for cost efficiency, or track and report on AI expenditure.
npx skillsauth add latestaiagents/agent-skills token-cost-analyzerInstall this skill globally with one command. Works with Claude Code, Cursor, and Windsurf.
3 of 9 scanners reported clean
Some scanners were skipped, did not run, or reported a non-clean status. Review each row below.
Audit, analyze, and optimize your LLM API spending.
| Model | Input (per 1M tokens) | Output (per 1M tokens) | |-------|----------------------|------------------------| | Claude 3 Opus | $15.00 | $75.00 | | Claude 3.5 Sonnet | $3.00 | $15.00 | | Claude 3 Haiku | $0.25 | $1.25 |
| Model | Input (per 1M tokens) | Output (per 1M tokens) | |-------|----------------------|------------------------| | GPT-4 Turbo | $10.00 | $30.00 | | GPT-4o | $5.00 | $15.00 | | GPT-4o mini | $0.15 | $0.60 | | o1 | $15.00 | $60.00 |
| Model | Input (per 1M tokens) | Output (per 1M tokens) | |-------|----------------------|------------------------| | Gemini 1.5 Pro | $3.50 | $10.50 | | Gemini 1.5 Flash | $0.075 | $0.30 |
interface TokenUsage {
inputTokens: number;
outputTokens: number;
cachedTokens?: number;
}
interface ModelPricing {
inputPer1M: number;
outputPer1M: number;
cachedPer1M?: number;
}
function calculateCost(usage: TokenUsage, pricing: ModelPricing): number {
const inputCost = (usage.inputTokens / 1_000_000) * pricing.inputPer1M;
const outputCost = (usage.outputTokens / 1_000_000) * pricing.outputPer1M;
const cachedCost = usage.cachedTokens
? (usage.cachedTokens / 1_000_000) * (pricing.cachedPer1M || pricing.inputPer1M * 0.1)
: 0;
return inputCost + outputCost + cachedCost;
}
// Example
const usage = { inputTokens: 50000, outputTokens: 10000 };
const claude35Sonnet = { inputPer1M: 3.00, outputPer1M: 15.00 };
const cost = calculateCost(usage, claude35Sonnet);
// $0.15 + $0.15 = $0.30
interface UsageRecord {
timestamp: Date;
model: string;
operation: string;
userId?: string;
inputTokens: number;
outputTokens: number;
cost: number;
metadata: Record<string, unknown>;
}
class CostTracker {
private records: UsageRecord[] = [];
record(usage: Omit<UsageRecord, 'timestamp' | 'cost'>): void {
const pricing = this.getPricing(usage.model);
const cost = calculateCost(
{ inputTokens: usage.inputTokens, outputTokens: usage.outputTokens },
pricing
);
this.records.push({
...usage,
timestamp: new Date(),
cost
});
}
// Aggregation methods
getTotalCost(since: Date): number {
return this.records
.filter(r => r.timestamp >= since)
.reduce((sum, r) => sum + r.cost, 0);
}
getCostByOperation(): Map<string, number> {
const byOp = new Map<string, number>();
for (const r of this.records) {
byOp.set(r.operation, (byOp.get(r.operation) || 0) + r.cost);
}
return byOp;
}
getCostByModel(): Map<string, number> {
const byModel = new Map<string, number>();
for (const r of this.records) {
byModel.set(r.model, (byModel.get(r.model) || 0) + r.cost);
}
return byModel;
}
getTopExpensiveOperations(limit: number = 10): UsageRecord[] {
return [...this.records]
.sort((a, b) => b.cost - a.cost)
.slice(0, limit);
}
}
interface CostReport {
period: { start: Date; end: Date };
summary: {
totalCost: number;
totalInputTokens: number;
totalOutputTokens: number;
uniqueOperations: number;
avgCostPerRequest: number;
};
breakdown: {
byModel: { model: string; cost: number; percentage: number }[];
byOperation: { operation: string; cost: number; requests: number }[];
byDay: { date: string; cost: number }[];
};
insights: {
mostExpensiveOperation: string;
fastestGrowingCost: string;
optimizationOpportunities: string[];
};
}
function generateCostReport(
records: UsageRecord[],
period: { start: Date; end: Date }
): CostReport {
const filtered = records.filter(
r => r.timestamp >= period.start && r.timestamp <= period.end
);
const totalCost = filtered.reduce((sum, r) => sum + r.cost, 0);
// Group by model
const byModel = new Map<string, number>();
for (const r of filtered) {
byModel.set(r.model, (byModel.get(r.model) || 0) + r.cost);
}
// Group by operation
const byOperation = new Map<string, { cost: number; count: number }>();
for (const r of filtered) {
const existing = byOperation.get(r.operation) || { cost: 0, count: 0 };
byOperation.set(r.operation, {
cost: existing.cost + r.cost,
count: existing.count + 1
});
}
// Find optimization opportunities
const opportunities: string[] = [];
// Check for expensive model usage on simple tasks
const haiku = byModel.get('claude-haiku-4-5') || 0;
const opus = byModel.get('claude-opus-4-6') || 0;
if (opus > haiku * 10) {
opportunities.push('Consider using Haiku for simpler tasks - Opus usage is 10x higher');
}
// Check for high output token ratio
const totalInput = filtered.reduce((s, r) => s + r.inputTokens, 0);
const totalOutput = filtered.reduce((s, r) => s + r.outputTokens, 0);
if (totalOutput > totalInput * 2) {
opportunities.push('High output ratio - consider asking for more concise responses');
}
return {
period,
summary: {
totalCost,
totalInputTokens: totalInput,
totalOutputTokens: totalOutput,
uniqueOperations: byOperation.size,
avgCostPerRequest: totalCost / filtered.length
},
breakdown: {
byModel: Array.from(byModel.entries()).map(([model, cost]) => ({
model,
cost,
percentage: (cost / totalCost) * 100
})),
byOperation: Array.from(byOperation.entries()).map(([op, data]) => ({
operation: op,
cost: data.cost,
requests: data.count
})),
byDay: groupByDay(filtered)
},
insights: {
mostExpensiveOperation: [...byOperation.entries()]
.sort((a, b) => b[1].cost - a[1].cost)[0]?.[0] || 'none',
fastestGrowingCost: calculateGrowth(filtered),
optimizationOpportunities: opportunities
}
};
}
type TaskComplexity = 'simple' | 'medium' | 'complex';
function selectCostEffectiveModel(
task: string,
complexity: TaskComplexity
): string {
const modelMap = {
simple: 'gpt-4o-mini', // $0.15/$0.60 per 1M
medium: 'claude-haiku-4-5', // $0.25/$1.25 per 1M
complex: 'claude-sonnet-4-6' // $3/$15 per 1M
};
return modelMap[complexity];
}
// Auto-detect complexity
function assessComplexity(task: string): TaskComplexity {
const complexIndicators = ['analyze', 'compare', 'synthesize', 'evaluate', 'complex'];
const simpleIndicators = ['format', 'extract', 'classify', 'summarize short'];
const lower = task.toLowerCase();
if (complexIndicators.some(i => lower.includes(i))) return 'complex';
if (simpleIndicators.some(i => lower.includes(i))) return 'simple';
return 'medium';
}
// Before: Verbose prompt (many input tokens)
const verbosePrompt = `
You are an expert assistant. Your task is to help users with their questions.
Please be thorough and comprehensive in your responses. Make sure to consider
all aspects of the question and provide detailed explanations. If you're unsure
about something, please say so. Always be polite and professional.
User question: ${question}
`;
// After: Concise prompt (fewer input tokens)
const concisePrompt = `Answer concisely: ${question}`;
// Savings: ~80% reduction in input tokens
// Instruct the model to be concise
const prompt = `
${task}
Respond in under 100 words. Use bullet points.
`;
// Or use max_tokens parameter
const response = await client.messages.create({
model: 'claude-haiku-4-5',
max_tokens: 500, // Limit output
messages: [{ role: 'user', content: prompt }]
});
// Use Anthropic prompt caching
const response = await client.messages.create({
model: 'claude-sonnet-4-6',
messages: [{
role: 'user',
content: [
{
type: 'text',
text: longSystemContext,
cache_control: { type: 'ephemeral' } // Cache this
},
{ type: 'text', text: userQuestion }
]
}]
});
// Cached tokens cost 90% less
interface CostAlert {
type: 'threshold' | 'spike' | 'anomaly';
threshold: number;
action: 'notify' | 'throttle' | 'block';
}
class CostAlertSystem {
private alerts: CostAlert[] = [
{ type: 'threshold', threshold: 100, action: 'notify' }, // $100/day
{ type: 'threshold', threshold: 500, action: 'throttle' }, // $500/day
{ type: 'spike', threshold: 5, action: 'notify' } // 5x normal
];
async checkAlerts(currentCost: number, historicalAvg: number): Promise<void> {
for (const alert of this.alerts) {
if (alert.type === 'threshold' && currentCost > alert.threshold) {
await this.triggerAlert(alert, `Daily cost $${currentCost} exceeds $${alert.threshold}`);
}
if (alert.type === 'spike' && currentCost > historicalAvg * alert.threshold) {
await this.triggerAlert(alert, `Cost spike: $${currentCost} is ${(currentCost/historicalAvg).toFixed(1)}x normal`);
}
}
}
}
interface CostDashboard {
// Real-time
currentHourCost: number;
currentDayCost: number;
currentMonthCost: number;
// Trends
costTrend7d: { date: string; cost: number }[];
projectedMonthEnd: number;
// Breakdown
topOperations: { name: string; cost: number; trend: 'up' | 'down' | 'stable' }[];
modelMix: { model: string; percentage: number }[];
// Alerts
activeAlerts: string[];
budgetUtilization: number;
}
development
Test skills for correct activation, content quality, and regression — both automated checks (frontmatter validity, lint) and manual verification (query-suite activation testing). Covers CI integration and how to catch skill regressions before users do. Use this skill when adding skills to a repo, setting up CI for a skill library, or debugging "the skill exists but doesn't work". Activate when: test skills, validate skills, skill CI, skill linting, skill activation test, skill regression.
documentation
Write the YAML frontmatter for a SKILL.md file so it activates reliably — name, description, and activation keywords that the model matches against. Covers length, tone, and the most common frontmatter mistakes. Use this skill when authoring a new skill, fixing a skill that isn't auto-activating, or reviewing skills for publication. Activate when: SKILL.md frontmatter, skill description, skill activation, skill YAML, write a skill, author a skill.
development
Design skills that fire at the right moment — neither over-eager (noise) nor under-eager (silent). Covers activation specificity, trigger phrases, disambiguation between overlapping skills, and debugging activation. Use this skill when multiple skills could fire on the same query, a skill never fires, or a skill fires too often. Activate when: skill won't activate, skill over-activates, overlapping skills, skill triggers, skill selection, skill disambiguation.
development
Structure SKILL.md content so the model reads just enough — concise summary up front, progressively deeper detail, examples on demand. Covers section ordering, length budgets, when to split into multiple skills. Use this skill when writing or refactoring a skill body, one skill has grown too long, or a skill is wordy but not useful. Activate when: SKILL.md structure, skill content, skill too long, split skill, progressive disclosure, skill body.