skills/mlops/llmops-guardian/ai-audit-logging/SKILL.md
Use this skill when implementing audit logging for AI systems. Activate when the user needs to track AI decisions for compliance, implement audit trails for LLM usage, meet regulatory requirements (EU AI Act, SOC2), or create accountability records for AI-generated content.
npx skillsauth add latestaiagents/agent-skills ai-audit-loggingInstall this skill globally with one command. Works with Claude Code, Cursor, and Windsurf.
3 of 9 scanners reported clean
Some scanners were skipped, did not run, or reported a non-clean status. Review each row below.
Implement compliance-ready audit trails for AI system decisions and outputs.
High-risk AI systems must maintain logs that enable:
Effective August 2026 with fines up to 7% of global revenue
interface AIAuditLog {
// Identification
id: string;
timestamp: Date;
correlationId: string;
sessionId: string;
// Actor
actor: {
type: 'user' | 'system' | 'automated';
id: string;
name?: string;
ip?: string;
userAgent?: string;
};
// AI Operation
operation: {
type: 'inference' | 'generation' | 'classification' | 'analysis';
model: string;
modelVersion: string;
provider: string;
};
// Input/Output
io: {
inputHash: string; // Hash of input for privacy
inputTokens: number;
outputHash: string; // Hash of output
outputTokens: number;
inputSummary?: string; // Optional human-readable summary
outputSummary?: string;
};
// Decisions
decision?: {
action: string;
confidence: number;
alternatives?: { action: string; confidence: number }[];
reasoning?: string;
};
// Safety
safety: {
contentFiltered: boolean;
filterReasons?: string[];
humanReviewRequired: boolean;
humanReviewCompleted?: boolean;
reviewerId?: string;
};
// Performance
performance: {
latencyMs: number;
queueTimeMs?: number;
tokensPerSecond?: number;
};
// Cost
cost: {
inputCost: number;
outputCost: number;
totalCost: number;
currency: string;
};
// Context
context: {
application: string;
environment: 'production' | 'staging' | 'development';
feature?: string;
tags: string[];
};
// Metadata
metadata: Record<string, unknown>;
}
import { createHash } from 'crypto';
class AIAuditLogger {
constructor(
private storage: AuditStorage,
private config: AuditConfig
) {}
async log(event: Partial<AIAuditLog>): Promise<string> {
const log: AIAuditLog = {
id: crypto.randomUUID(),
timestamp: new Date(),
correlationId: event.correlationId || crypto.randomUUID(),
sessionId: event.sessionId || 'unknown',
actor: event.actor || { type: 'system', id: 'unknown' },
operation: event.operation!,
io: event.io!,
safety: event.safety || { contentFiltered: false, humanReviewRequired: false },
performance: event.performance || { latencyMs: 0 },
cost: event.cost || { inputCost: 0, outputCost: 0, totalCost: 0, currency: 'USD' },
context: event.context || { application: 'unknown', environment: 'production', tags: [] },
metadata: event.metadata || {}
};
// Validate required fields
this.validate(log);
// Hash sensitive content
log.io.inputHash = this.hashContent(log.io.inputHash);
log.io.outputHash = this.hashContent(log.io.outputHash);
// Store
await this.storage.write(log);
// Alert if needed
if (log.safety.humanReviewRequired) {
await this.alertForReview(log);
}
return log.id;
}
private hashContent(content: string): string {
return createHash('sha256').update(content).digest('hex');
}
private validate(log: AIAuditLog): void {
if (!log.operation?.model) {
throw new Error('Audit log must include model information');
}
if (log.io.inputTokens === undefined || log.io.outputTokens === undefined) {
throw new Error('Audit log must include token counts');
}
}
private async alertForReview(log: AIAuditLog): Promise<void> {
// Send to review queue
await this.config.reviewQueue?.push({
logId: log.id,
reason: log.safety.filterReasons?.join(', '),
priority: 'high'
});
}
}
function withAuditLogging(
client: LLMClient,
logger: AIAuditLogger,
context: Partial<AIAuditLog['context']>
): LLMClient {
return {
async complete(params: CompletionParams): Promise<CompletionResponse> {
const startTime = Date.now();
try {
const response = await client.complete(params);
await logger.log({
operation: {
type: 'generation',
model: params.model,
modelVersion: response.model,
provider: client.provider
},
io: {
inputHash: params.messages.map(m => m.content).join(''),
inputTokens: response.usage.input_tokens,
outputHash: response.content[0].text,
outputTokens: response.usage.output_tokens
},
performance: {
latencyMs: Date.now() - startTime
},
cost: calculateCost(params.model, response.usage),
context: {
...context,
application: context.application || 'default',
environment: context.environment || 'production',
tags: context.tags || []
}
});
return response;
} catch (error) {
await logger.log({
operation: {
type: 'generation',
model: params.model,
modelVersion: 'unknown',
provider: client.provider
},
io: {
inputHash: params.messages.map(m => m.content).join(''),
inputTokens: 0,
outputHash: '',
outputTokens: 0
},
performance: {
latencyMs: Date.now() - startTime
},
metadata: {
error: (error as Error).message,
errorType: (error as Error).name
},
context: context as any
});
throw error;
}
}
};
}
import { PrismaClient } from '@prisma/client';
class DatabaseAuditStorage implements AuditStorage {
constructor(private prisma: PrismaClient) {}
async write(log: AIAuditLog): Promise<void> {
await this.prisma.aiAuditLog.create({
data: {
id: log.id,
timestamp: log.timestamp,
correlationId: log.correlationId,
sessionId: log.sessionId,
actorType: log.actor.type,
actorId: log.actor.id,
model: log.operation.model,
operationType: log.operation.type,
inputTokens: log.io.inputTokens,
outputTokens: log.io.outputTokens,
inputHash: log.io.inputHash,
outputHash: log.io.outputHash,
latencyMs: log.performance.latencyMs,
totalCost: log.cost.totalCost,
contentFiltered: log.safety.contentFiltered,
humanReviewRequired: log.safety.humanReviewRequired,
application: log.context.application,
environment: log.context.environment,
metadata: log.metadata as any
}
});
}
async query(filters: AuditQueryFilters): Promise<AIAuditLog[]> {
return this.prisma.aiAuditLog.findMany({
where: {
timestamp: {
gte: filters.startDate,
lte: filters.endDate
},
actorId: filters.actorId,
model: filters.model,
application: filters.application
},
orderBy: { timestamp: 'desc' },
take: filters.limit || 100
});
}
}
// For compliance, logs should be immutable and tamper-evident
class ImmutableAuditStorage implements AuditStorage {
private previousHash = '0';
async write(log: AIAuditLog): Promise<void> {
// Chain logs with hashes for tamper evidence
const logWithChain = {
...log,
previousLogHash: this.previousHash,
logHash: this.hashLog(log, this.previousHash)
};
await this.storage.append(logWithChain);
this.previousHash = logWithChain.logHash;
}
private hashLog(log: AIAuditLog, previousHash: string): string {
const content = JSON.stringify({ ...log, previousHash });
return createHash('sha256').update(content).digest('hex');
}
async verifyIntegrity(logs: AIAuditLog[]): Promise<boolean> {
let expectedHash = '0';
for (const log of logs) {
const computedHash = this.hashLog(log, expectedHash);
if (computedHash !== (log as any).logHash) {
return false; // Tampering detected
}
expectedHash = computedHash;
}
return true;
}
}
interface ComplianceReport {
period: { start: Date; end: Date };
summary: {
totalAIOperations: number;
uniqueUsers: number;
totalCost: number;
contentFilteredCount: number;
humanReviewCount: number;
};
modelUsage: { model: string; count: number; cost: number }[];
riskEvents: {
id: string;
timestamp: Date;
type: string;
severity: 'low' | 'medium' | 'high';
resolution?: string;
}[];
dataRetention: {
logsRetained: number;
oldestLog: Date;
retentionPolicy: string;
};
}
async function generateComplianceReport(
storage: AuditStorage,
period: { start: Date; end: Date }
): Promise<ComplianceReport> {
const logs = await storage.query({ startDate: period.start, endDate: period.end });
// Aggregate metrics
const uniqueUsers = new Set(logs.map(l => l.actor.id)).size;
const totalCost = logs.reduce((sum, l) => sum + l.cost.totalCost, 0);
const contentFiltered = logs.filter(l => l.safety.contentFiltered).length;
const humanReview = logs.filter(l => l.safety.humanReviewRequired).length;
// Model usage breakdown
const modelUsage = new Map<string, { count: number; cost: number }>();
for (const log of logs) {
const existing = modelUsage.get(log.operation.model) || { count: 0, cost: 0 };
modelUsage.set(log.operation.model, {
count: existing.count + 1,
cost: existing.cost + log.cost.totalCost
});
}
return {
period,
summary: {
totalAIOperations: logs.length,
uniqueUsers,
totalCost,
contentFilteredCount: contentFiltered,
humanReviewCount: humanReview
},
modelUsage: Array.from(modelUsage.entries()).map(([model, data]) => ({
model,
...data
})),
riskEvents: logs
.filter(l => l.safety.contentFiltered || l.safety.humanReviewRequired)
.map(l => ({
id: l.id,
timestamp: l.timestamp,
type: l.safety.filterReasons?.[0] || 'unknown',
severity: l.safety.humanReviewRequired ? 'high' : 'medium' as const
})),
dataRetention: {
logsRetained: logs.length,
oldestLog: logs[logs.length - 1]?.timestamp || new Date(),
retentionPolicy: '90 days'
}
};
}
development
Test skills for correct activation, content quality, and regression — both automated checks (frontmatter validity, lint) and manual verification (query-suite activation testing). Covers CI integration and how to catch skill regressions before users do. Use this skill when adding skills to a repo, setting up CI for a skill library, or debugging "the skill exists but doesn't work". Activate when: test skills, validate skills, skill CI, skill linting, skill activation test, skill regression.
documentation
Write the YAML frontmatter for a SKILL.md file so it activates reliably — name, description, and activation keywords that the model matches against. Covers length, tone, and the most common frontmatter mistakes. Use this skill when authoring a new skill, fixing a skill that isn't auto-activating, or reviewing skills for publication. Activate when: SKILL.md frontmatter, skill description, skill activation, skill YAML, write a skill, author a skill.
development
Design skills that fire at the right moment — neither over-eager (noise) nor under-eager (silent). Covers activation specificity, trigger phrases, disambiguation between overlapping skills, and debugging activation. Use this skill when multiple skills could fire on the same query, a skill never fires, or a skill fires too often. Activate when: skill won't activate, skill over-activates, overlapping skills, skill triggers, skill selection, skill disambiguation.
development
Structure SKILL.md content so the model reads just enough — concise summary up front, progressively deeper detail, examples on demand. Covers section ordering, length budgets, when to split into multiple skills. Use this skill when writing or refactoring a skill body, one skill has grown too long, or a skill is wordy but not useful. Activate when: SKILL.md structure, skill content, skill too long, split skill, progressive disclosure, skill body.