toolchains/ai/services/openrouter/SKILL.md
OpenRouter unified AI API - Access 200+ LLMs through single interface with intelligent routing, streaming, cost optimization, and model fallbacks
npx skillsauth add bobmatnyc/claude-mpm-skills openrouterInstall this skill globally with one command. Works with Claude Code, Cursor, and Windsurf.
3 of 9 scanners reported clean
Some scanners were skipped, did not run, or reported a non-clean status. Review each row below.
OpenRouter provides a single API to access 200+ language models from OpenAI, Anthropic, Google, Meta, Mistral, and more. It offers intelligent routing, streaming, cost optimization, and standardized OpenAI-compatible interface.
Key Features:
Pricing Model:
Installation:
npm install openai # Use OpenAI SDK
# or
pip install openai # Python
# Sign up at https://openrouter.ai/keys
export OPENROUTER_API_KEY="sk-or-v1-..."
import OpenAI from 'openai';
const client = new OpenAI({
baseURL: 'https://openrouter.ai/api/v1',
apiKey: process.env.OPENROUTER_API_KEY,
defaultHeaders: {
'HTTP-Referer': 'https://your-app.com', // Optional
'X-Title': 'Your App Name', // Optional
}
});
async function chat() {
const completion = await client.chat.completions.create({
model: 'anthropic/claude-3.5-sonnet',
messages: [
{ role: 'user', content: 'Explain quantum computing in simple terms' }
],
});
console.log(completion.choices[0].message.content);
}
async function streamChat() {
const stream = await client.chat.completions.create({
model: 'openai/gpt-4-turbo',
messages: [
{ role: 'user', content: 'Write a short story about AI' }
],
stream: true,
});
for await (const chunk of stream) {
const content = chunk.choices[0]?.delta?.content || '';
process.stdout.write(content);
}
}
Flagship Models (Highest Quality):
const flagshipModels = {
claude: 'anthropic/claude-3.5-sonnet', // Best reasoning
gpt4: 'openai/gpt-4-turbo', // Best general purpose
gemini: 'google/gemini-pro-1.5', // Best long context
opus: 'anthropic/claude-3-opus', // Best complex tasks
};
Fast Models (Low Latency):
const fastModels = {
claude: 'anthropic/claude-3-haiku', // Fastest Claude
gpt35: 'openai/gpt-3.5-turbo', // Fast GPT
gemini: 'google/gemini-flash-1.5', // Fast Gemini
llama: 'meta-llama/llama-3.1-8b-instruct', // Fast open source
};
Cost-Optimized Models:
const budgetModels = {
haiku: 'anthropic/claude-3-haiku', // $0.25/$1.25 per 1M tokens
gemini: 'google/gemini-flash-1.5', // $0.075/$0.30 per 1M tokens
llama: 'meta-llama/llama-3.1-8b-instruct', // $0.06/$0.06 per 1M tokens
mixtral: 'mistralai/mixtral-8x7b-instruct', // $0.24/$0.24 per 1M tokens
};
Specialized Models:
const specializedModels = {
vision: 'openai/gpt-4-vision-preview', // Image understanding
code: 'anthropic/claude-3.5-sonnet', // Code generation
longContext: 'google/gemini-pro-1.5', // 2M token context
function: 'openai/gpt-4-turbo', // Function calling
};
interface ModelSelector {
task: 'chat' | 'code' | 'vision' | 'function' | 'summary';
priority: 'quality' | 'speed' | 'cost';
maxCost?: number; // Max cost per 1M tokens
contextSize?: number;
}
function selectModel(criteria: ModelSelector): string {
if (criteria.task === 'vision') {
return 'openai/gpt-4-vision-preview';
}
if (criteria.task === 'code') {
return criteria.priority === 'quality'
? 'anthropic/claude-3.5-sonnet'
: 'meta-llama/llama-3.1-70b-instruct';
}
if (criteria.contextSize && criteria.contextSize > 100000) {
return 'google/gemini-pro-1.5'; // 2M context
}
// Default selection by priority
switch (criteria.priority) {
case 'quality':
return 'anthropic/claude-3.5-sonnet';
case 'speed':
return 'anthropic/claude-3-haiku';
case 'cost':
return criteria.maxCost && criteria.maxCost < 0.5
? 'google/gemini-flash-1.5'
: 'anthropic/claude-3-haiku';
default:
return 'openai/gpt-4-turbo';
}
}
// Usage
const model = selectModel({
task: 'code',
priority: 'quality',
});
async function robustStreamingChat(
prompt: string,
model: string = 'anthropic/claude-3.5-sonnet'
) {
try {
const stream = await client.chat.completions.create({
model,
messages: [{ role: 'user', content: prompt }],
stream: true,
max_tokens: 4000,
});
let fullResponse = '';
for await (const chunk of stream) {
const delta = chunk.choices[0]?.delta;
if (delta?.content) {
fullResponse += delta.content;
process.stdout.write(delta.content);
}
// Handle function calls
if (delta?.function_call) {
console.log('\nFunction call:', delta.function_call);
}
// Check for finish reason
if (chunk.choices[0]?.finish_reason) {
console.log(`\n[Finished: ${chunk.choices[0].finish_reason}]`);
}
}
return fullResponse;
} catch (error) {
if (error instanceof Error) {
console.error('Streaming error:', error.message);
}
throw error;
}
}
from openai import OpenAI
client = OpenAI(
base_url="https://openrouter.ai/api/v1",
api_key=os.environ.get("OPENROUTER_API_KEY"),
)
def stream_chat(prompt: str, model: str = "anthropic/claude-3.5-sonnet"):
stream = client.chat.completions.create(
model=model,
messages=[{"role": "user", "content": prompt}],
stream=True,
)
full_response = ""
for chunk in stream:
if chunk.choices[0].delta.content:
content = chunk.choices[0].delta.content
full_response += content
print(content, end="", flush=True)
print() # New line
return full_response
import { useState } from 'react';
function StreamingChat() {
const [response, setResponse] = useState('');
const [isStreaming, setIsStreaming] = useState(false);
async function handleSubmit(prompt: string) {
setIsStreaming(true);
setResponse('');
try {
const res = await fetch('https://openrouter.ai/api/v1/chat/completions', {
method: 'POST',
headers: {
'Authorization': `Bearer ${process.env.OPENROUTER_API_KEY}`,
'Content-Type': 'application/json',
},
body: JSON.stringify({
model: 'anthropic/claude-3.5-sonnet',
messages: [{ role: 'user', content: prompt }],
stream: true,
}),
});
const reader = res.body?.getReader();
const decoder = new TextDecoder();
while (true) {
const { done, value } = await reader!.read();
if (done) break;
const chunk = decoder.decode(value);
const lines = chunk.split('\n').filter(line => line.trim());
for (const line of lines) {
if (line.startsWith('data: ')) {
const data = line.slice(6);
if (data === '[DONE]') continue;
try {
const parsed = JSON.parse(data);
const content = parsed.choices[0]?.delta?.content || '';
setResponse(prev => prev + content);
} catch (e) {
// Skip invalid JSON
}
}
}
}
} catch (error) {
console.error('Streaming error:', error);
} finally {
setIsStreaming(false);
}
}
return (
<div>
<textarea
value={response}
readOnly
rows={20}
cols={80}
placeholder="Response will appear here..."
/>
<button onClick={() => handleSubmit('Explain AI')}>
{isStreaming ? 'Streaming...' : 'Send'}
</button>
</div>
);
}
const tools = [
{
type: 'function',
function: {
name: 'get_weather',
description: 'Get current weather for a location',
parameters: {
type: 'object',
properties: {
location: {
type: 'string',
description: 'City name, e.g. San Francisco',
},
unit: {
type: 'string',
enum: ['celsius', 'fahrenheit'],
},
},
required: ['location'],
},
},
},
];
async function chatWithFunctions() {
const completion = await client.chat.completions.create({
model: 'openai/gpt-4-turbo',
messages: [
{ role: 'user', content: 'What is the weather in Tokyo?' }
],
tools,
tool_choice: 'auto',
});
const message = completion.choices[0].message;
if (message.tool_calls) {
for (const toolCall of message.tool_calls) {
console.log('Function:', toolCall.function.name);
console.log('Arguments:', toolCall.function.arguments);
// Execute function
const args = JSON.parse(toolCall.function.arguments);
const result = await getWeather(args.location, args.unit);
// Send result back
const followUp = await client.chat.completions.create({
model: 'openai/gpt-4-turbo',
messages: [
{ role: 'user', content: 'What is the weather in Tokyo?' },
message,
{
role: 'tool',
tool_call_id: toolCall.id,
content: JSON.stringify(result),
},
],
tools,
});
console.log(followUp.choices[0].message.content);
}
}
}
async function multiStepFunctionCall(userQuery: string) {
const messages = [{ role: 'user', content: userQuery }];
let iterationCount = 0;
const maxIterations = 5;
while (iterationCount < maxIterations) {
const completion = await client.chat.completions.create({
model: 'openai/gpt-4-turbo',
messages,
tools,
tool_choice: 'auto',
});
const message = completion.choices[0].message;
messages.push(message);
if (!message.tool_calls) {
// No more function calls, return final response
return message.content;
}
// Execute all function calls
for (const toolCall of message.tool_calls) {
const functionName = toolCall.function.name;
const args = JSON.parse(toolCall.function.arguments);
// Execute function (implement your function registry)
const result = await executeFunctionCall(functionName, args);
messages.push({
role: 'tool',
tool_call_id: toolCall.id,
content: JSON.stringify(result),
});
}
iterationCount++;
}
throw new Error('Max iterations reached');
}
import { encoding_for_model } from 'tiktoken';
interface CostEstimate {
promptTokens: number;
completionTokens: number;
promptCost: number;
completionCost: number;
totalCost: number;
}
const modelPricing = {
'anthropic/claude-3.5-sonnet': { input: 3.00, output: 15.00 }, // per 1M tokens
'anthropic/claude-3-haiku': { input: 0.25, output: 1.25 },
'openai/gpt-4-turbo': { input: 10.00, output: 30.00 },
'openai/gpt-3.5-turbo': { input: 0.50, output: 1.50 },
'google/gemini-flash-1.5': { input: 0.075, output: 0.30 },
};
function estimateCost(
prompt: string,
expectedCompletion: number,
model: string
): CostEstimate {
const encoder = encoding_for_model('gpt-4'); // Approximation
const promptTokens = encoder.encode(prompt).length;
const completionTokens = expectedCompletion;
const pricing = modelPricing[model] || { input: 0, output: 0 };
const promptCost = (promptTokens / 1_000_000) * pricing.input;
const completionCost = (completionTokens / 1_000_000) * pricing.output;
return {
promptTokens,
completionTokens,
promptCost,
completionCost,
totalCost: promptCost + completionCost,
};
}
// Usage
const estimate = estimateCost(
'Explain quantum computing',
500, // Expected response tokens
'anthropic/claude-3.5-sonnet'
);
console.log(`Estimated cost: $${estimate.totalCost.toFixed(4)}`);
async function budgetOptimizedChat(
prompt: string,
maxCostPerRequest: number = 0.01 // $0.01 max
) {
// Estimate with expensive model
const expensiveEstimate = estimateCost(
prompt,
1000,
'anthropic/claude-3.5-sonnet'
);
let selectedModel = 'anthropic/claude-3.5-sonnet';
if (expensiveEstimate.totalCost > maxCostPerRequest) {
// Try cheaper models
const cheapEstimate = estimateCost(
prompt,
1000,
'anthropic/claude-3-haiku'
);
if (cheapEstimate.totalCost > maxCostPerRequest) {
selectedModel = 'google/gemini-flash-1.5';
} else {
selectedModel = 'anthropic/claude-3-haiku';
}
}
console.log(`Selected model: ${selectedModel}`);
const completion = await client.chat.completions.create({
model: selectedModel,
messages: [{ role: 'user', content: prompt }],
});
return completion.choices[0].message.content;
}
async function batchProcess(prompts: string[], model: string) {
// Process multiple prompts in parallel with rate limiting
const concurrency = 5;
const results = [];
for (let i = 0; i < prompts.length; i += concurrency) {
const batch = prompts.slice(i, i + concurrency);
const batchResults = await Promise.all(
batch.map(prompt =>
client.chat.completions.create({
model,
messages: [{ role: 'user', content: prompt }],
max_tokens: 500, // Limit tokens to control cost
})
)
);
results.push(...batchResults);
// Rate limiting delay
if (i + concurrency < prompts.length) {
await new Promise(resolve => setTimeout(resolve, 1000));
}
}
return results;
}
const modelFallbackChain = [
'anthropic/claude-3.5-sonnet',
'openai/gpt-4-turbo',
'anthropic/claude-3-haiku',
'google/gemini-flash-1.5',
];
async function chatWithFallback(
prompt: string,
maxRetries: number = 3
): Promise<string> {
for (const model of modelFallbackChain) {
try {
console.log(`Trying model: ${model}`);
const completion = await client.chat.completions.create({
model,
messages: [{ role: 'user', content: prompt }],
max_tokens: 2000,
});
return completion.choices[0].message.content || '';
} catch (error) {
console.warn(`Model ${model} failed:`, error);
// Continue to next model
if (model === modelFallbackChain[modelFallbackChain.length - 1]) {
throw new Error('All models failed');
}
}
}
throw new Error('No models available');
}
async function retryWithBackoff<T>(
fn: () => Promise<T>,
maxRetries: number = 5
): Promise<T> {
let lastError: Error;
for (let i = 0; i < maxRetries; i++) {
try {
return await fn();
} catch (error) {
lastError = error as Error;
// Check if rate limit error
if (error.status === 429) {
const delay = Math.pow(2, i) * 1000; // Exponential backoff
console.log(`Rate limited. Retrying in ${delay}ms...`);
await new Promise(resolve => setTimeout(resolve, delay));
} else {
throw error; // Non-retryable error
}
}
}
throw lastError!;
}
// Usage
const result = await retryWithBackoff(() =>
client.chat.completions.create({
model: 'anthropic/claude-3.5-sonnet',
messages: [{ role: 'user', content: 'Hello' }],
})
);
const systemPrompts = {
concise: 'You are a helpful assistant. Be concise and direct.',
detailed: 'You are a knowledgeable expert. Provide comprehensive answers with examples.',
code: 'You are an expert programmer. Provide clean, well-commented code with explanations.',
creative: 'You are a creative writing assistant. Be imaginative and engaging.',
};
async function chatWithPersonality(
prompt: string,
personality: keyof typeof systemPrompts
) {
const completion = await client.chat.completions.create({
model: 'anthropic/claude-3.5-sonnet',
messages: [
{ role: 'system', content: systemPrompts[personality] },
{ role: 'user', content: prompt },
],
});
return completion.choices[0].message.content;
}
async function fewShotClassification(text: string) {
const completion = await client.chat.completions.create({
model: 'openai/gpt-4-turbo',
messages: [
{
role: 'system',
content: 'Classify text sentiment as positive, negative, or neutral.',
},
{ role: 'user', content: 'I love this product!' },
{ role: 'assistant', content: 'positive' },
{ role: 'user', content: 'This is terrible.' },
{ role: 'assistant', content: 'negative' },
{ role: 'user', content: 'It works fine.' },
{ role: 'assistant', content: 'neutral' },
{ role: 'user', content: text },
],
});
return completion.choices[0].message.content;
}
async function reasoningTask(problem: string) {
const completion = await client.chat.completions.create({
model: 'anthropic/claude-3.5-sonnet',
messages: [
{
role: 'user',
content: `${problem}\n\nLet's solve this step by step:\n1.`,
},
],
max_tokens: 3000,
});
return completion.choices[0].message.content;
}
class RateLimitedClient {
private requestQueue: Array<() => Promise<any>> = [];
private processing = false;
private requestsPerMinute = 60;
private requestInterval = 60000 / this.requestsPerMinute;
async enqueue<T>(request: () => Promise<T>): Promise<T> {
return new Promise((resolve, reject) => {
this.requestQueue.push(async () => {
try {
const result = await request();
resolve(result);
} catch (error) {
reject(error);
}
});
this.processQueue();
});
}
private async processQueue() {
if (this.processing || this.requestQueue.length === 0) return;
this.processing = true;
while (this.requestQueue.length > 0) {
const request = this.requestQueue.shift()!;
await request();
await new Promise(resolve => setTimeout(resolve, this.requestInterval));
}
this.processing = false;
}
}
// Usage
const rateLimitedClient = new RateLimitedClient();
const result = await rateLimitedClient.enqueue(() =>
client.chat.completions.create({
model: 'anthropic/claude-3.5-sonnet',
messages: [{ role: 'user', content: 'Hello' }],
})
);
async function analyzeImage(imageUrl: string, question: string) {
const completion = await client.chat.completions.create({
model: 'openai/gpt-4-vision-preview',
messages: [
{
role: 'user',
content: [
{ type: 'text', text: question },
{ type: 'image_url', image_url: { url: imageUrl } },
],
},
],
max_tokens: 1000,
});
return completion.choices[0].message.content;
}
// Usage
const result = await analyzeImage(
'https://example.com/image.jpg',
'What objects are in this image?'
);
async function compareImages(imageUrls: string[]) {
const completion = await client.chat.completions.create({
model: 'openai/gpt-4-vision-preview',
messages: [
{
role: 'user',
content: [
{ type: 'text', text: 'Compare these images and describe the differences:' },
...imageUrls.map(url => ({
type: 'image_url' as const,
image_url: { url },
})),
],
},
],
});
return completion.choices[0].message.content;
}
interface ErrorResponse {
error: {
message: string;
type: string;
code: string;
};
}
async function robustCompletion(prompt: string) {
try {
const completion = await client.chat.completions.create({
model: 'anthropic/claude-3.5-sonnet',
messages: [{ role: 'user', content: prompt }],
});
return completion.choices[0].message.content;
} catch (error: any) {
// Rate limit errors
if (error.status === 429) {
console.error('Rate limit exceeded. Please wait.');
throw new Error('RATE_LIMIT_EXCEEDED');
}
// Invalid API key
if (error.status === 401) {
console.error('Invalid API key');
throw new Error('INVALID_API_KEY');
}
// Model not found
if (error.status === 404) {
console.error('Model not found');
throw new Error('MODEL_NOT_FOUND');
}
// Server errors
if (error.status >= 500) {
console.error('OpenRouter server error');
throw new Error('SERVER_ERROR');
}
// Unknown error
console.error('Unknown error:', error);
throw error;
}
}
class LoggingClient {
async chat(prompt: string, model: string) {
const startTime = Date.now();
console.log('[Request]', {
timestamp: new Date().toISOString(),
model,
promptLength: prompt.length,
});
try {
const completion = await client.chat.completions.create({
model,
messages: [{ role: 'user', content: prompt }],
});
const duration = Date.now() - startTime;
console.log('[Response]', {
timestamp: new Date().toISOString(),
duration,
usage: completion.usage,
finishReason: completion.choices[0].finish_reason,
});
return completion;
} catch (error) {
console.error('[Error]', {
timestamp: new Date().toISOString(),
duration: Date.now() - startTime,
error,
});
throw error;
}
}
}
Model Selection:
Cost Optimization:
Streaming:
Error Handling:
Prompt Engineering:
Rate Limiting:
Security:
Monitoring:
❌ Exposing API keys in frontend:
// WRONG - API key exposed
const client = new OpenAI({
baseURL: 'https://openrouter.ai/api/v1',
apiKey: 'sk-or-v1-...', // Exposed!
});
✅ Correct - Server-side proxy:
// Backend proxy
app.post('/api/chat', async (req, res) => {
const { prompt } = req.body;
const completion = await client.chat.completions.create({
model: 'anthropic/claude-3.5-sonnet',
messages: [{ role: 'user', content: prompt }],
});
res.json(completion);
});
❌ Not handling streaming errors:
// WRONG - no error handling
for await (const chunk of stream) {
console.log(chunk.choices[0].delta.content);
}
✅ Correct - with error handling:
try {
for await (const chunk of stream) {
const content = chunk.choices[0]?.delta?.content || '';
process.stdout.write(content);
}
} catch (error) {
console.error('Stream error:', error);
// Implement retry or fallback
}
❌ Ignoring rate limits:
// WRONG - no rate limiting
const promises = prompts.map(prompt => chat(prompt));
await Promise.all(promises); // May hit rate limits
✅ Correct - with rate limiting:
const results = [];
for (let i = 0; i < prompts.length; i += 5) {
const batch = prompts.slice(i, i + 5);
const batchResults = await Promise.all(batch.map(chat));
results.push(...batchResults);
await new Promise(r => setTimeout(r, 1000)); // Delay between batches
}
const responseCache = new Map<string, string>();
async function cachedChat(prompt: string, model: string) {
const cacheKey = `${model}:${prompt}`;
if (responseCache.has(cacheKey)) {
console.log('Cache hit');
return responseCache.get(cacheKey)!;
}
const completion = await client.chat.completions.create({
model,
messages: [{ role: 'user', content: prompt }],
});
const response = completion.choices[0].message.content || '';
responseCache.set(cacheKey, response);
return response;
}
async function parallelChat(prompts: string[], model: string) {
const results = await Promise.all(
prompts.map(prompt =>
client.chat.completions.create({
model,
messages: [{ role: 'user', content: prompt }],
})
)
);
return results.map(r => r.choices[0].message.content);
}
development
Optimize web performance using Core Web Vitals, modern patterns (View Transitions, Speculation Rules), and framework-specific techniques
development
Best practices for documenting APIs and code interfaces, eliminating redundant documentation guidance per agent.
development
Comprehensive API design patterns covering REST, GraphQL, gRPC, versioning, authentication, and modern API best practices
development
Visual verification workflow for UI changes to accelerate code review and catch ...